Gaussian Mixture Models
Univariate Gaussian Mixture Models
Implementation
<- function(X, K, tol=0.0001, maxit=1000) {
ugmm
# Basics of X
<- length(X)
N
# Initialise means
<- sample(X, K)
mu
# Initialise variances
<- rep(var(X), K)
theta
# Initialise mixing proportions
<- rep(1/K, K)
prop
# Initialise individual label probabilities and weighted densities
<- matrix(0, nrow=N, ncol=K)
g
# Initialise the difference
<- Inf
diff <- 0
old
# Initialise iterations
<- 0
it
# EM Algo
while (it < maxit) {
# Old values
<- mu
oldmu <- theta
oldtheta <- prop
oldprop
# E-step
for (k in 1:K) {
<- prop[k] * dnorm(X, mu[k], sqrt(theta[k]))
g[,k]
}<- g / rowSums(g)
g
# M-step
<- colSums(g)
Nk for (k in 1:K) {
# Calculate mu_k
<- sum(g[,k] * X) / Nk[k]
mu[k]
# Calculate theta^2_k
<- sum(g[,k]*(X - mu[k])^2) / Nk[k]
theta[k]
# Update mixing proportions
<- Nk[k]/N
prop[k]
}
# Calculate log-likelihood
<- numeric(N)
lli for (k in 1:K) {
<- lli + g[,k] * (log(prop[k]) + log(dnorm(X, mu[k], sqrt(theta[k]))))
lli
}<- sum(lli)
ll
# Recalulate difference
<- abs(ll - old)
diff
# Update iterations
<- it+1
it
# Check
if (diff < tol) break
# Assume the loop hasn't ended, update the old ll
<- ll
old
}
return(list(it=it, mu=mu, theta=sqrt(theta), prop=prop))
}
Testing the function:
# Simulate draws from 3 distributions
set.seed(1234)
<- rnorm(1000, 0, 1)
x1 <- rnorm(1500, -4, 2)
x2 <- rnorm(500, 2, 0.5)
x3
# Combine into single vector
<- c(x1, x2, x3)
x
# Density Plot
plot(density(x))
# Estimate clusters
ugmm(x, 3)
## $it
## [1] 231
##
## $mu
## [1] -3.9174651 1.9712504 -0.1162445
##
## $theta
## [1] 2.0231925 0.5045694 0.8715375
##
## $prop
## [1] 0.5054033 0.1863845 0.3082122