A function qualityCriterion::longitudinalData can calculate BIC and AIC for k-means cluster. It first calculates the likelihood for each cluster centres on every individual before merging together with weights on cluster size. The sd of the normal density function is based on RSS.
While the original code gives -BIC instead of BIC, I adopted the code for BIC:
qualityCriterion <- function (traj, clusters)
{
if (nrow(traj) != length(clusters)) {
stop("[qualityCriterion] the cluster and the number of trajectory should be the same.")
}
clusters <- as.integer(clusters)
nbIndiv <- nrow(traj)
nbTime <- ncol(traj)
nbClusters <- length(unique(clusters))
# Cluster frequency
preProba <- as.numeric(table(clusters))
preProba <- preProba / sum(preProba)
# Centers as cluster means
moy <- matrix(, nbClusters, nbTime)
for (i in 1:nbClusters) {
moy[i, ] <- apply(traj[as.numeric(clusters) == i, , drop = FALSE], 2, meanNA)
}
# sd of residuals
ecart <- sqrt(mean(as.numeric(traj - moy[clusters, ])^2, na.rm=TRUE))
# likelihood
vraisIndivXcluster <- matrix(, nbIndiv, nbClusters)
for (i in 1:nbClusters) {
vraisIndivXcluster[, i] <- preProba[i] * apply(dnorm(t(traj), moy[i, ], ecart),
2, prod, na.rm = TRUE
)
}
vraisIndivXcluster <- apply(vraisIndivXcluster, 1, sum)
logVraisemblance <- sum(log(vraisIndivXcluster))
nbParam <- nbClusters * nbTime + 1 # cluster centers and sd
#BIC <- -2 * logVraisemblance + nbParam * log(nbIndiv) # BIC for time series
BIC2 <- -2 * logVraisemblance + nbParam * log(nbIndiv * nbTime) # BIC for independent columns
AIC <- 2 * nbParam - 2 * logVraisemblance
#AICc <- AIC + (2 * nbParam * (nbParam + 1)) / (nbIndiv - nbParam - 1) # AICc for time series
#AICc2 <- AIC + (2 * nbParam * (nbParam + 1)) / (nbIndiv * nbTime - nbParam - 1) # AICc for independent columns
return(list(criters = c(BIC2 = BIC2, AIC = AIC)))
}
Example of 100 individuals, each with 2 data points, forming 3 clusters:
set.seed(1)
dat <- matrix(rnorm(100 * 2), nrow = 100, ncol = 2) # data of 100 individuals
dat[34:66,] <- dat[34:66,] + 4
dat[67:100,] <- dat[67:100,] + 8
plot(dat[,1], dat[,2]) # 3 cluster centers at (0,0), (4,4), (8,8)

As expected, minimum BIC at k = 3 clusters:
# k-means with k = 2:5
clusters_2 <- kmeans(dat, centers = 2)
clusters_3 <- kmeans(dat, centers = 3)
clusters_4 <- kmeans(dat, centers = 4)
clusters_5 <- kmeans(dat, centers = 5)
BIC <- c(qualityCriterion(dat, rep(1, nrow(dat)))$criters["BIC2"],
qualityCriterion(dat, clusters_2$cluster)$criters["BIC2"],
qualityCriterion(dat, clusters_3$cluster)$criters["BIC2"],
qualityCriterion(dat, clusters_4$cluster)$criters["BIC2"],
qualityCriterion(dat, clusters_5$cluster)$criters["BIC2"]
)
plot(1:5, BIC, xlab = "k", ylab = "BIC")
lines(1:5, BIC)
