Kmeans R
#running cluster of genes to stratify group
library(factoextra)
library(cluster)
dat <- read.csv("C:/Users/?/Desktop/dat.csv")
df <- na.omit(dat)
df <- scale(df)
#DETERMINE HOW MANY CLUSTERS IS OPTIMAL(number at the curve)
#plot number of clusters vs. total within sum of squares
fviz_nbclust(df, kmeans, method = "wss")
#calculate gap statistic based on number of clusters
gap_stat <- clusGap(df,
FUN = kmeans,
nstart = 25,
K.max = 10,
B = 50)
#plot number of clusters vs. gap statistic
fviz_gap_stat(gap_stat)
#PERFORM K-MEANS CLUSTERING WITH OPTIMAL K
#make this example reproducible
set.seed(1)
#perform k-means clustering with k = 4 clusters
km <- kmeans(df, centers = 4, nstart = 25)
km
#plot results of final k-means model
fviz_cluster(km, data = df)
#find mean of each cluster
aggregate(dat, by=list(cluster=km$cluster), mean)
#add cluster assigment to original data
finaldat<- cbind(dat, cluster = km$cluster)
head(finaldat)
kmeans(df[!is.na(df)], 3)
?