Using the data.frame
below
DATA
df <- read.table(text = c("
NA NA NA NA NA NA NA NA NA NA NA NA
0.4748 NA NA NA NA NA NA NA NA NA NA NA
0.905 0.5362 NA NA NA NA NA NA NA NA NA NA
0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
0.2248 0.6204 0.268 0.0014 0.183 0.0768 NA NA NA NA NA NA
0.483 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
0.697 0.732 0.7604 0.0264 0.594 0.3334 0.416 0.7388 NA NA NA NA
0.2918 0.7286 0.3382 0.003 0.2386 0.1122 0.8712 0.7266 0.509 NA NA NA
0.5904 0.8352 0.6704 0.0188 0.4966 0.273 0.5192 0.8328 0.8736 0.5914 NA NA
0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.873 0.6178 0.8276 0.7432 NA
"), header = F)
colnames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
rownames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
df
# TK1 TK2 TK3 TK4 TK5 TK6 TK7 TK8 TK9 TK10 TK11 TK12
#TK1 NA NA NA NA NA NA NA NA NA NA NA NA
#TK2 0.4748 NA NA NA NA NA NA NA NA NA NA NA
#TK3 0.9050 0.5362 NA NA NA NA NA NA NA NA NA NA
#TK4 0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
#TK5 0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
#TK6 0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
#TK7 0.2248 0.6204 0.2680 0.0014 0.1830 0.0768 NA NA NA NA NA NA
#TK8 0.4830 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
#TK9 0.6970 0.7320 0.7604 0.0264 0.5940 0.3334 0.4160 0.7388 NA NA NA NA
#TK10 0.2918 0.7286 0.3382 0.0030 0.2386 0.1122 0.8712 0.7266 0.5090 NA NA NA
#TK11 0.5904 0.8352 0.6704 0.0188 0.4966 0.2730 0.5192 0.8328 0.8736 0.5914 NA NA
#TK12 0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.8730 0.6178 0.8276 0.7432 NA
I can't change the input data. I will keep getting it in this format with different variables each time based on the user.
I used the code below to create a new variable Relationship
to transfer df
from wide to long format, then arranged the levels of Relation1
and Relationship
variables thanks to akrun's answer to this question. Finally, I created the heatmap as shown below
trial <- df
trial$Relationship <- rownames(df)
trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12))
df2 <- gather(trial1, "Relation1", "Strength", 2:13)
df2 <- df2 %>%
dplyr::mutate(Strength1 = round(Strength, digits = 2))%>%
dplyr::select(Relationship,Relation1, Strength1 )
df3 <- df2 %>%
extract(Relationship, into = c("Relationship1", "Relationship2"), "(\\D+)(\\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relationship = factor(Relationship, levels = paste0(Relationship1[1],
min(Relationship2):max(Relationship2)))) %>%
select(-Relationship1, -Relationship2) %>%
extract(Relation1, into = c("Relation11", "Relation12"), "(\\D+)(\\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relation1 = factor(Relation1, levels = paste0(Relation11[1],
min(Relation12):max(Relation12)))) %>%
select(-Relation11, -Relation12)
df3$Relation1 = with(df3, factor(Relation1, levels = rev(levels(Relation1))))
ggheatmap <- ggplot(df3, aes(Relationship, Relation1, fill = Strength1))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(Relationship, Relation1, label = Strength1), color = "black", size = 4) +
labs(x = expression(""),
y=expression(""))
RESULT
Question
I want to make the plotting of heatmap dynamic. So, regardless of the number of variables and observation, a heatmap can be plotted without the need to change the code for different number of variables?
Is there anyway to do that?