I am trying to write a script that generates stacked barplots showing the percentages of glycoforms on an analysed protein. I am using the "turbo" color scale from Viridis because I need a very wide coverage of colors to be able to distinguish all the glycoforms.
Edit: Here is an example datafile, used to generate 1 single bar:
structure(list(Glycoform = c("NaNaF", "NaAF", "NaGnF", "NaMF",
"NaNa", "NaA", "NaGn", "NaM", "AAF", "AGnF", "AMF", "AA", "AGn",
"AM/Man4Gn", "Man4A/Man5Gn", "Man5/A", "GnGnXF", "MGnXF", "MMXF",
"GnGnF", "MGnF", "MMF", "GnGnX", "MGnX", "MMX", "GnGn", "MGn",
"MM", "Man9", "Man8", "Man7", "Man6", "Man5", "Man4", "Rest"),
Raw = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81.82,
5.35, 0, 0, 0, 0, 10.69, 0, 0, 0, 0, 0, 2.14, 0, 0, 0, 0,
0, 2.14), Percentage = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 81.82, 5.35, 0, 0, 0, 0, 10.69, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2.14), Plant = c("A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -35L))
Because it is imperative that in every plot, a respective glycoform has to have the same color, I order my glycoforms like so:
t$Glycoform<-factor(t$Glycoform,levels=c("NaNaF","NaAF","NaGnF","NaMF","NaNa","NaA","NaGn","NaM","AAF","AGnF","AMF","AA","AGn","AM/Man4Gn","Man4A/Man5Gn","Man5/A","GnGnXF","MGnXF","MMXF","GnGnF","MGnF","MMF","GnGnX","MGnX","MMX","GnGn","MGn","MM","Man9","Man8","Man7","Man6","Man5","Man4","Rest"))
But because in my raw data, non-found glycoforms have a %-value of 0, they still show up in the legend:
library(ggplot2)
library(viridis)
ggplot(t,aes(fill=Glycoform,y=Percentage,x=Plant,group=Glycoform))+
geom_bar(stat="identity",color="white",size=1.2, width=0.8)+
scale_fill_viridis(option="turbo",discrete="TRUE")+
geom_text(data=subset(t,Percentage>0),position=position_stack(vjust=0.5),color="white",size=4,aes(label=Glycoform),fontface="bold")+
guides(fill=guide_legend(ncol=2))
When I convert the unused values to NA, the legend is fixed, but the color-assignment is wrong:
t[t == 0] <- NA
ggplot(t,aes(fill=Glycoform,y=Percentage,x=Plant,group=Glycoform))+
geom_bar(stat="identity",color="white",size=1.2, width=0.8)+
scale_fill_viridis(option="turbo",discrete="TRUE")+
geom_text(data=t,position=position_stack(vjust=0.5),color="white",size=4,aes(label=Glycoform),fontface="bold")+
guides(fill=guide_legend(ncol=2))
I was unable to either lock colors onto factors in Viridis if unused values are set to NA and also unable to hide legend elements of values of zero in the case where I leave them as zeros. I am a R-beginner.