2

This was my earlier question where it was solved using multiple distribution.

I want to plot the continuous variable like age or tumor mutation burden as shown in first figure with a range like a window such 20-30 age group or some mutational burden range

The frequencies are calculated for all the variables of the metadata, but when plotting the age is not mapped to the final plot as show in the second plot.

Does the age need to be converted into other class before plotting?

distribution

library(dplyr)
library(ggplot2)
library(tidyverse)
library(ggnewscale)

plot_meta <- read.csv("figure1/TCGA_data/BEAT_AML/TCGA_AML/full_cleaned_meta.txt",sep = "\t")
head(plot_meta)


colnames(plot_meta)
x <- colnames(plot_meta)
b <- gsub(".", "-", x, fixed = TRUE)
colnames(plot_meta) <- b
colnames(plot_meta)
 

df1 <-
  plot_meta %>%
  mutate(across(everything(), as.character)) %>%
  pivot_longer(everything()) %>%
  count(name, value) %>%
  group_by(name) %>%
  mutate(perc = n / sum(n) * 100)
df1

df1 %>%
  ggplot(aes(name, perc)) +
  geom_col(data = ~ filter(.x, name == "FAB") %>% rename(FAB = value), mapping = aes(fill = FAB)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Sex") %>% rename(Sex = value), mapping = aes(fill = Sex)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Age") %>% rename(Age = value), mapping = aes(fill = Age)) +
  coord_flip()

The issue is I'm not able to plot the continuous distribution like the age as i have plotted only two other variables,what is going wrong here. Any help or suggestion would be really appreciated

My databsubset

a <- dput(head(plot_meta))
structure(list(patient = structure(c(36L, 33L, 122L, 95L, 66L, 
49L), .Label = c("TCGA-AB-2805", "TCGA-AB-2806", "TCGA-AB-2808", 
"TCGA-AB-2810", "TCGA-AB-2811", "TCGA-AB-2812", "TCGA-AB-2813", 
"TCGA-AB-2814", "TCGA-AB-2815", "TCGA-AB-2817", "TCGA-AB-2818", 
"TCGA-AB-2819", "TCGA-AB-2820", "TCGA-AB-2821", "TCGA-AB-2822", 
"TCGA-AB-2823", "TCGA-AB-2825", "TCGA-AB-2826", "TCGA-AB-2828", 
"TCGA-AB-2830", "TCGA-AB-2834", "TCGA-AB-2835", "TCGA-AB-2836", 
"TCGA-AB-2839", "TCGA-AB-2840", "TCGA-AB-2841", "TCGA-AB-2842", 
"TCGA-AB-2843", "TCGA-AB-2844", "TCGA-AB-2845", "TCGA-AB-2846", 
"TCGA-AB-2847", "TCGA-AB-2849", "TCGA-AB-2851", "TCGA-AB-2853", 
"TCGA-AB-2856", "TCGA-AB-2857", "TCGA-AB-2858", "TCGA-AB-2859", 
"TCGA-AB-2861", "TCGA-AB-2862", "TCGA-AB-2863", "TCGA-AB-2865", 
"TCGA-AB-2866", "TCGA-AB-2867", "TCGA-AB-2869", "TCGA-AB-2870", 
"TCGA-AB-2871", "TCGA-AB-2872", "TCGA-AB-2873", "TCGA-AB-2874", 
"TCGA-AB-2875", "TCGA-AB-2876", "TCGA-AB-2877", "TCGA-AB-2878", 
"TCGA-AB-2880", "TCGA-AB-2881", "TCGA-AB-2882", "TCGA-AB-2883", 
"TCGA-AB-2884", "TCGA-AB-2885", "TCGA-AB-2886", "TCGA-AB-2888", 
"TCGA-AB-2889", "TCGA-AB-2890", "TCGA-AB-2891", "TCGA-AB-2892", 
"TCGA-AB-2893", "TCGA-AB-2894", "TCGA-AB-2895", "TCGA-AB-2896", 
"TCGA-AB-2897", "TCGA-AB-2898", "TCGA-AB-2899", "TCGA-AB-2900", 
"TCGA-AB-2901", "TCGA-AB-2908", "TCGA-AB-2910", "TCGA-AB-2911", 
"TCGA-AB-2912", "TCGA-AB-2913", "TCGA-AB-2914", "TCGA-AB-2915", 
"TCGA-AB-2916", "TCGA-AB-2917", "TCGA-AB-2918", "TCGA-AB-2919", 
"TCGA-AB-2920", "TCGA-AB-2921", "TCGA-AB-2924", "TCGA-AB-2925", 
"TCGA-AB-2927", "TCGA-AB-2928", "TCGA-AB-2929", "TCGA-AB-2930", 
"TCGA-AB-2931", "TCGA-AB-2932", "TCGA-AB-2933", "TCGA-AB-2934", 
"TCGA-AB-2935", "TCGA-AB-2936", "TCGA-AB-2937", "TCGA-AB-2938", 
"TCGA-AB-2939", "TCGA-AB-2940", "TCGA-AB-2941", "TCGA-AB-2942", 
"TCGA-AB-2943", "TCGA-AB-2944", "TCGA-AB-2946", "TCGA-AB-2948", 
"TCGA-AB-2949", "TCGA-AB-2950", "TCGA-AB-2952", "TCGA-AB-2955", 
"TCGA-AB-2956", "TCGA-AB-2959", "TCGA-AB-2963", "TCGA-AB-2965", 
"TCGA-AB-2966", "TCGA-AB-2970", "TCGA-AB-2971", "TCGA-AB-2973", 
"TCGA-AB-2975", "TCGA-AB-2976", "TCGA-AB-2977", "TCGA-AB-2979", 
"TCGA-AB-2980", "TCGA-AB-2981", "TCGA-AB-2982", "TCGA-AB-2983", 
"TCGA-AB-2984", "TCGA-AB-2986", "TCGA-AB-2987", "TCGA-AB-2988", 
"TCGA-AB-2990", "TCGA-AB-2991", "TCGA-AB-2992", "TCGA-AB-2994", 
"TCGA-AB-2995", "TCGA-AB-2996", "TCGA-AB-2998", "TCGA-AB-2999", 
"TCGA-AB-3000", "TCGA-AB-3001", "TCGA-AB-3002", "TCGA-AB-3007", 
"TCGA-AB-3008", "TCGA-AB-3009", "TCGA-AB-3011", "TCGA-AB-3012"
), class = "factor"), Sex = structure(c(2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), FAB = structure(c(5L, 
1L, 5L, 3L, 2L, 4L), .Label = c("M0", "M1", "M2", "M3", "M4", 
"M5", "M6", "M7", "nc"), class = "factor"), `Diagnosis-Age` = c(63L, 
39L, 76L, 62L, 42L, 42L), `Bone-Marrow-Blast-Percentage` = c(82L, 
83L, 91L, 72L, 68L, 88L), Cytogenetics = structure(c(75L, 93L, 
51L, 27L, 21L, 57L), .Label = c("37~49,XY,+Y,der(1)add(1)(p13)del(1)(q21q25),-5,der(7)inv(7)(p15q11.2)?inv(7)(q22q32),+17,add(17)(p13),+21,+mar[cp20]", 
"39~47,XX,del(5)(q13q33),-7,der(8)t(8;?8;8)(p23;?p11.2p23;q11.2),der(14)t(1;14)(p12;p11.2)der(1)t(7;16)(p15;q22),+2mar[cp19]", 
"41~44,X,?i(X)(p10),-7,der(12)t(8;12)(q11.2;p11.2),-8 [cp11]/46,XX[8[", 
"42,XY,-5,-7,add(12)(p13),t(14;15)(q10;q10),der(17)t(5;17)(p13;p11.2),-18[6]/40,idem,-11,-add(12)(p13),der(12)t(?;12)(?;p13),-19[6]/41,idem,-der(17)[3]/41,idem,-der(17),+mar1,+mar[3]/41,idem,der(1)der(1)(p12)add(1)(p12),+der(1)(q21)add(1)(q21),-3,-8[2]", 
"43,XY-3,del(5)(q12q33),-7,der(10)t(10;11)(q26;q13),-12,-18,+2mar[20]", 
"44-45,X,-Y,-5,add(16)(q22),-17,-18,iso(21),+mars[cp5]/82-84,XX,-Y,-3,-4,-11,-12,-19,-21,+21[cp5}", 
"44~46,XX,del(11)(q23),der(19)?t(11;19)(q23;p13.1)[cp11]/44~45,XX,-19[cp4]/46,XX [5]", 
"44~47,XX,t(1;15)(q32;q26)[14],del(5)(q13q33)[19],-7[20],+8[7],del(12)(p11.2p11.2)[15],del(17)(q21)[8],der(22)t(1;22)(p13;p11.2)[20],+mar[13][cp20]", 
"44~47,XY,del(5)(q22q35)[20],-7[14],-8[6],der(12)t(10;12)(p11.2q21)[2],add(14)(p12)[11],-17[13],der(17)t(10;17)(q11.2;p13)[14],-18[7],add(18)(p11.2)[7],-21[10],i(21)(q10)[4],-22[4],+mar[10],+mar1x2[6][cp20]", 
"45,X,-X,t(8;21)(q22;q22)[20]", "45,X,-Y, t(8;7;21)(q22;p15;q22[22]/46,XY[3]", 
"45,X,-Y,t(8;21)(q22;q22)[13]/45,idem,del(9)(q22;q32)[7]", "45,X,-Y,t(8;21)(q22;q22)[19]/46,XY[1]", 
"45,X,-Y[3]/46,XY [17]", "45,XX-7[5]-only 5 metaphases", "45,XX,-7,t(9;11)(p22;q23)[19]/46,XX[1]", 
"45,XX,-7[12]/46,XX[8]", "45,XX,-7[20]", "45,XY,-7, t(9;22)(q34;q11.20) [19]/46,XY[1]", 
"45,XY,-7[20]", "45,XY,der(7)(t:7;12)(p11.1;p11.2),-12,-13,+mar[19]/46,XY[1]", 
"45~46,XY,add(X)(q22)[7],Y[4],der(5)t(5;17)(q13;21)[18],-7[18],+8[17],del(12)(q23)[16],-17[18],add(18)(p11.2)[14][cp18]", 
"46, XX[14]", "46, XX[15]", "46, XX[16]", "46, XX[19]", "46, XX[20]", 
"46, XY[15]", "46, XY[20]", "46,XX,1~50dmin[12]/46,idem,der(6)t(6;?)(q22;?)[2]/46,XX[6]", 
"46,XX,9qh+[20]", "46,XX,del(3)(q23q26.2),der(7)t(1:7)(q32;q32),del(10)(q22q25),t(13;16)(q34;p11.2)dup(21)(q22)[cp20]", 
"46,XX,del(5)(q11.2q33)[1]/48~52,idem,+1,+?del(5)(q15q33),+11,+11,?t(12;22)(p13;q12),-13,-17,+i(22)(q10),+i(22)(q10),+mar[cp19]", 
"46,XX,del(5)(q22q33)[4]/46,XX[16]", "46,XX,i(17)(q10)[1]/45,sl-7[2]/48,sl,+13,+19[3]/46,XX[15]", 
"46,XX,inv(16)(p13q22)[15]/46,XX[2]", "46,XX,inv(16)(p13q22)[19]/46,XX[1]", 
"46,XX,inv(16)(p13q22)[20]", "46,XX,inv(16)(p13q22)[5]/46,idem,t(3;3)(p13;q?28)[5]/46,XX[6]", 
"46,XX,t(15;17)(q22;q21.1)[19]/47,idem,+8 [1]", "46,XX,t(15;17)(q22;q21),t(16;19)(p13.3;p13.1)[17]/46,XX[3]", 
"46,XX,t(15;17)(q22;q21)[11]/46,XX[9]", "46,XX,t(15;17)(q22;q21)[12]/46,XX[8]", 
"46,XX,t(15;17)(q22;q21)[20]", "46,XX,t(8;21)(q22;q22)[17]/46,XX[3]", 
"46,XX,t(8;21)(q22;q22)[20]", "46,XX,t(8;21)[15]/46,idem,del(9)(q12q22)[5]", 
"46,XX[15]", "46,XX[18]", "46,XX[19]/46,XX,add(7)(p?22)[1]", 
"46,XX[20]", "46,XX+13,21[cp17]/46,XX[3]", "46,XY,9qh+[19]", 
"46,XY,del(11)(p12)[2]/46,XY[18]", "46,XY,del(20)(q11.2)[23]/92,XXYY,del(20)(q11.2)x2[2]/46,XY[3]", 
"46,XY,del(7)(q21q36)[18]/46,XY[2]", "46,XY,del(9)(q13:q22),t(11:21)(p13;q22),t(15;17)(q22;q210[20]", 
"46,XY,i(17)(q10)[15]/47,XY,idem+13[3]/46,XY[2]", "46,XY,inv(16)(p13;q22)[20]", 
"46,XY,inv(16)(p13q22)[17]/46,XY[3]", "46,XY,inv(16)(p13q22)[9]/46,XY[10]", 
"46,XY,t(11;19)(q23;p13)[17]/46,XY,t(11;19)(q23;p13),inv(12)(p12p13)[3]", 
"46,XY,t(11;19)(q23;p13)[20]", "46,XY,t(15;17)(q22;q21)[19]/46,XY[1]", 
"46,XY,t(15;17)(q22;q21)[20]", "46,XY,t(15;17)(q22:q21)[11]/46,XY[9]", 
"46,XY,t(2;4)(q34;q21)inv(16)(p13q22) [20]", "46,XY,t(6;11)(q27;q23)[15]", 
"46,XY,t(9;11)(p22;q23)[7]/47,XY,t(9;11)(p22;q23)[7]/46,XY[4]", 
"46,XY,t(9;22)(q34;q11.2)[13]/34~37,idem,-3,del(4),-4,-5,-7,-9,-10,t?(11;12),-12,-14,-14,-16,-17,-22[cp6]/46,XY[1]", 
"46,XY,t(9;22)(q34;q11.2[4]/50,idem,+8,+10,+21,+der(22)(t(9;22)(q34;q11.2)[16]", 
"46,XY[13]", "46,XY[15]", "46,XY[19]", "46,XY[20]", "46,XY[30]", 
"46~49,XY,del(3)(p14),del(5)(p11.2q33),del(17)(q21q21),add(21)(p11.2),+22,mar[cp20]", 
"47,XX,+der(5)t(2;5)(p11.2;q11.2)?,t(8;16)(p11.2;p13.3)[19]", 
"47,XX,i(11)(q10)[18]/46,XX [2]", "47,XX,t(15;17)(q22:q21)+mar[20]", 
"47,XX+11 [20]", "47,XX+8 [20]", "47,XXY [17]", "47,XY,+13[5]/46,XY[15]", 
"47,XY,+21 [6]/46,XY[13]", "47,XY,+21[11]/48,XY,+3,+21[8]", "47,XY,+22[10]/47,XY,+8[7]/45,XY,del(3)(p21),del(4)(p12p15),-7,?dup(7)(q11.2q36)[3]", 
"47,XY,+8 [10]/46,XY [10]", "47,XY,+8 [19]", "47,XY,+8 [20]", 
"47,XY,+8[15]/46,+8,-17[3]", "47,XY,+9[10]/46,XY[10]", "47,XY,del(5)(q22q33),t(10;11)(p13~p15;q22~23),i(17)(q10)[3]/46,XY[17]", 
"47,XY,del(7)(q22),+8,t(15;17)(q22;q21)[18]/46,XY,del(7)(q22),t(15;17)(q22;q21)[2]", 
"47,XY+8 [15]/48,XY+8+8[4]/46,XY[1]", "48,XY,+8,+8[16]/46,XY[4]", 
"52~54,XY,+2,+4,+6,+8,del(11)(q23),+19,+19,+21[17]/46,XY[3]", 
"53~56,XY,+1,del(2)(q33q34),+8,+10,+11x2,+13x1-2,+14,del(17)(p11.2),+19,add(21)(q22),+22[cp20]", 
"incomplete-46,XY,del(12)(p11.20[2]/46,XY[3]", "N.D.", "ND", 
"Outside hospital with inv(16)"), class = "factor"), `Cytogenetic-Code--Other-` = structure(c(8L, 
3L, 8L, 8L, 3L, 9L), .Label = c("BCR-ABL1", "CBFB-MYH11", "Complex Cytogenetics", 
"Intermediate Risk Cytogenetic Abnormality", "MLL translocation, poor risk", 
"MLL translocation, t(9;11)", "N.D.", "Normal Karyotype", "PML-RARA", 
"Poor Risk Cytogenetic Abnormality", "RUNX1-RUNX1T1"), class = "factor"), 
    Induction = structure(c(11L, 4L, 1L, 8L, 4L, 9L), .Label = c("7+3", 
    "7+3, dauna", "7+3, IT", "7+3+3", "7+3+3, gleevec", "7+3+3, then 5+2+2", 
    "7+3+3+PSC", "7+3+AMD", "7+3+ATRA", "7+3+dauno", "7+3+Genasense", 
    "7+3+study drug", "7+4+ATRA", "Azacitidine", "CLAM", "Cytarabine only", 
    "Decitabine", "Decitabine then 7+3", "Hydrea & Idarubicin", 
    "Hydrea, ATRA started", "hydrea, didn't get add'l chemo", 
    "LBH/Decitabine", "low dose Ara C", "no treatment", "Revlimid", 
    "Revlmd then Decitbne,7+3,5+2"), class = "factor")), row.names = c(NA, 
6L), class = "data.frame")

my figure

PesKchan
  • 868
  • 6
  • 14
  • 1
    Try with `filter(.x, name == "Diagnosis-Age") ...`. The age category is called `Diagnosis-Age` in your data not `Age`. – stefan Mar 14 '22 at 07:00

1 Answers1

2

Rename Diagnosis-Age and use cut to convert to a factor. Add labels as required for appearance of age groups in legend.

Note I have swapped name and perc in the call to aes to avoid the call to coord_flip.

library(dplyr)
library(ggplot2)
library(tidyr)
library(ggnewscale)

plot_meta <- a
df1 <-
  plot_meta %>%
  rename(Age = `Diagnosis-Age`) %>% 
  mutate(Age = cut(Age, breaks = seq(30, 80, by = 10))) %>% 
  mutate(across(everything(), as.character)) %>%
  pivot_longer(everything()) %>%
  count(name, value) %>%
  group_by(name) %>%
  mutate(perc = n / sum(n) * 100)

df1 %>%
  ggplot(aes(perc, name)) +
  geom_col(data = ~ filter(.x, name == "FAB") %>% rename(FAB = value), mapping = aes(fill = FAB)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Sex") %>% rename(Sex = value), mapping = aes(fill = Sex)) +
  new_scale_fill() +
  geom_col(data = ~ filter(.x, name == "Age") %>% rename(Age = value), mapping = aes(fill = Age))

Created on 2022-03-14 by the reprex package (v2.0.1)

Peter
  • 11,500
  • 5
  • 21
  • 31
  • its runs without any issue another query ,how do i define my color for the graph where do i pass the argument? I would like to add color like the first figure – PesKchan Mar 14 '22 at 09:51
  • 1
    I assume you mean the first figure in the question? You can control fill colours by using `scale_fill_manual` check out help documentation `?scale_fill_manual` – Peter Mar 14 '22 at 09:54
  • yes..."scale_fill_manual" this i have used but never with piping the arguments..let me see first..then – PesKchan Mar 14 '22 at 10:06
  • It may also be worth checking out examples from {ggnewscale} to see how they manage different fill colour schemes for individual legends. In the documentation they use: `new_scale_fill()` to control the colour scheme. – Peter Mar 14 '22 at 10:16
  • okay thanks let me look into it – PesKchan Mar 14 '22 at 10:17