Hello all very new to R and coding in general. My problem is exactly the same as this user`s loop over groups and create plot for each one R but unlike the iris dataset that has only 3 groups under species, my data frame has ~1000 groups.
Basically, I want to have individual scatter plots for every Protein (~1000) with each individual points as the Peptides (x= Peptides, y= Ratio).
I tried both responses that was provided in the question above, but neither worked. The second code in the question generated individual plots for each protein but plotted all the peptides in the dataframe (essentially plotting the same scatter plot over and over again, with changing protein name) as shown here--->How my current output looks like
EDIT Here's a snapshot of my actual dataframe Show in New Window
structure(list(Peptides = c("LAMB1.52_.R..YSDIEPSTEGEVIFR..A.",
"LAMB1.54_.R..YVVLPRPVCFEK..G.", "LAMB1.55_.K..YYYAVYDMVVR..G.",
"LAMB1.56_.K..YYYAVYDMVVR..G.", "LAMC1_.K..AFDITYVR..L.", "LAMC1.3_.R..ATAESASECLPCDCNGR..S.",
"LAMC1.4_.R..CDCHALGSTNGQCDIR..T.", "LAMC1.5_.R..CDQCEENYFYNR..S.",
"LAMC1.6_.K..CLPFFNDR..P.", "LAMC1.7_.K..CLPFFNDRPWR..R.", "LAMC1.8_.K..CLPFFNDRPWRR..A.",
"LAMC1.9_.K..CIYNTAGFYCDR..C.", "LAMC1.10_.R..CQPGFHSLTEAGCR..P.",
"LAMC1.11_.R..CRENFFR..L.", "LAMC1.13_.K..DNVEGFNCER..C.", "LAMC1.14_.K..DVDQNLMDR..L.",
"LAMC1.15_.K..DVDQNLMDR..L.", "LAMC1.16_.K..DYEDLREDMR..G.",
"LAMC1.17_.K..DYEDLREDMR..G.", "LAMC1.18_.K..DYEDLREDMRGK..E.",
"LAMC1.19_.K..DYEDLREDMRGK..E.", "LAMC1.20_.R..EDGPWIPYQYYSGSCENTYSK..A.",
"LAMC1.21_.R..EGFVGNR..C.", "LAMC1.22_.R..EGFVGNRCDQCEENYFYNR..S.",
"LAMC1.24_.K..FHTSRPESFAIYK..R.", "LAMC1.25_.R..HKQEADDIVR..V.",
"LAMC1.27_.R..KQDDADQDMMMAGMASQAAQEAEINAR..K.", "LAMC1.28_.R..KQDDADQDMMMAGMASQAAQEAEINAR..K.",
"LAMC1.29_.R..KQDDADQDMMMAGMASQAAQEAEINAR..K.", "LAMC1.30_.R..KQDDADQDMMMAGMASQAAQEAEINAR..K.",
"LAMC1.32_.R..KQDDADQDMMMAGMASQAAQEAEINARK..A.", "LAMC1.35_.K..KQEAAIMDYNR..D.",
"LAMC1.36_.K..KQEAAIMDYNR..D.", "LAMC1.39_.R..LHEATDYPWR..P.",
"LAMC1.40_.K..LLNNLTSIK..I.", "LAMC1.41_.R..LNTFGDEVFNDPK..V.",
"LAMC1.42_.K..IRGTYSER..S.", "LAMC1.43_.R..LSAEDLVLEGAGLR..V.",
"LAMC1.44_.R..LTGECLK..C.", "LAMC1.45_.K..NISQDLEK..Q.", "LAMC1.51_.R..QDIAVISDSYFPR..Y.",
"LAMC1.52_.K..QEAAIMDYNR..D.", "LAMC1.53_.K..QEAAIMDYNR..D.",
"LAMC1.55_.R..RATAESASECLPCDCNGR..S.", "LAMC1.57_.R..SQECYFDPELYR..S.",
"LAMC1.58_.R..SWPGCQECPACYR..L.", "LAMC1.59_.K..SYYYAISDFAVGGR..C.",
"LAMC1.61_.K..TAAEEALR..K.", "LAMC1.62_.R..TGQCECQPGITGQHCER..C.",
"LAMC1.63_.R..TREDGPWIPYQYYSGSCENTYSK..A.", "LAMC1.64_.R..VSVPLIAQGNSYPSETTVK..Y.",
"LAMC1.66_.R..YFIAPAK..F.", "FLNA.1_.K..AGNNMLLVGVHGPR..T.",
"FLNA.2_.K..AGNNMLLVGVHGPR..T.", "FLNA.3_.K..AGVAPLQVK..V.",
"FLNA.6_.K..ATCAPQHGAPGPGPADASK..V.", "FLNA.8_.R..AYGPGIEPTGNMVK..K.",
"FLNA.13_.K..DAGEGLLAVQITDPEGKPK..K.", "FLNA.16_.R..DVDIIDHHDNTYTVK..Y.",
"FLNA.17_.R..EAGAGGLAIAVEGPSK..A.", "FLNA.18_.R..EATTEFSVDAR..A."
), Ratio = c(1.056754467, 1.174922122, 1.053785481, 1.125303543,
1.124986981, 1.033582206, 0.999034319, 1.06338514, 1.14573856,
1.168731217, 0.803946964, 1.070001334, 0.79258407, 0.971011335,
1.046705546, 0.853712706, 0.90907656, 1.078730919, 1.155442728,
1.64320038, 1.271964691, 1.167744801, 1.107760911, 1.764000051,
1.287814849, 1.3053024, 0.809893271, 0.937671629, 1.02568349,
0.975795003, 1.26120622, 0.669929057, 0.860244941, 0.953860383,
0.852241097, 1.00511006, 1.157047594, 2.365514653, 1.367842325,
0.950098923, 1.180357859, 1.074772699, 1.043318915, 0.97532573,
1.106755776, 1.035721353, 1.089302473, 0.872753968, 1.16134958,
0.986453823, 0.972356049, 1.134263144, 0.564740889, 0.596272445,
0.837378424, 0.799958396, 0.668971848, 0.595255023, 0.657884455,
0.856494001, 0.883430995), Protein = c("LAMB1", "LAMB1", "LAMB1",
"LAMB1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1", "LAMC1",
"FLNA", "FLNA", "FLNA", "FLNA", "FLNA", "FLNA", "FLNA", "FLNA",
"FLNA")), row.names = 40:100, class = "data.frame")
This is the code that I ran, same as the suggested answer in the post I referred to above.
pep_df<-read.csv("Peptides.csv", sep=",", header=TRUE)
protein_list<-unique(pep_df$Protein)
pep_plot <- function(x,y,z) {
ggplot(data=pep_df, aes_string(x=x,y=y, colour=z)) +
geom_point(shape = 21, aes_string(fill = z), colour = "black", size =2)
}
map(protein_list, ~peptide_plot("Peptides", "Ratio",.x))
after running this code now I am now getting this error, cannot even generate the repeating scatter plots anymore.
Error in FUN(X[[i]], ...) : object 'LAMB1' not found
Thanks again, any help will be greatly appreciated.