0

I have this data: (Design contains several tissues and the ones I'll need to consider are pancreas and lung)

head(Design)
                         Individual sex   age RNA.quality..max10.   organ  tissue
GTEX-Y5V6-0526-SM-4VBRV   GTEX-Y5V6   1 60-69                 7.1 Thyroid Thyroid
GTEX-1KXAM-1726-SM-D3LAE GTEX-1KXAM   1 60-69                 8.1 Thyroid Thyroid
GTEX-18A67-0826-SM-7KFTI GTEX-18A67   1 50-59                 7.2 Thyroid Thyroid
GTEX-14BMU-0226-SM-5S2QA GTEX-14BMU   2 20-29                 7.2 Thyroid Thyroid
GTEX-13PVR-0626-SM-5S2RC GTEX-13PVR   2 60-69                 7.3 Thyroid Thyroid
GTEX-1211K-0726-SM-5FQUW GTEX-1211K   2 60-69                 7.0 Thyroid Thyroid
dput(counts[1:10,])

structure(list(`GTEX-Y5V6-0526-SM-4VBRV` = c(0L, 1L, 2L, 1L, 
0L, 0L, 0L, 0L, 0L, 214L), `GTEX-1KXAM-1726-SM-D3LAE` = c(0L, 
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 205L), `GTEX-18A67-0826-SM-7KFTI` = c(0L, 
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 164L), `GTEX-14BMU-0226-SM-5S2QA` = c(0L, 
0L, 0L, 12L, 0L, 0L, 0L, 0L, 0L, 108L), `GTEX-13PVR-0626-SM-5S2RC` = c(0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 100L), `GTEX-1211K-0726-SM-5FQUW` = c(0L, 
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 174L), `GTEX-1KXAM-0926-SM-CXZKA` = c(2L, 
1L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 99L), `GTEX-18A67-2626-SM-718AD` = c(7L, 
3L, 7L, 2L, 0L, 1L, 5L, 0L, 0L, 116L), `GTEX-14BMU-1126-SM-5RQJ8` = c(0L, 
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 44L), `GTEX-1211K-1426-SM-5FQTF` = c(4L, 
0L, 5L, 2L, 0L, 0L, 0L, 0L, 0L, 143L), `GTEX-11TT1-0726-SM-5GU5A` = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 57L), `GTEX-1HCUA-1626-SM-A9SMG` = c(0L, 
0L, 0L, 22L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1KXAM-0226-SM-EV7AP` = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 75L), `GTEX-18A67-1726-SM-7KFT9` = c(0L, 
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 73L), `GTEX-14BMU-0726-SM-73KXS` = c(0L, 
0L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 74L), `GTEX-13PVR-0726-SM-5S2PX` = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 54L), `GTEX-1211K-1126-SM-5EGGB` = c(0L, 
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 25L), `GTEX-11TT1-0326-SM-5LUAY` = c(0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 54L), `GTEX-1KXAM-2426-SM-DIPFC` = c(1L, 
0L, 2L, 1L, 0L, 0L, 2L, 0L, 0L, 29L), `GTEX-18A67-0326-SM-7LG5X` = c(0L, 
0L, 5L, 4L, 0L, 0L, 2L, 0L, 1L, 91L), `GTEX-14BMU-2026-SM-5S2W6` = c(0L, 
0L, 2L, 5L, 0L, 0L, 0L, 0L, 0L, 30L), `GTEX-13PVR-2526-SM-5RQIT` = c(0L, 
0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 14L), `GTEX-1211K-2126-SM-59HJZ` = c(1L, 
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 51L), `GTEX-Y3I4-2326-SM-4TT81` = c(0L, 
0L, 3L, 0L, 0L, 0L, 1L, 0L, 0L, 38L), `GTEX-1KXAM-0426-SM-DHXKG` = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 105L), `GTEX-18A67-1126-SM-7KFSB` = c(1L, 
0L, 0L, 4L, 0L, 0L, 1L, 0L, 0L, 76L), `GTEX-14BMU-0526-SM-73KW4` = c(0L, 
0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 53L), `GTEX-1211K-0826-SM-5FQUP` = c(1L, 
0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 104L), `GTEX-11TT1-1626-SM-5EQL7` = c(0L, 
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 113L), `GTEX-ZYFG-0226-SM-5GIDT` = c(1L, 
0L, 2L, 2L, 0L, 0L, 2L, 0L, 0L, 54L), `GTEX-1KXAM-0826-SM-CXZK9` = c(0L, 
0L, 0L, 5L, 0L, 0L, 2L, 0L, 0L, 97L), `GTEX-18A67-2426-SM-7LT95` = c(1L, 
0L, 2L, 0L, 0L, 1L, 3L, 0L, 0L, 69L), `GTEX-14BMU-0926-SM-5S2QB` = c(0L, 
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 29L), `GTEX-13PVR-1826-SM-5Q5CC` = c(1L, 
0L, 0L, 3L, 0L, 1L, 2L, 0L, 0L, 32L), `GTEX-1211K-0926-SM-5FQTL` = c(0L, 
0L, 0L, 3L, 0L, 0L, 1L, 0L, 0L, 99L), `GTEX-11TT1-0526-SM-5P9JO` = c(0L, 
1L, 2L, 4L, 0L, 0L, 2L, 0L, 0L, 52L), `GTEX-1KXAM-0726-SM-E9U5I` = c(0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 45L), `GTEX-18A67-2526-SM-7LG5Z` = c(1L, 
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 91L), `GTEX-14BMU-1026-SM-5RQJ5` = c(1L, 
0L, 1L, 8L, 0L, 0L, 0L, 0L, 0L, 47L), `GTEX-13PVR-2026-SM-73KXT` = c(0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 27L), `GTEX-1211K-1326-SM-5FQV2` = c(0L, 
0L, 3L, 0L, 0L, 0L, 1L, 1L, 0L, 57L), `GTEX-11TT1-0626-SM-5GU4X` = c(1L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 90L), `GTEX-ZYFG-1826-SM-5GZWX` = c(0L, 
0L, 3L, 2L, 0L, 0L, 2L, 0L, 0L, 91L), `GTEX-1KXAM-1926-SM-D3LAG` = c(0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 103L), `GTEX-18A67-2226-SM-7LT9Z` = c(0L, 
0L, 2L, 2L, 0L, 0L, 1L, 0L, 1L, 157L), `GTEX-13PVR-1726-SM-5Q5EC` = c(1L, 
0L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 34L), `GTEX-1211K-1826-SM-5EGJ2` = c(0L, 
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-11TT1-0926-SM-5GU5M` = c(0L, 
2L, 0L, 3L, 1L, 0L, 0L, 0L, 1L, 49L), `GTEX-1KXAM-1026-SM-CY8IA` = c(0L, 
0L, 1L, 3L, 0L, 0L, 0L, 0L, 0L, 93L), `GTEX-14BMU-1626-SM-5TDE7` = c(0L, 
1L, 3L, 13L, 0L, 0L, 1L, 0L, 0L, 84L), `GTEX-13PVR-2226-SM-7DHKP` = c(0L, 
0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L, 75L), `GTEX-1211K-1926-SM-5EQLB` = c(0L, 
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 114L), `GTEX-11TT1-2126-SM-5GU5Y` = c(2L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 49L), `GTEX-ZT9W-2026-SM-51MRA` = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 70L), `GTEX-1KXAM-2326-SM-CYPTD` = c(0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 20L), `GTEX-18A67-0226-SM-7LG67` = c(0L, 
0L, 5L, 2L, 0L, 0L, 1L, 0L, 0L, 94L), `GTEX-14BMU-2126-SM-5S2TS` = c(0L, 
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 50L), `GTEX-13PVR-2426-SM-5RQHN` = c(0L, 
0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 59L), `GTEX-1211K-2226-SM-5FQU6` = c(0L, 
0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L, 81L), `GTEX-11TT1-2426-SM-5EQMK` = c(0L, 
1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 60L)), row.names = c("ENSG00000243485", 
"ENSG00000237613", "ENSG00000186092", "ENSG00000238009", "ENSG00000222623", 
"ENSG00000241599", "ENSG00000236601", "ENSG00000235146", "ENSG00000223181", 
"ENSG00000237491"), class = "data.frame")

I need to create a DGEList with only some of the genes: Pancreas and lung genes (if I am right), in order to do the tasks in the image below: Tasks I need to do a PCA to check if there's separation among male and female genes, and after I need to do a differential expression analysis with the function exactTest(), and since I need a DGEList for exactTest to compare Pancreas sex1 genes with pancreas sex 2 genes, lungsex1-lungsex2 I suppose that I can do both after creating the DGEList.

In the end my problem is that I dont know how to setup the data. If you need anything else I'll be here, thank you in advance.

PancreasLungDesign=Design[13:30,1:6]
PancreasLungDesign=PancreasLungDesign[-c(7:12),]
Counts2=counts[,13:30]
Counts2= Counts2[,-(7:12)]
rownames(PancreasLungDesign) == colnames(Counts2)
Expressedgenes2=Counts2>=10
NumExpressedgenes2=apply(Expressedgenes2,1,sum)
FilteredCounts2=Counts2[NumExpressedgenes2>0,]
NumExpressedgenes2=apply(Expressedgenes2,1,sum)
FilteredCounts2=Counts2[NumExpressedgenes2>0,]
y2=DGEList(counts=FilteredCounts2, group = PancreasLungDesign$tissue)
y2=calcNormFactors(y2)
apply(cpm(y2,normalized.lib.sizes = T),2,sum)
plotMDS(y2,table(PancreasLungDesign$sex),labels = PancreasLungDesign$tissue,col=rep(c("green","green","blue","blue","blue","green","yellow","yellow","red","red","yellow","red")),cex=0.5,main="Principal component analysis sex specific expression")
  • Does this help? https://stackoverflow.com/questions/73034981/how-to-subset-dataframe-in-r-based-on-another-data – jared_mamrot Jul 20 '22 at 11:33
  • Yes thank you, so by setting up it I can easily assign the colors for the PCA and check the differential expression of sex specific genes, but for the differential expression analysis, considering that originally I had to compare just the tissues, in order to compare the sex genes, I should just compare the genes of sex 1 with the genes of sex 2 of the same tissue right? this means that I should subset again the dataframes to do it? I am sorry, but yes, is obvious that I am new to that, probably the question is stupid and foregone – Luca Rinaldi Jul 20 '22 at 15:48
  • No such thing as a stupid question, but please review [How do I ask a good question on stackoverflow?](https://stackoverflow.com/help/how-to-ask). We are not a free coding service and this question appears to be [homework/coursework](https://meta.stackoverflow.com/questions/334822/how-do-i-ask-and-answer-homework-questions), so we expect you to show your genuine attempt at solving the question yourself i.e. edit your question to include the code you've tried and note where it has failed. If you follow the posting guidelines you will have a much greater chance of getting help on stackoverflow. – jared_mamrot Jul 20 '22 at 22:55
  • Yes obviously I am not asking for that, I added the codes I used to setup the data for the PCA, I colored Sex 1 and sex 2 genes from pancreas green and blue, sex 1 and 2 genes from lung yellow and red. Sincerely from the plot that I obtained I don't see that big difference on differential expression, and also, I dont know how could i continue with the exactTest function..I was thinking to do 2 exact test with 2 pairs: sex1 and 2 from pancreas and another one with sex1 and 2 from lung, but I am not sure that I am right, I also post an image of the plot that I obtained – Luca Rinaldi Jul 21 '22 at 15:12
  • Nevermind I forgot to add the table function with the sex specific genes to the plot to see the differential expression. – Luca Rinaldi Jul 21 '22 at 15:17

0 Answers0