I have one question. Like now i have two files:sampleattributes and genecount.I have filtered sample attributes file and it has a column name sampid and genecount has a column name sampid. I am trying to merge the two files using the common sampid. This is what I have written:
GTEx_Analysis_v8_Annotations_SampleAttributesDS <- read_delim("/new_gtex/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
sample_attributes <- select(GTEx_Analysis_v8_Annotations_SampleAttributesDS,SAMPID,SMTS,SMTSD,SMAFRZE)
sample_attributes_braindata <- sample_attributes %>% filter(sample_attributes$SMTS == "Brain" & sample_attributes$SMAFRZE == "RNASEQ")
sample_attributes_braindata <- data.frame(sample_attributes_braindata)
GTEx_Analysis_gene_reads <- read_table2("/new_gtex/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_reads.gct")
GTEx_Analysis_gene_reads <- data.frame(GTEx_Analysis_gene_reads)
gene_count <- data.frame(t(GTEx_Analysis_gene_reads[,-c(1:2)]))
colnames(gene_count) <- GTEx_Analysis_gene_reads$Name
This is how my sample_Attributes_braindata look like:
And this is how my gene_count data look like:
I tried to rename the gene_count first column with GTEX ID using this command:
colnames(gene_count) <- GTEx_Analysis_gene_reads$Name
But its not happening.
I also tried this command to rename the first column with sampid:
colnames(gene_count)[1] <- "SAMPID"
What I want to do it merge the two datasets by the common column SAMPID or GTEXid
genecount2 <- merge(sample_attributes_braindata,gene_count, by=SAMPID)
dput(gene_count[1:5, 1:4])
structure(list(ENSG00000223972.5 = c(0, 0, 0, 0, 0), ENSG00000227232.5 = c(187,
109, 143, 251, 113), ENSG00000278267.1 = c(0, 0, 1, 0, 0), ENSG00000243485.5 = c(1,
0, 0, 1, 0)), row.names = c("GTEX.1117F.0226.SM.5GZZ7", "GTEX.1117F.0426.SM.5EGHI",
"GTEX.1117F.0526.SM.5EGHJ", "GTEX.1117F.0626.SM.5N9CS", "GTEX.1117F.0726.SM.5GIEN"
), class = "data.frame")
dput((sample_attributes_braindata[1:5, 1:4]))
structure(list(SAMPID = c("GTEX-1117F-3226-SM-5N9CT", "GTEX-111FC-3126-SM-5GZZ2",
"GTEX-111FC-3326-SM-5GZYV", "GTEX-1128S-2726-SM-5H12C", "GTEX-1128S-2826-SM-5N9DI"
), SMTS = c("Brain", "Brain", "Brain", "Brain", "Brain"), SMTSD = c("Brain - Cortex",
"Brain - Cortex", "Brain - Cerebellum", "Brain - Cortex", "Brain - Cerebellum"
), SMAFRZE = c("RNASEQ", "RNASEQ", "RNASEQ", "RNASEQ", "RNASEQ"
)), row.names = c(NA, 5L), class = "data.frame")