1

I have 7 samples, I can successfully create a phyloseq file, make a distance matrix and calculate the beta diversity with adonis2. However, I want to add one more sample from an earlier sequencing, so I have to work with a different shared, taxonomy and metadata file. I differentiated them with a ".J" in the end.

##### Load data ####
OTU = read.table("D:/andok_stat/andok_saubsample_corrected.shared", header=TRUE, sep="\t")
tax = read.table("D:/andok_stat/andok_classifyotu.taxonomy", header=TRUE, sep="\t")
meta = read.table("D:/andok_stat/metadatastat-S12.txt", header=TRUE, row.names=1, sep="\t")

OTU.J = read.table("D:/andok_NGS/Juli/andokkalandok.otus.subsample.shared", header=TRUE, sep="\t")
tax.J = read.table("D:/andok_NGS/Juli/andokkalandok.otu.taxonomy.taxonomy", header=TRUE, sep="\t")
meta.J = read.table("D:/andok_stat/J_meta.txt", header=TRUE, row.names=1, sep="\t")

meta.all = read.table("D:/andok_stat/metadatastat-phyloS12.txt", header=TRUE, row.names=1, sep="\t")

##### Clean up data ####
row.names(OTU) = OTU$Group
OTU.clean = OTU[,-which(names(OTU) %in% c("label", "numOtus", "Group"))]

row.names(tax) = tax$OTU
tax.clean = tax[row.names(tax) %in% colnames(OTU.clean),]
tax.clean = separate(tax.clean, Taxonomy, into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Strain"), sep=";")
tax.clean = tax.clean[,-which(names(tax.clean) %in% c("Size", "Strain", "OTU"))]

meta$Sampling_place <- as.factor(meta$Sampling_place)
meta$Climate <- as.factor(meta$Climate)


row.names(OTU.J) = OTU.J$Group
OTU.clean.J = OTU.J[,-which(names(OTU.J) %in% c("label", "numOtus", "Group"))]

row.names(tax.J) = tax.J$OTU
tax.clean.J = tax.J[row.names(tax.J) %in% colnames(OTU.clean.J),]
tax.clean.J = separate(tax.clean.J, Taxonomy, into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Strain"), sep=";")
tax.clean.J = tax.clean.J[,-which(names(tax.clean.J) %in% c("Size", "Strain", "OTU"))]

meta.J$Sampling_place <- as.factor(meta.J$Sampling_place)
meta.J$Climate <- as.factor(meta.J$Climate)


meta.all$Sampling_place <- as.factor(meta.all$Sampling_place)
meta.all$Climate <- as.factor(meta.all$Climate)

##### Order the data ####
OTU.clean = OTU.clean[order(row.names(OTU.clean)),]
meta = meta[order(row.names(meta)),]

OTU.clean.J = OTU.clean.J[order(row.names(OTU.clean.J)),]
meta.J = meta.J[order(row.names(meta.J)),]

##### Make the test #####

# Create physeq object for my samples
OTU.UF = otu_table(as.matrix(OTU.clean), taxa_are_rows=FALSE)
tax.UF = tax_table(as.matrix(tax.clean))
meta.UF = sample_data(meta)

physeq = phyloseq(OTU.UF, tax.UF, meta.UF)
physeq
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 1881 taxa and 7 samples ]
sample_data() Sample Data:       [ 7 samples by 4 sample variables ]
tax_table()   Taxonomy Table:    [ 1881 taxa by 7 taxonomic ranks ]

# Create physeq object for the ".J" samples
OTU.UF.J = otu_table(as.matrix(OTU.clean.J), taxa_are_rows=FALSE)
tax.UF.J = tax_table(as.matrix(tax.clean.J))
meta.UF.J = sample_data(meta.J)

physeq.J = phyloseq(OTU.UF.J, tax.UF.J, meta.UF.J)
physeq.J
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 2039 taxa and 10 samples ]
sample_data() Sample Data:       [ 10 samples by 4 sample variables ]
tax_table()   Taxonomy Table:    [ 2039 taxa by 7 taxonomic ranks ]

# I only need one sample from the ".J" data, therefore I subset physeq.J
sample_names(physeq.J)
sample_variables(physeq.J)
S12 <- subset_samples(physeq.J, Sample_ID == "A59-16PF")
S12
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 2039 taxa and 1 samples ]
sample_data() Sample Data:       [ 1 samples by 4 sample variables ]
tax_table()   Taxonomy Table:    [ 2039 taxa by 7 taxonomic ranks ]

# Then I merge the previous physeq file with her sample
mergedphyseq <- merge_phyloseq(physeq, S12)
mergedphyseq
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 2111 taxa and 8 samples ]
sample_data() Sample Data:       [ 8 samples by 4 sample variables ]
tax_table()   Taxonomy Table:    [ 2111 taxa by 7 taxonomic ranks ]

# Check the sample's name  
sample_names(mergedphyseq)

# S12 was succesfully added, so I can start to make the test:
# calculate bray curtis distance matrix
mergedphyseq_bray <- phyloseq::distance(mergedphyseq, method = "bray")
mergedphyseq_bray

I checked the distance matrix results, and it wasn't correct about the S12 sample. The others were correct. (I based that statement on previously created plots: stacked bar charts and an NMDS.) What do you suggest? What am I doing wrong?

vicey
  • 13
  • 3

0 Answers0