I want to use the reactome2py
package to analyse pathways by adapting the Reference code
.
I want to iterate over the dataframe (e.g., df for data_linear_cna.txt
).
Reference code: https://colab.research.google.com/drive/1OufIYapCWirfLsudpg0fw1OxD7KTud2y?usp=sharing#scrollTo=e1q6oxLUKGGR
The portion of the code that's throwing the error:
cna = DataProcessing(directory + "data_linear_cna.txt")
cna_result = [analysis.identifiers(ids=",".join(d)) for d in cna]
"Full" code:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from reactome2py import analysis, content, utils
def get_pathways(t):
# Exclude disease pathways
token_result = analysis.token(token=t, species="Homo sapiens", page_size="-1", page="-1", sort_by="ENTITIES_FDR", order="ASC", resource="TOTAL", p_value="1", include_disease=False, min_entities=None, max_entities=None)
l = [p["name"] for p in token_result["pathways"]]
l.sort()
return l
class DataProcessing:
def __init__(self, data):
self.df = pd.read_csv(data, sep="\t").drop("Entrez_Gene_Id", axis=1, errors="ignore")
self.df = self.df.loc[:, ~self.df.columns.duplicated()]
self.df = self.df.sort_index()
def main():
cna = DataProcessing(directory + "data_linear_cna.txt")
cna_result = [analysis.identifiers(ids=",".join(d)) for d in cna]
cna_tokens = [r["summary"]["token"] for r in cna_result]
main()
Traceback:
Traceback (most recent call last):
File "data_gen_files/main.py", line 336, in <module>
main()
File "data_gen_files/main.py", line 133, in main
cna_result = [analysis.identifiers(ids=",".join(d)) for d in cna]
TypeError: 'DataProcessing' object is not iterable
Example dataframe:
pd.DataFrame([[MEOX2, 0.041451, 0.103890, 0.060789, 0.025519, 0.041260],[COX8C, 0.981055, 0.989163,0.979539, 0.985666, 0.982094],[IMPA2, 0.018580, 0.127843, 0.019699, 0.018484, 0.019477],[TTC8, 0.037853, 0.061488, 0.061730, 0.084045, 0.056650],[TMEM186, 0.858835, 0.718749, 0.887326, 0.862945, 0.909910]], columns = ['Hugo_Symbol', 'Entrez_Gene_Id'], 'TCGA-02-0001-01'], 'TCGA-02-0003-01'], 'TCGA-02-0006-01'], 'TCGA-02-0007-01'], 'TCGA-02-0009-01'])