Trying to analyse a list of references from my repository in attempting to prioritize articles to read first.
First tried to place an 'eval' before calling file from dir, i.e. ...pd.DataFrame(eval("File") because error message told I tried to do a string, then I got a syntax error, saying that I was doing the file call incorrectly. Don't you normally use "" to call a file in pandas? Help needed, thx...;o)
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.set_style("white")
publication_data = pd.DataFrame("C:\\Users\\jcst\\Desktop\\Private\\Python data\\My_Collection_test1.txt")
publication_data.dropna(subset=['EDAT'], inplace=True)
publication_data["Year"] = (
publication_data["EDAT"].astype(str).str[0:4].astype(int)
)
plt.figure(figsize=(10, 10), dpi=600)
# Top 10 authors
plt.subplot(2, 2, 1)
authors_flat = [
author
for authors in list(publication_data["FAU"].dropna())
for author in authors
]
top10authors = pd.DataFrame.from_records(
Counter(authors_flat).most_common(10), columns=["Name", "Count"]
)
sns.barplot(x="Count", y="Name", data=top10authors, palette="RdBu_r")
plt.title("Top 10 Authors")
# Publications over Time
plt.subplot(2, 2, 2)
yearly = pd.DataFrame(publication_data["Year"].value_counts().reset_index())
yearly.columns = ["Year", "Count"]
sns.lineplot(x="Year", y="Count", data=yearly)
plt.title("Publications over Time")
plt.xlim([1986, 2020])
plt.subplot(2, 2, 3)
# TOP 10 Journals
top10journals = pd.DataFrame.from_records(
Counter(publication_data["TA"]).most_common(10),
columns=["Journal", "Count"],
)
sns.barplot(x="Count", y="Journal", data=top10journals, palette="RdBu_r")
plt.title("Top 10 Journals")
# Top associated keywords
plt.subplot(2, 2, 4)
flat_kw = [
_.lower()
for kws in list(publication_data["OT"].dropna())
for kw in kws
for _ in kw.split(" ")
]
top10kw = pd.DataFrame.from_records(
Counter(flat_kw).most_common(10), columns=["Keyword", "Count"]
)
sns.barplot(x="Count", y="Keyword", data=top10kw, palette="RdBu_r")
plt.title("Top 10 Associated Keywords")
plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0.3, wspace=0.3)
plt.show()
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2019.1.2\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2019.1.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/jcst/PycharmProjects/TextMining/Test2.py", line 8, in <module>
publication_data = pd.DataFrame(eval("C:\\Users\\jcst\\Desktop\\Private\\Python data\\My_Collection_test1.txt"))
File "<string>", line 1
C:\Users\jcst\Desktop\Private\Python data\My_Collection_test1.txt
^
SyntaxError: invalid syntax