The purpose of this code is to output node and edges file to be used in Gephi. I have used this code as base, which is for Chinese language and I am trying to re-create it to handle English language - https://pythonmana.com/2022/01/202201060336539842.html#%E5%9B%9B%E3%80%81%E5%AF%BC%E5%85%A5Gephi%20%E5%88%B6%E4%BD%9C%E7%BD%91%E7%BB%9C%E5%9B%BE
However, I am getting this error - "cannot unpack non-iterable NoneType object"
Error is coming from this part of the code which is the main function, is there something wrong with it?
if __name__ == '__main__':
os.chdir(r'path')
filePath1 = r'.\node.xlsx'
filePath2 = r'.\edge.xlsx'
df = pd.read_excel(r'path')
df2 = [w for w in df['Translated'] if len(w)>20]
co_ist = [" ".join(my_cut(w)) for w in df2]
node_str, edge_str = build_matrix(co_ist, is_reverse = True)
str2csv(filePath1,node_str,'node')
str2csv(filePath2,edge_str,'edge')
Error results from this particular line -
node_str, edge_str = build_matrix(co_ist, is_reverse = True)
Just adding the codes before the main function(given above) here for better clarity -
def my_cut(text):
word_dict_file = pd.read_excel(r'path')
stop_words = []
with open(r'path')
lines = f.readlines()
for line in lines:
stop_words.append(line.strip())
return [w for w in nltk.word_tokenize(text) if w not in stop_words and len(w)>1] #stop_words[:10]
def str2csv(filePath, s, x):
if x=='node':
with open(filePath, 'w', encoding='UTF-8') as f:
f.write("Label,Weight\r")
f.write(s)
print('Write file successful'+ filePath + 'View in')
else:
with open(filePath, 'w', encoding='UTF-8') as f:
f.write("Source,Target,Weight\r")
f.write(s)
print('Write file successful'+ filePath + 'View in')
#Build dictionary
def sortDictValue(dict, is_reverse):
tups = sorted(dict.items(), key=lambda item: item[1], reverse=is_reverse)
s = ''
for tup in tups: # Merge into csv Comma delimited format required
s = s + tup[0] + ',' + str(tup[1]) + '\n'
return s
#construct co-occurence matrix
def build_matrix(co_authors_list, is_reverse):
node_dict = {}
edge_dict = {}
row_authors_list = []
connect_list = []
global row_authors
for row_authors in co_authors_list:
row_authors_list = row_authors.split(' ')
for index, pre_au in enumerate(row_authors_list):
if pre_au not in node_dict:
node_dict[pre_au] = 1
else:
node_dict[pre_au] += 1
if pre_au == row_authors_list[-1]:
break
connect_list = row_authors_list[index+1:]
for next_au in connect_list:
A, B = pre_au, next_au
if A==B:
continue
if A > B:
A, B = B, A
key = A+','+B
if key not in edge_dict:
edge_dict[key] = 1
else:
edge_dict[key] += 1
node_str = sortDictValue(node_dict, is_reverse)
edge_str = sortDictValue(edge_dict, is_reverse)
return node_str, edge_str