Below is my code:
if t.endswith('.docx'):
def get_files(extension, location):
v_doc = []
for root, dirs, files in os.walk(location):
for t in files:
if t.endswith(extension):
v_doc.append(t)
return v_doc
file_list = get_files('.docx', paths)
#print(file_list)
index = 0
for file in file_list:
index += 1
doc = Document(file)
#print(doc)
column_label = f'column{index}'
data_content = doc.paragraphs
final = []
for f in data_content:
final.append(f.text)
new = [x for x in final if x]
#j = {column_label: new}
#print(j)
df_last = pd.DataFrame(new, columns=
[column_label])
df_last.to_excel('output_dummy.xlsx')
But i get following problem:
column2:
#hello how are you guys?
#i hope you are all doing fine
expected dataframe output:
column1: column2:
#This column is getting replaced by column 2 #hello how are you guys?
#some random dummy text #i hope you are all doing fine
docx1 contans: #This column is getting replaced by column 2 #some random dummy text
docx2 conatins: #hello how are you guys? #i hope you are all doing fine
i know its a silly question. where am i doing this mistake ?