can anyone tell me what is the problem with my own code? I want to segment a big text into small texts by words. for example, each segment contains 60 words each.
file=r'C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\economy2.txt'
openFile= open(file, 'r', encoding='utf-8-sig')
words= openFile.read().split()
#print (words)
i = 0
for idx, w in enumerate(words, start=0):
textNum = 1
while textNum <= 20:
wordAsText = []
print("word list before:", wordAsText)
while i<idx+60:
wordAsText.append(words[i])
i+=1
print ("word list after:", wordAsText)
textSeg=' '.join(wordAsText)
print (textNum, textSeg)
files = open(r"C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\datasetEco\Eco" + str(textNum) + ".txt", "w", encoding='utf-8-sig')
files.write(textSeg)
files.close()
idx+=60
if textNum!=20:
continue
textNum+=1
my big file (economy2) contains more than 12K words.
EDIT: thanks for all responses. I tried what I found here and it is achieved my require.
Edited Code:
file=r'C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\economy2.txt'
openFile= open(file, 'r', encoding='utf-8-sig')
words= openFile.read().split()
#print (words)
n=60
segments=[' '.join(words[i:i+n]) for i in range(0,len(words),n)] #from link
i=1
for s in segments:
seg = open(r"C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\datasetEco\Eco" + str(i) + ".txt", "w", encoding='utf-8-sig')
seg.write(s)
seg.close()
i+=1