from os import listdir
import re
def find_word(text, search):
result = re.findall('\\b'+search+'\\b', text, flags=re.IGNORECASE)
if len(result)>0:
return True
else:
return False
#please put all the txt file in a folder (for my case it was: D:/onr/1738349/txt/')
search = 'word' #or phrase you want to search
search = search.lower() #converted to lower case
result_folder= "D:/onr/1738349/result.txt" #make sure you change them accordingly, I put it this why so you can understand
transcript_folder = "D:/onr/1738349/txt" #all the transcript files would be here
with open(result_folder, "w") as f: #please change in to where you want to output your result, but not in the txt folder where you kept all other files
for filename in listdir(transcript_folder): #the folder that contains all the txt file(50 files as you said)
with open(transcript_folder+'/' + filename) as currentFile:
# Strips the newline character
i = 0;
for line in currentFile:
line = line.lower() #since you want to omit case-sensitivity
i=i+1
if find_word(line, search): #for exact match, for example 'word' and 'testword' would be different
f.write('Found in (' + filename[:-4] + '.txt) at line number: ('+str(i) +') ')
if(next(currentFile)!='\0'):
f.write('time: ('+next(currentFile).rstrip()+') \n')
else:
continue
Make sure you follow the comments.
(1) create result.txt
(2) keep all the transcript files in a folder
(3) make sure (1) and (2) are not in the same folder
(4) Directory would be different if you are using Unix based system(mine is windows)
(5) Just run this script after making suitable changes, the script will take care all of it(it will find all the transcript files and show the result to a single file for your convenience)
The Output would be(in the result.txt):
Found in (fileName.txt) at line number: (#lineNumber) time: (Time)
.......