Google Speech Recognition doesn't work only for few seconds audio. So I split my audio file to chunks. This is the class of splitting audio.
class Split_audio():
def __init__(self):
"""
Constructor
"""
def create_folder(self,audio):
"""
Create folder for chunks
"""
#name of the folder: exemple audio file's name = test.wav ==> folder's name = test
pos=audio.get_nameAudioFile()
pos=pos.rfind('.')
folder=audio.get_nameAudioFile()[0:pos]
#if folder exist overwrite
if os.path.exists(folder):
shutil.rmtree(folder)
#create folder
os.makedirs(folder)
return folder
def split(self,audio,silence_thresh=None, min_silence_len=500):
"""
Split audio file on silence
"""
sound_file = AudioSegment.from_wav(audio.get_nameAudioFile())
if silence_thresh==None:
silence_thresh=int(sound_file.dBFS)-19
audio_chunks = split_on_silence(sound_file, silence_thresh=silence_thresh, min_silence_len=min_silence_len)
return audio_chunks
def export(self,audio,path_folder=None):
"""
Export chunks on wav's file
"""
audio_chunks=self.split(audio)
if path_folder==None:
path_folder=self.create_folder(audio)
for i, chunk in enumerate(audio_chunks):
out_file = "chunk{0}.wav".format(i)
path="%s/%s" %(path_folder,out_file)
chunk.export(path, format="wav")
I conclude that the quality of google_recognize output depends on silence_thresh and min_silence. After testing on 3 different audio I set the values ββto silence_thresh=dbfs of the audio - 19 and min_silence=500ms. After 1 month I retested my code for the same audio. oops I had a transcript totally different from the first. here are the two results: First result second result. Any suggestion?