# Import the required libraries
import speech_recognition as sr # Library for speech recognition
import os # Library for interacting with the operating system
from pydub import AudioSegment # Library for working with audio files
from pydub.silence import split_on_silence # Function for splitting audio files based on silence
# Create a speech recognition object
recognizer = sr.Recognizer()
def transcribe_large_audio_file(audio_path):
"""
Split audio into chunks and apply speech recognition
"""
# Load audio file with pydub
audio = AudioSegment.from_mp3(audio_path)
# Split audio at silent parts with duration of 700ms or more and obtain chunks
audio_chunks = split_on_silence(audio, min_silence_len=700, silence_thresh=audio.dBFS-14, keep_silence=700)
# Create a directory to store audio chunks
chunks_dir = "audio-chunks"
if not os.path.isdir(chunks_dir):
os.mkdir(chunks_dir)
full_text = ""
failed_attempts = 0
# Process each audio chunk
for i, chunk in enumerate(audio_chunks, start=1):
# Save chunk in the directory
chunk_file_name = os.path.join(chunks_dir, f"chunk{i}.wav")
chunk.export(chunk_file_name, format="wav")
# Recognize audio from the chunk
with sr.WavFile(chunk_file_name) as src:
listened_audio = recognizer.listen(src)
# Convert audio to text
try:
text = recognizer.recognize(listened_audio)
except:
failed_attempts += 1
if failed_attempts == 5:
print(f"Skipping {audio_path} due to too many errors")
break
else:
failed_attempts = 0
text = f"{text.capitalize()}. "
print(chunk_file_name, ":", text)
full_text += text
# Return the transcription for all chunks
return full_text
# Define the output directory
output_dir = "C:\\Store\\output"
# Create the output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)
# Create a list of processed files
processed_files = []
# Iterate through all .mp3 files in the directory and transcribe them
with open(os.path.join(output_dir, 'result.txt'), 'w') as result_file:
for file in os.listdir(output_dir):
# Process only .mp3 files that have not been processed before
if file.endswith(".mp3") and file not in processed_files:
mp3_file_path = os.path.join(output_dir, file)
print(f"Processing {mp3_file_path}")
try:
# Transcribe the audio file
transcription = transcribe_large_audio_file(mp3_file_path)
except LookupError as error:
# If there is an error, skip the file and continue with the next one
print(f"Skipping {mp3_file_path} due to error: {error}")
continue
else:
# Save the transcription to a text file with the same name as the audio file
txt_file_path = os.path.join(output_dir, f"{os.path.splitext(file)[0]}.txt")
with open(txt_file_path, 'w') as txt_file:
txt_file.write(transcription)
# Print the transcription and the path to the saved text file
print(transcription)
print(f"Transcription saved to {txt_file_path}")
# Save the transcription to the result