I have a project that needs to get a recorded file and then process by the code and extract the text from file and match the extracted file with the other text and verify it. my problem is: I can't use recorded file in code and it does'nt read the file
init function is the fundamental of code.
verify functtion confirm the matched speech and text.
import argparse
import json
import os
import queue
import random
import sys
from difflib import SequenceMatcher
import numpy as np
import sounddevice as sd
import vosk
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def init():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-f', '--filename', type=str, metavar='FILENAME',
help='audio file to store recording to')
parser.add_argument(
'-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model')
parser.add_argument(
'-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-r', '--samplerate', type=int, help='sampling rate')
args = parser.parse_args(remaining)
try:
if args.model is None:
args.model = "model"
if not os.path.exists(args.model):
print("Please download a model for your language from https://alphacephei.com/vosk/models")
print("and unpack as 'model' in the current folder.")
parser.exit(0)
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info['default_samplerate'])
model = vosk.Model(args.model)
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
except KeyboardInterrupt:
print('\nDone')
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
return model, args
def verify(random_sentence, model, args):
num, T_num, F_num, num_word = 0, 0, 0, 1
with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16',
channels=1, callback=callback):
rec = vosk.KaldiRecognizer(model, args.samplerate)
print("{}) ".format(num_word), random_sentence, end='\n')
print('=' * 30, end='\n')
run = True
while run:
data = q.get()
if rec.AcceptWaveform(data):
res = json.loads(rec.FinalResult())
res['text'] = res['text'].replace('ي', 'ی')
if SequenceMatcher(None, random_sentence, res['text']).ratio() > 0.65:
T_num, num, num_word += 1
else:
F_num, num, num_word += 1
run = False
print('=' * 30)
print('True Cases : {}\n False Cases : {}'.format(T_num, F_num))
if __name__ == "__main__":
model, args = init()
verify(random_sentences, model, args)