I am currently creating a bot that uses vosk AI to transcript conversations. so far I am able to make my bot join a call and say who is currently talking.
const { SlashCommandBuilder} = require('discord.js');
const { joinVoiceChannel, createAudioPlayer, NoSubscriberBehavior } = require('@discordjs/voice'); //, getVoiceConnection, VoiceConnectionStatus, createAudioPlayer, createAudioResource, StreamType
const { Readable } = require('stream');
//const { Model, KaldiRecognizer } = require('vosk');
const vosk = require('vosk');
//https://alphacephei.com/vosk/models
const model = new vosk.Model('model/vosk-model-en-us-0.42-gigaspeech');
module.exports = {
data: new SlashCommandBuilder()
.setName('connect')
.setDescription('Join channel to start listening'),
async execute(interaction) {
let talkingUser;
const ch = interaction.guild.channels.cache.find(c => c.name === 'transcript');
const voiceChannel = interaction.member.voice.channel;//(interaction.guild.members.cache.get(interaction.member.user.id)).voice.channel; //gets current user id
if (!voiceChannel){ //if they are in a voice channel return an error
await interaction.reply({content: 'Error: The voice channel does not exist!', ephemeral: true});
return;
}
const voiceConn = joinVoiceChannel({
channelId: voiceChannel.id,
guildId: interaction.guild.id,
adapterCreator: interaction.guild.voiceAdapterCreator,
selfDeaf: false,
});
await interaction.reply({ content: 'Ready to listen!', ephemeral: true });
await performSpeechRecognition(voiceConn, model, ch);
// voiceConn.receiver.speaking.on('start', (userId) => {//'end' works too
// //lis();
// //console.log(voiceConn.receiver.SpeakingMap.on());
// talkingUser = (interaction.guild.members.cache.get(userId)).displayName
// console.log(userId, 'started')
// //ch.send({content: `${talkingUser}: words`});
// })
}
}
async function performSpeechRecognition(voiceConn, model, ch) {
//const recognizer = new KaldiRecognizer(model, 16000);
const recognizer = new vosk.Recognizer(model, 16000);
recognizer.setWords(true);
recognizer.setVoice(voiceConn);
const audioPlayer = createAudioPlayer();
voiceConn.subscribe(audioPlayer);
audioPlayer.on('data', (audioData) => {
recognizer.acceptWaveform(audioData);
});
audioPlayer.on('error', (error) => {
console.error('Audio player error:', error);
});
recognizer.on('result', (result) => {
const text = result.text;
console.log('Recognized text:', text);
// Do whatever you want with the recognized text
// For example, you can send it to a transcript channel:
if (ch) {
ch.send({ content: text });
}
});
recognizer.on('error', (error) => {
console.error('Recognition error:', error);
});
}
I am not sure how to implement vosk, I am getting an error saying Error (VoskAPI:Model():model.cc:122) folder'...' does not contain model files. Make sure you specified the model path properly in Model Constructor. if you are not sure about the relative path, use absolute path specification.
but that's not my issue FIXED
My issue is I am not sure how to get the audio "recorded" to put into vosk in the first place, I did some research online and this is the best I can accomplish
const lis = () => {
const recognizer = new KaldiRecognizer(model, 16000);
recognizer.on('result', (result) => {
const text = result.text;
console.log(`Recognized:`, text);
});
recognizer.on('error', (err) => {
console.error('Recognition error:', err);
});
};
I do not know if I need anything else for Discord to pick up the audio itself as a temp recording I tried other implementations but when I run it I get an error saying
const recognizer = new KaldiRecognizer(model, 16000);
TypeError: KaldiRecognizer is not a constructor
the solution for KaldiRecognizer