Come up with an array of button names. Because SpeechRecognition
recognizes numbers as the actual numbers (eg 1
, not one
), use the numeric values rather than their word representations.
var buttonNames = [ 'None', '1', '2', '3'];
I had trouble giving an embedded StackSnippet permission to access the microphone (probably has to do with cross-domain and sandboxing rules), so I put all the code in a userscript. It replaces the page's HTML with your HTML. Click on the document body and the recognition will start. (Open your browser's console to see what it's doing) Then, speak one of the button names. (Make sure Stack Overflow - or whatever domain you run the userscript on - has permission to listen to your microphone)
When the onresult
handler is triggered (when you stop speaking), identify the last word in the transcript, and see if it matches any of the buttonNames
. If so, querySelectorAll
the buttons in the document, and .click()
the appropriate button index.
// ==UserScript==
// @name Userscript Speech Recognition
// @namespace CertainPerformance
// @version 1
// @match https://stackoverflow.com/questions/51702275/click-button-using-javascript-speech-recognition-tampermonkey
// @grant none
// ==/UserScript==
document.head.innerHTML = '';
document.body.innerHTML = `
<div class="radio-container" style="height:1000px">
<div class="col-6">
<button id="radio0">None</button>
</div>
<div class="col-6">
<button id="radio1">One</button>
</div>
<div class="col-6">
<button id="radio2">Two</button>
</div>
<div class="col-6">
<button id="radio3">Three +</button>
</div>
</div>
`;
document.addEventListener('click', ({ target }) => {
if (!target.matches('button')) return;
console.log('Click detected: ' + target.outerHTML);
});
var SpeechRecognition = SpeechRecognition || webkitSpeechRecognition
var SpeechGrammarList = SpeechGrammarList || webkitSpeechGrammarList
var SpeechRecognitionEvent = SpeechRecognitionEvent || webkitSpeechRecognitionEvent
var buttonNames = [ 'None', '1', '2', '3'];
var recognition = new SpeechRecognition();
document.body.onclick = function(e) {
if (e.target.matches('button')) return;
recognition.start();
console.log('Listening');
}
recognition.onresult = function(event) {
var last = event.results.length - 1;
var speechText = event.results[last][0].transcript;
console.log('Heard ' + speechText);
const foundButtonIndex = buttonNames.findIndex(buttonName => buttonName === speechText);
console.log(foundButtonIndex);
if (foundButtonIndex !== -1) document.querySelectorAll('button')[foundButtonIndex].click();
}
recognition.onspeechend = function() {
recognition.stop();
}
recognition.onnomatch = function(event) {
console.log('Not recognized')
}
recognition.onerror = function(event) {
console.log('Error ' + event.error);
}
For a more generic solution when the buttons can have any text inside them, and you want to be able to speak the button text and have the appropriate button clicked, you might querySelectorAll
all buttons on pageload, map them to an object with keys corresponding to their text content, and then click buttonObj[speechText]
if it exists.