When using the speech synthesis API in safari iOS or chrome Android the voice used to speak is the same for each language.
https://codepen.io/Dan-MacArthur/pen/dxyzpa
html:
<html>
<head>
<meta charset="UTF-8">
<script src="script.js"></script>
<style type = "text/css">
.num {
float: left;
padding: 1%;
font-size: 50px;
}
#numcontainer {
width: 100%;
height: 80px;
}
#errorlog {
height: 500px;
width: 98%;
overflow: scroll;
}
</style>
</head>
<body>
<h1 align="center">Voice Test </h1>
<p align="center"> </p>
<div id ="numcontainer">
<div class = "num" id="one"> 1 </div>
<div class = "num" id="two"> 2 </div>
<div class = "num" id="three"> 3 </div>
</div>
<br>
<select id ="voicelist" > </select>
<button onclick = "speak()">Play</button>
<div id="voiceinfo"></div>
<br>
<div id= "voicenumbers"> </div>
<br>
<p>log:</p>
<div id= "errorlog"> </div>
</body>
</html>
js:
var synth = window.speechSynthesis;
var voiceSelect;
var playbutton;
var voicenumbers;
var voices = [];
var voiceinfo;
function populateVoiceList() {
voices = synth.getVoices();
voiceSelect.innerHTML = '';
for(i = 0; i < voices.length ; i++) {
var option = document.createElement('option');
option.textContent = voices[i].name + ' (' + voices[i].lang + ')';
option.voice = voices[i];
option.setAttribute('data-lang', voices[i].lang);
option.setAttribute('data-name', voices[i].name);
voiceSelect.appendChild(option);
}
}
window.addEventListener("load", function() {
voiceSelect = document.querySelector('select');
voicenumbers = document.querySelector("#voicenumbers");
voiceinfo = document.querySelector("#voiceinfo");
populateVoiceList();
document.querySelector("#playall").addEventListener("click", function() {
voicecount = 0;
Start();
});
});
if (speechSynthesis.onvoiceschanged !== undefined) {
speechSynthesis.onvoiceschanged = populateVoiceList;
}
function highlightnum() {
var node = document.querySelector("#numcontainer")
var ran = new Range();
ran.setStart(node, 0)
ran.setEnd(node, 1);
window.getSelection().addRange(ran);
}
function highlightnum1() {
window.getSelection().removeAllRanges();
var node = document.querySelector("#one")
var ran = new Range();
ran.setStart(node, 0)
ran.setEnd(node, 1);
window.getSelection().addRange(ran);
}
function highlightnum2() {
window.getSelection().removeAllRanges();
var node = document.querySelector("#two")
var ran = new Range();
ran.setStart(node, 0)
ran.setEnd(node, 1);
window.getSelection().addRange(ran);
}
function highlightnum3() {
window.getSelection().removeAllRanges();
var node = document.querySelector("#three")
var ran = new Range();
ran.setStart(node, 0)
ran.setEnd(node, 1);
window.getSelection().addRange(ran);
}
function Start() {
speak();
}
function speak() {
highlightnum1();
var utterThis = new SpeechSynthesisUtterance("1");
utterThis.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
utterThis.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
utterThis.voice = window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
utterThis.onend = function (event) {
highlightnum2();
var utterThis2 = new SpeechSynthesisUtterance("2");
utterThis2.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
utterThis2.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
utterThis2.voice = window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
utterThis2.onend = function (event) {
highlightnum3();
var utterThis3 = new SpeechSynthesisUtterance("3");
utterThis3.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
utterThis3.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
utterThis3.voice = window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
utterThis3.onend = function (event) {
let p = document.createElement("p");
p.style.color = "green";
p.textContent = "voice: " + utterThis.voice.name + " finished speaking "+ "\n";
document.querySelector("#errorlog").prepend(p);
}
synth.speak(utterThis3);
console.log(utterThis3);
voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);
}
synth.speak(utterThis2);
console.log(utterThis2);
voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);
}
utterThis.onerror = function(event) {
document.querySelector("#errorlog").innerHTML = ("<p style='color: red;'> " + "voice: " + utterThis.voice.name + " did not speak " + event.error + "<br> </p>") + document.querySelector("#errorlog").innerHTML;
console.log(event);
}
synth.speak(utterThis);
console.log(utterThis);
voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);
}
Go to the code pen on a mobile device and select any en-us voice and press play. Then select a different en-us voice and press play. The voices are the same.
I've tried proving the voice URI directly to the utterance and debugging in the web inspector. The URI says it should be using the correct voice.
I expect the '1 2 3' to be spoken in the voice selected.
The actual output is '1 2 3' spoken in the same voice for a given language.