Why do mobile devices use the same voice for the same language when using speech synthesis?

Question

When using the speech synthesis API in safari iOS or chrome Android the voice used to speak is the same for each language.

https://codepen.io/Dan-MacArthur/pen/dxyzpa

html:

<html>
<head>
<meta charset="UTF-8">
<script src="script.js"></script> 
<style type = "text/css">
.num {
    float: left;
    padding: 1%;
    font-size: 50px;
}

#numcontainer {
    width: 100%;
    height: 80px;
}
#errorlog {
    height: 500px;
    width: 98%;
    overflow: scroll;
}


</style>
</head>
<body>
<h1 align="center">Voice Test </h1>
<p align="center"> </p>
<div  id ="numcontainer">
    <div class = "num" id="one"> 1 </div>
    <div class = "num" id="two"> 2 </div>
    <div class = "num" id="three"> 3 </div>
</div>
<br>
<select id ="voicelist" > </select>
<button onclick = "speak()">Play</button>
<div id="voiceinfo"></div>
<br>
<div id= "voicenumbers"> </div>
<br>
<p>log:</p>
<div id= "errorlog"> </div>
</body>
</html>

js:

var synth = window.speechSynthesis;
var voiceSelect;
var playbutton;
var voicenumbers;
var voices = [];
var voiceinfo;

function populateVoiceList() {
  voices = synth.getVoices();
  voiceSelect.innerHTML = '';
  for(i = 0; i < voices.length ; i++) {
        var option = document.createElement('option');
        option.textContent = voices[i].name + ' (' + voices[i].lang + ')';
        option.voice = voices[i];
        option.setAttribute('data-lang', voices[i].lang);
        option.setAttribute('data-name', voices[i].name);
        voiceSelect.appendChild(option);
  }
}

window.addEventListener("load", function() {
    voiceSelect = document.querySelector('select');
    voicenumbers = document.querySelector("#voicenumbers");
    voiceinfo = document.querySelector("#voiceinfo");
    populateVoiceList();
    document.querySelector("#playall").addEventListener("click", function() {
        voicecount = 0;
        Start();
    });
});


if (speechSynthesis.onvoiceschanged !== undefined) {
    speechSynthesis.onvoiceschanged = populateVoiceList;
}

function highlightnum() {
    var node = document.querySelector("#numcontainer")
    var ran = new Range();
    ran.setStart(node, 0)
    ran.setEnd(node, 1);
    window.getSelection().addRange(ran);
}

function highlightnum1() {
    window.getSelection().removeAllRanges();
    var node = document.querySelector("#one")
    var ran = new Range();
    ran.setStart(node, 0)
    ran.setEnd(node, 1);
    window.getSelection().addRange(ran);
}

function highlightnum2() {
    window.getSelection().removeAllRanges();
    var node = document.querySelector("#two")
    var ran = new Range();
    ran.setStart(node, 0)
    ran.setEnd(node, 1);
    window.getSelection().addRange(ran);
}

function highlightnum3() {
    window.getSelection().removeAllRanges();
    var node = document.querySelector("#three")
    var ran = new Range();
    ran.setStart(node, 0)
    ran.setEnd(node, 1);
    window.getSelection().addRange(ran);
}

function Start() {
    speak();    
}


function speak() {


    highlightnum1();
    var utterThis = new SpeechSynthesisUtterance("1");      

    utterThis.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
    utterThis.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
    utterThis.voice = window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
    utterThis.onend = function (event) {        
        highlightnum2();
        var utterThis2 = new SpeechSynthesisUtterance("2");
        utterThis2.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
        utterThis2.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
        utterThis2.voice =  window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
        utterThis2.onend = function (event) {   
            highlightnum3();
            var utterThis3 = new SpeechSynthesisUtterance("3");         
            utterThis3.voiceURI = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.voiceURI;
            utterThis3.lang = voiceSelect.children[document.querySelector("#voicelist").selectedIndex].voice.lang;
            utterThis3.voice =  window.speechSynthesis.getVoices()[document.querySelector("#voicelist").selectedIndex];
            utterThis3.onend = function (event) {   
                let p = document.createElement("p");
                p.style.color = "green";
                p.textContent = "voice: " + utterThis.voice.name + " finished speaking "+ "\n";
                document.querySelector("#errorlog").prepend(p);
            }
            synth.speak(utterThis3);
            console.log(utterThis3);
            voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);      
        }
        synth.speak(utterThis2);
        console.log(utterThis2);    
        voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);
    }   

    utterThis.onerror = function(event) {
        document.querySelector("#errorlog").innerHTML = ("<p style='color: red;'> " + "voice: " + utterThis.voice.name + " did not speak " + event.error + "<br> </p>") + document.querySelector("#errorlog").innerHTML;
        console.log(event);
    }

    synth.speak(utterThis);
    console.log(utterThis); 
    voiceinfo.innerHTML = ("voice: " + utterThis.voice.name + "<br>" + "text: " + utterThis.text);
}

Go to the code pen on a mobile device and select any en-us voice and press play. Then select a different en-us voice and press play. The voices are the same.

I've tried proving the voice URI directly to the utterance and debugging in the web inspector. The URI says it should be using the correct voice.

I expect the '1 2 3' to be spoken in the voice selected.

The actual output is '1 2 3' spoken in the same voice for a given language.

Hi Dan, I think your question echoes mine here: https://stackoverflow.com/questions/52975068/speechsynthesis-in-android-chrome-cannot-change-english-voice-from-us-english It's the same case with other pairs e.g. French and Portuguese. — Frazer, Jul 18 '19 at 11:13
I believe it does but, my code pen demonstrates it better as you're able to freely change voices. I believe it is a bug with in iOS — Daniel MacArthur, Jul 29 '19 at 12:22

score 1 · Answer 1 · answered Apr 22 '20 at 13:45

On Android 5.0.2 and perhaps other versions (on Chrome and Firefox), the only English voice that will play is the one selected in the Android settings:

You can pick which one by going to the Settings app, then Controls->Language and input->Text-to-speech options. Select the gear icon next to Google Text-to-speech Engine, then under Language you can update the exact locale you want to use. If you select "Install voice data" you can even select from a sample of different voices for some locales. You need to restart the device after changing this setting for it to take effect.

On iOS it is a little bit better: you can select one voice per locale...so your codepen works if you change between Karen (en-AU) and Daniel (en-GB), but not if you switch between Karen (en-AU) and Gordon (en-AU). You will always get Karen.

These and other oddities with the speechSynthesis API listed here.

Why do mobile devices use the same voice for the same language when using speech synthesis?

1 Answers1