1

I am trying to invoke the Microsoft Bing Text to Speech API in Unity3D.

This API requires an accessToken and other parameters that will be passed in the request headers. First of all, in order to get the accessToken, I have sent a POST request to the Token API, after that, I have sent the other parameters in the request headers and it worked perfectly (returns a Wav audioclip) in Postman, like so:

enter image description here

To make this in Unity, I have used the UnityWebRequest Class to send two POST requests (sequentially) and then recieve the Audio Response.

using System.Collections;
using System.Xml.Linq;
using UnityEngine;
using UnityEngine.Networking;

public class TextToSpeech : MonoBehaviour
{
    public AudioSource audioSource;
    public static readonly string accessUri = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken";
    public static readonly string synthesizeUri = "https://speech.platform.bing.com/synthesize";
    public string textToSpeak = "Oh yeah! Finally Dorot is speaking.";
    private static string accessToken;
    private static readonly string apiKey = "MY API KEY";
    private string postStringData;

    public TextToSpeech (string textToSpeak)
    {
        this.textToSpeak = textToSpeak;
    }

    public void Speak()
    {
        audioSource = gameObject.GetComponent<AudioSource>();
        StartCoroutine(RequestToken(apiKey));
    }

    private string GenerateSsml(string textToSpeak)
    {
        var ssmlDoc = new XDocument(
                          new XElement("speak",
                              new XAttribute("version", "1.0"),
                              new XAttribute(XNamespace.Xml + "lang", "en-US"),
                              new XElement("voice",
                                  new XAttribute(XNamespace.Xml + "lang", "en-US"),
                                  new XAttribute(XNamespace.Xml + "gender", "Male"),
                                  new XAttribute("name", "Microsoft Server Speech Text to Speech Voice (en-US, BenjaminRUS)"),
                                  textToSpeak)));
        return ssmlDoc.ToString();
    }

    public IEnumerator RequestToken(string apiKey)
    {
        var tokenRequest = UnityWebRequest.Post(accessUri, "data");
        tokenRequest.SetRequestHeader("Ocp-Apim-Subscription-Key", apiKey);

        var tokenResponse = tokenRequest.SendWebRequest();
        yield return tokenResponse;

        if (tokenRequest.isHttpError)
        {
            Debug.LogError("HTTP Error: " + tokenRequest.error + " Code: " + tokenRequest.responseCode);
        }
        else
        {
            postStringData = GenerateSsml(textToSpeak);
            accessToken = tokenRequest.downloadHandler.text;
            Debug.Log("Access token: " + accessToken);

            StartCoroutine(Synthesize(postStringData, accessToken));
        }
    }

    public IEnumerator Synthesize(string text, string token)
    {
        var synReq = UnityWebRequest.Post(synthesizeUri, text);
        synReq.SetRequestHeader("Content-Type", "application/ssml+xml");
        synReq.SetRequestHeader("X-Microsoft-OutputFormat", "riff-16khz-16bit-mono-pcm");
        synReq.SetRequestHeader("X-Search-AppId", "07D3234E49CE426DAA29772419F436CA");
        synReq.SetRequestHeader("X-Search-ClientID", "1ECFAE91408841A480F00935DC390960");
        synReq.SetRequestHeader("User-Agent", "Dorot");
        synReq.SetRequestHeader("Authorization", "Bearer " + token);

        var synRes = synReq.SendWebRequest();
        yield return synRes;

        if (synReq.isHttpError)
        {
            Debug.LogError("HTTP Error: " + synReq.error + " Code: " + synReq.responseCode + " isNetworkError: " + synReq.isNetworkError + " isDone: " + synReq.isDone);
        }
        else
        {
            AudioClip cc = DownloadHandlerAudioClip.GetContent(synReq);
            audioSource.clip = cc;
            audioSource.Play();
        }
    }
}

As result, the first API returned correctly the Token. But, the second returned HTTP Code status 408, . Like so :

enter image description here

How can I solve this problem? Thanks.

Montacer Dkhilali
  • 390
  • 1
  • 6
  • 20
  • 1
    Do you still need the `Ocp-Apim-Subscription-Key` header in the "synthesize" request? – Jon Apr 04 '18 at 19:58
  • 1
    @Jon no, the token replaces the api key. OP, can you post a reproducible example in Github? Your code looks okay to me but I don't know Unity so I can't run it. – Maria Ines Parnisari Apr 05 '18 at 06:56
  • 1
    @Jon, the sub Subscription key is just used to get the access token which will be sent in the header (Authorization). – Montacer Dkhilali Apr 05 '18 at 07:58
  • 1
    @MariaInesParnisari, this is a .NETCore example, from GitHub : https://github.com/Azure-Samples/Cognitive-Speech-TTS/blob/master/Samples-Http/NETCore/TTSProgram.cs and an other Node.js example : https://github.com/Azure-Samples/Cognitive-Speech-TTS/blob/master/Samples-Http/NodeJS/TTSService.js – Montacer Dkhilali Apr 05 '18 at 08:00

1 Answers1

1

Found the solution.

According to this answer, UnityWebRequest has chunkedTransfer set to true by default. So you need to do:

synReq.chunkedTransfer = false;

BTW: you could write a unit test for this by following this guide: http://www.invisiblerock.com/unity-test-runner (it's what I did to try this out)

Maria Ines Parnisari
  • 16,584
  • 9
  • 85
  • 130