6

I am using the IBM Watson Speech to Text API:

var SpeechToTextV1 = require('watson-developer-cloud/speech-to-text/v1');
var fs = require('fs');
var request = require('request');

var speech_to_text = new SpeechToTextV1({
  "username": "<user name>",
  "password": "<password>"
});

var recognizeStream = speech_to_text.createRecognizeStream();

// request(wavfileURL).pipe(recognizeStream);
// recognizeStream.on('results', function(err, res){
//     console.dir(err)
//     console.dir(res)
//     if (res.results){
//         console.dir(res.results)
//     }
//
// });
request.get(wavfileURL, function (err, res, buffer) {
  var streamer = require('streamifier');
  var params = {
    // From file
    audio: streamer.createReadStream(buffer) ,
    content_type: 'audio/wav; rate=44100'
  };

  speech_to_text.recognize(params, function(err, res) {
    debugger;
    if (err)
      console.log("ERR:",err);
    else {
      console.log("NOT ERR");
      console.log(JSON.stringify(res, null, 2));
      console.dir(res);
    }
  });
});

I call it with the following WAV file https://s3.amazonaws.com/buzzy-audio/adam.ginsburg%40gmail.com/vNixvnC4Xscu8yZ98

And I get the following error:

> ERR: { [Error: unable to transcode data stream audio/wav ->
> audio/x-float-array ] I20170411-18:23:40.576(10)?   code: 400,
> I20170411-18:23:40.576(10)?   code_description: 'Bad Request',
> I20170411-18:23:40.577(10)?   error: 'unable to transcode data stream
> audio/wav -> audio/x-float-array ' }

The content type sample rate seems correct:

fileinfo

Any ideas please?

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
aginsburg
  • 1,223
  • 1
  • 12
  • 22
  • @german just checking the edits... was it just a few spaces... or did I miss something? – aginsburg May 01 '17 at 21:27
  • code indentation mostly – German Attanasio May 01 '17 at 21:31
  • in the api documentation it said that an inactivity timeout could led to an error 400. "The server also closes the connection (response code 400) if no speech is detected for inactivity_timeout seconds of audio (not processing time); use the inactivity_timeout parameter to change the default of 30 seconds. " – jacques May 02 '17 at 08:23
  • Regarding your question @aginsburg, I will try to see if I can make it work but I'm 99% sure it's related to the `content-type` and `content-length` that s3 returns. I bet that you could save the file in a temp folder and then send it to speech to text. In this case, you need to make sure you can override the headers to send to STT and that they match what the API is expecting. – German Attanasio May 02 '17 at 18:15
  • I'm having the problem with audio recordings from Twilio, have you solved this issue? – szx Jun 06 '18 at 22:40
  • It looks like simply appending ".mp3" to the recording URL from Twilio solves this (Watson can understand mp3) – szx Jul 23 '18 at 22:35

1 Answers1

1

This is due to watson thinking that the audio file you're trying to upload is not a audio file, So to protect itself from harful file it raises exception

official explanation : link

corrupted file : file byte code altered (scan before downloading)

Maghil vannan
  • 435
  • 2
  • 6
  • 19