1

I would like to realize this.

  1. A user speaks to a web browser.
  2. A web browser (Google Chrome) record user's voice as WAV file (Recorder.js) and send it to a python-flask server.
  3. Python server calls Google Cloud text to speech API and transcribe WAV file.
  4. Send the transcribed text to the web browser.

I develop this app locally using Windows 10, WSL, Debian 10/buster, python3.7.6 and Google Chrome.

I realized step 1,2,3, but not 4. In step 2, I used XMLHttpRequest() to send WAV file to python-flask server. So, in step 4, I can't use return render_template() in ordinary way.

I searched "XMLHttpRequest() flask return render_template()" and found a solution (How to get the response of XMLHttpRequest?) to use

XMLHttpRequest.responseText in XMLHttpRequest.onreadystatechange when XMLHttpRequest.readyState equals to XMLHttpRequest.DONE.

xhr.onreadystatechange = function() {
    if (xhr.readyState == XMLHttpRequest.DONE) {
        alert(xhr.responseText);
    }
}

Therefore, I added this code to my app.js, but still I can't get return render_template(). Here is Terminal's result.

127.0.0.1 - - [04/Feb/2020 16:55:58] "GET / HTTP/1.1" 200 -
['Transcript: 明日雪が降らないといいです。']
127.0.0.1 - - [04/Feb/2020 16:56:25] "POST / HTTP/1.1" 200 -

Could you give me any information or suggestion? I know that there are a lot of similar Q&A in stackoverflow. I read more than 10 articles and tried them, but I haven't solved my difficulties. Could you tell me how exactly change my code, please?

Thank you in advance.

Sincerely, Kazu


My directory structure is here.

.
├── app.yaml
├── credentials.json
├── file.wav(uploaded WAV file)
├── main.py
├── Pipfile
├── Pipfile.lock
├── static
│   └── js
│       └── app.js
└── templates
    ├── index.html
    └── result.html

This is app.js.

//webkitURL is deprecated but nevertheless
URL = window.URL || window.webkitURL;

var gumStream;                      //stream from getUserMedia()
var rec;                            //Recorder.js object
var input;                          //MediaStreamAudioSourceNode we'll be recording

// shim for AudioContext when it's not avb. 
var AudioContext = window.AudioContext || window.webkitAudioContext;
var audioContext //audio context to help us record

var recordButton = document.getElementById("recordButton");
var stopButton = document.getElementById("stopButton");
var pauseButton = document.getElementById("pauseButton");

//add events to those 2 buttons
recordButton.addEventListener("click", startRecording);
stopButton.addEventListener("click", stopRecording);
pauseButton.addEventListener("click", pauseRecording);

function startRecording() {
    console.log("recordButton clicked");

    /*
        Simple constraints object, for more advanced audio features see
        https://addpipe.com/blog/audio-constraints-getusermedia/
    */

    var constraints = { audio: true, video:false }

    /*
        Disable the record button until we get a success or fail from getUserMedia() 
    */

    recordButton.disabled = true;
    stopButton.disabled = false;
    pauseButton.disabled = false

    /*
        We're using the standard promise based getUserMedia() 
        https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia
    */

    navigator.mediaDevices.getUserMedia(constraints).then(function(stream) {
        console.log("getUserMedia() success, stream created, initializing Recorder.js ...");

        /*
            create an audio context after getUserMedia is called
            sampleRate might change after getUserMedia is called, like it does on macOS when recording through AirPods
            the sampleRate defaults to the one set in your OS for your playback device

        */
        audioContext = new AudioContext();

        //update the format 
        document.getElementById("formats").innerHTML="Format: 1 channel pcm @ "+audioContext.sampleRate/1000+"kHz"

        /*  assign to gumStream for later use  */
        gumStream = stream;

        /* use the stream */
        input = audioContext.createMediaStreamSource(stream);

        /* 
            Create the Recorder object and configure to record mono sound (1 channel)
            Recording 2 channels  will double the file size
        */
        rec = new Recorder(input,{numChannels:1})

        //start the recording process
        rec.record()

        console.log("Recording started");

    }).catch(function(err) {
        //enable the record button if getUserMedia() fails
        recordButton.disabled = false;
        stopButton.disabled = true;
        pauseButton.disabled = true
    });
}

function pauseRecording(){
    console.log("pauseButton clicked rec.recording=",rec.recording );
    if (rec.recording){
        //pause
        rec.stop();
        pauseButton.innerHTML="Resume";
    }else{
        //resume
        rec.record()
        pauseButton.innerHTML="Pause";

    }
}

function stopRecording() {
    console.log("stopButton clicked");

    //disable the stop button, enable the record too allow for new recordings
    stopButton.disabled = true;
    recordButton.disabled = false;
    pauseButton.disabled = true;

    //reset button just in case the recording is stopped while paused
    pauseButton.innerHTML="Pause";

    //tell the recorder to stop the recording
    rec.stop();

    //stop microphone access
    gumStream.getAudioTracks()[0].stop();

    //create the wav blob and pass it on to createDownloadLink
    rec.exportWAV(createDownloadLink);
}

function createDownloadLink(blob) {

    var url = URL.createObjectURL(blob);
    var au = document.createElement('audio');
    var li = document.createElement('li');
    var link = document.createElement('a');

    //name of .wav file to use during upload and download (without extendion)
    var filename = new Date().toISOString();

    //add controls to the <audio> element
    au.controls = true;
    au.src = url;

    //save to disk link
    link.href = url;
    link.download = filename+".wav"; //download forces the browser to donwload the file using the  filename
    link.innerHTML = "Save to disk";

    //add the new audio element to li
    li.appendChild(au);

    //add the filename to the li
    li.appendChild(document.createTextNode(filename+".wav "))

    //add the save to disk link to li
    li.appendChild(link);

    //upload link
    var upload = document.createElement('a');
    upload.href="#";
    upload.innerHTML = "Upload";
    upload.addEventListener("click", function(event){
          var xhr=new XMLHttpRequest();
          xhr.onreadystatechange = function() {
            if (xhr.readyState == XMLHttpRequest.DONE) {
                alert(xhr.responseText);
            }
        }
          var fd=new FormData();
          fd.append("audio_data",blob, filename);
          xhr.open("POST","/",true);
          xhr.send(fd);
    })
    li.appendChild(document.createTextNode (" "))//add a space in between
    li.appendChild(upload)//add the upload link to li

    //add the li element to the ol
    recordingsList.appendChild(li);
}

This is main.py.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from flask import Flask
from flask import request
from flask import render_template
from flask import send_file
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import os
import io

app = Flask(__name__)

@app.route("/", methods=['POST', 'GET'])
def index():
    if request.method == "POST":
        f = open('./file.wav', 'wb')
        f.write(request.files['audio_data'].read())
        f.close()

        os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="credentials.json"
        client = speech.SpeechClient()
        with io.open('./file.wav', 'rb') as audio_file:
            content = audio_file.read()

        audio = types.RecognitionAudio(content=content)
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            language_code='ja-JP',
            enable_automatic_punctuation=True)
        response = client.recognize(config, audio)

        resultsentence = []
        for result in response.results:
            # The first alternative is the most likely one for this portion.
            sentence = u'Transcript: {}'.format(result.alternatives[0].transcript)
            resultsentence.append(sentence)

        print(resultsentence)

        return render_template("result.html", resultsentence=resultsentence)
    else:
        return render_template("index.html")

if __name__ == "__main__":
    app.run()

This is index.html.

<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>Simple Recorder.js demo with record, stop and pause - addpipe.com</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
  </head>
  <body>
    <h1>Simple Recorder.js demo</h1>

    <div id="controls">
     <button id="recordButton">Record</button>
     <button id="pauseButton" disabled>Pause</button>
     <button id="stopButton" disabled>Stop</button>
    </div>
    <div id="formats">Format: start recording to see sample rate</div>
    <p><strong>Recordings:</strong></p>
    <ol id="recordingsList"></ol>
    <!-- inserting these scripts at the end to be able to use all the elements in the DOM -->
    <script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
    <script src="/static/js/app.js"></script>
  </body>
</html>

This is result.html.

<!DOCTYPE html>
<html>
<body>
    {{ resultsentence }}
</body>
</html>
Kazuaki Suzuki
  • 1,025
  • 6
  • 19
  • 33

1 Answers1

3

Its ocurs because you change are calling a route who changes the HTML inside of Ajax request(XMLHttpRequest), so you have two options, the first is change the HTML in response like these:

xhr.onreadystatechange = function() {
    if (xhr.readyState == XMLHttpRequest.DONE) {
        document.write(xhr.responseText);
    }
}
// or using query(its better to me)

$.ajax({
        type: "POST",
        contentType: 'application/json',
        url: "/",
        data: {audio_data: blob}
        success:function(response){
            document.write(response); 
       }
    });

the other way is making a API, so you dont need to call the render_template, you only need to return resultsentence in index function(this result need to be in dict format)

@app.route("/", methods=['POST', 'GET'])
def index():
    if request.method == "POST":
        #the image logic...

        return make_response(resultsentence)
    else:
        return render_template("index.html")

in this way you will need to treat the response in frontend using javascript

Caio Filus
  • 705
  • 3
  • 17
  • Thank you very much, @CaioFilus ! I choose option 1 and it worked perfectly. `xhr.onreadystatechange = function() { if (xhr.readyState == XMLHttpRequest.DONE) { document.write(xhr.responseText); } }` – Kazuaki Suzuki Feb 04 '20 at 23:33