Stream live audio to Node.js server

Question

I'm working on a project and I require to send an audio stream to a Node.js server. I'm able to capture microphone sound with this function:

function micCapture(){
    'use strict';

    navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia;

    var constraints = {
        audio: true,
        video: false
    };

    var video = document.querySelector('video');

    function successCallback(stream) {
        window.stream = stream; // stream available to console
        if (window.URL) {
            video.src = window.webkitURL.createObjectURL(stream);
        } else {
            video.src = stream;
        }
        //Send audio stream
        //server.send(stream);
    }

    function errorCallback(error) {
        console.log('navigator.getUserMedia error: ', error);
    }

    navigator.getUserMedia(constraints, successCallback, errorCallback);
}

As you can see, I'm able to capture audio and play it on the website.

Now I want to send that audio stream to a Node.js server, and send it back to other clients. Like a voicechat, but I don't want to use WebRTC as I need the stream in the server. How can I achieve this? Can I use socket.io-stream to do this? In the examples I saw, they recorded the audio, and sent a file, but I need "live" audio.

Yes you certainly can use WebSockets to stream audio from client to server ... I suggest you work to code up something then come back with specific answerable programming questions ... above code as you say is not specific to your socket question — Scott Stensland, Jan 18 '18 at 19:00
Thanks for the suggestion. I will code something and update the question. — JCAguilera, Jan 18 '18 at 19:02
I ended up trying WebRTC, but I finally abandoned the project — JCAguilera, Dec 16 '19 at 10:49

Sisir · Answer 1 · 2020-05-29T17:59:53.397

I have recently done live audio upload using socket.io from browser to server. I am going to answer here in case someone else needs it.

    var stream;
    var socket = io();
    var bufferSize = 1024 * 16;
    var audioContext = new AudioContext();
    // createScriptProcessor is deprecated. Let me know if anyone find alternative
    var processor = audioContext.createScriptProcessor(bufferSize, 1, 1);
    processor.connect(audioContext.destination);

    navigator.mediaDevices.getUserMedia({ video: false, audio: true }).then(handleMicStream).catch(err => {
      console.log('error from getUserMedia', err);
    });

handleMicStream will run when user accepts the permission to use microphone.

  function handleMicStream(streamObj) {
    // keep the context in a global variable
    stream = streamObj;

    input = audioContext.createMediaStreamSource(stream);

    input.connect(processor);

    processor.onaudioprocess = e => {
      microphoneProcess(e); // receives data from microphone
    };
  }


  function microphoneProcess(e) {
    const left = e.inputBuffer.getChannelData(0); // get only one audio channel
    const left16 = convertFloat32ToInt16(left); // skip if you don't need this
    socket.emit('micBinaryStream', left16); // send to server via web socket
  }

// Converts data to BINARY16
function convertFloat32ToInt16(buffer) {
    let l = buffer.length;
    const buf = new Int16Array(l / 3);

    while (l--) {
      if (l % 3 === 0) {
        buf[l / 3] = buffer[l] * 0xFFFF;
      }
    }
    return buf.buffer;
  }

Have your socket.io server listen to micBinaryStream and you should get the data. I needed the data as a BINARY16 format for google api if you do not need this you can skip the function call to convertFloat32ToInt16().

Important

When you need to stop listening you MUST disconnect the the processor and end the stream. Run the function closeAll() below.

function closeAll() {
    const tracks = stream ? stream.getTracks() : null;
    const track = tracks ? tracks[0] : null;

    if (track) {
      track.stop();
    }

    if (processor) {
      if (input) {
        try {
          input.disconnect(processor);
        } catch (error) {
          console.warn('Attempt to disconnect input failed.');
        }
      }
      processor.disconnect(audioContext.destination);
    }

    if (audioContext) {
      audioContext.close().then(() => {
        input = null;
        processor = null;
        audioContext = null;
      });
    }
  }

you were using this piece of code for Google Speech to Text service? — Shubham, Sep 14 '21 at 11:56
Yes, I think what i did is upload mic to google API and get text. I also have code in my github repo. I will link it here if you need. — Sisir, Sep 15 '21 at 12:12
Thanks for the help man, the above code is working like a charm on EJS but have some issues when I am using Angular. Do add the repo link if possible, it might help other users. Till the time I am going to fix the Angular issue. — Shubham, Sep 17 '21 at 03:49
Looks like ScriptProcessorNode is replaced by audioWorkletNode, but it is a different architecture that keeps the audio out of the main UI thread, it is going to take some experimentation... see https://developer.chrome.com/blog/audio-worklet/ — Dr. Aaron Dishno, May 05 '22 at 17:07
Here is the documentation: https://googlechromelabs.github.io/web-audio-samples/audio-worklet/ — Dr. Aaron Dishno, May 05 '22 at 17:07
I found an example of audioWorkletNode that demonstrates showing the microphone input volume.. https://stackoverflow.com/questions/62702721/how-to-get-microphone-volume-using-audioworklet — Dr. Aaron Dishno, May 05 '22 at 17:52

score 1 · Answer 2 · answered Apr 10 '21 at 23:28

it's an old time question, i see. I'm doing the same thing (except my server doesn't run node.js and is written in C#) and stumbled upon this.

Don't know if someone is still interested but i've elaborated a bit. The current alternative to the deprecated createScriptProcessor is the AudioWorklet interface.

From: https://webaudio.github.io/web-audio-api/#audioworklet

1.32.1. Concepts

The AudioWorklet object allows developers to supply scripts (such as JavaScript or >WebAssembly code) to process audio on the rendering thread, supporting custom >AudioNodes. This processing mechanism ensures synchronous execution of the script >code with other built-in AudioNodes in the audio graph.

You cannot implement interfaces in Javascript as far as i know but you can extend a class derived from it.

And the one we need is: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor

So i did write a processor that just mirrors the output with the input values and displays them.

class CustomAudioProcessor extends AudioWorkletProcessor {
    process (inputs, outputs, parameters) {
        const input = inputs[0];
        const output = output[0];
        for (let channel = 0; channel < input.length; ++channel) {   
            for (let i = 0; i < input[channel].length; ++i) {
            // Just copying all the data from input to output
            output[channel][i] = input[channel][i];
            // The next one will make the app crash but yeah, the values are there
            // console.log(output[channel][i]);
            }
        }
    }
}

The processor must then be placed into the audio pipeline, after the microphone and before the speakers.

function record() {

constraints = { audio: true };
navigator.mediaDevices.getUserMedia(constraints)
.then(function(stream) {
   audioCtx = new AudioContext();
    var source = audioCtx.createMediaStreamSource(stream);
    audioCtx.audioWorklet.addModule("custom-audio-processor.js").then(() => {
        customAudioProcessor = new AudioWorkletNode(audioCtx, "custom-audio-processor");
        source.connect(customAudioProcessor);
        customAudioProcessor.connect(audioCtx.destination);
    }) 

    audioCtx.destination.play();

Works! Good luck! :)

Stream live audio to Node.js server

2 Answers2

Important