2

There are similar questions for Java and iOS, but I'm wondering about detecting silence in javascript for audio recordings via getUserMedia(). So given:

navigator.mediaDevices.getUserMedia({ audio: true })
  .then(stream => {
    const mediaRecorder = new MediaRecorder(stream);
    mediaRecorder.start();

    const audioChunks = [];
    mediaRecorder.addEventListener("dataavailable", event => {
      audioChunks.push(event.data);
    });

    mediaRecorder.addEventListener("stop", () => {
      const audioBlob = new Blob(audioChunks);
      const audioUrl = URL.createObjectURL(audioBlob);
      const audio = new Audio(audioUrl);
      audio.play();
    });
  });

I'm wondering if there is anything that can be checked on the Blob, URL, or Audio objects in the stop event for an absence of audio. In the case of a bad microphone or a virtual device selected - anything along those lines. I was previously checking the blob's size, but silent audio still has a filesize. I can do this on the backend via ffmpeg, but hoping there is a way in pure JS to simplify.

Ryan DuVal
  • 3,774
  • 2
  • 21
  • 15
  • 1
    Might be expensive to do this every `dataavailable` event, but maybe try converting your Blob into an AudioBuffer and read the channel values? If they're all zero (or close to zero) you can presume no audio is detected. Converting to Blob to AudioBuffer here: https://stackoverflow.com/a/61531985/5522641 – AdamInTheOculus Feb 15 '22 at 20:13
  • I think you can using this code suggested by `Kaiido` : https://stackoverflow.com/a/46781986/11966136 – Mahdi Akrami Feb 16 '22 at 18:35

2 Answers2

10

With this solution inspired by Visualizations with Web Audio API, you can set minimal required decibels and detect if anything was recorded.

const MIN_DECIBELS = -45;

navigator.mediaDevices.getUserMedia({ audio: true })
  .then(stream => {
    const mediaRecorder = new MediaRecorder(stream);
    mediaRecorder.start();

    const audioChunks = [];
    mediaRecorder.addEventListener("dataavailable", event => {
      audioChunks.push(event.data);
    });

    const audioContext = new AudioContext();
    const audioStreamSource = audioContext.createMediaStreamSource(stream);
    const analyser = audioContext.createAnalyser();
    analyser.minDecibels = MIN_DECIBELS;
    audioStreamSource.connect(analyser);

    const bufferLength = analyser.frequencyBinCount;
    const domainData = new Uint8Array(bufferLength);

    let soundDetected = false;

    const detectSound = () => {
      if (soundDetected) {
        return
      }

      analyser.getByteFrequencyData(domainData); 

      for (let i = 0; i < bufferLength; i++) {
        const value = domainData[i];

        if (domainData[i] > 0) {
          soundDetected = true
        }
      }

      window.requestAnimationFrame(detectSound);
    };

    window.requestAnimationFrame(detectSound);

    mediaRecorder.addEventListener("stop", () => {
      const audioBlob = new Blob(audioChunks);
      const audioUrl = URL.createObjectURL(audioBlob);
      const audio = new Audio(audioUrl);
      audio.play();
      
      console.log({ soundDetected });
    });
  });
BorisTB
  • 1,686
  • 1
  • 17
  • 26
  • 1
    Fantastic. I have two followup questions for you: 1. Would you mind explaining the rationale for using -45dB as the threshold? 2. I'm curious how I might adapt this to only analyze the first chunk of data (say, 10-15 seconds). I am using this to detect an incorrectly-selected input, so I'd want to notify the user ASAP and the first batch of data would be sufficient. – Ryan DuVal Feb 21 '22 at 15:56
  • 1
    1. Honestly -45 is just the value that works best for me to ignore white noise and background sounds. 2. I believe in js there's currently no way to slice audio or select n seconds of it. But you can set timeout to stop recording after 10 seconds and then start new recording. – BorisTB Feb 23 '22 at 08:24
  • 1
    FYI There is a question/answer here related to setting the noise floor (e.g. MIN_DECIBELS) in a way that will work in different recording environments. https://stackoverflow.com/questions/72333613/what-is-a-good-algorithm-to-detect-silence-over-a-variety-of-recording-environme/72341235#72341235 – Erik Hermansen May 22 '22 at 20:56
1

This code can run a function for every dialog it detects. it runs in a loop until the user stops it:

VOICE_MIN_DECIBELS      = -35;
DELAY_BETWEEN_DIALOGS   = 400;
DIALOG_MAX_LENGTH       = 60*1000;
MEDIA_RECORDER          = null;
IS_RECORDING            = false;

//startRecording:
function startRecording(){
    IS_RECORDING = true;
    record();
}

//stopRecording:
function stopRecording(){
    IS_RECORDING = false;
    if(MEDIA_RECORDER !== null)
        MEDIA_RECORDER.stop();
}

//record:
function record(){
    navigator.mediaDevices.getUserMedia({ audio: true })
    .then(stream => {
        
        //start recording:
        MEDIA_RECORDER = new MediaRecorder(stream);
        MEDIA_RECORDER.start();
        
        //save audio chunks:
        const audioChunks = [];
        MEDIA_RECORDER.addEventListener("dataavailable", event => {
            audioChunks.push(event.data);
        });
        
        //analisys:
        const audioContext      = new AudioContext();
        const audioStreamSource = audioContext.createMediaStreamSource(stream);
        const analyser          = audioContext.createAnalyser();
        analyser.minDecibels    = VOICE_MIN_DECIBELS;
        audioStreamSource.connect(analyser);
        const bufferLength      = analyser.frequencyBinCount;
        const domainData        = new Uint8Array(bufferLength);
        
        //loop:
        const time              = new Date();
        let startTime,
            lastDetectedTime    = time.getTime();
        let anySoundDetected    = false;
        const detectSound       = () => {
            
            //recording stoped by user:
            if(!IS_RECORDING)
                return;

            time = new Date();
            currentTime = time.getTime();
            
            //time out:
            if(currentTime > startTime + DIALOG_MAX_LENGTH){
                MEDIA_RECORDER.stop();
                return;
            }

            //a dialog detected:
            if( anySoundDetected === true &&
                currentTime > lastDetectedTime + DELAY_BETWEEN_DIALOGS
                ){
                MEDIA_RECORDER.stop();
                return;
            }
            
            //check for detection:
            analyser.getByteFrequencyData(domainData);
            for(let i = 0; i < bufferLength; i++)
                if(domainData[i] > 0){
                    anySoundDetected = true;
                    time = new Date();
                    lastDetectedTime = time.getTime();
                }
            
            //continue the loop:
            window.requestAnimationFrame(detectSound);
        };
        window.requestAnimationFrame(detectSound);

        //stop event:
        MEDIA_RECORDER.addEventListener('stop', () => {
            
            //stop all the tracks:
            stream.getTracks().forEach(track => track.stop());
            if(!anySoundDetected) return;
            
            //send to server:
            const audioBlob = new Blob(audioChunks, {'type': 'audio/mp3'});
            doWhateverWithAudio(audioBlob);
            
            //start recording again:
            record();

        });

    });
}

//doWhateverWithAudio:
function doWhateverWithAudio(audioBlob){

    //.... send to server, downlod, etc.

}