PyAudio - Convert stream.read into int to get amplitude

Question

I'm trying to record audio and simultaneously print the amplitude of the recorded signal. So I'm saving all datas in stream.read. But when I try to print them, I have a string of bytes and no integers. I would like to know how to convert these signs in order to get amplitude.

This is my code :

import pyaudio
import wave

CHUNK = 1024 
FORMAT = pyaudio.paInt16
CHANNELS = 1 
RATE = 44100 
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK) 

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data) # 2 bytes(16 bits) per channel

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

for data in frames:
    print(data)

And this is what I obtain :

       ����#  ����
          
 !$
          

                 ��  ���� ��������������������������
           ������  �� ��                                           
��

   �� ������ ����������������������������
                            ��    
                                     ����                                                
��

��

% �� (��)��,��.��%��#��

score 3 · Answer 1 · edited May 23 '17 at 12:24

3

PyAudio is giving you binary-encoded audio frames as bytes in a string. See the answer to this question for how to print a human-readable representation of your frames:

Get an audio sample as float number from pyaudio-stream

edited May 23 '17 at 12:24

Community

1
1

answered Apr 04 '16 at 22:16

NoThatIsTeal

102
1
7

Thanks for your answer. I've just adding the line "decoded = numpy.fromstring(data, 'Float32');" in my for loop, but the result is not concluding. I obtained a list of very small numbers like : 3.67348991e-40 6.42851276e-40 3.67355998e-40 6.42868091e-40 2.75502285e-40 1.10201895e-39 nan 4.59204105e-40 1.19389508e-39 1.37756747e-39 – Utopia Apr 04 '16 at 22:37
You need to use the correct format for your data. try `decoded = numpy.fromstring(data, dtype=numpy.int16)`. I suggest `numpy.int16` since you seem to have defined the stream as consisting of 16 bit integer samples. If you want to try different sample formats, here is the list of those supported by numpy: http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.dtype.html#numpy.dtype – NoThatIsTeal Apr 06 '16 at 05:36

score 3 · Accepted Answer · edited May 23 '17 at 12:31

You can certainly inspire yourself by the following code :

#!/usr/bin/python

# open a microphone in pyAudio and listen for taps

import pyaudio
import struct
import math

INITIAL_TAP_THRESHOLD = 0.010
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME

def get_rms( block ):
    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

# we will get one short out for each 
# two chars in the string.
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )

# iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
        # sample is a signed short in +/- 32768. 
        # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )

class TapTester(object):
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0

    def stop(self):
        self.stream.close()

    def find_input_device(self):
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%        (i,devinfo["name"]) )
                    device_index = i
                    return device_index

    if device_index == None:
        print( "No preferred input found; using default input device." )

    return device_index

def open_mic_stream( self ):
    device_index = self.find_input_device()

    stream = self.pa.open(   format = FORMAT,
                             channels = CHANNELS,
                             rate = RATE,
                             input = True,
                             input_device_index = device_index,
                             frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

    return stream

def tapDetected(self):
    print "Tap!"

def listen(self):
    try:
        block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
    except IOError, e:
        # dammit. 
        self.errorcount += 1
        print( "(%d) Error recording: %s"%(self.errorcount,e) )
        self.noisycount = 1
        return

    amplitude = get_rms( block )
    if amplitude > self.tap_threshold:
        # noisy block
        self.quietcount = 0
        self.noisycount += 1
        if self.noisycount > OVERSENSITIVE:
            # turn down the sensitivity
            self.tap_threshold *= 1.1
    else:            
        # quiet block.

        if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
            self.tapDetected()
        self.noisycount = 0
        self.quietcount += 1
        if self.quietcount > UNDERSENSITIVE:
            # turn up the sensitivity
            self.tap_threshold *= 0.9

if __name__ == "__main__":
tt = TapTester()

for i in range(1000):
    tt.listen()

It come from this post: [Detect tap with pyaudio from live mic

You can easyly adapt it to put the RMS in a table and plot the table.

Thanks for your answer. I just added the class get_rms, and save values in a list and everything is fine. I get a list of amplitudes which increase or decrease whether I'm talking or not. — Utopia, Apr 04 '16 at 22:38
Nice. Now if you really want to plot, I suggest you to use pyqt. — FLCcrakers, Apr 04 '16 at 22:48

score 2 · Answer 3 · answered Jan 06 '21 at 19:17

I guess the question is old and I stumpled over it looking for other answers, but in my project I use something like this.

#Lets assume the constants are defined somewhere

import struct
import pyaudio
import numpy as np

self.input = pyaudio.PyAudio().open(
            format=pyaudio.paInt16,
            channels=1,
            rate=44100,
            input=True,
            output=False,
            frames_per_buffer=1024,
)
wf_data = self.input.read(self.CHUNK)
wf_data = struct.unpack(str(self.CHUNK) + 'h', wf_data)
wf_data = np.array(wf_data)

the paInt16 and the 'h' correspond. You can figure out what letter matches your pyaudio format here. https://docs.python.org/3/library/struct.html

Credit goes to: https://www.youtube.com/channel/UC2W0aQEPNpU6XrkFCYifRFQ

didn't get this "str(self.CHUNK) + 'h'" could you explain? what is self.CHUNK? — Lorenzo Sciuto, Mar 20 '21 at 15:32

score 0 · Answer 4 · answered Sep 09 '17 at 11:43

0

I think you could do this

data = stream.read(CHUNK)
for each in data:
    print(each)

answered Sep 09 '17 at 11:43

score -1 · Answer 5 · answered Mar 26 '21 at 17:49

When dealing with audio you probably want the RMS (root mean squared) value of the signals buffer. I believe it offers a better 'view' of the overall power in an audio signal.

The python standard library as a module called audioop the module has a function called rms.

import pyaudio
import time
import audioop

def get_rms():
    # Creates a generator that can iterate rms values
    CHUNK = 8
    WIDTH = 2
    CHANNELS = 1
    RATE = 44100

    p = pyaudio.PyAudio()

    try:
        stream = p.open(format=p.get_format_from_width(WIDTH),
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=False,
                        frames_per_buffer=CHUNK)
        # wait a second to allow the stream to be setup
        time.sleep(1)
        while True:
            # read the data
            data = stream.read(CHUNK, exception_on_overflow = False)
            rms = audioop.rms(data, 1)
            yield rms_scaled
    finally:
        p.terminate()
        stream.stop_stream()
        stream.close()

You can use the function like this

rms_values = get_rms()
for rms in rms_values:
    print(rms)

PyAudio - Convert stream.read into int to get amplitude

5 Answers5