2

I'm using Android's AudioRecord to record the audio and save it to a WAV file. The problem is that when I play the audio from the WAV file, it sounds like it is fast forwarded (plays too fast). In the class I have posted, I also have voice recognition setup. Voice recognition works, and the audio records, but it's the rate at which the audio plays that isn't right.

I think it could be a problem with how I'm writing the WAV file, but I'm not sure. What settings could be modified in the WAV header or writing the audio to wav file where it will make the audio playback slower?

Here is the class I'm using. I have omitted some methods from the class to keep the code more readable.

public class SpeechRecognizer {

protected static final String TAG = SpeechRecognizer.class.getSimpleName();

private final Decoder decoder;

private final int sampleRate;
private int bufferSize;
private final AudioRecord recorder;
private boolean record;
private boolean is_recording;

/* Files to record the audio into */
public File pcm_sound_file;
public File wav_sound_file;

private Thread recognizerThread;
public Thread recorder_thread;

private final Handler mainHandler = new Handler(Looper.getMainLooper());

private final Collection<RecognitionListener> listeners = new HashSet<RecognitionListener>();

/**
 * Creates speech recognizer. Recognizer holds the AudioRecord object, so you 
 * need to call {@link release} in order to properly finalize it.
 * 
 * @param config The configuration object
 * @throws IOException thrown if audio recorder can not be created for some reason.
 */
protected SpeechRecognizer(Config config, boolean record) throws IOException {
    this.record = record;
    decoder = new Decoder(config);
    sampleRate = (int)decoder.getConfig().getFloat("-samprate");
    bufferSize = AudioRecord.getMinBufferSize(sampleRate,
            AudioFormat.CHANNEL_IN_MONO,
            AudioFormat.ENCODING_PCM_16BIT);
    recorder = new AudioRecord(
            AudioSource.VOICE_RECOGNITION, sampleRate,
            AudioFormat.CHANNEL_IN_MONO,
            AudioFormat.ENCODING_PCM_16BIT, bufferSize);

    if (recorder.getState() == AudioRecord.STATE_UNINITIALIZED) {
        recorder.release();
        throw new IOException(
                "Failed to initialize recorder. Microphone might be already in use.");
    }
}

/**
 * Stops recognition. All listeners should receive final result if there is
 * any. Does nothing if recognition is not active.
 * 
 * @return true if recognition was actually stopped
 */
public boolean stop() {
    boolean result = stopRecognizerThread();
    if (result) {
        Log.i(TAG, "Stop recognition");
        is_recording = false;
        recorder_thread = null;

        try {
            wav_sound_file = getOutputMediaFile("wav");
            copyWaveFile(pcm_sound_file, wav_sound_file);
        }
        catch (Exception e){
            Log.d("COS", "Failed to convert PCM to WAV");
        }

        final Hypothesis hypothesis = decoder.hyp();
        mainHandler.post(new ResultEvent(hypothesis, true));
    }
    return result;
}


private void copyWaveFile(File inFilename,File outFilename){
    FileInputStream in;
    FileOutputStream out;
    long totalAudioLen;
    long totalDataLen;
    long longSampleRate = sampleRate;
    int channels = 1;
    long byteRate = 16 * sampleRate * channels/8;

    byte[] data = new byte[bufferSize];

    try {
        in = new FileInputStream(inFilename);
        out = new FileOutputStream(outFilename);
        totalAudioLen = in.getChannel().size();
        totalDataLen = totalAudioLen + 36;


        WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                longSampleRate, channels, byteRate);

        while(in.read(data) != -1){
            out.write(data);
        }

        in.close();
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

private void WriteWaveFileHeader(
        FileOutputStream out, long totalAudioLen,
        long totalDataLen, long longSampleRate, int channels,
        long byteRate) throws IOException {

    byte[] header = new byte[44];

    header[0] = 'R'; // RIFF/WAVE header
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    header[4] = (byte) (totalDataLen & 0xff);
    header[5] = (byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (byte) ((totalDataLen >> 24) & 0xff);
    header[8] = 'W';
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    header[12] = 'f'; // 'fmt ' chunk
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    header[16] = 16; // 4 bytes: size of 'fmt ' chunk
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    header[20] = 1; // format = 1
    header[21] = 0;
    header[22] = (byte) channels;
    header[23] = 0;
    header[24] = (byte) (longSampleRate & 0xff);
    header[25] = (byte) ((longSampleRate >> 8) & 0xff);
    header[26] = (byte) ((longSampleRate >> 16) & 0xff);
    header[27] = (byte) ((longSampleRate >> 24) & 0xff);
    header[28] = (byte) (byteRate & 0xff);
    header[29] = (byte) ((byteRate >> 8) & 0xff);
    header[30] = (byte) ((byteRate >> 16) & 0xff);
    header[31] = (byte) ((byteRate >> 24) & 0xff);
    header[32] = (byte) (16 / 8); // block align
    header[33] = 0;
    header[34] = 16; // bits per sample
    header[35] = 0;
    header[36] = 'd';
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    header[40] = (byte) (totalAudioLen & 0xff);
    header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

    out.write(header, 0, 44);
}

private void writeAudioDataToFile() {

    byte sData[] = new byte[bufferSize];

    FileOutputStream os = null;
    try {
        pcm_sound_file = getOutputMediaFile("pcm");
        os = new FileOutputStream(pcm_sound_file);
    } catch (Exception e) {e.printStackTrace();}

    while (is_recording) {
        recorder.read(sData, 0, bufferSize);
        try {
            os.write(sData);
        } catch (Exception e) {e.printStackTrace();}
    }

    try {
        os.close();
    } catch (Exception e) {e.printStackTrace();}
}


private File getOutputMediaFile(String format){
    // To be safe, you should check that the SDCard is mounted
    // using Environment.getExternalStorageState() before doing this.

    File mediaStorageDir = new File(Environment.getExternalStoragePublicDirectory(
            Environment.DIRECTORY_PICTURES), "SafePhrase");
    // This location works best if you want the created images to be shared
    // between applications and persist after your app has been uninstalled.

    // Create the storage directory if it does not exist
    if (! mediaStorageDir.exists()){
        if (! mediaStorageDir.mkdirs()){
            Log.d("COS", "failed to create directory");
            return null;
        }
    }

    // Create a media file name
    String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
    File mediaFile;
    mediaFile = new File(mediaStorageDir.getPath() + File.separator +
                "SOUND_"+ timeStamp + "." + format);

    return mediaFile;
}


private final class RecognizerThread extends Thread {

    private int remainingSamples;
    private int timeoutSamples;
    private final static int NO_TIMEOUT = -1;

    public RecognizerThread(int timeout) {
        if (timeout != NO_TIMEOUT)
            this.timeoutSamples = timeout * sampleRate / 1000;
        else
            this.timeoutSamples = NO_TIMEOUT;
        this.remainingSamples = this.timeoutSamples;
    }

    public RecognizerThread() {
        this(NO_TIMEOUT);
    }

    @Override
    public void run() {

        recorder.startRecording();

        /* If the user has asked to record, then create a new recorder thread where the audio
        * will be recorded. */
        if(record) {
            recorder_thread = new Thread(new Runnable() {
                @Override
                public void run() {
                    Log.d("COS", "RECORDING!");
                    writeAudioDataToFile();
                }
            }, "Audio Recorder Thread");
            recorder_thread.start();
        }
        else{
            Log.d("COS", "NOT RECORDING!");
        }

        /* SPEECH RECOGNITION BELOW */

        if (recorder.getRecordingState() == AudioRecord.RECORDSTATE_STOPPED) {
            recorder.stop();
            IOException ioe = new IOException(
                    "Failed to start recording. Microphone might be already in use.");
            mainHandler.post(new OnErrorEvent(ioe));
            return;
        }

        Log.d(TAG, "Starting decoding");

        decoder.startUtt();
        short[] buffer = new short[bufferSize];
        boolean inSpeech = decoder.getInSpeech();

        // Skip the first buffer, usually zeroes
        recorder.read(buffer, 0, buffer.length);

        while (!interrupted()
                && ((timeoutSamples == NO_TIMEOUT) || (remainingSamples > 0))) {
            int nread = recorder.read(buffer, 0, buffer.length);

            if (-1 == nread) {
                throw new RuntimeException("error reading audio buffer");
            } else if (nread > 0) {
                decoder.processRaw(buffer, nread, false, false);

                // int max = 0;
                // for (int i = 0; i < nread; i++) {
                //     max = Math.max(max, Math.abs(buffer[i]));
                // }
                // Log.e("!!!!!!!!", "Level: " + max);

                if (decoder.getInSpeech() != inSpeech) {
                    inSpeech = decoder.getInSpeech();
                    mainHandler.post(new InSpeechChangeEvent(inSpeech));
                }

                if (inSpeech)
                    remainingSamples = timeoutSamples;

                final Hypothesis hypothesis = decoder.hyp();
                mainHandler.post(new ResultEvent(hypothesis, false));
            }

            if (timeoutSamples != NO_TIMEOUT) {
                remainingSamples = remainingSamples - nread;
            }
        }

        recorder.stop();
        decoder.endUtt();

        // Remove all pending notifications.
        mainHandler.removeCallbacksAndMessages(null);

        // If we met timeout signal that speech ended
        if (timeoutSamples != NO_TIMEOUT && remainingSamples <= 0) {
            mainHandler.post(new TimeoutEvent());
        }
    }
}
Cyogenos
  • 3,509
  • 3
  • 15
  • 14

0 Answers0