1

We're using pocketsphinx to help us convert .wav files into text files. We don't know why it gives us a strange output as it gives us only <s> and </s> after the conversion. We're using the default dictionary, langauge model and acoustic model given by the cmusphinx community.

Here is the code we're using for the conversion:

package com.example.saling_wika.saling_wika;


import android.app.Activity;
import android.net.Uri;
import android.os.AsyncTask;
import android.os.Bundle;
import android.os.Environment;
import android.util.Log;
import android.widget.Toast;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import edu.cmu.pocketsphinx.Assets;
import edu.cmu.pocketsphinx.Config;
import edu.cmu.pocketsphinx.Decoder;
import edu.cmu.pocketsphinx.Segment;

import static android.support.v7.widget.StaggeredGridLayoutManager.TAG;
import static junit.framework.Assert.fail;





public class ConversionModule extends Activity {
    static {
        System.loadLibrary("pocketsphinx_jni");
    }
    Config c;
    Decoder ps;
    FileInputStream stream;

    @Override
    public void onCreate(Bundle state) {
        super.onCreate(state);
        setContentView(R.layout.main);

        c = Decoder.defaultConfig();

    /*Configuring decoder object*/
        c.setString("-hmm", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "en-us-ptm").getPath());
        c.setString("-dict", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "cmudict-en-us.dict").getPath());
        c.setString("-lm", new File(Environment.getExternalStorageDirectory() + "/Android/data/com.example.saling_wika.saling_wika/files/sync", "weather.dmp").getPath());
        c.setBoolean("-allphone_ci", true);


        ps = new Decoder(c);


        try {
            final File file = new File(AudioToConvert.pathko);
            Uri uri = Uri.fromFile(file);
            File auxFile = new File(uri.getPath());
            stream = new FileInputStream(auxFile);


        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }




        ps.startUtt();
        byte[] b = new byte[4096];
        try {
            int nbytes;
            while ((nbytes = stream.read(b)) >= 0) {
                ByteBuffer bb = ByteBuffer.wrap(b, 0, nbytes);
                short[] s = new short[nbytes / 2];
                bb.asShortBuffer().get(s);
                ps.processRaw(s, nbytes / 2, false, false);
            }
        } catch (IOException e) {

        }
        ps.endUtt();
        //  System.out.println(ps.hyp().getHypstr());
        Toast.makeText(getApplicationContext(), ps.hyp().getHypstr(), Toast.LENGTH_LONG).show();
        for (Segment seg : ps.seg()) {
            //  System.out.println(seg.getWord());
            Toast.makeText(getApplicationContext(),seg.getWord(), Toast.LENGTH_LONG).show();
        }
        ;


    }
}

1 Answers1

0

As explained on CMUSphinx forum you have multiple issues:

  1. You need to add bb.order(ByteOrder.LITTLE_ENDIAN); as described in Give a file as input to Pocketsphinx on Android

  2. Your input file should have PCM format 16khz 16 bit mono. In case you want to submit some encoded file you need to decode it to raw data first.

Community
  • 1
  • 1
Nikolay Shmyrev
  • 24,897
  • 5
  • 43
  • 87
  • 1
    Hi I used the answer given here: https://stackoverflow.com/a/37640851/1693203, and I am having the same problem. I have an 11 second wav file with a native English speaker saying a short sentence. The file is 16 bit, little endian, 16 kHz and I have removed the wav header, so the file only has the raw audio part. I am getting result [speech] [speech] [speech] . Can you give me a few hints on how to solve this, please? – user13267 Jan 10 '20 at 12:00
  • By the way I get the same result if I use the file keeping the wav header – user13267 Jan 10 '20 at 12:01