1

I am trying to load my own trained data to tesseract.js. As the file is placed locally, I tried to load everything offline. The code I used is shown below:

<script src="tesseract.js"></script>

<script>
//Set the worker, core and lang to local files
(function() {
var path = (function() { //absolute path
    var pathArray = window.location.pathname.split( '/' );
    pathArray.pop(); //Remove the last ("**.html")
    return window.location.origin + pathArray.join("/");
})();
console.log(path);

window.Tesseract = Tesseract.create({
    workerPath: path + '/worker.js',
    //langPath: path + '/traineddata/',
    corePath: path + '/index.js',
});
})();
</script>

<script>
function recognizeFile(file){
    document.querySelector("#log").innerHTML = ''

    Tesseract.recognize(file, {
        lang: document.querySelector('#langsel').value
    })
        .progress(function(packet){
            console.info(packet)
            progressUpdate(packet)

        })
        .then(function(data){
            console.log(data)
            progressUpdate({ status: 'done', data: data })
        })
}
</script>

The code above is working fine if the langPath is not set, but when I point the langPath to a local folder, Tesseract failed to load anything with the following error:

Failed loading language 'eng'
Tesseract couldn't load any languages!

...

AdaptedTemplates != NULL:Error:Assert failed:in file ../classify/adaptmatch.cpp, line 190
SCRIPT0: abort() at Error
   at Na (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:36:24)
   at ka (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:511:83)
   at Module.de._abort (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:377:166)
   at $L (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:55709)
   at jpa (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:392:22274)
   at lT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80568)
   at mT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80698)
   at BS (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:69009)
   at bP (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:110094)
   at jT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80280)
   at RJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:19088)
   at QJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:17789)
   at zI (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:403:90852)
   at tw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:49079)
   at rw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:48155)
   at lw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:39071)
   at _v (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:22565)
   at aw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:24925)
   at cw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:27237)
   at oj (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:24689)
   at Og (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:10421)
   at $.prototype.Recognize (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:558:379)
   at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8814:9)
   at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8786:9)
   at xhr.onerror (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8429:9)
If this abort() is unexpected, build with -s ASSERTIONS=1 which can give more information.
index.js (8,1)

I have both eng.traineddata and eng.traineddata.gz in the /traineddata folder as apparently the ungzip process is skipped. Is there anything I neglected? Any help is appreciated.

R. Wang
  • 353
  • 4
  • 11
  • Hi, did you find any solution for your question? I am also facing similar issue. – flamelite Sep 05 '17 at 07:17
  • Not a solution in any sense, but I converted the data to blob and put that in the js file to load it – R. Wang Nov 06 '17 at 14:10
  • So is tesseract index file able to read that blob file and giving correct output? Anyways i was able to load the language file locally and its working fine now for me. – flamelite Nov 06 '17 at 14:13

2 Answers2

2

I know this question is an old but recently I needed to use Tesseract.js in one of my projects. I needed to load Data Files locally so here is what I have done.

Instead of creating a new worker. I modified the default worker options available. So I didn't use Tesseract.createWorker and directly set the path and used recognize instead.

 Tesseract.workerOptions.langPath = 
           window.location.origin // take protocol://domain.com part
           + "/scripts/tesseract/dist/"; // location of data files

 //you could set core and worker paths too but I didn't need it
 Tesseract.workerOptions.workerPath = 
           window.location.origin // take protocol://domain.com part
           + "/scripts/tesseract/dist/worker.js"; // location of worker.js

 //you could set core and worker paths too but I didn't need it
 Tesseract.workerOptions.corePath = 
           window.location.origin // take protocol://domain.com part
           + "/scripts/tesseract/dist/index.js"; // location of index.js

//example lang path would be protocol://domain.com/scripts/tesseract/dist/

By doing this, I left the worker and core paths untouched pointing to Default CDN.

PS: When using local worker.js and core.js paths I was getting uncaught error on postMessage() in worker.js. That's why I am using local path for langData only. I still don't know how to fix it or why it is happening. But, You can follow this issue here and here

Hey24sheep
  • 1,172
  • 6
  • 16
0

I solved the problem by taking the corePath file from tesseract.js-core 0.1.0

window.Tesseract = Tesseract.create({
   workerPath: window.location.origin + "/tesseract/worker.js", //tesseract.js-1.0.10
   langPath: window.location.origin + "/tesseract/",
   corePath: window.location.origin + "/tesseract/index.js", //tesseract.js-core-0.1.0
});

And language gz from https://github.com/naptha/tessdata/tree/gh-pages/3.02

Jack
  • 1
  • 1