I am trying to push (Post) pdf files to Solr/Tika for text extraction and indexing using Ajax/js. I've gotten the following curl command to work:
curl 'http://localhost:8983/solr/techproducts/update/extract?literal.id=doc1&commit=true' -F "myfile=@/PathToFile/SomeDoc.pdf"
This command puts the desired pdf into the Solr Index, and I can retrieve it just fine. However, I need to be able to do this from a web browsers. After much googling, and a little experimentation I've got the following js code ALMOST working. It returns a 0 status code, and status of Success, but nothing gets committed to the index:
$("#solrPost").click(function(event) {
event.stopPropagation();
event.preventDefault();
/* Read a local pdf file as a blob */
let fileAsBlob = null;
let file = $('#upload_file')[0].files[0];
let myReader = new FileReader();
myReader.onloadend = function() {
fileAsBlob = myReader.result;
sendToSolr(fileAsBlob);
};
fileAsBlob = myReader.readAsArrayBuffer(file);
function sendToSolr(fileAsBlob) {
$.ajax({
url:"http://localhost:8983/solr/techproducts/update/extract?literal.id=doc2&commit=true",
type: 'POST',
data: fileAsBlob,
cache: false,
crossOrigin: true,
dataType: 'jsonp',
jsonp: 'json.wrf',
processData: false,
contentType: false,
success: function(data, status) {
console.log("Ajax.post successful, status: " + data.responseHeader.status + "\t status text: " + status);
console.log("debug");
},
error: function(data, status) {
console.log("Ajax.post error, status: " + data.status + "\t status text:" + data.statusText);
},
done: function(data, status) {
console.log("Ajax.post Done");
}
});
}
This is SO close to working, but I just can't figure out what's going wrong. All indications (From client side) are good, but nothing added to the index. Note:
- The fileReader is working, I see an Array of the same size as the source pdf.
- Even though I specify POST, when I examine the network tab in the browser/debugger, it says GET.
- I've hardcoded the literal.id=doc2 for simplicity, not a long term strategy...
I know there are similar posts, but none address the issue of extracting pdf's using Solr/Tika outside of the provided post script. Thanks in advance.