I would go with looking at the data stream as a moving window of size 6 bytes.
For example, if you have the following file (in bytes):
23, 34, 45, 67, 76
A moving window of 2 passing over the data will be:
[23, 34]
[34, 45]
[45, 67]
[67, 76]
I propose going over these windows looking for your string.
var Stream = require('stream');
var fs = require('fs');
var exampleStream = fs.createReadStream("./dump.dmp");
var matchCounter = 0;
windowStream(exampleStream, 6).on('window', function(buffer){
if (buffer.toString() === '0xEF53') {
++matchCounter;
}
}).on('end', function(){
console.log('done scanning the file, found', matchCounter);
});
function windowStream(inputStream, windowSize) {
var outStream = new Stream();
var soFar = [];
inputStream.on('data', function(data){
Array.prototype.slice.call(data).forEach(function(byte){
soFar.push(byte);
if (soFar.length === windowSize) {
outStream.emit('window', new Buffer(soFar));
soFar.shift();
}
});
});
inputStream.on('end', function(){
outStream.emit('end');
});
return outStream;
}
Usually I'm not a fan of going over bytes when you actually need the underling string. In UTF-8 there are cases where it might cause some issues, but assuming everything is in English it should be fine. The example can be improved to support these cases by using a string decoder
EDIT
Here is a UTF8 version
var Stream = require('stream');
var fs = require('fs');
var exampleStream = fs.createReadStream("./dump.dmp", {encoding: 'utf8'});
var matchCounter = 0;
windowStream(exampleStream, 6).on('window', function(windowStr){
if (windowStr === '0xEF53') {
++matchCounter;
}
}).on('end', function(){
console.log('done scanning the file, found', matchCounter);
});
function windowStream(inputStream, windowSize) {
var outStream = new Stream();
var soFar = "";
inputStream.on('data', function(data){
Array.prototype.slice.call(data).forEach(function(char){
soFar += char;
if (soFar.length === windowSize) {
outStream.emit('window', soFar);
soFar = soFar.slice(1);
}
});
});
inputStream.on('end', function(){
outStream.emit('end');
});
return outStream;
}