var crypto = require('crypto');
var sha = crypto.createHash('sha512').update(String(s));
var result = sha.digest('hex');
That's my current code.
How do I do this async? I'm planning to do the sha512 100,000 times.
var crypto = require('crypto');
var sha = crypto.createHash('sha512').update(String(s));
var result = sha.digest('hex');
That's my current code.
How do I do this async? I'm planning to do the sha512 100,000 times.
Node's crypto
module does not provide asynchronous SHA512 hashing at present, and although the createHash()
stream interface looks asynchronous it will also execute in the main thread and block the event loop.
There is an issue open for this: https://github.com/nodejs/node/issues/678
In the interim, you can use @ronomon/crypto-async
to do SHA512 asynchronously and concurrently in the threadpool without blocking the event loop, for multi-core throughput.
If you can not find any better solutions, this trick may help you:
You can create a standalone SHA-512 generator application, which receives your String "s" as standard input, generates the hash, and writes it out to the standard output.
From within your app, you can exec it via the child_process
module, and catch the response with an event handler. There is an other stackoverflow thread, which may come handy about child_process:
This way you can encapsulate the sync function into an async context. :)
Node.js runs in a single thread, so if you want to do asynchronous processing, you have to either:
The method I present below uses the latter approach.
Node.js API provides a module called cluster that allows you to fork your process as you would do if you were programming in C.
My approach breaks the input data (the strings you want to hash) into chunks, where each chunk is passed to a child worker process. When the worker finishes work on its chunk, it signals the master process, passing the results back.
The master node keeps running while the workers do their job, so it can do any unrelated asynchronous work without being blocked. When all workers finish, the master is signaled and it is free to further process the final results.
To run my test, you can simply do:
node parhash
My tests ran on an Intel Core i5 4670 with 8 GB RAM DDR3.
For your need of 100'000 strings, 1 worker completed in 450 ms, while 10 workers took 350 ms.
In a test with a million strings, 1 worker did the job in 4.5 seconds, while 10 workers did in 3.5 seconds.
Here is the code:
var
crypto = require('crypto'),
cluster = require('cluster');
var
STRING_COUNT = 1000000,
STRINGS_PER_WORKER = 100000,
WORKER_COUNT = Math.ceil(STRING_COUNT / STRINGS_PER_WORKER),
chunks = [],
nextChunkId = 0,
results = [],
startTime,
pendingWorkers = WORKER_COUNT;
/**
* Generates strings partitioned in WORKER_COUNT chunks.
* Each of these chunks will later be passed to a child process to be parsed asynchronously.
*
* You should replace this with your working data.
*/
function generateDemoStringChunks() {
var
si, wi,
chunk;
for (wi = 0; wi < WORKER_COUNT; wi++) {
chunk = [];
for (si = STRINGS_PER_WORKER * wi; (si < STRINGS_PER_WORKER * (wi + 1)) && (si < STRING_COUNT); si++) {
chunk.push(si.toString());
}
chunks.push(chunk);
}
}
/**
* After all workers finish processing, this will be executed.
*
* Here you should do whatever you want to process the resulting hashes.
*/
function mergeResults() {
results.sort(function compare(a, b) {
return a.id - b.id;
});
console.info('Summary:');
results.forEach(function (result) {
console.info('\tChunk %d: %d hashes (here is the first hash: "%s")', result.id, result.data.length, result.data[0]);
});
}
/**
* This will be called on the master side every time a worker finishes working.
*
* @param {object} worker the Worker that finished
* @param {{id: number, data: [string]}} result the result
*/
function processWorkerResult(worker, result) {
console.info('Worker %d finished computing %d hashes.', worker.id, result.data.length);
results.push(result);
worker.kill();
if (--pendingWorkers == 0) {
console.info('Work is done. Whole process took %d seconds.', process.hrtime(startTime)[0]);
mergeResults();
}
}
/**
* Gets a chunk of data available for processing.
*
* @returns {{id: number, data: [string]}} the chunk to be passed to the worker
*/
function getNextAvailableChunk() {
var chunk = {
id: nextChunkId,
data: chunks[nextChunkId]
};
nextChunkId++;
return chunk;
}
/**
* The master node will send a chunk of data every time a worker node
* signals it's ready to work.
*/
function waitForWorkers() {
cluster.on('online', function (worker) {
console.info('Worker %d is online.', worker.id);
worker.on('message', processWorkerResult.bind(null, worker));
worker.send(getNextAvailableChunk());
});
}
/**
* Start workers.
*/
function spawnWorkers() {
var wi;
for (wi = 0; wi < WORKER_COUNT; wi++) {
cluster.fork();
}
}
/**
* The hash function.
*
* @param {string} s a string to be hashed
* @returns {string} the hash string
*/
function hashString(s) {
return crypto.createHash('sha512').update(s).digest('hex');
}
/**
* A worker will wait for the master to send a chunk of data and will
* start processing as soon as it arrives.
*/
function processChunk() {
cluster.worker.on('message', function(chunk) {
var
result = [];
console.info('Worker %d received chunk %d with a load of %d strings.', cluster.worker.id, chunk.id, chunk.data.length);
chunk.data.forEach(function processChunk(s) {
result.push(hashString(s));
});
cluster.worker.send({
id: chunk.id,
data: result
});
});
}
function main() {
if (cluster.isMaster) {
/*
The master node will instantiate all required workers
and then pass a chunk of data for each one.
It will then wait for all of them to finish so it can
merge the results.
*/
startTime = process.hrtime();
generateDemoStringChunks();
spawnWorkers();
waitForWorkers();
} else {
/*
A worker node will wait for a chunk to arrive and
then will start processing it. When finished, it will
send a message back to the master node with the
resulting hashes.
*/
console.info('Worker %d is starting.', cluster.worker.id);
processChunk();
}
}
main();
I can't tell how well it would perform if it were implemented using threads because I haven't tested it. You could try WebWorker Threads if you want to do a benchmark (note: I haven't tried the WebWorkers module yet and I don't guarantee it works - you are on your own here).