227

I'm trying to write a script to download images using node.js. This is what I have so far:

var maxLength = 10 // 10mb
var download = function(uri, callback) {
  http.request(uri)
    .on('response', function(res) {
      if (res.headers['content-length'] > maxLength*1024*1024) {
        callback(new Error('Image too large.'))
      } else if (!~[200, 304].indexOf(res.statusCode)) {
        callback(new Error('Received an invalid status code.'))
      } else if (!res.headers['content-type'].match(/image/)) {
        callback(new Error('Not an image.'))
      } else {
        var body = ''
        res.setEncoding('binary')
        res
          .on('error', function(err) {
            callback(err)
          })
          .on('data', function(chunk) {
            body += chunk
          })
          .on('end', function() {
            // What about Windows?!
            var path = '/tmp/' + Math.random().toString().split('.').pop()
            fs.writeFile(path, body, 'binary', function(err) {
              callback(err, path)
            })
          })
      }
    })
    .on('error', function(err) {
      callback(err)
    })
    .end();
}

I, however, want to make this more robust:

  1. Are there libraries that do this and do this better?
  2. Is there a chance that response headers lie (about length, about content type)?
  3. Are there any other status codes I should care about? Should I bother with redirects?
  4. I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
  5. How can I get this to work on windows?
  6. Any other ways you can make this script better?

Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.

Shanil Arjuna
  • 1,135
  • 10
  • 18
Jonathan Ong
  • 19,927
  • 17
  • 79
  • 118

7 Answers7

472

I'd suggest using the request module. Downloading a file is as simple as the following code:

var fs = require('fs'),
    request = require('request');

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    console.log('content-type:', res.headers['content-type']);
    console.log('content-length:', res.headers['content-length']);

    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
  console.log('done');
});
Cezary Wojtkowski
  • 5,467
  • 1
  • 17
  • 6
60

You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:

npm i axios

Then, you can use the following basic example to begin downloading images:

const fs = require('fs');
const axios = require('axios');

/* ============================================================
  Function: Download Image
============================================================ */

const download_image = (url, image_path) =>
  axios({
    url,
    responseType: 'stream',
  }).then(
    response =>
      new Promise((resolve, reject) => {
        response.data
          .pipe(fs.createWriteStream(image_path))
          .on('finish', () => resolve())
          .on('error', e => reject(e));
      }),
  );

/* ============================================================
  Download Images in Order
============================================================ */

(async () => {
  let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');

  console.log(example_image_1.status); // true
  console.log(example_image_1.error); // ''

  let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');

  console.log(example_image_2.status); // false
  console.log(example_image_2.error); // 'Error: Request failed with status code 404'

  let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');

  console.log(example_image_3.status); // true
  console.log(example_image_3.error); // ''
})();
Beeno Tung
  • 1,058
  • 10
  • 17
Grant Miller
  • 27,532
  • 16
  • 147
  • 165
  • 3
    Great example ! But barely readable code, try the [standard](https://standardjs.com/) style :D – camwhite Sep 13 '18 at 02:24
  • 3
    @camwhite I prefer [semicolons](https://standardjs.com/rules.html#semicolons). ;) – Grant Miller Sep 13 '18 at 02:33
  • 1
    You really should attach 'finish' and 'error' events to the write stream, wrap them in a Promise and return the promise. Otherwise you may try to access an image that hasn't been completely downloaded yet. – jwerre Feb 28 '19 at 19:19
  • Wouldn't the await would make sure the image downloads completely before trying to access? @jwerre – FabricioG Mar 14 '19 at 06:31
  • @jwerre @FabricioG I've updated the function `download_image` to capture the 'finish' and 'error' event for the returned promise – Beeno Tung Jun 11 '19 at 14:52
  • Don't think this is working, copied the example but nope... – Paul Jul 14 '20 at 15:53
  • example_image_1 is always undefined and no error or info to troubleshoot. – Greggory Wiley Feb 09 '22 at 03:54
55

I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.

var http = require('http'),                                                
    Stream = require('stream').Transform,                                  
    fs = require('fs');                                                    

var url = 'http://www.google.com/images/srpr/logo11w.png';                    

http.request(url, function(response) {                                        
  var data = new Stream();                                                    

  response.on('data', function(chunk) {                                       
    data.push(chunk);                                                         
  });                                                                         

  response.on('end', function() {                                             
    fs.writeFileSync('image.png', data.read());                               
  });                                                                         
}).end();

The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.

*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.

Nihey Takizawa
  • 797
  • 5
  • 8
  • 8
    Why not stream the download directly to disk? – geon May 09 '17 at 07:30
  • had a lot of problems with chunking strings together as it created a corrupt file, but this did it – Shaho May 24 '20 at 19:44
  • 4
    Alternatively you can use an array for `data`, and replace `data.read()` with `Buffer.concat(data)`. This way we don't need to import the stream module – Coco Liliace Apr 22 '21 at 17:24
  • 3
    For `https://...` urls, use the `https` module, as seen in [@chandan-chhajer's answer](https://stackoverflow.com/a/49687365/2441655). – Venryx Jun 10 '21 at 09:10
  • How do you check for errors using this? I'm downloading a file which ends up being corrupted somehow... trying to debug this. Is setting the encoding a possible solution? – Steven Jun 12 '21 at 19:08
  • Thanks everyone. Used Nihey's answer as main code. Used ternary operator plus some simple regex to determine whether to use http or https like Venryx mentioned. Used Ching Chang's answer to avoid importing Stream, and it worked perfectly. :) – isaacsan 123 Oct 06 '21 at 20:16
  • Property 'push' does not exist on type 'Stream'. – Liam Pillay Aug 13 '23 at 04:34
13
const fs = require('fs');
const http = require('http');
const https = require('https');

const downloadImageToUrl = (url, filename) => {

   let client = http;
   if (url.toString().indexOf("https") === 0) {
      client = https;
   }
   return new Promise((resolve, reject) => {
      client.get(url, (res) => {
          res.pipe(fs.createWriteStream(filename))
          .on('error', reject)
          .once('close', () => resolve(filename))
      })
  })
};

await downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');
double-beep
  • 5,031
  • 17
  • 33
  • 41
Chandan Chhajer
  • 303
  • 2
  • 7
10

if you want progress download try this:

var fs = require('fs');
var request = require('request');
var progress = require('request-progress');

module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
    progress(request(uri))
    .on('progress', onProgress)
    .on('response', onResponse)
    .on('error', onError)
    .on('end', onEnd)
    .pipe(fs.createWriteStream(path))
};

how to use:

  var download = require('../lib/download');
  download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
            console.log("progress", state);
        }, function (response) {
            console.log("status code", response.statusCode);
        }, function (error) {
            console.log("error", error);
        }, function () {
            console.log("done");
        });

note: you should install both request & request-progress modules using:

npm install request request-progress --save
mateuscb
  • 10,150
  • 3
  • 52
  • 76
Fareed Alnamrouti
  • 30,771
  • 4
  • 85
  • 76
  • 2
    This worked great, but wanted to suggest adding a `statusCode` check. A 500 statusCode for example, will not hit the `'on("error", e)`. By adding a `on('response', (response) => console.error(response.statusCode))` it greatly facilitates debugging, – mateuscb Nov 09 '16 at 23:14
  • 1
    You can edit my answer :) – Fareed Alnamrouti Nov 10 '16 at 02:42
8

This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.

const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){    
    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
  console.log('done');
});
SpaceDogCS
  • 2,808
  • 3
  • 20
  • 49
Ahsan Ahmed
  • 327
  • 5
  • 13
5

Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    if (err) callback(err, filename);
    else {
        var stream = request(uri);
        stream.pipe(
            fs.createWriteStream(filename)
                .on('error', function(err){
                    callback(error, filename);
                    stream.read();
                })
            )
        .on('close', function() {
            callback(null, filename);
        });
    }
  });
};
VladFr
  • 816
  • 1
  • 10
  • 18