0

I want to download all the mp3 files contained in this xml, so I created this code using Node.js and JavaScript:

var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var request = require('request');

const xmlUrl = 'https://deejayreloadedpodcast.maxxer.it/podcast/pinocchio.xml';

var download = async function(url, dest, callback) {
    // download if only the file is not existing yet
    if(!fs.existsSync(dest)) {
        await request.get(url)
        .on('error', function(err) {
            console.log(err);
        })
        .pipe(fs.createWriteStream(dest))
        .on('close', callback); 
    }
};

https.get(xmlUrl, function(res) {
    var response_data = '';
    res.setEncoding('utf8');

    res.on('data', function(chunk) {
        response_data += chunk;
    });

    res.on('end', function() {
        parser.parseString(response_data, function(err, result) {
            if(err) {
                console.log('Got error: ' + err.message);
            } 
            else {
                var json = JSON.stringify(result, null, 2);

                var channels = result['rss']['channel'];
                var items = channels[0]['item'];

                var urlsTemp = [];
                var namesTemp = [];
                for(var elem in items) {
                    var obj = items[elem];
                    var name = obj['title'][0];
                    var url = obj['enclosure'][0]['$']['url'];
                    urlsTemp.push(url);
                    namesTemp.push(name);
                }

                var urls = [];
                var names = [];
                for(i in urlsTemp) {
                    urls.push(urlsTemp[i]);
                    names.push(namesTemp[i]);
                }

                for(var i = 10; i < 20/*urls.length*/; i++) {
                    var dirPath = './puntate/';
                    var filename =  names[i] + '.mp3';
                    download(urls[i], dirPath + filename, function() {
                        console.log('Finished downloading \'' + filename);
                    });
                }

            }
        });
    });

    res.on('error', function(err) {
        console.log('Got error: ' + err.message);
    });
});

This code takes the contents of the XML file, processes it by saving the links and file names in two arrays (urls and names) and then downloads the audio files. The problem is that it only works if you download a few mp3s at a time (in the example, there are only 10). If I let it loop from 0 to the full length of the array urls, the program no longer works. It does not generate errors but saves all mp3s with size 0 (ie empty).

Why? I thought the problem was asynchronous code, but I used async/await in the download method. What's the problem?

Thank you


var i = 0; 
var dirPath = './puntate/';
var filename = names[i] + '.mp3';
var fn = function(i) {
    console.log('(A)', i, urls.length);
    download(urls[i], dirPath + filename, function() {
        console.log('Finished downloading \'' + filename);
        console.log('(B)', i, urls.length);
        if(i < urls.length) { 
            i++;
            console.log('(C)', i, urls.length);
            fn(i);
        } 
    });
}
fn(i);

and:

(A) 0 3095
Finished downloading 'Puntata del 17 Settembre 2018.mp3
(B) 0 3095
(C) 1 3095
(A) 1 3095

1 Answers1

0

I suggest you to modify the for loop, since it gives a synchronous function:

for(var i = 10; i < 20/*urls.length*/; i++) {
   var dirPath = './puntate/';
   var filename =  names[i] + '.mp3';
   download(urls[i], dirPath + filename, function() {
       console.log('Finished downloading \'' + filename);
   });
}

to a continuous passing style:

   var i=0; /*i starts from 0*/
   var dirPath = './puntate/';


   var fn=function(i){
          var filename =  names[i] + '.mp3';
          download(urls[i], dirPath + filename, function() {
             console.log('Finished downloading \'' + filename);
             /*if not finish downloading all the links*/
             if(i<urls.length){ 
               i++;
               fn(i);
             } 
          });
       }   
       fn(i);  

Here is enhanced code version:

Improvements:

  • deleted a unneccessary for loop
  • if file already exists, skip it until the next not exists one and print.

var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var request = require('request');
var urls = [];
var names = [];
const xmlUrl = 'https://deejayreloadedpodcast.maxxer.it/podcast/pinocchio.xml';


var download = async function(url, dest, callback) {

    request.get(url)
    .on('error', function(err) {
        console.log(err);
    })
    .pipe(fs.createWriteStream(dest))
    .on('close', callback); 
};

https.get(xmlUrl, function(res) {
    var response_data = '';
    res.setEncoding('utf8');

    res.on('data', function(chunk) {
        response_data += chunk;
    });

    res.on('end', function() {
        parser.parseString(response_data, function(err, result) {
            if(err) {
                console.log('Got error: ' + err.message);
            } 
            else {
                var json = JSON.stringify(result, null, 2);

                var channels = result['rss']['channel'];
                var items = channels[0]['item'];

               // var urlsTemp = []; //you don't need both of temp arrays
               // var namesTemp = []; //push items directly into urls[] and names[]
                for(var elem in items) {
                    var obj = items[elem];
                    var name = obj['title'][0];
                    var url = obj['enclosure'][0]['$']['url'];
                    urls.push(url);
                    names.push(name);
                }

                var i = 0;
                var dirPath = './puntate/';



                var fn = function(i) {
                    var filename = names[i] + '.mp3';
                    var fileExist=fs.existsSync(dirPath + filename);

                    // skip downloading if the file exists already
                    if(fileExist){
                        console.log('File exists', i, urls.length);
                        i++;
                        fn(i);                      
                    }else{  // download if only the file is not existing yet                    
                        console.log('(A)', i, urls.length);
                        download(urls[i], dirPath + filename, function() {
                            console.log('Finished downloading \'' + filename);
                            console.log('(B)', i, urls.length);
                            if(i < urls.length) { 
                                i++;
                                console.log('(C)', i, urls.length);
                                fn(i);
                            } 
                        });                     
                    }

                }
                fn(i);              

            }
        });
    });

    res.on('error', function(err) {
        console.log('Got error: ' + err.message);
    });
});
Janice Zhong
  • 836
  • 7
  • 16
  • It doesn't work. I get the error **fn is not a function** so I edit `var fn = function(i)` to `function fn(i)` but it download only the first mp3 –  Sep 19 '18 at 13:02
  • I shouldn't call fn(i) before I define it, that's why you get a error. I've edited my answer. Try it again :) – Janice Zhong Sep 19 '18 at 13:10
  • It still does not work. It downloads the first mp3 and then stop. I added some prints (see the code added in the main message). It's strange –  Sep 19 '18 at 13:30
  • I think the logic is fine, something might be going on in the `download` function, I had some research, maybe try to replace the `pipe.on('close')` with `pipe.on('finish')`. Here is the [Reference](https://stackoverflow.com/questions/11447872/callback-to-handle-completion-of-pipe) – Janice Zhong Sep 19 '18 at 14:28
  • Thanks for your patience but it's the same. I can download only the first mp3 –  Sep 19 '18 at 14:39
  • Hi buddy, I just found a really simple mistake I made in the previous code, I accidentally put `var filename = names[i] + '.mp3';` outside of the `fn()`, that was something definitely causing the error. I copied your code and test it by myself , everything is on the right track now – Janice Zhong Sep 20 '18 at 10:47
  • I've post a complete code, and have test it by myself, please check :) – Janice Zhong Sep 20 '18 at 14:47