0

I want scrape youtube channel i have list 1500 channel list. I want to scrape it one by one through the node. The problem is in the loop I want to stop loop until getting response and process response of the current channel.

async and await

const cheerio = require('cheerio');
const request = require('request');
var fs = require('fs');

async function getTitle(){
    var data =  await fs.readFileSync('./channels.json');
    var Parsed_Data = await JSON.parse(data);
    console.log(Parsed_Data);
    for (x of Parsed_Data){
        // var url = 'https://www.youtube.com/channel/' + x.id + '/about';
        // console.log(url);
        request( 'https://www.youtube.com/channel/' + x.id + '/about', function(err, res, body) {
        let $ =  cheerio.load(body);
        console.log($);
        let title =  $('title');
        console.log(title.text());
        }

    )};

want the title of each channel

Raj
  • 43
  • 1
  • 9

2 Answers2

0

You can do:

function getBody(url) {
    return new Promise((resolve, reject) => {
        request( 'https://www.youtube.com/channel/' + x.id + '/about', function(err, res, body) {
            if (err) {
                return reject(err);
            }
            resolve(body);
        });
    })
}

async function getTitle(){
    var data =  await fs.readFileSync('./channels.json');
    var Parsed_Data = await JSON.parse(data);
    console.log(Parsed_Data);
    for (x of Parsed_Data){
        const body = await getBody('https://www.youtube.com/channel/' + x.id + '/about');

        let $ =  cheerio.load(body);
        console.log($);
        let title =  $('title');
        console.log(title.text());

)};
Deepsy
  • 3,769
  • 7
  • 39
  • 71
0

First, install request-promise:

npm i request-promise

Then, change your code into this:

const cheerio = require('cheerio');
const rp = require('request-promise');
var fs = require('fs');

async function getTitle(){
    var data =  await fs.readFileSync('./channels.json');
    var Parsed_Data = JSON.parse(data);
    console.log(Parsed_Data);
    for (x of Parsed_Data){
        // var url = 'https://www.youtube.com/channel/' + x.id + '/about';
        // console.log(url);
        let body=await rp( 'https://www.youtube.com/channel/' + x.id + '/about', 
        let $ =  cheerio.load(body);
        console.log($);
        let title =  $('title');
        console.log(title.text());
    }
}

Actually, you don't need an await next to JSON.parse(data);

Alvin Lau
  • 194
  • 2
  • 9