Hi I was finally able to setup my webscraper and import data into my web page:)
However my web page runs on port 3001 and the web scraper on port 8080 , Im a bit confused as how I could set a timer to update the scraper in the background ?
Index.js
var scraper = require(__dirname + '/scripts/scraper.js');
var express = require('express');
var path = require('path');
var app = express();
var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://127.0.0.1:27017/test";
app.use(express.static(__dirname + '/public'));
// set the view engine to ejs
app.set('view engine', 'ejs');
// index page
app.get('/', function(req, res) {
MongoClient.connect(url, function(err, db) {
if (err) throw err;
var dbo = db.db("mydb");
dbo.collection("customers").find({}).toArray(function(err, result) {
if (err) throw err;
res.render('pages/index', {
result: result,
});
db.close();
});
});
});
app.listen(3001);
console.log('navigate to: http://178.62.253.206:3001');
The way it works now is that I would physically need to navigate to http://178.62.253.206:8080 to update the web scraper.
I was hoping to update the scraper silently in the background
scraper.js
var http = require('http');
var request = require('request');
var cheerio = require('cheerio');
var MongoClient = require('mongodb').MongoClient
http.createServer(function(req, res) {
request('http://www.xscores.com/soccer', function(error, response,
html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var list_items = "";
var arr = [];
var j = 1;
// Step1 Get Data
// Live Matches Even rows
$('div.match_line.score_row.live_match.e_true ').each(function(i,
element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
var j = 2;
// Step 1.5 Get Data
// Live Matches, Odd rows
$('div.match_line.score_row.live_match.o_true ').each(function(i, element)
{
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + b + "</li>";
//console.log(arr.length);
});
//Sort the data
arr.sort(function(a, b) {
return a.j - b.j
})
//Calculate which row to resume (odd/even)
if (isEven(arr.length) == true){
//console.log("Even Number");
var j = arr.length + 1;
var x = arr.length + 2;
} else {
//console.log("Odd Number");
var j = arr.length + 2;
var x = arr.length + 1;
}
// Step 2 Get Data
// "sched" games from even rows
$('div.match_line.score_row.other_match.e_true').each(function(i, element)
{
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != "sched") { return false; }
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
// Step 2.5 Get Data
//Scrape "sched" games from odd rows
var j = x
$('div.match_line.score_row.other_match.o_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Get GameStatus
var gs = $(this).attr('data-statustype');
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
if (gs != "sched") { return false; }
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
//Sort the data we fetched
arr.sort(function(a, b) {
return a.j - b.j
})
//Calculate which row to resume (odd/even)
if (isEven(arr.length) == true){
//console.log("Even Number");
var j = arr.length + 2;
var x = arr.length + 1;
} else {
//console.log("Odd Number");
var j = arr.length + 1;
var x = arr.length + 2;
}
// - Step 3 Get Data
// - Scrape Finished matches games from even rows
$('div.match_line.score_row.other_match.e_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != 'sched'|| $(this).attr('data-ftr') == 'true') {
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
}
});
// - Step 3.5 Get Data
// - Scrape Finished matches games from even rows
var j = x
$('div.match_line.score_row.other_match.o_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != 'sched'|| $(this).attr('data-ftr') == 'true') {
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2
}
});
//Sort the data
arr.sort(function(a, b) {
return a.j - b.j
})
//Output from the array to html on server page
var arrayLength = arr.length;
var columns = ""
for (var i = 0; i < arrayLength; i++) {
let row = ""
if (i < arr.length) {
row += "<td>" + arr[i].j + "</td>"
row += "<td>" + arr[i].Kickoff + "</td>"
row += "<td>" + arr[i].statustype + "</td>"
row += "<td>" + arr[i].country + "</td>"
row += "<td>" + arr[i].league + "</td>"
row += "<td>" + arr[i].hteam + "</td>"
row += "<td>" + arr[i].ateam + "</td>"
}
columns += "<tr>" + row + "</tr>";
}
//var html = "<table><tr>" + columns + "</tr></table>"
//res.writeHead(200, {
// 'Content-Type': 'text/html'
//});
//res.end(html);
// MongoDataBase
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
if (err) throw err;
var dbo = db.db("mydb");
// Delete old records
dbo.dropCollection("customers", function(err, delOK) {
if (err) throw err;
if (delOK) console.log("Collection deleted");
});
// Add new records
dbo.collection("customers").insertMany(arr, function(err, res) {
if (err) throw err;
console.log("Collection created!");
db.close();
});
});
// End MongoDataBase
//console.log(arr.length);
}
});
}).listen(8080);
console.log('Server is running at http://178.62.253.206:8080/');
function isEven(n) {
return n == parseFloat(n)? !(n%2) : void 0;
}
function ucwords (str) {
return (str + '').replace(/^([a-z])|\s+([a-z])/g, function ($1) {
return $1.toUpperCase();
});
}
function subtracthour(str) {
var m = str.slice(3, 5);
var h = str.slice(0, 2);
if (h == '00') { h = "24"; }
var h = Number(h) - Number(1);
if (h <= 9) { h = "0" + h; }
var y = h + ":" + m;
return y; // Note this was return str
}
function soccer(str) {
var x = str
var x = x.replace(" Ff", " FF");
return x
}
frederik