I want to set up a server that scrapes data off an existing API, cleans it up and outputs it to my own server where I can manipulate the data further.
So far I've got all the pieces working.
- My server runs.
- I can scrape the data into the correct JSON format
- I have a function that runs when a POST https request is called that writes JSON to my database.
Now I need to connect the two.
There are 3 main components:
- the server.js file
- The Post controller & schema file. This contains the function that handles the http request and constructs the object and writes to the database.
- I have a separate file that contains a bunch of functions that scrape the data and prepare it in the proper JSON format.
My problem is getting scraping file to run. At the moment to test it I just run Node filename.js it worked fine when I was outputting the json to console.log. However now I've written in the http request. the problem is, when I run the server to open the http endpoint, I can't run the file.js that contains the scraper.
Essentially I want the scraper to run on a schedule, and send the data to the endpoint I've set up on the server.
Is there a way to replicate calling node filename.js
in code? As per my understanding when you run node filename.js
it just runs down the file, executing code as it finds it. Is there a way to do this? Or do I need to call each function 1 by 1 from my server file? Or do I just encapsulate the whole js file in a master function (my worry would then be that variables become siloed)?
I've posted the code below for reference:
server.js
require("dotenv").config(); // ALLOWS ENVIRONMENT VARIABLES TO BE SET ON PROCESS.ENV SHOULD BE AT TOP
const express = require("express");
const app = express();
const postRoutes = require("./routes/postRoutes.js");
const path = require('path');
// Middleware
app.use(express.json()); // parse json bodies in the request object
// Redirect requests to endpoint starting with /posts to postRoutes.js
app.use("/posts", postRoutes);
// Global Error Handler. IMPORTANT function params MUST start with err
app.use((err, req, res, next) => {
console.log(err.stack);
console.log(err.name);
console.log(err.code);
res.status(500).json({
message: "Something went rely wrong",
});
});
// Listen on pc port
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(`Server running on PORT ${PORT}`));
const battlesGetData = require(path.resolve(__dirname, "./battlesgetData.js")); // is this right?!?
battlesGetData.js
const fetch = require("node-fetch");
const postController = require("./controllers/postControllers");
//const fs = require('fs');
const distinct = (value, index, self) => {
return self.indexOf(value) === index;
}
async function getBattleHistory(player = '', data = {}) {
const battleHistory = await fetch('https://api2.splinterlands.com/battle/history?player=' + player)
.then((response) => {
if (!response.ok) {
throw new Error('Network response was not ok');
}
return response;
})
.then((battleHistory) => {
return battleHistory.json();
})
.catch((error) => {
console.error('There has been a problem with your fetch operation:', error);
});
return battleHistory.battles;
}
const extractGeneralInfo = (x) => {
return {
created_date: x.created_date ? x.created_date : '',
match_type: x.match_type ? x.match_type : '',
mana_cap: x.mana_cap ? x.mana_cap : '',
ruleset: x.ruleset ? x.ruleset : '',
inactive: x.inactive ? x.inactive : ''
}
}
const extractMonster = (team) => {
const monster1 = team.monsters[0];
const monster2 = team.monsters[1];
const monster3 = team.monsters[2];
const monster4 = team.monsters[3];
const monster5 = team.monsters[4];
const monster6 = team.monsters[5];
return {
summoner_id: team.summoner.card_detail_id,
summoner_level: team.summoner.level,
monster_1_id: monster1 ? monster1.card_detail_id : '',
monster_1_level: monster1 ? monster1.level : '',
monster_1_abilities: monster1 ? monster1.abilities : '',
monster_2_id: monster2 ? monster2.card_detail_id : '',
monster_2_level: monster2 ? monster2.level : '',
monster_2_abilities: monster2 ? monster2.abilities : '',
monster_3_id: monster3 ? monster3.card_detail_id : '',
monster_3_level: monster3 ? monster3.level : '',
monster_3_abilities: monster3 ? monster3.abilities : '',
monster_4_id: monster4 ? monster4.card_detail_id : '',
monster_4_level: monster4 ? monster4.level : '',
monster_4_abilities: monster4 ? monster4.abilities : '',
monster_5_id: monster5 ? monster5.card_detail_id : '',
monster_5_level: monster5 ? monster5.level : '',
monster_5_abilities: monster5 ? monster5.abilities : '',
monster_6_id: monster6 ? monster6.card_detail_id : '',
monster_6_level: monster6 ? monster6.level : '',
monster_6_abilities: monster6 ? monster6.abilities : ''
}
}
let battlesList = [];
usersToGrab = ["rus48-bot", "sbalani"]
const battles = usersToGrab.map(user =>
getBattleHistory(user)
.then(battles => battles.map(
battle => {
const details = JSON.parse(battle.details);
if (details.type != 'Surrender') {
if (battle.winner && battle.winner == battle.player_1) {
const monstersDetails = extractMonster(details.team1)
const info = extractGeneralInfo(battle)
return {
...monstersDetails,
...info,
battle_queue_id: battle.battle_queue_id_1,
player_rating_initial: battle.player_1_rating_initial,
player_rating_final: battle.player_1_rating_final,
winner: battle.player_1,
}
} else if (battle.winner && battle.winner == battle.player_2) {
const monstersDetails = extractMonster(details.team2)
const info = extractGeneralInfo(battle)
return {
...monstersDetails,
...info,
battle_queue_id: battle.battle_queue_id_2,
player_rating_initial: battle.player_2_rating_initial,
player_rating_final: battle.player_2_rating_final,
winner: battle.player_2,
}
}
}
})
).then(x => battlesList = [...battlesList, ...x])
)
Promise.all(battles).then(() => {
const cleanBattleList = battlesList.filter(x => x != undefined)
fetch("http://localhost:3000/posts/", {
method: "post",
body: cleanBattleList,
headers: {"Content-Type": "application/json"}
})
.then(json => console.log(json))
.catch(err => console.log(err))
/* fs.writeFile(`data/history.json`, JSON.stringify(cleanBattleList), function (err) {
if (err) {
console.log(err);
}
}); */
});
This is the POST function that gets called
exports.createNewPost = async (req, res, next) => {
/*
let { summoner_id, summoner_level,
monster_1_id, monster_1_level, monster_1_abilities,
monster_2_id, monster_2_level, monster_2_abilities,
monster_3_id, monster_3_level, monster_3_abilities,
monster_4_id, monster_4_level, monster_4_abilities,
monster_5_id, monster_5_level, monster_5_abilities,
monster_6_id, monster_6_level, monster_6_abilities,
created_date, match_type, mana_cap, ruleset, inactive,
battle_queue_id, player_rating_initial, player_rating_final, winner
} = req.body; // using postman this is what allows us to post JSON
let post = new PostBattle(summoner_id, summoner_level,
monster_1_id, monster_1_level, monster_1_abilities,
monster_2_id, monster_2_level, monster_2_abilities,
monster_3_id, monster_3_level, monster_3_abilities,
monster_4_id, monster_4_level, monster_4_abilities,
monster_5_id, monster_5_level, monster_5_abilities,
monster_6_id, monster_6_level, monster_6_abilities,
created_date, match_type, mana_cap, ruleset, inactive,
battle_queue_id, player_rating_initial, player_rating_final, winner); // the title & body defined in the previous line taken from the JSON are now deposited here.
*/
let json = req.body;
for (var obj in json) {
console.log(obj + ": " + json[obj]);
let post = new PostBattle(json[obj].summoner_id, json[obj].summoner_level,
json[obj].monster_1_id, json[obj].monster_1_level, json[obj].monster_1_abilities,
json[obj].monster_2_id, json[obj].monster_2_level, json[obj].monster_2_abilities,
json[obj].monster_3_id, json[obj].monster_3_level, json[obj].monster_3_abilities,
json[obj].monster_4_id, json[obj].monster_4_level, json[obj].monster_4_abilities,
json[obj].monster_5_id, json[obj].monster_5_level, json[obj].monster_5_abilities,
json[obj].monster_6_id, json[obj].monster_6_level, json[obj].monster_6_abilities,
json[obj].created_date, json[obj].match_type, json[obj].mana_cap, json[obj].ruleset, json[obj].inactive,
json[obj].battle_queue_id, json[obj].player_rating_initial, json[obj].player_rating_final, json[obj].winner);
console.log(post);
//let post = new PostBattle(json);
post = await post.save();
console.log("a post is happening");
}