I'm writing a web scraper that uses regex to extract information in a paragraph and store it in an object. Then I add the object to an array. Here's my full code:
function scrapeCourseData(htmlString) {
// scrapes a specific department's course list
var tempArr = [];
console.log(tempArr); // outputs '[]'
$ = cheerio.load(htmlString);
// #coursestextcontainer contains the actual information for every single course listed in a department
$('#coursestextcontainer').find('.courseblock').each(function(i, elem) {
// finds all divs of type courseblock, iterates though each of them,
// extracting course information from children.
console.log('courseblock ' + (i + 1));
var courseText = $('strong', '.courseblocktitle', elem).text(); // Gets the text that will be parsed
var regex = /([A-Z]{4}\s[A-Z]{1,2}\d{4})\s(.*?)(?:\.*)(\d{1,2}(?:\.?|-?)\d{0,2}\spoints?)/g;
var regexGroups = Object.freeze({
NUMBER: 1,
NAME: 2,
CREDITS: 3
});
var match, course;
while ((match = regex.exec(courseText)) !== null) { // when regex.exec returns null, no more matches, and loop stops.
course = {
number: match[regexGroups.NUMBER],
name: match[regexGroups.NAME],
credits: match[regexGroups.CREDITS]
};
tempArr.push(course); // doesn't work-- result is array full of 'null'
console.log(course); // but this outputs as a valid object, e.g. { number: 'AFAS W3030'... }
}
});
console.log("Complete tempArr: " + tempArr); // outputs [object Object],[object Object],[object Object], etc.
for (var j of tempArr) {
dataJSONObject.push(tempArr[j]);
console.log('\ntempArray at ' + j + ': ' + tempArr[j]); // outputs [object Object]: undefined
}
console.log('\n');
}
When I first define tempArr
as []
and output it to the console, I get the expected result []
.
The objects I form from regex matches are also valid as expected at runtime.
However, when I try to push those objects to tempArr
, and then print tempArr
, it outputs as undefined
.
I've been poking around other stackoverflow questions and I'm pretty sure my problem is that when I'm pushing to tempArr
, I'm doing so outside of its scope. I've tried moving around where I declare tempArr
(e.g. by putting it outside its function to make it global), but I still get undefined
after pushing. What am I missing?