0

With the help of this SO questionsI am trying to scrape the following website. I would like the two teams and the time. For example, the first entry would be Chicago | Miami | 12:30 PM, and the last entry would be Colorado | Arizona | 10:10 PM. My code is as follows

function espn_schedule() {
  var url = "http://www.espn.com/mlb/schedule/_/date/20180329";
  var content = UrlFetchApp.fetch(url).getContentText();
  var scraped = Parser.data(content).from('class="schedule has-team-logos align-left"').to('</tbody>').iterate();
  var res = [];

  var temp = [];
  var away_ticker = "";
  scraped.forEach(function(e){
    var away_team = Parser.data(e).from('href="mlb/team/_/name/').to('"').build();
    var time = Parser.data(e).from('a data-dateformat="time1"').to('</a>').build();
    if (away_ticker == "") away_ticker = away_team;
    if (away_team != away_ticker) {
      temp.splice(1, 0, away_ticker);
      res.push(temp);
      temp = [];
      away_ticker = away_team;
      temp.push(time);
    }
  });
  var ss = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Schedule");
  ss.getRange(ss.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}

I get the following error:

TypeError: Cannot read property "length" from undefined. (line 42, file "Code")

Rubén
  • 34,714
  • 9
  • 70
  • 166
cfb_moose
  • 27
  • 1
  • 9
  • First difference I see is that they escaped the `"` in the `.from('class=\"screener-body-table-nw\"')` call. Also please Logger.log(scraped) to see if you are getting anything back there. – Chris Feb 21 '18 at 17:23

1 Answers1

0

Here is a modified solution that works

function espn_schedule() {
  var url = "http://www.espn.com/mlb/schedule/_/date/20180329";
  var content = UrlFetchApp.fetch(url).getContentText();
  var e = Parser.data(content).from('class="schedule has-team-logos align-left"').to('</tbody>').build();
  var res = [];
  //Logger.log(scraped[0])
  var temp = [];
  var away_ticker = "";
    var teams = Parser.data(e).from('<abbr title="').to('">').iterate();
    Logger.log(teams)
    var time = Parser.data(e).from('data-date="').to('">').iterate()
    Logger.log(time)

     for( var i = 0; i<teams.length ; i = i+2)
     {
       res[i/2] = []
       res[i/2][0] = teams[i]
       res[i/2][1] = teams[i+1]
       res[i/2][2] = new Date(time[i/2]).toLocaleTimeString('en-US')
     }
  Logger.log(res)
  var ss = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Schedule");
  ss.getRange(ss.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}

Modification explained:
1) Since you access only the first table you don't need to iterate during parsing and just get the first table. Also, since you get just the first table, you don't need to use forEach to loop through each element.

var e = Parser.data(content)
        .from('class="schedule has-team-logos align-left"')
        .to('</tbody>')
        .build();   //Use build instead of iterate

2) Instead of parsing the HTML link to get the team name, you can use <abbr title=" element to scrape the name. Furthermore, you can iterate over all the team names in the table to get an array of team names.

var teams = Parser.data(e).from('<abbr title="').to('">').iterate();

3) Similar to the above modification, you can get the time by using the data-date tag. This gives you date which can read by Date() class. Again, we iterate over the table to get all the times

var time = Parser.data(e).from('data-date="').to('">').iterate()

4) Finally, we use for loop to rearrange the teams and time in the array called res. This allows for inserting the data into the sheet directly.

for( var i = 0; i<teams.length ; i = i+2) //each loop adds 2 to the counter
         {
           res[i/2] = []         
           res[i/2][0] = teams[i]   //even team  (starts at zero)
           res[i/2][1] = teams[i+1] //vs odd teams
           res[i/2][2] = new Date(time[i/2]).toLocaleTimeString('en-US')
         }

Reference:
Date(),Date.toLocaleTimeString()

Edit:
Reason for error, in the below code

Parser.data(e).from('href="mlb/team/_/name/').to('"').build()

you are looking for string 'href="mlb/team/_/name/', however it should be href="/mlb/team/_/name/'. Note the difference mlb vs /mlb.

Secondly, in the following code

Parser.data(e).from('a data-dateformat="time1"').to('</a>').build();

The string should be a data-dateFormat, when you inspect the website it shown as dateformat. However, when you call it using URLfetch and log the text, it is shown as dateFormat

Jack Brown
  • 5,802
  • 2
  • 12
  • 27