1

I'm new here and need some advice for my module. I've created the following Module to scrape data value from alexa.com with specific address: alexa.com/siteinfo/clashofclans.com

The specific value is nested within the following table: https://i.stack.imgur.com/DaPmU.jpg

I try to get data "percent of visitor" from US as the image above with value 9.1%, but the code will only work if United States in first row / first position. https://i.stack.imgur.com/kqpKR.jpg

The below VBA code is my attempt at scraping:

Sub ExtractAlexa()
  Dim tickername As String
  Dim doc As HTMLDocument

  ie.Visible = False
  ie.navigate "http://www.alexa.com/siteinfo/clashofclans.com"

  Do
    DoEvents
  Loop Until ie.readyState = READYSTATE_COMPLETE

  Application.Wait (Now + TimeValue("00:00:4"))
  Set doc = ie.document

  Set elems = doc.getElementById("demographics_div_country_table").getElementsByTagName("tr")
  For Each e In elems        
    If e.outerHTML Like "*/topsites/countries/US*" Then
      Sheet2.Range("E11").Value = Trim(doc.getElementsByTagName("td")(1).innerText)
    End If
  Next e

  ie.Quit
End Sub

Please, does anyone know where I am going wrong here? Thank You.

Matthew Wilcoxson
  • 3,432
  • 1
  • 43
  • 48
eros
  • 29
  • 1
  • 6

2 Answers2

1

You can achieve the same thing with CSS selector combination to grab all the table cells from the table by using a descendant combination of parent id element and child td tagged elements. Loop until find the United States string and take the + 1 index to get the %.

Option Explicit
Public Sub GetPercentage()
    Dim sResponse As String, html As HTMLDocument, tds As Object, i As Long

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.alexa.com/siteinfo/clashofclans.com", False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    Set html = New HTMLDocument
    html.body.innerHTML = sResponse

    Set tds = html.querySelectorAll("#demographics_div_country_table td")
    For i = 0 To tds.Length - 1
        If Trim$(tds.item(i).innerText) = "United States" Then
            ThisWorkbook.Worksheets("Sheet2").Range("E11").Value = tds.item(i + 1).innerText
        Exit Sub
        End If
    Next i
End Sub
QHarr
  • 83,427
  • 12
  • 54
  • 101
0

You've basically nailed it - but I think you have a subtle bug in this line:

Sheet2.Range("E11").Value = Trim(doc.getElementsByTagName("td")(1).innerText)

Should be:

Sheet2.Range("E11").Value = Trim(e.getElementsByTagName("td")(1).innerText)

Also you can use WinHTTP to avoid some of the wrangling with IE:

Public Sub ExtractAlexa()
    Dim oHTML As MSHTML.HTMLDocument
    Dim elems As MSHTML.IHTMLElementCollection
    Dim e As MSHTML.IHTMLElement
    Set oHTML = New MSHTML.HTMLDocument
    With CreateObject("WINHTTP.WinHTTPRequest.5.1")
        .Open "GET", "http://www.alexa.com/siteinfo/clashofclans.com", False
        .send
        oHTML.body.innerHTML = .responseText
    End With
    oHTML.getElementById("demographics_div_country_table").getElementsByTagName ("tr")
    Set elems = oHTML.getElementById("demographics_div_country_table").getElementsByTagName("tr")
    For Each e In elems
        If e.outerHTML Like "*/topsites/countries/US*" Then
            Sheet2.Range("E11").Value = Trim(e.getElementsByTagName("td")(1).innerText)
            Exit For
        End If
    Next e
End Sub
Community
  • 1
  • 1
Ben
  • 355
  • 2
  • 11