0

I have the following code to retrieve some data from a web-table.

Sub Retrieve_ticker_list()

    Dim Stockticker As Long                      'loopvalue (URL link) you want to use

    Dim DownloadInfoSheet As Worksheet
    Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo")

    Dim DataSheet As Worksheet
    Set DataSheet = ActiveWorkbook.Worksheets("Data")

    Dim lastrowStock As Long
    Dim lastrowG As Long

    Dim baseURL As String
    Dim searchResultsURL As String

    lastrowStock = DownloadInfoSheet.Cells(Rows.Count, "C").End(xlUp).Row 'Find last row in Stockticker
    lastrowG = DataSheet.Cells(Rows.Count, "A").End(xlUp).Row + 10 'Find last row in range PART3


    For Stockticker = 2 To lastrowStock          'Loop from page 2 to lastrow

        baseURL = DownloadInfoSheet.Cells(2, "A") 'download from cell A2: 
        searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example             
        With DataSheet _
             .QueryTables.Add(Connection:="URL;" & searchResultsURL, Destination:=DataSheet.Range(DataSheet.Cells(1, "A"), DataSheet.Cells(lastrowG, "A")))
            .Name = _
                  "Stock Data"
            .BackgroundQuery = True
            .RefreshStyle = xlOverwriteCells
            .TablesOnlyFromHTML = True
            .WebSelectionType = xlSpecifiedTables
            .WebTables = """Rf"""
            .PreserveFormatting = True
            .Refresh BackgroundQuery:=False
            Call Delete_Query_Content_Data 'See code below. I have tried to have this inside and outside the "with" loop
        End With

        Call RunProcess 'calculate adjusted key-ratios
    Next Stockticker

End Sub

I got issues when I try to delete the connection. If the web table exists and is pasted into the workbook, then I can delete the connection with the below code without problem.

However, when the URL is incorrect (invalid stockticker name) the code works (paste blank data), but I'm not able to remove the connection. I can manually go to "Data" -> "Connections" -> "Remove" but it doesn't work with code. Either the connection is not removed (if lastrowG = 1) or I get the following error (lastrowG = ... row + 10):

enter image description here

Code to delete table query connection:

Sub Delete_Query_Content_Data()
' This code works when the URL code is valid, however if the code has an invalid stockticker (i.e. ADPA)
 'it doesn't remove the connection (if I set lastrowG = 1) otherwise it mostly gives the error message 80010108 
'Clear Web Query for "Stock data"
Sheets("Data").Activate
Range("A1").Select
Selection.QueryTable.Delete
Selection.ClearContents

End Sub

I have tried to replace Delete_Query_Content_Data with suggestions from stackoverflow (example "Killing connection in EXCEL vba" and "Excel VBA Export to Excel - Removing Connections") but none of them solves my problem, I still get the error message.

Mikku
  • 6,538
  • 3
  • 15
  • 38
Wizhi
  • 6,424
  • 4
  • 25
  • 47

2 Answers2

1

I would look into actual xmlhttp request as faster retrieval method. For now, although slightly unusual please see a structural re-write of your answer with some notes.

Note:

1) Moving IE object out of loop and making visible before loop. Same for some other variables not affected by loop e.g. baseURL.

2) Reducing selection of values for Last 52 weeks high and low to using css selectors to target the appropriate elements

3) Use With statements where appropriate e.g. for determining lastrowStockTickerPE

4) Remove unnecessary additional wait

5) Remove Set = Nothing where not required as objects will be dereferenced when out of scope

Option Explicit  
Public Sub Retrieve_PE_Low_High()
    Dim DownloadInfoSheet As Worksheet, OutputSheet As Worksheet
    Dim Stockticker As Long, lastrowStockTickerPE As Long
    Dim baseURL As String, searchResultsURL As String
    Dim HTMLDoc As HTMLDocument, oIE As InternetExplorer

    Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo") 'Sheet to retrieve stockticker from
    Set OutputSheet = ActiveWorkbook.Worksheets("Output") 'Where data will be assigned
    Set oIE = New InternetExplorer
    baseURL = DownloadInfoSheet.Cells(3, "A")

    With DownloadInfoSheet
        lastrowStockTickerPE = .Cells(.Rows.Count, "D").End(xlUp).Row
    End With

    With oIE
        .Visible = True

        For Stockticker = lastrowStockTickerPE To lastrowStockTickerPE '<==presumably your endpoint is not always the same as start

            searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example "https://www.nasdaq.com/symbol" + "ADP" = "https://www.nasdaq.com/symbol/ADP"

            .Navigate2 searchResultsURL

            While .Busy Or .readyState < 4: DoEvents: Wend

            Set HTMLDoc = .document

            Dim high As Object, low As Object
            With HTMLDoc
                On Error Resume Next
                Set high = .querySelector(".infoTable.trading-activitiy tr + tr td:last-child")
                Set low = .querySelector(".infoTable.trading-activitiy tr + tr + tr td:last-child")
                Debug.Print high.innerText, low.innerText
                On Error GoTo 0
                If high Is Nothing Or low Is Nothing Then
                    'dummy
                Else
                    'other code to write to sheet
                End If
            End With
            Set high = Nothing: low = Nothing
        Next Stockticker
        .Quit
    End With
End Sub

An example XMLHTTP request which you can adapt into loop using idea from above. Interestingly, the css selectors to target the elements have to be tweaked slightly.

Option Explicit   
Public Sub GetInfo()
    Dim sResponse As String, html As HTMLDocument, high As Object, low As Object
    Set html = New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.nasdaq.com/symbol/AAPL", False
        .send
        html.body.innerHTML = StrConv(.responseBody, vbUnicode)
    End With

    On Error Resume Next
    Set high = html.querySelector(".infoTable.trading-activitiy tr + tr td + td")
    Set low = html.querySelector(".infoTable.trading-activitiy tr + tr + tr td + td")
    Debug.Print high.innerText, low.innerText
    On Error GoTo 0
End Sub
QHarr
  • 83,427
  • 12
  • 54
  • 101
  • Thank you, thank you. This is really impressive and very educational :). Thanks for sharing. I still think, the most tricky part is to locate the correct `html.querySelector()`. Might I ask, how would you print the code in an efficient way to a specific excel sheet in your examples? Would be very interesting to learn :) – Wizhi Jan 07 '19 at 21:32
  • 1
    The selectors can probably be improved I.e. shortened. You can even use the first selector twice via querySelectorAll and index into positions 0 and 1 to get the two values. What do you mean by print code? – QHarr Jan 07 '19 at 21:43
  • Oki. I have some to learn there :). // I was thinking about write to excel sheet the values I got. Of course the `high` / `low` actually contains the value. So I was looking for this: `OutputSheet.Cells(1, 1).Value = high.innerText` // `OutputSheet.Cells(1, 2).Value = low.innerText`. Thanks again for taking your time, much appreciated!! – Wizhi Jan 07 '19 at 22:24
  • 1
    Ok. So you sorted? Any other questions let me know :-) – QHarr Jan 07 '19 at 22:25
0

If any other could benefit I would highly suggest to look for this post: web scraping with vba using XMLHTTP

This is my interpretation of the code provided by Graham Anderson.

I have added:

  • Loop the URL address extension (i.e. nasdaq.com/symbol/ loop this ticker).
  • Added a simple errorhandler (it more skips the error, leaves a note and goes on) to avoid interruption.
  • Direct the code to only copy specific elements back to the sheets. (Saves time instead of printing the whole table and then lookup which value I want to use)

The benefit using HTML/XMLHTTP compared to excels web import (my code in the question) is that number values are directly recognized correctly. With the QueryTables approach I lost zeros since the numbers was in US format ("." as delimiter while I use ","). With the below code the numbers come in fine from the beginning, it save so much time.

Sub Retrieve_PE_Low_High()
Dim Stockticker As Long 'loopvalue (URL extension to link) you want to use
Dim DownloadInfoSheet As Worksheet
Set DownloadInfoSheet = ActiveWorkbook.Worksheets("DownloadInfo") 'Sheet to retrieve stockticker from

Dim OutputSheet As Worksheet
Set OutputSheet = ActiveWorkbook.Worksheets("Output") 'Where data will be assigned
Dim lastrowB As Long

Dim lastrowStockTickerPE As Long
Dim lastrowStockPE As Long

Dim baseURL As String
Dim searchResultsURL As String

lastrowStockTickerPE = DownloadInfoSheet.Cells(Rows.Count, "D").End(xlUp).Row 'Find last row in Stockticker

For Stockticker = lastrowStockTickerPE To lastrowStockTickerPE 'Loop from page 2 to lastrow
    baseURL = DownloadInfoSheet.Cells(3, "A") 'download from cell A2: https://www.nasdaq.com/symbol
    searchResultsURL = baseURL & DownloadInfoSheet.Cells(Stockticker, "C").Value 'Add the ticker symbol to the original URL link, example "https://www.nasdaq.com/symbol" + "ADP" = "https://www.nasdaq.com/symbol/ADP"

    '#Microsoft HTML Object Library - Activate by Tools -> References
    '#Microsoft Internet Controls - Activate by Tools -> References
    Dim HTMLDoc As New HTMLDocument
    Dim AnchorLinks As Object
    Dim TDelements As Object
    Dim tdElement As Object
    Dim AnchorLink As Object
    Dim lRow As Long
    Dim lCol As Long
    Dim oElement As Object
    Dim i As Integer

    Dim oIE As InternetExplorer

    Set oIE = New InternetExplorer

    oIE.navigate searchResultsURL
    oIE.Visible = True

    'Wait for IE to load the web page
    Do Until (oIE.readyState = 4 And Not oIE.Busy)
        DoEvents
    Loop

    'Wait for Javascript to run
    Application.Wait (Now + TimeValue("0:00:15"))

    HTMLDoc.body.innerHTML = oIE.document.body.innerHTML


    With HTMLDoc.body
        Set AnchorLinks = HTMLDoc.getElementsByClassName("infoTable trading-activitiy") 'The "getElementsByClassName" is found by righ-click the element you want to retrieve data. This is the higher node (table)

        For Each AnchorLink In AnchorLinks
            'Debug.Print AnchorLink.innerText
            Set TDelements = AnchorLink.getElementsByTagName("td") 'The "getElementsByTagName" is found by righ-click the element you want to retrieve data. This is the lower node (single value)
        Next AnchorLink

        'lRow = 1
        'Print complete table of "infoTable trading-activitiy" to see what each element has for row.
        'If "High/Low 52 wk price" prints out at row 99, then the element index number is 98.
        'You could also search for items by: Debug.Print TDelements.Item(i).innerText, where i = a number
        'For Each tdElement In TDelements
        '    Debug.Print tdElement.innerText
        '    Cells(lRow, 1).Value = tdElement.innerText
        '    lRow = lRow + 1
        'Next tdElement

        If TDelements Is Nothing Then
            Call Dummy_PE                    'If object "TDelements is not populated/nothing (i.e. URL is not working or getElementsByClassName is not found) go to Dummy_PE
        Else
            lastrowStockPE = OutputSheet.Cells(Rows.Count, "G").End(xlUp).Row 'Find last row in Stockticker
            For i = 5 To 3 Step -1           'Loop through the TDelements items 5 to 3
                Select Case i
                Case 3, 5                    'For TDelements items 3 and 5, copy those to the sheet
                    'Debug.Print TDelements.Item(i).innerText
                    OutputSheet.Cells(lastrowStockPE - 1, 6).Value = TDelements.Item(i).innerText
                    OutputSheet.Cells(lastrowStockPE - 1, 6).NumberFormat = "General"
                    OutputSheet.Cells(lastrowStockPE - 1, 7).ClearContents
                    If OutputSheet.Cells(lastrowStockPE - 1, 6).Value = "" Then
                        OutputSheet.Cells(lastrowStockPE - 1, 2).Font.Color = vbRed
                    End If
                    lastrowStockPE = lastrowStockPE + 1
                End Select
            Next i

        End If

    End With

    oIE.Quit

    Set AnchorLinks = Nothing
    Set AnchorLink = Nothing
    Set TDelements = Nothing
    Set tdElement = Nothing
    Set HTMLDoc = Nothing
    Set olE = Nothing

Next Stockticker
End Sub
Wizhi
  • 6,424
  • 4
  • 25
  • 47