0

I am scraping data from a webpage table and placing the data into a Excel sheet. There are some 1000 pages with 40 results for each page. I need help with making the macro change result page and putting them all into one long list in the same sheet.

Should I add something like this: For Page = 1 To 100 If Page > 1 Then _

Option Explicit

Sub scrape()

    Dim appIE As Object
    Dim ihtml As Object

    Set appIE = CreateObject("internetexplorer.application")

    With appIE

        .Visible = True
        .navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"

        While .Busy = True Or .readyState < 4: DoEvents: Wend

        Set ihtml = .document


        Dim allRowOfData As Object

        Set allRowOfData = appIE.document.getElementById("searchresult")

        Dim r As Long, c As Long

        Dim curHTMLRow As Object


        For r = 1 To allRowOfData.Rows.Length - 1

            Set curHTMLRow = allRowOfData.Rows(r)
            Cells(r + 1, c + 1) = curHTMLRow.Cells(7).innerText

        Next r

        .Quit

    End With

    Set appIE = Nothing


End Sub


        pageParameter = "&pageNumber=" & Page
Community
  • 1
  • 1
thomasuponor
  • 23
  • 1
  • 7

1 Answers1

0

For this code, I used UDF ONLY DIGITS designed by @paxdiablo.

Sub scrape()


    Dim appIE As Object
    Dim ihtml As Object
    Dim ThisPage As Long
    Dim TotalPages As Long
    Dim allRowOfData As Object
    Dim r As Long
    Dim curHTMLRow As Object

    ThisPage = 1
    Set appIE = CreateObject("internetexplorer.application")

    With appIE

        .Visible = False
        .navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017&p=" & ThisPage

        While .Busy = True Or .readyState < 4: DoEvents: Wend

        Set ihtml = .document
        'we get total result number to calculate how many pages
        For Each allRowOfData In appIE.document.getElementsByTagName("h4")
            If Right(allRowOfData.innerText, 10) = "resultater" Then

                TotalPages = Application.WorksheetFunction.RoundUp(onlyDigits(allRowOfData.innerText) / 40, 0)
                Exit For
            End If
        Next

        For ThisPage = 1 To TotalPages Step 1


            .navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017&p=" & ThisPage
            While .Busy = True Or .readyState < 4: DoEvents: Wend

            Set allRowOfData = Nothing

            Set allRowOfData = appIE.document.getElementById("searchresult")

            For r = 1 To allRowOfData.Rows.Length - 1

                Set curHTMLRow = allRowOfData.Rows(r)

                If ThisPage = 1 Then
                    Cells(r + 1, 1) = curHTMLRow.Cells(7).innerText
                Else
                    Cells(Range("A2").End(xlDown).Row + 1, 1) = curHTMLRow.Cells(7).innerText
                End If

            Next r
        Next ThisPage

        .Quit

    End With

    Set appIE = Nothing


MsgBox "Scrapping done"
End Sub

Private Function onlyDigits(s As String) As String
    ' Variables needed (remember to use "option explicit").   '
    Dim retval As String    ' This is the return string.      '
    Dim i As Integer        ' Counter for character position. '

    ' Initialise return string to empty                       '
    retval = ""

    ' For every character in input string, copy digits to     '
    '   return string.                                        '
    For i = 1 To Len(s)
        If Mid(s, i, 1) >= "0" And Mid(s, i, 1) <= "9" Then
            retval = retval + Mid(s, i, 1)
        End If
    Next

    ' Then return the return string.                          '
    onlyDigits = retval
End Function

That website got more than 1200 pages, so it will take a while. If you want to test how it works first. just make 2 changes in the code above before the long run.

First change. Replace .Visible = False and make it .Visible = True

Second Change: Replace For ThisPage = 1 To TotalPages Step 1 and make it For ThisPage = 1 To (TotalPages - 1222) Step 1. This way it will get you only the first 5 pages. I know you need them all, but this is just if you want to try it.