0

for my master thesis I need to get the Wikipedia-URLs for a list of actors (approximately 20,000) sktneer helped me with my first attempt to get the code running. Thank you again! (see: Get Wikipedia page urls from an Excel list )

One issue thats left, is that google blocks my queries after a couple of actors. (150-200) A thought was to build in the Application.Wait command into the code, so that there would be a pause of 2-3 seconds before every new query.

Would this work and if, can you help me to impement this into the code?

Or is this the wrong way and is there even an easier solution?

sample

Code:

Sub XMLHTTP()
Dim url As String, lastRow As Long

Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date
Dim i As Long
Dim str_text As String

lastRow = Range("A" & Rows.Count).End(xlUp).Row

Dim cookie As String
Dim result_cookie As String

start_time = Time
Debug.Print "start_time:" & start_time

For i = 2 To lastRow

    url = "https://www.google.de/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)

    Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
    XMLHTTP.Open "GET", url, False
    XMLHTTP.setRequestHeader "Content-Type", "text/xml"
    XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
    XMLHTTP.send

    If XMLHTTP.Status = 200 Then
        Set html = CreateObject("htmlfile")
        html.body.innerHTML = XMLHTTP.ResponseText
        Set objResultDiv = html.getelementbyid("rso")

        If Not objResultDiv Is Nothing Then
            Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
            Set link = objH3.getelementsbytagname("a")(0)


            str_text = Replace(link.innerHTML, "<EM>", "")
            str_text = Replace(str_text, "</EM>", "")

            Cells(i, 2) = str_text
            Cells(i, 3) = link.href
            DoEvents
        Else
            Cells(i, 2) = "Not Found"
            Cells(i, 3) = "Not Found"
        End If
    Else
        Cells(i, 2) = "Not Found"
        Cells(i, 3) = "Not Found"
    End If
Next

end_time = Time
Debug.Print "end_time:" & end_time

Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
braX
  • 11,506
  • 5
  • 20
  • 33
Stgrwld
  • 3
  • 1

0 Answers0