Allegedly there are illegal characters where there are none. My instance is going nuts on me. The problem occurs at Dim datastream As Stream = client.OpenRead(url)
.
First Dim url As String = GoogleSearch & MovieName
did not want to accept HTML format as string. Ok. I remove https://
from the string and now it is just 'www.____` format which should still work with the webclient. Now it pulls this on me. Why? When tested outside Visual Studio it works.
My input string URL is: www.google.com/search?q=imdb+Orville
which causes the webclient to give this error:
System.ArgumentException: 'Illegal characters in path.'
Public Class Form1
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load()
End Sub
Public Property status As Boolean
Public Property Id As String
Public Property ImdbURL As String
Private GoogleSearch As String = "www.google.com/search?q=imdb+"
Private BingSearch As String = "www.bing.com/search?q=imdb+"
Private AskSearch As String = "www.ask.com/web?q=imdb+"
Private Function match(ByVal regex As String, ByVal html As String, ByVal Optional i As Integer = 1) As String
Return New Regex(regex, RegexOptions.Multiline).Match(html).Groups(i).Value.Trim()
End Function
Private Function matchAll(ByVal regex As String, ByVal html As String, ByVal Optional i As Integer = 1) As ArrayList
Dim list As ArrayList = New ArrayList()
For Each m As Match In New Regex(regex, RegexOptions.Multiline).Matches(html)
list.Add(m.Groups(i).Value.Trim())
Next
Return list
End Function
Private Function getIMDbUrl(ByVal MovieName As String, ByVal Optional searchEngine As String = "google") As String
Dim url As String = GoogleSearch & MovieName
If searchEngine.ToLower().Equals("bing") Then url = BingSearch & MovieName
If searchEngine.ToLower().Equals("ask") Then url = AskSearch & MovieName
Dim html As String = getUrlData(url)
Dim imdbUrls As ArrayList = matchAll("<a href=""(http://www.imdb.com/title/tt\d{7}/)"".*?>.*?</a>", html)
If imdbUrls.Count > 0 Then
Return CStr(imdbUrls(0))
ElseIf searchEngine.ToLower().Equals("google") Then
Return getIMDbUrl(MovieName, "bing")
ElseIf searchEngine.ToLower().Equals("bing") Then
Return getIMDbUrl(MovieName, "ask")
Else
Return String.Empty
End If
End Function
Private Function getUrlData(ByVal url As String) As String
Dim client As WebClient = New WebClient()
Dim r As Random = New Random()
client.Headers("X-Forwarded-For") = r.[Next](0, 255) & "." & r.[Next](0, 255) & "." & r.[Next](0, 255) & "." & r.[Next](0, 255)
client.Headers("User-Agent") = "Mozilla/" & r.[Next](3, 5) & ".0 (Windows NT " & r.[Next](3, 5) & "." & r.[Next](0, 2) & "; rv:2.0.1) Gecko/20100101 Firefox/" & r.[Next](3, 5) & "." & r.[Next](0, 5) & "." & r.[Next](0, 5)
Dim datastream As Stream = client.OpenRead(url)
Dim reader As StreamReader = New StreamReader(datastream)
Dim sb As StringBuilder = New StringBuilder()
While Not reader.EndOfStream
sb.Append(reader.ReadLine())
End While
Return sb.ToString()
End Function
Private Sub parseIMDbPage(ByVal imdbUrl As String)
Dim html As String = getUrlData(imdbUrl)
Id = match("<link rel=""canonical"" href=""http://www.imdb.com/title/(tt\d{7})/"" />", html)
If Not String.IsNullOrEmpty(Id) Then
status = True
imdbUrl = "http://www.imdb.com/title/" & Id & "/"
End If
End Sub
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim TextFromBox As String = RichTextBox1.Text
Dim imdbUrl As String = getIMDbUrl(TextFromBox)
parseIMDbPage(imdbUrl)
MessageBox.Show(Id)
End Sub
End Class