You need to activate the WebBrowser advanced feature for the parsing procedure to complete successfully. When these features are not enabled, the WebBrowser, in standard IE7 emulation, won't be able to complete the Document. The failure is caused by the high number of scripting errors.
I've added a class with static methods (WebBrowserAdvancedFetures
) to add the required values to the Registry.
WebBrowserAdvancedFetures.ActivateWBAdvancedFeatures
is called in the Form's constructor.
You can roll it back calling WebBrowserAdvancedFetures.DeactivateWBAdvancedFeatures
.
How does this procedure work:
- Instantiate a WebBrowser class (
Private browser As WebBrowser
). We could also use a WebBrowser control (the visible control version that a Form container can host), it's the same thing.
- Subscribe to its DocumentCompleted event. It will be raised each time one of the
HtmlDocuments
inside the main WebBrowser.Document
is completed. Read How to get an HtmlElement value inside Frames/IFrames? for some more details on HtmlDocuments nesting.
- In the
DocumentCompleted
handler, verify that at least one of the Documents is ready to be parsed, checking that WebBrowser.ReadyState
= WebBrowserReadyState.Complete
- When it is, search for the HtmlElements that contain the data we're looking for.
- When all data has been collected, raise an event, to notify that the parsing is completed (this also allows subscribers from other classes to be notified as well, if needed. This requires a custom
EventArgs
class, though) and disable further parsing of the HtmlDocument
(here, this is accomplished setting a Boolean field).
- Handle the new data (here, just a
String
and a DateTime
objects), then reset the fields/variables used in the parsing procedure.
Remember to remove the handlers, in the Form.FormClosed
event or in a custom class Dispose()
method:
RemoveHandler DocumentParsingComplete, AddressOf OnDocumentParsingComplete
RemoveHandler browser.DocumentCompleted, AddressOf browser_DocumentCompleted
Public Event DocumentParsingComplete As EventHandler(Of EventArgs)
Private browser As WebBrowser = Nothing
Private trackingNumberValue As String = String.Empty
Private trackingDateValue As DateTime
Private documentParsed As Boolean = False
Private userAgent As String = "User-Agent: Mozilla/5.0 (Windows NT 10; Win64; x64; rv:48.0) Gecko/20100101 Firefox/48.0"
Public Sub New()
InitializeComponent()
WebBrowserAdvancedFetures.ActivateWBAdvancedFeatures(Path.GetFileName(Application.ExecutablePath))
browser = New WebBrowser With {.ScriptErrorsSuppressed = True}
AddHandler DocumentParsingComplete, AddressOf OnDocumentParsingComplete
AddHandler browser.DocumentCompleted, AddressOf browser_DocumentCompleted
End Sub
Private Sub btnNavigate_Click(sender As Object, e As EventArgs) Handles btnNavigate.Click
browser.Navigate("")
browser.Document.OpenNew(True)
documentParsed = False
browser.Navigate("[Some URL]", "_self", Nothing, userAgent)
End Sub
Private Sub OnDocumentParsingComplete(sender As Object, e As EventArgs)
' Do whatever you need with these
Console.WriteLine(trackingNumberValue)
Console.WriteLine(trackingDateValue)
'Then reset for further use
trackingNumberValue = String.Empty
trackingDateValue = DateTime.MinValue
End Sub
Private Sub browser_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs)
Dim wb As WebBrowser = DirectCast(sender, WebBrowser)
If wb.ReadyState <> WebBrowserReadyState.Complete OrElse wb.Document.Forms.Count = 0 OrElse documentParsed Then Return
Dim trackingNumberClass As String = "tracking-number-value"
Dim trackingElement = wb.Document.GetElementsByTagName("SPAN").
OfType(Of HtmlElement)().FirstOrDefault(Function(elm) elm.GetAttribute("className").Contains(trackingNumberClass))
Me.trackingNumberValue = trackingElement?.InnerText
Dim trackingDateClass As String = "ng-binding ng-scope"
Dim trackingDateElement = wb.Document.GetElementsByTagName("SPAN").
OfType(Of HtmlElement)().FirstOrDefault(Function(elm) elm.GetAttribute("className").Equals(trackingDateClass))
If trackingDateElement IsNot Nothing Then
Dim deliveryDate As String = trackingDateElement.InnerText.Split().Last().TrimEnd("."c)
Me.trackingDateValue = Date.ParseExact(deliveryDate, "dd-MM-yyyy", Nothing)
If Not String.IsNullOrEmpty(trackingNumberValue) Then
documentParsed = True
RaiseEvent DocumentParsingComplete(sender, EventArgs.Empty)
End If
End If
End Sub
Use this class to activate/deactivate the WebBrowser control's advanced features:
Imports Microsoft.Win32
Imports System.Security.AccessControl
Public Class WebBrowserAdvancedFetures
Private Shared baseKeyName As String = "Software\Microsoft\Internet Explorer\Main\FeatureControl"
Private Shared featuresKey As String = baseKeyName & "\FEATURE_BROWSER_EMULATION"
Private Shared hardwareAccelKey As String = baseKeyName & "\FEATURE_GPU_RENDERING"
Public Shared Sub ActivateWBAdvancedFeatures(executableName As String)
Dim wbFeatureKey As RegistryKey = Nothing
Dim wbAccelKey As RegistryKey = Nothing
Try
wbFeatureKey = Registry.CurrentUser.OpenSubKey(featuresKey,
RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
If wbFeatureKey Is Nothing Then
wbFeatureKey = Registry.CurrentUser.CreateSubKey(featuresKey, True)
End If
wbFeatureKey.SetValue(executableName, 11001, RegistryValueKind.DWord)
wbAccelKey = Registry.CurrentUser.OpenSubKey(hardwareAccelKey,
RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
If wbAccelKey Is Nothing Then
wbAccelKey = Registry.CurrentUser.CreateSubKey(hardwareAccelKey, True)
End If
wbAccelKey.SetValue(executableName, 1, RegistryValueKind.DWord)
Finally
wbFeatureKey?.Dispose()
wbAccelKey?.Dispose()
End Try
End Sub
Public Shared Sub DeactivateWBAdvancedFeatures(executableName As String)
Using wbFeatureKey = Registry.CurrentUser.OpenSubKey(
featuresKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbFeatureKey.DeleteValue(executableName, False)
End Using
Using wbAccelKey = Registry.CurrentUser.OpenSubKey(
hardwareAccelKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbAccelKey.DeleteValue(executableName, False)
End Using
End Sub
End Class