The page makes API calls to update content based on lat and lon of zipcode. You can find this in the network tab e.g. 1 , 2. These API calls return a string containing json with the listings which can be parsed with a json parser after a little string manipulation|regex. This means, in this case, you can issue xhr requests (so no need for I/O of opening browser) and then parse the json. The total number of results is present in the responseText
.
The json parser I use is Jsonconverter.bas: Download raw code from here and add to standard module called jsonConverter
. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.
The following shows returning all results (all pages) for a given zipcode - both for By Firm
and By Individual
.
Firms:
Getting Firm
results based on zip.
The API endpoint (construction):
apiUrl = "https://api.brokercheck.finra.org/firm?hl=true&json.wrf=angular.callbacks._6&lat={LAT}&lon={LON}&nrows=100&r=25&sort=score+desc&{START}&wt=json"
uses a query string where you can alter the number of results retrieved by the changing the nrows
param. The limit is 100. The default is 12. If you wish to retrieve all results, you can make subsequent calls in batches of n
e.g. 12 with the appropriate n
cumulative offset adjustment to start
param:
GET /firm?hl=true&json.wrf=angular.callbacks._7&lat=33.864146&lon=-84.114088&nrows=100&r=25&sort=score+desc&start=0&wt=json
GET /firm?hl=true&json.wrf=angular.callbacks._7&lat=33.864146&lon=-84.114088&nrows=100&r=25&sort=score+desc&start=100&wt=json
etc
In order to make less requests I would go with the max of n = 100 and alter the nrows
param before a loop to collect all results, and the start
(offset) param during the loop to get next batch.
Option Explicit
'Firm
Public r As Long
Public Sub GetListings()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, apiUrl As String, re As Object, s As String, latLon()
r = 0
Set re = CreateObject("VBScript.RegExp")
apiUrl = "https://api.brokercheck.finra.org/firm?hl=true&json.wrf=angular.callbacks._6&lat={LAT}&lon={LON}&nrows=100&r=25&sort=score+desc&{START}&wt=json"
Dim xhr As Object, totalResults As Long, numPages As Long
Set xhr = CreateObject("MSXML2.XMLHTTP")
latLon = GetLatLon("30047", xhr, re) '"30047" is the zipcode of interest and could be passed as a constant set at top of module or as a local variable changed set in a loop. over zipcodes
apiUrl = Replace$(Replace$(apiUrl, "{LAT}", latLon(0)), "{LON}", latLon(1))
s = GetApiResults(xhr, Replace$(apiUrl, "{START}", "start=0"), re)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)("hits")
totalResults = json("total")
numPages = Application.RoundUp(totalResults / 100, 0)
Dim results(), ws As Worksheet, headers(), i As Long
ReDim results(1 To totalResults, 1 To 3)
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("CRD Number", "Name", "Address")
results = GetFirmListings(results, json("hits"))
If numPages > 1 Then
For i = 2 To numPages
s = GetApiResults(xhr, Replace$(apiUrl, "{START}", "start=" & (i - 1) * 100), re)
If s = "No match" Or InStr(s, "Exceeded limit") > 0 Then Exit For
Set json = JsonConverter.ParseJson(s)("hits")
results = GetFirmListings(results, json("hits"))
Next
End If
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetLatLon(ByVal zip As String, ByVal xhr As Object, ByVal re As Object) As Variant
Dim json As Object, lat As String, lon As String
With xhr
.Open "GET", Replace$("https://api.brokercheck.finra.org/locations?query={ZIP}&results=1", "{ZIP}", zip), False
.send
Set json = JsonConverter.ParseJson(.responseText)("hits")("hits")(1)("_source")
lat = json("latitude")
lon = json("longitude")
GetLatLon = Array(lat, lon)
End With
End Function
Public Function GetApiResults(ByVal xhr As Object, ByVal apiUrl As String, ByVal re As Object) As String
With xhr
.Open "GET", apiUrl, False
.send
GetApiResults = GetJsonString(re, .responseText)
End With
End Function
Public Function GetFirmListings(ByVal results As Variant, ByVal json As Object) As Variant
Dim row As Object, address As Object
Dim addressToParse As String, addressToParse2 As String
'Crd number, name and office address
For Each row In json
r = r + 1
results(r, 1) = row("_source")("firm_source_id")
results(r, 2) = row("_source")("firm_name")
addressToParse = Replace$(row("_source")("firm_ia_address_details"), "\""", Chr$(32))
addressToParse2 = Replace$(row("_source")("firm_address_details"), "\""", Chr$(32))
addressToParse = IIf(addressToParse = vbNullString, addressToParse2, addressToParse)
If addressToParse <> vbNullString Then
Set address = JsonConverter.ParseJson(addressToParse)("officeAddress")
results(r, 3) = Join$(address.items, " ,")
End If
Next
GetFirmListings = results
End Function
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\((.*)\);" 'regex pattern to get json string
If .Test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Individuals:
At page = 91 there is limit exceeded. 90 requests actually yielded 9,000 results of total 11,960. It may be worth investigating whether that total is actually accurate as this may be the reason for no further results. For example, despite stating currently 11,960 results there are only 75 pages of 12 results per page i.e. only 750 of c.997 expected pages. 750 pages, at 12 results per page, gives 9,000 results which is the actual returned amount. The code below simply ceases looping if "limit exceeded" is found in the response.
I show extracting only specific items from json. There is a lot more info returned e.g. all current employments which can be more than 1. You can explore, for example, the json for the first request (first 100 listing) here.
If you are interested in a specific individual you can also use their CRD in an API call as shown at the very bottom section.
Option Explicit
'Individual
Public r As Long
Public Sub GetListings2()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, apiUrl As String, re As Object, s As String, latLon()
r = 0
Set re = CreateObject("VBScript.RegExp")
apiUrl = "https://api.brokercheck.finra.org/individual?hl=true&includePrevious=false&json.wrf=angular.callbacks._d&lat={LAT}&lon={LON}&nrows=100&r=25&sort=score+desc&{START}&wt=json"
Dim xhr As Object, totalResults As Long, numPages As Long
Set xhr = CreateObject("MSXML2.XMLHTTP")
latLon = GetLatLon("30047", xhr, re)
apiUrl = Replace$(Replace$(apiUrl, "{LAT}", latLon(0)), "{LON}", latLon(1))
s = GetApiResults(xhr, Replace$(apiUrl, "{START}", "start=0"), re)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)("hits")
totalResults = json("total")
numPages = Application.RoundUp(totalResults / 100, 0)
Dim results(), ws As Worksheet, headers(), i As Long
'example info retrieved. There is a lot more info in json
headers = Array("CRD Number Indiv", "Name", "FINRA registered", "Disclosures", "In industry since")
ReDim results(1 To totalResults, 1 To UBound(headers) + 1)
Set ws = ThisWorkbook.Worksheets("Sheet1")
results = GetIndividualListings(results, json("hits"))
If numPages > 1 Then
For i = 2 To numPages
DoEvents
s = GetApiResults(xhr, Replace$(apiUrl, "{START}", "start=" & (i - 1) * 100), re)
If s = "No match" Or InStr(s, "Exceeded limit") > 0 Then Exit For
Set json = JsonConverter.ParseJson(s)("hits")
results = GetIndividualListings(results, json("hits"))
Next
End If
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetLatLon(ByVal zip As String, ByVal xhr As Object, ByVal re As Object) As Variant
Dim json As Object, lat As String, lon As String
With xhr
.Open "GET", Replace$("https://api.brokercheck.finra.org/locations?query={ZIP}&results=1", "{ZIP}", zip), False 'changed results = 10 to results = 1
.send
Set json = JsonConverter.ParseJson(.responseText)("hits")("hits")(1)("_source")
lat = json("latitude")
lon = json("longitude")
GetLatLon = Array(lat, lon)
End With
End Function
Public Function GetApiResults(ByVal xhr As Object, ByVal apiUrl As String, ByVal re As Object) As String
With xhr
.Open "GET", apiUrl, False
.send
GetApiResults = GetJsonString(re, .responseText)
End With
End Function
Public Function GetIndividualListings(ByVal results As Variant, ByVal json As Object) As Variant
Dim row As Object
'can have numerous current employments. Alter here and below if want more info from json about the individual
For Each row In json
r = r + 1
results(r, 1) = row("_source")("ind_source_id")
results(r, 2) = Replace$(Join$(Array(row("_source")("ind_firstname"), row("_source")("ind_middlename"), row("_source")("ind_lastname")), ", "), ", , ", ", ")
results(r, 3) = row("_source")("ind_approved_finra_registration_count")
results(r, 4) = row("_source")("ind_bc_disclosure_fl")
results(r, 5) = row("_source")("ind_industry_cal_date")
Next
GetIndividualListings = results
End Function
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\((.*)\);" 'regex pattern to get json string
If .Test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Single individual:
Detailed info for a single individual can be gained from:
https://api.brokercheck.finra.org/individual/1614374?json.wrf=angular.callbacks._h&wt=json