1

Trying to scrape multiple search stages of this county website using python requests. Basically trying to make a search, filter through the results (that code isn't here yet), and then go to that result page. Getting an asp.net error message The session based SearchQueue is empty. My code so far might seem long but I'm including all the form data I'm using in the request. Simply trying to search the name "Smith"

Basically i'm making an empty request, grabbing __VIEWSTATE and other values, then making a search request which works fine. Then I grab __VIEWSTATE and friends again from the search results page, and try to follow a search result using what I believe to be the hdLink value though I'm not sure. Do you think I'm missing an __EVENTTARGET possibly? Driving myself crazy because I don't know what to look for here. Posted image of the error page as well. Thank you to anyone who can provide knowledge.

test.py

import CountyFormDataList

import requests
import json

from scrapy import Selector

url = "http://property.franklincountyauditor.com/_web/search/CommonSearch.aspx?mode=OWNER"

r = requests.post(url)

scriptManager = Selector(text=r.text).xpath('//*[@id="ScriptManager1_TSM"]/@value').get()
viewState = Selector(text=r.text).xpath('//*[@id="__VIEWSTATE"]/@value').get()
viewStateGenerator = Selector(text=r.text).xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').get()
eventValidation = Selector(text=r.text).xpath('//*[@id="__EVENTVALIDATION"]/@value').get()

payload = json.loads(
    "{" + CountyFormDataList.formDataList["CommonSearchASPX"]["search"]["ownerSearch"].format(
        scriptManager,
        viewState,
        viewStateGenerator,
        eventValidation,
        "SMITH"
    ) + "}"
)
cookies = CountyFormDataList.formDataList["CommonSearchASPX"]["cookies"]
headers = CountyFormDataList.formDataList["CommonSearchASPX"]["headers"]

r = requests.post(url, data=payload, cookies=cookies, headers=headers)

scriptManager = Selector(text=r.text).xpath('//*[@id="ScriptManager1_TSM"]/@value').get()
viewState = Selector(text=r.text).xpath('//*[@id="__VIEWSTATE"]/@value').get()
viewStateGenerator = Selector(text=r.text).xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').get()
eventValidation = Selector(text=r.text).xpath('//*[@id="__EVENTVALIDATION"]/@value').get()

payload = json.loads(
    "{" + CountyFormDataList.formDataList["CommonSearchASPX"]["result"]["resultJSON"].format(
        scriptManager,
        viewState,
        viewStateGenerator,
        eventValidation,
        "SMITH",
        "sIndex=0&idx=1"
    ) + "}"
)

r = requests.post(url, data=payload, cookies=cookies, headers=headers)

f = open("ohioOutput.html", "w")
f.write(r.text)
f.close()

CountyFormDataList.py:

formDataList = {
    "CommonSearchASPX" : { #from commonsearch aspx websites, example: http://property.franklincountyauditor.com/_web/search/CommonSearch.aspx?mode=OWNER
        "cookies" : { #cookies for search to accept disclaimer
            'DISCLAIMER': '1'
        },
        "headers" : {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "en-US,en;q=0.9",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Content-Length": "4348",
            "Content-Type": "application/x-www-form-urlencoded",
            "Host": "auditor.ashtabulacounty.us",
            "Origin": "https://auditor.ashtabulacounty.us",
            "Referer": "https://auditor.ashtabulacounty.us/PT/search/CommonSearch.aspx?mode=OWNER",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "same-origin",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
        },
        "search" : {
            "ownerSearch" : """
                "ScriptManager1_TSM" : "{}",
                "__EVENTTARGET" : "btSearch",
                "__EVENTARGUMENT" : "",
                "__VIEWSTATE" : "{}",
                "__VIEWSTATEGENERATOR" : "{}",
                "__EVENTVALIDATION" : "{}",
                "PageNum": 1,
                "SortBy" : "PARID",
                "SortDir":  "asc",
                "PageSize": 100,
                "hdAction" : "Search",
                "hdIndex": 0,
                "sIndex": -1,
                "hdListType" : "PA",
                "hdJur" : "",
                "hdSelectAllChecked" : "false",
                "inpOwner" : "{}",
                "selSortBy" : "PARID",
                "selSortDir":  "asc",
                "selPageSize": 100,
                "searchOptions$hdBeta" : "",
                "btSearch" : "",
                "hdLink" : "",
                "AkaCfgResults$hdPins" : "",
                "ReportsListParIDs" : "",
                "RadWindow_NavigateUrl_ClientState" : "",
                "mode" : "OWNER",
                "mask" : "",
                "param1" : "",
                "searchimmediate" : ""
            """
        },
        "result" : { #result page, found by clicking a result item on search page
            "resultJSON" : """
                "ScriptManager1_TSM" : "{}",
                "__EVENTTARGET" : "",
                "__EVENTARGUMENT" : "",
                "__VIEWSTATE" : "{}",
                "__VIEWSTATEGENERATOR" : "{}",
                "__EVENTVALIDATION" : "{}",
                "PageNum":1,
                "SortBy" : "TAXID",
                "SortDir" : "+asc",
                "PageSize":100,
                "hdAction" : "Link",
                "hdIndex":1,
                "sIndex":-1,
                "hdListType" : "PA",
                "hdJur" : "",
                "hdSelectAllChecked" : "false",
                "inpOwner" : "{}",
                "selSortBy" : "TAXID",
                "selSortDir" : "+asc",
                "selPageSize":100,
                "searchOptions$hdBeta" : "",
                "hdLink" : "../Datalets/Datalet.aspx?{}",
                "AkaCfgResults$hdPins" : "",
                "ReportsListParIDs" : "",
                "RadWindow_NavigateUrl_ClientState" : "",
                "mode" : "OWNER",
                "mask" : "",
                "param1" : "",
                "searchimmediate" : ""
            """
        }
    }
}

Error page result

Pixelknight1398
  • 537
  • 2
  • 10
  • 33

1 Answers1

1

I ended up looking into the Microsoft Docs and then found an article by Alex Ronquillo including the python session object which outlined some information I needed. I modified the code to the following:

import CountyFormDataList

import requests
import json

from scrapy import Selector

with requests.Session() as session:
    url = "http://property.franklincountyauditor.com/_web/search/CommonSearch.aspx?mode=OWNER"

    r = session.post(url)

    scriptManager = Selector(text=r.text).xpath('//*[@id="ScriptManager1_TSM"]/@value').get()
    viewState = Selector(text=r.text).xpath('//*[@id="__VIEWSTATE"]/@value').get()
    viewStateGenerator = Selector(text=r.text).xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').get()
    eventValidation = Selector(text=r.text).xpath('//*[@id="__EVENTVALIDATION"]/@value').get()

    payload = json.loads(
        "{" + CountyFormDataList.formDataList["CommonSearchASPX"]["search"]["ownerSearch"].format(
            scriptManager,
            viewState,
            viewStateGenerator,
            eventValidation,
            "SMITH"
        ) + "}"
    )
    cookies = CountyFormDataList.formDataList["CommonSearchASPX"]["cookies"]
    headers = CountyFormDataList.formDataList["CommonSearchASPX"]["headers"]

    r = session.post(url, data=payload, cookies=cookies, headers=headers)

    scriptManager = Selector(text=r.text).xpath('//*[@id="ScriptManager1_TSM"]/@value').get()
    viewState = Selector(text=r.text).xpath('//*[@id="__VIEWSTATE"]/@value').get()
    viewStateGenerator = Selector(text=r.text).xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').get()
    eventValidation = Selector(text=r.text).xpath('//*[@id="__EVENTVALIDATION"]/@value').get()

    payload = json.loads(
        "{" + CountyFormDataList.formDataList["CommonSearchASPX"]["result"]["resultJSON"].format(
            scriptManager,
            viewState,
            viewStateGenerator,
            eventValidation,
            "SMITH",
            "sIndex=0&idx=1"
        ) + "}"
    )

    r = session.post(url, data=payload, cookies=cookies, headers=headers)

f = open("ohioOutput.html", "w")
f.write(r.text)
f.close()

A simple adaptation such as this to retain the session allowed the problem to be fixed and the web page appeared to return the correct information. I don't fully understand the intricacies behind the scenes but I'm going to continue working on it. Hope this helped somebody in a similar situation.

Pixelknight1398
  • 537
  • 2
  • 10
  • 33