I've created a script using python in combination with selenium to parse the id
,vikey
and cbhtmlfragid
meant to be used as payload while being used within a post http requests. As I found it difficult to scrape id
,vikey
and cbhtmlfragid
using requests, I thought to grab them using selenium so that I can use them while making a post requests.
I'm trying to populate result using a
in the inputbox right next to Entity Name Or Identifier
. I could notice that the result are populated through a post requests which I'm trying to achieve programmatically.
To populate the result it is necessary to follow the steps sequentially in this image which ultimately leads to this image
I've tried with:
import re
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.businessregistration.moc.gov.kh/'
post_url = 'https://www.businessregistration.moc.gov.kh/cambodia-master/viewInstance/update.html?id={}'
payload = {
'QueryString': 'a',
'SourceAppCode': 'cambodia-br-soleproprietorships',
'OriginalVersionIdentifier': '',
'nodeW772-Advanced': 'N',
'_CBASYNCUPDATE_': 'true',
'_CBHTMLFRAGNODEID_': 'W762',
'_CBHTMLFRAGID_': '',
'_CBHTMLFRAG_': 'true',
'_CBNODE_': 'W778',
'_VIKEY_': '',
'_CBNAME_': 'buttonPush'
}
def get_content(wait,link):
driver.get(link)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[data-rel='#appMainNavigation']"))).click()
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[class$='menu-soleproprietorships']"))).click()
elem = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[class$='menu-brSoleProprietorSearch']")))
driver.execute_script("arguments[0].click();",elem)
item_id = driver.current_url.split("id=")[1].split("&_timestamp")[0]
x_catalyst = re.findall(r"sessionId:'(.*?)',", str(driver.page_source), flags=re.DOTALL)[0]
item = re.findall(r"viewInstanceKey:'(.*?)',", str(driver.page_source), flags=re.DOTALL)[0]
elem = re.findall(r"guid:(.*?),", str(driver.page_source), flags=re.DOTALL)[0]
return item_id,x_catalyst,item,elem
def make_post_requests(item_id,x_catalyst,item,elem):
payload['_VIKEY_'] = item
payload['_CBHTMLFRAGID_'] = elem
res = requests.post(post_url.format(item_id),data=payload,headers={
'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
'x-requested-with':'XMLHttpRequest',
'x-catalyst-session-global':x_catalyst
})
soup = BeautifulSoup(res.text,"lxml")
result_count = soup.select_one("[class='appPagerBanner']")
print(result_count)
if __name__ == '__main__':
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
item_id,x_catalyst,item,elem = get_content(wait,link)
make_post_requests(item_id,x_catalyst,item,elem)
driver.quit()
When I execute the above script, I could find out that there is no result in there. So, I suppose I went somewhere wrong.
How can I let my script populate result using post requests?