I am scraping this website using curl. It has a form(left one) that need to submit using curl.
What I have done: If you see my code, first curl_exec is working fine and I am getting captcha code and other attributes perfectly. In 2nd curl_exec, when I am posting all attributes in form, then I got error.
My code is here:
$url = 'http://epunjabschool.gov.in/gs_schoolwebsite/Search.aspx';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, $proxy);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 50);
curl_setopt($ch, CURLOPT_TIMEOUT, 50);
$httpCode = curl_getinfo($ch , CURLINFO_HTTP_CODE); // this results 0 every time
$response = curl_exec($ch);
if ($response === false){
$response = curl_error($ch);
echo stripslashes($response);
}
//curl_close($ch);
$dom = new DOMDocument;
@$dom->loadHTML($response);
$tags = $dom->getElementsByTagName('input');
$VIEWSTATE = '';
$EVENTVALIDATION = '';
for($i=0;$i<$tags->length; $i++){
$grab = $tags->item($i);
//echo $grab->getAttribute('value');
if($grab->getAttribute('name') === '__VIEWSTATE'){
$VIEWSTATE = $grab->getAttribute('value');
}
if($grab->getAttribute('name') === '__EVENTVALIDATION'){
$EVENTVALIDATION = $grab->getAttribute('value');
}
}
$domx = new DOMXPath($dom);
$trans_id = $domx->query('//img[@id="imgCaptcha"]');
$imgCaptcha = '';
foreach ($trans_id as $id) {
$imgCaptcha = $id->getAttribute('src');
}
$captch = explode('=', $imgCaptcha);
echo $captcha = $captch[1];
$data = array(
"__VIEWSTATE" => $VIEWSTATE,
"__EVENTVALIDATION" => $EVENTVALIDATION,
"__EVENTARGUMENT" => '',
"__EVENTTARGET" => '',
"__LASTFOCUS" => '',
"txtCaptcha" => $captcha,
"ddlDistrict" => 2,
"ddlEdBlock" => 2,
"ddlManagement" => 1,
"ddlCategory" => 4,
"ddlDistrictBySchoolName" => "Select",
"btnShow" => "Show",
"txtCaptchbySchoolName" => "",
"txtSchoolName" => ""
);
$url = 'http://epunjabschool.gov.in/gs_schoolwebsite/Search.aspx';
$headers = array("Content-Type: application/x-www-form-urlencoded");
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, TRUE);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
if ($response === false){
$response = curl_error($ch);
}
echo stripslashes($response);
Issue: This is error I am getting:
Invalid postback or callback argument. Event validation is enabled using in configuration or <%@ Page EnableEventValidation="true" %> in a page. For security purposes, this feature verifies that arguments to postback or callback events originate from the server control that originally rendered them.