I'm still a bit new to using curl to pull data and I've recently started using Fiddler to help find what options need to be set.
I'm trying to see if I can pull an image from a site. I first hit a search page - I set the search parameters, then start hitting links in the results. When I attempt to go a link in one of the results for an image, I get an empty string returned from curl_exec().
The weird thing is - at one point, it worked - I got the data back and successfully saved the image locally. But then it stopped, and I have no idea what I was doing to have it working. Naturally, everything works OK in the browser. :(
I'm using Simple HTML DOM to parse through results and cUrl for the actual page requests. curl_error() does not show an error, curl_getinfo() thinks everything is OK too. It's probably something trivial, but I'm not sure how to troubleshoot it beyond where I am.
<?php
include 'includes/simple_html_dom.php';
$url = "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/Corrections/InmateInquiry.aspx";
// Get Cookie - ASP.NET_SessionId
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
$r = curl_exec($ch);
preg_match_all('/^Set-Cookie:\s*([^;]*)/mi', $r, $matches);
$cookies = array();
foreach($matches[1] as $item)
{
parse_str($item, $cookie);
$cookies = array_merge($cookies, $cookie);
}
$sessionCookie = "ASP_NET_SessionId=".$cookies['ASP_NET_SessionId'];
// now load up page into Simple HTML DOM and get all inputs - ignore buttons and populate our dates
$startDate = "02%2F01%2F2000";
$endDate = "02%2F07%2F2016";
$getInputs = str_get_html($r);
$inputs = $getInputs->find('input');
$inputs_array = array();
$buttons_array = array();
for ($i=0; $i<count($inputs); $i++)
{
if ($inputs[$i]->type != "submit")
{
$inputs_array[$inputs[$i]->id] = $inputs[$i]->value;
if (stripos($inputs[$i]->id, "FromDate") > 0)
$inputs_array[$inputs[$i]->id] = $startDate;
if (stripos($inputs[$i]->id, "ToDate") > 0)
$inputs_array[$inputs[$i]->id] = $endDate;
}
}
// build up our curl data - includes hidden inputs, our to & from dates, plus the Search button
$curl_data = http_build_query($inputs_array)."&ctl00%24DefaultContent%24uxSearch=Search";
// POST the data, include session cookie
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $curl_data);
curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);
$response = curl_exec($ch);
// this shows that we can get data
// find the links from the HTML
$htmlDom = str_get_html($response); // load up Simple HTML DOM
// get the table of results
$divTable = $htmlDom->find('div#ctl00_DefaultContent_uxResultsWrapper',0)->find('table',0);
$rows = $divTable->find('tr');
for ($i=1; $i<count($rows);$i++)
{
if ($i>3) break; // limit the length of script for debugging
$link = $rows[$i]->find('td',1)->find('a',0)->href;
// build up query to get inmate details from the link above
$url = "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/Corrections/".$link;
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);
$page = curl_exec($ch);
$pageData = str_get_html($page);
// Now find the Photo, there's a thumb in div.BookingPhotos
// It is linked to a full size image, the link is of the form http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/GetImage.aspx?ImageKey=17C030IS, but in the href, it has ../GetImage.aspx?ImageKey=xxxx
$photoLink = $pageData->find('div.BookingPhotos',0)->find('a',0)->href;
// get rid of .. and put the base URL on the front
$imgLink = str_replace("..", "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal", $photoLink);
// now attempt to pull the image
$ch = curl_init($imgLink);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);
// here is the PROBLEM - NO DATA RETURNED
$imgData = curl_exec($ch); // I get a header back, but NO data
}
?>