0

I'm still a bit new to using curl to pull data and I've recently started using Fiddler to help find what options need to be set.

I'm trying to see if I can pull an image from a site. I first hit a search page - I set the search parameters, then start hitting links in the results. When I attempt to go a link in one of the results for an image, I get an empty string returned from curl_exec().

The weird thing is - at one point, it worked - I got the data back and successfully saved the image locally. But then it stopped, and I have no idea what I was doing to have it working. Naturally, everything works OK in the browser. :(

I'm using Simple HTML DOM to parse through results and cUrl for the actual page requests. curl_error() does not show an error, curl_getinfo() thinks everything is OK too. It's probably something trivial, but I'm not sure how to troubleshoot it beyond where I am.

<?php
include 'includes/simple_html_dom.php';

$url = "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/Corrections/InmateInquiry.aspx";

// Get Cookie - ASP.NET_SessionId
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
$r = curl_exec($ch);
preg_match_all('/^Set-Cookie:\s*([^;]*)/mi', $r, $matches);
$cookies = array();
foreach($matches[1] as $item)
{
    parse_str($item, $cookie);
    $cookies = array_merge($cookies, $cookie);
}
$sessionCookie = "ASP_NET_SessionId=".$cookies['ASP_NET_SessionId'];

// now load up page into Simple HTML DOM and get all inputs - ignore buttons and populate our dates
$startDate = "02%2F01%2F2000";
$endDate = "02%2F07%2F2016";

$getInputs = str_get_html($r);
$inputs = $getInputs->find('input');

$inputs_array = array();
$buttons_array = array();

for ($i=0; $i<count($inputs); $i++)
{
    if ($inputs[$i]->type != "submit")
    {
        $inputs_array[$inputs[$i]->id] = $inputs[$i]->value;
        if (stripos($inputs[$i]->id, "FromDate") > 0)
            $inputs_array[$inputs[$i]->id] = $startDate;
        if (stripos($inputs[$i]->id, "ToDate") > 0)
            $inputs_array[$inputs[$i]->id] = $endDate;
    }
}

// build up our curl data - includes hidden inputs, our to & from dates, plus the Search button
$curl_data = http_build_query($inputs_array)."&ctl00%24DefaultContent%24uxSearch=Search"; 

// POST the data, include session cookie
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $curl_data);
curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);
$response = curl_exec($ch);

// this shows that we can get data
// find the links from the HTML

$htmlDom = str_get_html($response); // load up Simple HTML DOM
// get the table of results
$divTable = $htmlDom->find('div#ctl00_DefaultContent_uxResultsWrapper',0)->find('table',0);
$rows = $divTable->find('tr');
for ($i=1; $i<count($rows);$i++)
{
    if ($i>3) break; // limit the length of script for debugging
    $link = $rows[$i]->find('td',1)->find('a',0)->href;

    // build up query to get inmate details from the link above
    $url = "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/Corrections/".$link;
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_HEADER, 1);
    curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);
    $page = curl_exec($ch);
    $pageData = str_get_html($page);

    // Now find the Photo, there's a thumb in div.BookingPhotos
    // It is linked to a full size image, the link is of the form http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal/GetImage.aspx?ImageKey=17C030IS, but in the href, it has ../GetImage.aspx?ImageKey=xxxx
    $photoLink = $pageData->find('div.BookingPhotos',0)->find('a',0)->href;
    // get rid of .. and put the base URL on the front
    $imgLink = str_replace("..", "http://nwweb.co.bell.tx.us/NewWorld.Aegis.WebPortal", $photoLink);

    // now attempt to pull the image
    $ch = curl_init($imgLink);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_HEADER, 1);
    curl_setopt($ch, CURLOPT_COOKIE, $sessionCookie);

    // here is the PROBLEM - NO DATA RETURNED
    $imgData = curl_exec($ch); // I get a header back, but NO data
}
?>
deweycooter
  • 82
  • 1
  • 2
  • 9
  • Have a look at [this question](http://stackoverflow.com/q/3757071/628267) for some ways to do more in-depth debugging of curl. – John C Feb 07 '16 at 23:22
  • I had seen that one. It suggests using CURLOPT_STDERR to capture debugging info. Unfortunately, I'm getting a HTTP Code of 200, so there's no error being thrown. – deweycooter Feb 08 '16 at 00:15

0 Answers0