I am trying to make a webscraper for a danish wine-site.
But I am having some troubles getting results out of it. I think it is in the Xpath-portion my problem is, as I can se from my debugging that it is omitting some strings, but I am not sure.
$title = $ScrapedPageXpath->query('*<h3>');
It could also be that my query is wrong.
I am not a skilled programmer, and this is the first thing I have ever tried to make, so please bear that in mind in your replies.
Below is my code:
<?php
function curlGet($url)
{
$chandle = curl_init();
curl_setopt($chandle, CURLOPT_URL, $url);
curl_setopt($chandle, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($chandle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($chandle, CURLOPT_FOLLOWLOCATION, 5);
$curlResults = curl_exec($chandle);
curl_close($chandle);
return $curlResults;
}
$Winelist = array();
function returnXPathObject($item)
{
$xmlPageDom = new DomDocument();
@$xmlPageDom->loadHTML($item);
$xmlPageXPath = new DOMXPath($xmlPageDom);
return $xmlPageXPath;
}
$ScrapedPage = curlGet('http://www.vinhit.dk/shop/');
$ScrapedPageXpath = returnXPathObject($ScrapedPage);
$title = $ScrapedPageXpath->query('*<h3>');
if ($title->length > 0) {
$Winelist['title'] = $title->item(0)->nodeValue;
}
print_r($Winelist);