I have been playing around with cURL and xpath for some webscraping. I finally got my code running as I want but after trying on another side it stopped. The only thing I have changed is the path and url. I'm totally new and only been working with this for a week. Therefore, bear with me if it's an obvious fail.
My code is:
<?php
/*----Connection to Database----*/
include('wp-config.php');
mysql_connect(DB_HOST, DB_USER, DB_PASSWORD);
mysql_select_db("db");
/*----US Dollar Index----*/
$url = "http://www.wsj.com/mdc/public/page/2_3023-fut_index-futures.html";
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
// Make the cURL request
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html= curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}
// Parse the html into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($html);
// Grab all the MONTH on the page
$xpath = new DOMXPath($dom);
$data = $xpath->query("/html/body/div[6]/div[3]/div/table[9]/tbody/tr[position() >= 3 and position() <=6]");
//[position() >= 1 and position() <=13]
// Searching for data
$values = array();
foreach($data as $row) {
$values[] = $row->nodeValue;
}
print_r($values);
?>
</body>
</html>