I must have a memory leak or something that is just eating memory on my server somewhere in this class. For example if I file_get_contents(http://www.theknot.com) it will not be able to connect to the server tho its not down, or mysql closes the connection, or in extreme situations completed knock out the server for a mount of time that we can not even get a ping. I know its somewhere within the preg_match_all if block, but I dont know what would get run away to what I can only assume is a lot of processing on the regex match due to whatever is within the content that is fetched from the remote site. Any ideas?
<?php
class Utils_Linkpreview extends Zend_Db_table
{
public function getPreviews($url) {
$link = $url;
$width = 200;
$height = 200;
$regex = '/<img[^\/]+src="([^"]+\.(jpe?g|gif|png))/';
/// $regex = '/<img[^\/]+src="([^"]+)/';
$thumbs = false;
try {
$data = file_get_contents($link);
} catch (Exception $e) {
print "Caught exception when attempting to find images: ". $e->getMessage(). "\n";
}
if (($data) && preg_match_all($regex, $data, $m, PREG_PATTERN_ORDER)) {
if (isset($m[1]) && is_array($m[1])) {
$thumbs = array();
foreach (array_unique($m[1]) as $url) {
if (
($url = $this->rel2abs($url, $link)) &&
($i = @getimagesize($url)) &&
$i[0] >= ($width-10) &&
$i[1] >= ($height-10)
) {
$thumbs[] = $url;
}
}
}
}
return $thumbs;
}
private function rel2abs($url, $host) {
if (substr($url, 0, 4) == 'http') {
return $url;
} else {
$hparts = explode('/', $host);
if ($url[0] == '/') {
return implode('/', array_slice($hparts, 0, 3)) . $url;
} else if ($url[0] != '.') {
array_pop($hparts);
return implode('/', $hparts) . '/' . $url;
}
}
}
}
?>
EDIT - Amal Murali's comment pointed me in a better direction using PHP's DomDocument. Thanks bud!
Here is the result:
public function getPreviews($url) {
$link = $url;
$thumbs = false;
try {
$html = file_get_contents($link);
} catch (Exception $e) {
print "Caught exception when attempting to find images: ". $e->getMessage(). "\n";
}
$dom = new DOMDocument();
@$dom->loadHTML($html);
$x = new DOMXPath($dom);
foreach($x->query("//img[@width > 200 or substring-before(@width, 'px') > 200 or @height > 200 or substring-before(@height, 'px') > 200]") as $node)
{
$url = $node->getAttribute("src");
$thumbs[] = $this->rel2abs($url, $link);
}
return $thumbs;
}