I have a problem that I've been trying to solve for the last couple of days. I have a site where I crawl news and that works perfectly. Recently however, I've encountered a problem with my analyzer_script as it appears to exceed the time limit my web host has set. Apparently there is an max_execution time at about 1 minute, and my script takes way longer than that. And I'm not able to adjust that in the php.ini script since I'm hosting my website on a public server. What can I do? Do I need to rewrite my script?
I appreciate your help! My script is below:
<?php
$array = array();
$sub_array = array();
$analyzer_ids = array();
$res5 = mysqli_query($con,"SELECT id,status FROM statuz ORDER BY id DESC LIMIT 1");
$row5 = mysqli_fetch_array($res5);
$status = $row5['status'];
$status_id = $row5['id'];
if($status == 2) {
$res1 = mysqli_query($con,"SELECT tag, id FROM tags");
while($row1 = mysqli_fetch_array($res1)) {
$tag = $row1['tag'];
$id = $row1['id'];
$res2 = mysqli_query($con,"SELECT sub_tag FROM sub_tags WHERE tag_id = '$id'");
while($row2 = mysqli_fetch_array($res2)) {
$sub_tag = $row2['sub_tag'];
$sub_tag = strtolower($sub_tag);
$sub_array[] = $sub_tag;
}
$array[] = array('tag_id' => $id, 'tag' => $tag, 'sub_tag' => $sub_array);
$sub_array = array();
}
mysqli_query($con,"INSERT INTO analyzer_queue (crawler_id, status)
(SELECT id,0 FROM crawlers)");
$initial_res = mysqli_query($con,"SELECT crawler_id,id FROM analyzer_queue WHERE status = '0'");
while($initial_row = mysqli_fetch_array($initial_res)) {
$analyzer_id = $initial_row['id'];
$start_crawler_id = $initial_row['crawler_id'];
mysqli_query($con,"UPDATE analyzer_queue SET status = '1' WHERE crawler_id = '$start_crawler_id' ORDER BY id DESC LIMIT 1");
$analyzer_ids[] = $analyzer_id;
$res = mysqli_query($con,"SELECT cr.title, cr.content, cr.id
FROM crawler_results cr
INNER JOIN crawlers c
ON c.newspaper_id = cr.newspaper_id
WHERE c.id = '$start_crawler_id'
AND status = '3'
LIMIT 10");
while($row = mysqli_fetch_array($res)) {
$article_id = $row['id'];
$title = $row['title'];
$content = $row['content'];
$content = strip_tags($content);
$content = strtolower($content);
$title = strtolower($title);
$count = array();
foreach ($array as $tag) {
$regex = '/(?:\b' . preg_quote($tag['tag'], '/');
foreach ($tag['sub_tag'] as $sub) {
$regex .= '\b)|(?:\b' . preg_quote($sub, '/');
}
$regex .= '\b)/i';
$count_content = preg_match_all($regex, $content, $count_content);
$count_title = preg_match_all($regex, $title, $count_title);
$count_total[$tag['tag']] = $count_content + $count_title;
$total_count = $count_total[$tag['tag']];
$tag_name = $tag['tag'];
$res5 = mysqli_query($con,"SELECT id FROM tags WHERE tag = '$tag_name'");
$row5 = mysqli_fetch_array($res5);
$tag_id = $row5['id'];
if($total_count != 0) {
mysqli_query($con,"INSERT INTO article_tags (article_id,tag_id,count_tags) VALUES('$article_id','$tag_id','$total_count')");
}
echo$count_total[$tag['tag']];
echo"<br /><br />";
}
echo"<pre>";
print_r($count_total);
echo"</pre>";
mysqli_query($con,"UPDATE crawler_results SET status = '2', analyzer_id = '$analyzer_id' WHERE id = '$article_id'");
}
mysqli_query($con,"UPDATE analyzer_queue SET status = '2' WHERE crawler_id = '$start_crawler_id' ORDER BY id DESC LIMIT 1");
}
mysqli_query($con,"UPDATE crawler_results SET status = '4' WHERE analyzer_id NOT IN (".implode(',',$analyzer_ids).")");
mysqli_query($con,"UPDATE statuz SET status = '3' WHERE id = '$status_id'");
print_r($analyzer_ids);
} else {
echo"Not ready yet";
}
?>