0

my project was worked good last year but now the same project is not running properly i haven't change any code but now it is not extracting links from google. Did google has changed any setting for extracting links form it.. my codes are follows

<?php

    if(isset($_POST['operation']))
    {
        $op = $_POST['operation'];
        $start = time();

        if($op == 'search_crawler')
        {
            if(isset($_POST['text']))
            {
                include_once('mysqllibrary.php');

                mysqlStart();

                $start = time();

                $text = $_POST['text'];
                //Get the words from the text
                $words = explode(" ",$text);

                $query = "SELECT t1.url url,t1.content content,t2.number_of_clicks number_of_clicks FROM tbllinks t1,tblclicks t2 WHERE t1.id=t2.lid ORDER by t2.number_of_clicks DESC";
                $res = searchDatabase($query);
                $found = 0;

                if(sizeof($res) > 0)
                {
                    for($count=0;$count<sizeof($res);$count++)
                    {
                        $content = $res[$count]['content'];
                        $content_count = 0;
                        $url = $res[$count]['url'];
                        for($count2=0;$count2<sizeof($words);$count2++)
                        {
                            if(strstr(strtolower($content),strtolower($words[$count2])) === false)
                            {
                            }
                            else
                            {
                                $content_count = $content_count + 1;
                                $found = 1;
                            }
                        }

                        if($content_count > 0)
                        {
                            echo "<a href='add_user_url.php?user_id=$user_id&url=$url'>$url</a><br/>";
                        }
                    }
                }

                $end = time();
                if($found == 1)
                {
                    echo "<hr/>Time needed for output from crawler:" . ($end-$start) . " ms<hr/>";
                }
            }
        }
        else if($op == 'search_crawler2')
        {
            if(isset($_POST['text']))
            {
                include_once('simple_html_dom.php');
                $text = $text . " audio video images";
                $text = str_replace(' ','+',$_POST['text']);
                $user_id = $_POST['user_id'];

                $file_name = 'test.txt';
                $ch = curl_init("https://www.google.com/search?q=" . $text);
                $fp = fopen($file_name, "w");

                $start = time();

                curl_setopt($ch, CURLOPT_FILE, $fp);
                curl_setopt($ch, CURLOPT_HEADER, 0);
                curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                curl_exec($ch);
                curl_close($ch);
                fclose($fp);

                $html = file_get_html($file_name);

                // Find all links 
                foreach($html->find('a') as $element) 
                {
                    $url = $element->href;
                    $index = strpos($url,'q=https://');
                    if($index > 0)
                    {
                        $index2 = strpos($url,'webcache');
                        if($index2 === false)
                        {
                            $index2 = strpos($url,'sa=U');
                            $url = substr($element->href,$index+2,$index2-$index-3);
                            echo "<a href='add_user_url.php?user_id=$user_id&url=$url'>$url</a>";
                            echo '<br/>';
                        }
                    }
                }
                $end = time();
                echo "<hr/>Time needed for search output:" . ($end-$start) . " ms<hr/>";
            }
        }
        else if($op == 'search')
        {
            if(isset($_POST['text']))
            {
                include_once('simple_html_dom.php');

                $text = $text . " audio video images";
                $text = str_replace(' ','+',$_POST['text']);
                $user_id = $_POST['user_id'];

                $file_name = 'test.txt';
                //$ch = curl_init("https://in.yahoo.com/search?q=" . $text);
                $ch = curl_init("https://www.google.com/search?q=" . $text);
                $fp = fopen($file_name, "w");

                curl_setopt($ch, CURLOPT_FILE, $fp);
                curl_setopt($ch, CURLOPT_HEADER, 0);
                curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                sleep(4);
                curl_exec($ch);
                curl_close($ch);
                fclose($fp);

                $html = file_get_html($file_name);

                // Find all links 
                foreach($html->find('a') as $element) //---this is line no 130
                {
                    $url = $element->href;
                    $index = strpos($url,'q=https://');
                    if($index > 0)
                    {
                        $index2 = strpos($url,'webcache');
                        if($index2 === false)
                        {
                            $index2 = strpos($url,'sa=U');
                            $url = substr($element->href,$index+2,$index2-$index-3);
                            echo "<a href='$url'>$url</a>";
                            echo '<hr/>';
                        }
                    }
                }

                $end = time();
                echo "<hr/>Time needed for search output:" . ($end-$start) . " ms<hr/>";
            }
        }
    }
?>

While running the codes on local host it is showing an error of--

( ! ) Fatal error: Call to a member function find() on a non-object in 
C:\wamp\www\crawler_based_search_engine\ajax_requests.php on line 130

#   Time    Memory  Function    Location
1   0.0151  162928  {main}( )   ..\ajax_requests.php:0

because of this error my program is not fetching the links from the google

h2ooooooo
  • 39,111
  • 8
  • 68
  • 102
Adi
  • 1
  • 1
  • Asking the same question again won't help. Edit your existing one. – Niet the Dark Absol Jan 31 '15 at 18:10
  • As to how you can figure out what happens, why don't you simply output the contents of the page that the server gets and see the *actual* content? Neither the less, Google's TOS disallows you doing this. – h2ooooooo Jan 31 '15 at 18:33

0 Answers0