0

I'm trying to get the source code of an instagram user.... (see attached class), but I just get a loading page..............

Here's the class:

    public static function getUrl($url) {
        // Initialize cURL
        $ch = curl_init();

        $useragent= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36';

        curl_setopt($ch, CURLOPT_URL, $url);


//      $cookies[] = 'sessionid' . '=' . '53968650743%3AkzYxPBfRCDsGCo%3A3%3AAYedmj0Vj5XfGwUFNvTGGjULwFx0UYBhkV4v5aeDmmI';
//      $cookies[] = 'ig_did' . '=' . 'BF7D9A5D-FC56-4F19-8C7A-7E22B3CF2260';
//      $cookies[] = 'mid' . '=' . 'Y_lEaQALAAEpVqRsEhT98bBzGV8e';
//      $cookies[] = 'csrftoken' . '=' . 'unj4TcDks10DcqW0EKBPLFdM7y50q22F';

//echo implode(';', $cookies);

    $arrSetHeaders = array(
        'origin: https://www.instagram.com',
        'authority: www.instagram.com',
        'method: GET',
        'upgrade-insecure-requests: 1',
        'Host: www.instagram.com',
        "User-Agent: USERAGENT",
        'content-type: application/x-www-form-urlencoded',
        'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'accept-language:ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6',
   //     'accept-encoding: deflate, br',
        "Referer: https://www.instagram.com",
        'Connection: keep-alive',
        'cache-control: max-age=0',
    );

        curl_setopt($ch, CURLOPT_HTTPHEADER, $arrSetHeaders); 

    //  curl_setopt($ch, CURLOPT_COOKIE, implode(';', $cookies) );
        curl_setopt($ch, CURLOPT_VERBOSE, 0);
        curl_setopt($ch, CURLOPT_HEADER, 1);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    //  curl_setopt($ch, CURLOPT_USERAGENT, $useragent);

        // Execute the request and get response
        $response = curl_exec($ch);


        // Throw an error if we could not execute the request
        if ($response === false) {
            throw new Exception(curl_error($ch));
        }

        // Close cURL connection
        curl_close($ch);

        // Return the response content
        return $response;
    }

    /**
     * Query a nested array using dot notation syntax.
     * 
     * @param array $data
     * @param string|null $path
     * @return mixed
     */
    private static function arrayGet($array, $path = '')
    {
        if (empty($path)) {
            return $array;
        }

        $keys = explode('.', $path);

        $structure = $array;

        foreach ($keys as $key) {
            if (isset($structure[$key])) {
                $structure = $structure[$key];
            } else {
                return false;
            }
        }

        return $structure;
    }

    /**
     * General scraper method (used for both user and tag searches)
     * Takes a $path using dot notation syntax to return a specific level in the
     * response array (if any).
     * 
     * Returns an array of items, with an optional limit, and false on errors or
     * when no items are found.
     * 
     * @param string $url
     * @param string $path
     * @param int $limit
     * @return array|false
     */
    private static function scrape($url, $path, $limit = null)
    {
        try {
            $feed = self::getUrl($url);
        } catch (Exception $e) {
            return false;
        }

        if (!$feed) {
            return false;
        }

        $data = explode('window._sharedData = ', $feed);

        if (!isset($data[1])) {
            echo "NO VIDEOS FOUND!!!!!";
            return false;
        }

        $data_json = explode(';</script>', $data[1]);
        $data_obj = json_decode($data_json[0], true);

        if (!($data_obj && !empty($data_obj['entry_data']))) {
            return false;
        }

        $structure = self::arrayGet($data_obj['entry_data'], $path);

        if (!($structure && isset($structure['edges']))) {
            return false;
        }

        $media = $structure['edges'];
        $items = [];

        if (!empty($media) && is_array($media)) {
            foreach ($media as $item) {
                $item = $item['node'];

                $items[] = [
                    'image'    => $item['display_url'],
                    'url'      => 'https://www.instagram.com/p/' . $item['shortcode'] . '/',
                    'likes'    => $item['edge_liked_by']['count'],
                    'comments' => $item['edge_media_to_comment']['count']
                ];
            }
        }

        return $limit !== null ? array_slice($items, 0, $limit) : $items;
    }

    /**
     * Get public users media, with an optional limit.
     * 
     * Returns false on errors, or when no items are found.
     * 
     * @param string $tag
     * @param int|null $limit
     * @return array|false
     */
    public static function getUser($user, $limit = null)
    {
        return self::scrape(
            'https://www.instagram.com/' . $user . '/',
            'ProfilePage.0.graphql.user.edge_owner_to_timeline_media',
            $limit
        );
    }

    /**
     * Get public media with specified tag, with an optional limit.
     * 
     * Returns false on errors, or when no items are found.
     * 
     * @param string $tag
     * @param int|null $limit
     * @return array|false
     */
    public static function getTag($tag, $limit = null)
    {
        return self::scrape(
            'https://www.instagram.com/explore/tags/' . $tag . '/',
            'TagPage.0.graphql.hashtag.edge_hashtag_to_media',
            $limit
        );
    }
}

and I'm calling it :

include __DIR__ . '/InstagramScrapeClass.php';

$instaScrape = new InstagramScrapeClass();

print_r( $instaScrape->getUser('maria43'));

any idea how can I do it?

Thank you very much ........ ................ ...................... .............................. ............ ........... ......... .......

0 Answers0