162

I'm trying to make curl follow a redirect but I can't quite get it to work right. I have a string that I want to send as a GET param to a server and get the resulting URL.

Example:

String = Kobold Vermin
Url = www.wowhead.com/search?q=Kobold+Worker

If you go to that url it will redirect you to "www.wowhead.com/npc=257". I want curl to return this URL to my PHP code so that i can extract the "npc=257" and use it.

Current code:

function npcID($name) {
    $urltopost = "http://www.wowhead.com/search?q=" . $name;
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1");
    curl_setopt($ch, CURLOPT_URL, $urltopost);
    curl_setopt($ch, CURLOPT_REFERER, "http://www.wowhead.com");
    curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type:application/x-www-form-urlencoded"));
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
    return curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}

This however returns www.wowhead.com/search?q=Kobold+Worker and not www.wowhead.com/npc=257.

I suspect PHP is returning before the external redirect happens. How can I fix this?

Stephen Ostermiller
  • 23,933
  • 14
  • 88
  • 109
Thomas Van Nuffel
  • 1,689
  • 2
  • 12
  • 6
  • 10
    This is one of the top questions for "curl follow redirects". To automatically follow redirects using the `curl` command, pass the `-L` or `--location` flag. E.g. `curl -L http://example.com/` – Rob W Sep 09 '13 at 19:09

7 Answers7

269

To make cURL follow a redirect, use:

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

Erm... I don't think you're actually executing the curl... Try:

curl_exec($ch);

...after setting the options, and before the curl_getinfo() call.

EDIT: If you just want to find out where a page redirects to, I'd use the advice here, and just use Curl to grab the headers and extract the Location: header from them:

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($ch);
if (preg_match('~Location: (.*)~i', $result, $match)) {
   $location = trim($match[1]);
}
Kerem
  • 11,377
  • 5
  • 59
  • 58
Matt Gibson
  • 37,886
  • 9
  • 99
  • 128
  • 2
    This makes php follow the redirect. I dont want to follow the redirect, I just want to know the url of the redirected page. – Thomas Van Nuffel Aug 19 '10 at 08:50
  • 10
    Oh, so you don't actually want to fetch the page? Just find out the location? In that case, I'd suggest the tactic used here: http://zzz.rezo.net/HowTo-Expand-Short-URLs.html -- basically just grab the header from the page that redirects, and grab the Location: header from it. Either way, though, you still need to do the exec() for Curl to actually _do_ anything... – Matt Gibson Aug 19 '10 at 09:03
  • 1
    I suggest taking a look at Luca Camillos solution below, because this solution doesn't take multiple redirections into consideration. – Christian Engel Apr 12 '15 at 20:25
  • this solution opens the new webpage within the same url. I want to change the url also along with posting the parameters to that url. How can I achieve that? – amanpurohit Oct 06 '15 at 07:43
  • @MattGibson when i use $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); with CURLOPT_FOLLOWLOCATION set to true what will be the httpcode. I mean will it be for the first url or for the redirect url – Manigandan Arjunan Dec 10 '15 at 12:00
34

Add this line to curl inizialization

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

and use getinfo before curl_close

$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );

es:

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,0); 
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
$html = curl_exec($ch);
$redirectURL = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL );
curl_close($ch);
miken32
  • 42,008
  • 16
  • 111
  • 154
Luca Camillo
  • 796
  • 9
  • 9
  • 2
    I think this one is the better solution, because it also unfolds multiple redirections. – Christian Engel Apr 12 '15 at 20:24
  • Remember: (ok, duh) POST data will not be re-submitted after a redirect. In my case this happened and I felt stupid afterwards because: just use the appropriate url and it's fixed. – twicejr May 22 '17 at 17:57
  • Using `curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);` is a security vulnerability. It essentially says “Ignore the SSL errors if it’s broken – trust the same as you would an unencrypted URL.”. – Finesse Dec 08 '18 at 02:11
8

The answer above didn't work for me on one of my servers, something to to with basedir, so I re-hashed it a little. The code below works on all my servers.

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$a = curl_exec($ch);
curl_close( $ch ); 
// the returned headers
$headers = explode("\n",$a);
// if there is no redirection this will be the final url
$redir = $url;
// loop through the headers and check for a Location: str
$j = count($headers);
for($i = 0; $i < $j; $i++){
// if we find the Location header strip it and fill the redir var       
if(strpos($headers[$i],"Location:") !== false){
        $redir = trim(str_replace("Location:","",$headers[$i]));
        break;
    }
}
// do whatever you want with the result
echo $redir;
Taufik Nurrohman
  • 3,329
  • 24
  • 39
GR1NN3R
  • 81
  • 1
  • 1
  • The `Location: ` header is not always to follow a redirect. Also please see a question that is explicitly about this: [curl follow location error](http://stackoverflow.com/questions/2511410/curl-follow-location-error) – hakre Mar 13 '13 at 09:19
5

The chosen answer here is decent but its case sensitive, doesn't protect against relative location: headers (which some sites do) or pages that might actually have the phrase Location: in their content... (which zillow currently does).

A bit sloppy, but a couple quick edits to make this a bit smarter are:

function getOriginalURL($url) {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_HEADER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
    $result = curl_exec($ch);
    $httpStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    // if it's not a redirection (3XX), move along
    if ($httpStatus < 300 || $httpStatus >= 400)
        return $url;

    // look for a location: header to find the target URL
    if(preg_match('/location: (.*)/i', $result, $r)) {
        $location = trim($r[1]);

        // if the location is a relative URL, attempt to make it absolute
        if (preg_match('/^\/(.*)/', $location)) {
            $urlParts = parse_url($url);
            if ($urlParts['scheme'])
                $baseURL = $urlParts['scheme'].'://';

            if ($urlParts['host'])
                $baseURL .= $urlParts['host'];

            if ($urlParts['port'])
                $baseURL .= ':'.$urlParts['port'];

            return $baseURL.$location;
        }

        return $location;
    }
    return $url;
}

Note that this still only goes 1 redirection deep. To go deeper, you actually need to get the content and follow the redirects.

broox
  • 3,538
  • 33
  • 25
5

Sometimes you need to get HTTP headers but at the same time you don't want return those headers.**

This skeleton takes care of cookies and HTTP redirects using recursion. The main idea here is to avoid return HTTP headers to the client code.

You can build a very strong curl class over it. Add POST functionality, etc.

<?php

class curl {

  static private $cookie_file            = '';
  static private $user_agent             = '';  
  static private $max_redirects          = 10;  
  static private $followlocation_allowed = true;

  function __construct()
  {
    // set a file to store cookies
    self::$cookie_file = 'cookies.txt';

    // set some general User Agent
    self::$user_agent = 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';

    if ( ! file_exists(self::$cookie_file) || ! is_writable(self::$cookie_file))
    {
      throw new Exception('Cookie file missing or not writable.');
    }

    // check for PHP settings that unfits
    // correct functioning of CURLOPT_FOLLOWLOCATION 
    if (ini_get('open_basedir') != '' || ini_get('safe_mode') == 'On')
    {
      self::$followlocation_allowed = false;
    }    
  }

  /**
   * Main method for GET requests
   * @param  string $url URI to get
   * @return string      request's body
   */
  static public function get($url)
  {
    $process = curl_init($url);    

    self::_set_basic_options($process);

    // this function is in charge of output request's body
    // so DO NOT include HTTP headers
    curl_setopt($process, CURLOPT_HEADER, 0);

    if (self::$followlocation_allowed)
    {
      // if PHP settings allow it use AUTOMATIC REDIRECTION
      curl_setopt($process, CURLOPT_FOLLOWLOCATION, true);
      curl_setopt($process, CURLOPT_MAXREDIRS, self::$max_redirects); 
    }
    else
    {
      curl_setopt($process, CURLOPT_FOLLOWLOCATION, false);
    }

    $return = curl_exec($process);

    if ($return === false)
    {
      throw new Exception('Curl error: ' . curl_error($process));
    }

    // test for redirection HTTP codes
    $code = curl_getinfo($process, CURLINFO_HTTP_CODE);
    if ($code == 301 || $code == 302)
    {
      curl_close($process);

      try
      {
        // go to extract new Location URI
        $location = self::_parse_redirection_header($url);
      }
      catch (Exception $e)
      {
        throw $e;
      }

      // IMPORTANT return 
      return self::get($location);
    }

    curl_close($process);

    return $return;
  }

  static function _set_basic_options($process)
  {

    curl_setopt($process, CURLOPT_USERAGENT, self::$user_agent);
    curl_setopt($process, CURLOPT_COOKIEFILE, self::$cookie_file);
    curl_setopt($process, CURLOPT_COOKIEJAR, self::$cookie_file);
    curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
    // curl_setopt($process, CURLOPT_VERBOSE, 1);
    // curl_setopt($process, CURLOPT_SSL_VERIFYHOST, false);
    // curl_setopt($process, CURLOPT_SSL_VERIFYPEER, false);
  }

  static function _parse_redirection_header($url)
  {
    $process = curl_init($url);    

    self::_set_basic_options($process);

    // NOW we need to parse HTTP headers
    curl_setopt($process, CURLOPT_HEADER, 1);

    $return = curl_exec($process);

    if ($return === false)
    {
      throw new Exception('Curl error: ' . curl_error($process));
    }

    curl_close($process);

    if ( ! preg_match('#Location: (.*)#', $return, $location))
    {
      throw new Exception('No Location found');
    }

    if (self::$max_redirects-- <= 0)
    {
      throw new Exception('Max redirections reached trying to get: ' . $url);
    }

    return trim($location[1]);
  }

}
halfer
  • 19,824
  • 17
  • 99
  • 186
Igor Parra
  • 10,214
  • 10
  • 69
  • 101
0

You can use:

$redirectURL = curl_getinfo($ch,CURLINFO_REDIRECT_URL);
A--C
  • 36,351
  • 10
  • 106
  • 92
  • +1. `CURLINFO_REDIRECT_URL` works for me, but `CURLINFO_EFFECTIVE_URL` is not. [This](https://stackoverflow.com/a/35046466/13433323) is why. – Sprite Oct 10 '21 at 18:44
0

Lot's of regex here, despite the fact i really like them this way might be more stable to me:

$resultCurl=curl_exec($curl); //get curl result
//Optional line if you want to store the http status code
$headerHttpCode=curl_getinfo($curl,CURLINFO_HTTP_CODE);

//let's use dom and xpath
$dom = new \DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($resultCurl, LIBXML_HTML_NODEFDTD);
libxml_use_internal_errors(false);
$xpath = new \DOMXPath($dom);
$head=$xpath->query("/html/body/p/a/@href");

$newUrl=$head[0]->nodeValue;

The location part is a link in the HTML sent by apache. So Xpath is perfect to recover it.

Patrick Valibus
  • 227
  • 4
  • 17