0

I'm trying to get the zip file out of this link with C#: http://dl.opensubtitles.org/en/download/sub/4860863

I've tried: string ResponseText;

        HttpWebRequest m = (HttpWebRequest)WebRequest.Create(o.link);
        m.Method = WebRequestMethods.Http.Get;

        using (HttpWebResponse response = (HttpWebResponse)m.GetResponse())
        {

            using (StreamReader reader = new StreamReader(response.GetResponseStream()))
            {

               ResponseText = reader.ReadToEnd();

                // ResponseText = HttpUtility.HtmlDecode(ResponseText);
                XmlTextReader xmlr = new XmlTextReader(new StringReader(ResponseText));


            }
        }

and

  WebRequest request = WebRequest.Create(o.link);
        using (WebResponse response = request.GetResponse())
        using (Stream stream = response.GetResponseStream())
        {

            string contentType = response.ContentType;
            // TODO: examine the content type and decide how to name your file
            string filename = "test.zip";

            // Download the file
            using (Stream file = File.OpenWrite(filename))
            {
                // Remark: if the file is very big read it in chunks
                // to avoid loading it into memory
                byte[] buffer = new byte[response.ContentLength];
                stream.Read(buffer, 0, buffer.Length);
                file.Write(buffer, 0, buffer.Length);
            }
        }

But they all return something weird, nothing that looks like the file I need... I think the link is php generated, but I'm not sure... The opensubtitles api is no option for me... Many thanks

Perry
  • 11,172
  • 2
  • 27
  • 37
  • try http://stackoverflow.com/questions/307688/how-to-download-a-file-from-a-url-in-c if anything, it would simplify your code. – argaz Jun 25 '13 at 16:25
  • each response and request in http has a header and body, when you get the stream, it means you get the whole (both header and body) stream, you need to manipulate the header (header is 1st, body is 2nd place in stream) to find out where (seek) exactly body stream is placed –  Jun 25 '13 at 16:29
  • That is not correct. Per the MSDN docs on `HttpWebResponse.GetResponseStream`: `Gets the stream that is used to read the body of the response from the server`. So using GetResponseStream is the correct way to read just the body portion of the response. – rossipedia Jun 25 '13 at 16:34

1 Answers1

2

It seems the Content-Type response is ok for me for your link:

Request URL:http://dl.opensubtitles.org/en/download/sub/4860863
Request Method:GET
Status Code:200 OK
Request Headersview:
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*//*;q=0.8
Accept-Encoding:gzip,deflate,sdch
Accept-Language:en-US,en;q=0.8
Connection:keep-alive
Cookie:PHPSESSID=gk86hdrce96pu06kuajtue45a6; ts=1372177758
Host:dl.opensubtitles.org
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36
Response Headersview:
Accept-Ranges:bytes
Age:0
Cache-Control:must-revalidate, post-check=0, pre-check=0
Connection:keep-alive
Content-Disposition:attachment; filename="the.dark.knight.(2008).dut.1cd.(4860863).zip"
Content-Length:48473
Content-Transfer-Encoding:Binary
Content-Type:application/zip
Date:Tue, 25 Jun 2013 16:29:45 GMT
Expires:Mon, 1 Apr 2006 01:23:45 GMT
Pragma:public
Set-Cookie:ts=1372177785; expires=Thu, 25-Jul-2013 16:29:45 GMT; path=/
X-Cache:MISS
X-Cache-Backend:web1

I have check your code and test it using the link and the manual download produced a 48473 bytes file, and using your code produced 48564 bytes with zero after 0xDC2 and when I compared it with Hex editor, it have many different part. We may need to put more request header before sending the request.

ok, now i can resolve it: put cookie and read at a smaller chunk

private void button1_Click(object sender, EventArgs e) {
    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri("http://dl.opensubtitles.org/en/download/sub/4860863"));
    //request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36";
    //request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*//*;q=0.8";
    //request.Headers["Accept-Encoding"] = "gzip,deflate,sdch";
    request.Headers["Cookie"] = "PHPSESSID=gk86hdrce96pu06kuajtue45a6; ts=1372177758";
    using (WebResponse response = request.GetResponse())
    using (Stream stream = response.GetResponseStream()) {

        string contentType = response.ContentType;
        // TODO: examine the content type and decide how to name your file
        string filename = "test.zip";

        // Download the file
        using (Stream file = File.OpenWrite(filename)) {
            byte[] buffer = ReadFully(stream, 256);
            stream.Read(buffer, 0, buffer.Length);
            file.Write(buffer, 0, buffer.Length);
        }
    }
}

/// <summary>
/// Reads data from a stream until the end is reached. The
/// data is returned as a byte array. An IOException is
/// thrown if any of the underlying IO calls fail.
/// </summary>
/// <param name="stream">The stream to read data from</param>
/// <param name="initialLength">The initial buffer length</param>
public static byte[] ReadFully(Stream stream, int initialLength) {
    // If we've been passed an unhelpful initial length, just
    // use 32K.
    if (initialLength < 1) {
        initialLength = 32768;
    }


    byte[] buffer = new byte[initialLength];
    int read = 0;


    int chunk;
    while ((chunk = stream.Read(buffer, read, buffer.Length - read)) > 0) {
        read += chunk;


        // If we've reached the end of our buffer, check to see if there's
        // any more information
        if (read == buffer.Length) {
            int nextByte = stream.ReadByte();


            // End of stream? If so, we're done
            if (nextByte == -1) {
                return buffer;
            }


            // Nope. Resize the buffer, put in the byte we've just
            // read, and continue
            byte[] newBuffer = new byte[buffer.Length * 2];
            Array.Copy(buffer, newBuffer, buffer.Length);
            newBuffer[read] = (byte)nextByte;
            buffer = newBuffer;
            read++;
        }
    }
    // Buffer is now too big. Shrink it.
    byte[] ret = new byte[read];
    Array.Copy(buffer, ret, read);
    return ret;
}

EDIT: You don't need to set Cookie at all, you'll produce a different file but a valid one. I assume the server add extra info to the file when you revisit them.

arifnpm
  • 357
  • 1
  • 7