0

I have a page where I need to get the substring so I made this code :

static string GetBetween(string message, string start, string end)
        {
            int startIndex = message.IndexOf(start) + start.Length;
            int stopIndex = message.IndexOf(end);
            return message.Substring(startIndex, stopIndex - startIndex);

        }

And this is the code that gets the value:

 string myStr = HttpMethods.Get("https://www.marktplaats.nl/account/login.html", "https://www.marktplaats.nl/account/login.html", ref myCookies);
            string myToken;
            Console.WriteLine("Token: " + (myToken = GetBetween(myStr, "name='xsrf.token' value='", "'>")));

But instead of getting the value between this line (The value changes every time I do a new WebRequest so I need to get the new value every time):

<input type="hidden" name="xsrf.token" value="1576329716873.d7a4f9a4b3f8e63fa5d1fe94730774c34b327ec96cdbb4aaca405de3b96231b3(THIS CHANGES EVERYTIME I LOAD THE PAGE)">

It gives me this output:

ng="nl">
<head>
    <title>= Marktplaats - De plek om nieuwe en tweedehands spullen te kopen en verkopen</title>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <meta http-equiv="Content-Language" content="nl">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <meta name="description" content="De grootste advertentiesite van Nederland. Nieuwe en tweedehands goederen en diensten, koopt en verkoopt u op Marktplaats.nl.">
    <link rel="shortcut icon" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon.3e02874c.ico">
    <link rel="apple-touch-icon" sizes="57x57" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-57x57.722d5557.png">
    <link rel="apple-touch-icon" sizes="114x114" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-114x114.fdd2bafc.png">
    <link rel="apple-touch-icon" sizes="72x72" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-72x72.6af4f862.png">
    <link rel="apple-touch-icon" sizes="144x144" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-144x144.319efd6c.png">
    <link rel="apple-touch-icon" sizes="60x60" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-60x60.c37547f0.png">
    <link rel="apple-touch-icon" sizes="120x120" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-120x120.94703796.png">
    <link rel="apple-touch-icon" sizes="76x76" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-76x76.8866913d.png">
    <link rel="apple-touch-icon" sizes="152x152" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-152x152.d5085224.png">
    <link rel="apple-touch-icon" sizes="180x180" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/apple-touch-icon-180x180.8a15fd8d.png">
    <link rel="icon" type="image/png" sizes="192x192" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon-192x192.da693c37.png">
    <link rel="icon" type="image/png" sizes="160x160" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon-160x160.a3b4d12c.png">
    <link rel="icon" type="image/png" sizes="96x96" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon-96x96.cc0ba841.png">
    <link rel="icon" type="image/png" sizes="16x16" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon-16x16.ce3b2c65.png">
    <link rel="icon" type="image/png" sizes="32x32" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/favicon-32x32.595f2e60.png">
    <meta name="theme-color" content="#eda566">
    <meta name="msapplication-TileColor" content="#eda566">
    <meta name="msapplication-square150x150logo" content="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/mstile-150x150.e3e83679.png">
    <meta name="msapplication-square310x310logo" content="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/mstile-310x310.09da1373.png">
    <meta name="msapplication-square70x70logo" content="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/mstile-70x70.2bc50495.png">
    <meta name="msapplication-wide310x150logo" content="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/favicons/mstile-310x150.80f61dab.png">
    <meta name="msapplication-config" content="none">
    <meta name="generation-time-millis" content="1576332442585">
    <meta name="user-logged-in" content="false">
    <link rel="stylesheet" type="text/css" href="//s.marktplaats.com/static-web/identity/vendor/normalize.css/normalize.112272e5.css" />
    <link rel="stylesheet" type="text/css" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/Index.ad7c2c53.css" />

        <script src="//s.marktplaats.com/static-web/identity/vendor/design-sdk/polyfills/polyfills.bce7e2b1.js"></script>
        <script src="//s.marktplaats.com/static-web/identity/vendor/design-sdk/components/polymer.1a8b099b.js"></script>
        <link rel="import" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/themes/mp/CssVars.d3ec1e54.html">
        <style is="custom-style" include="mp-css-vars"></style>
        <link rel="import" href="//s.marktplaats.com/static-web/identity/vendor/design-sdk/organisms/header.63727d14.html">
        <link rel="stylesheet" type="text/css" media="screen" href="//s.marktplaats.com/static-web/identity/css/Login.mp.nlnl.6f8ceda7.css"/>
    <script>
        window.dataLayer = [{"p":{"t":"Login"},"c":{"l1":{"id":-1},"l2":{"id":-1},"c":{"id":-1}},"u":{"li":false,"tg":{"stg":"aur7110_b|aur10242_a|aur6493_a|aur9628_b|aur3815-android_b|alternative-redirect_b|aur9245_a|aur9234_b|aur7712new_c|aur9777_c|aur9214_b|aur7342android_c|an-lmap_b|aur9848_b|aur9247_b|aur7377-order_b|aur8977_c|aur2445_b|aur9583_b|aur10018_a|aur9439_b|aur8030_c|aur2389_s_b|aur5496_a|vos2-150_b|aur7892_b|aur9856_b|aur9629_a|aur9007_b|vos6677_b|aur9752_a|aur10141_a|aur6760_a|aur-cdata345_a|aur10256_a|aur8865_b|aur4923_c|aur6594_a|aur5875_c|aur-cdata673_b|aur7918_b|aur3815_s_b|aur9834_b|aur8682_b|aur9749_a|aur6087_b|aur6503_a|aur8949_a|aur8950_a|aur9863_c|aur8221_b|aur8863_b|aur8808_b|aur5346_b|aur9685_b|aur8722_c|aur9555_b|pie3083-vip_a|aur6745_a|pie-2608-reblaze-challenge-tag_a|aur4627_b|aur10388_b|aur8790_b|aur8721_c|aur6916_a|aur9761_a|unsupported-browser-warning_b|aur3130_b|aur7826_b|aur9996_b|aur7464_b|aur9939_b|aur2389_v_b|aur5496-android_a|aur7958_b|aur10124_b|aur6034_a|aur2862_b|aur8869_b|aur9380_b|aur3186_b|aur8533_b|aur6207_b|aur9287v2_b|aur6768_b|aur9714_b|aur6915_a|aur10022_b|aur9187_b|aur10313_a|aur2805_b"}},"d":{"ua":"Go-http-client\u002F1.1"},"s":{"pn":-1,"ps":-1,"tr":-1,"srt":{},"prc":{"mn":-1,"mx":-1}},"l":{"d":-1},"a":{},"o":{},"m":{}}];
    </script>

        <script src="//s.marktplaats.com/static-web/identity/vendor/raven-js/raven.min.daf037c2.js" crossorigin="anonymous"></script>
        <script type="text/javascript">
            Raven.config('https://e50a08a833ea4c4f8cd6413450461127@sentry.marktplaats.nl/10', {
                release: 'nohash',
                tags: {
                    artifactId: 'identity-frontend',
                    requestId: '998712b4-8022-47fb-bcc6-d18f0c4de521'
                }
            }).install();

        </script>

    <script type='text/javascript

The website is:

https://www.marktplaats.nl/account/login.html

After this error was fixed I got this one this is my whole webrequest:

This is my method

This is my main program.cs

JFoxx64
  • 262
  • 2
  • 11
CSharper
  • 3
  • 4
  • Why do you want to do extensive string processing? – Anand Vaidya Dec 14 '19 at 14:18
  • @AnandVaidya Would like to make an UI for faster advertisement sharing so I need the token to login with my webrequest. – CSharper Dec 14 '19 at 14:21
  • Try to use HtmlAgilitypack. Remember this: DO NOT regex Html content. Try to use html parser instead! – Mr.D Dec 14 '19 at 14:41
  • @Mr.D Thanks for the info I'm kind of new to C# . Can you look at my new error and what I did wrong with my request headers? Thanks in advance :) – CSharper Dec 14 '19 at 15:09
  • Protip: stop using string parsing/regex. Look into parsing the HTML properly (or use a browser object to fetch which will already provide the DOM to traverse). E.g. https://stackoverflow.com/q/56107/298053 – Brad Christie Dec 14 '19 at 15:14
  • @BradChristie thanks for the information can you please look at my webrequest error http 400? I don't know what I did wrong there am I missing some information? The pictures are at the bottom of my post... thanks in advance – CSharper Dec 14 '19 at 15:17
  • I don't see you supply `ContentLength` in your post method. Unless library's changed since I used it, could be reason for 400. – Brad Christie Dec 14 '19 at 15:21
  • @BradChristie I used ContentLength but now I got this error: "The bytes to be written to the stream exceed the specified Content-Length size." – CSharper Dec 14 '19 at 15:27

3 Answers3

1

Use regex.

string myStr = HttpMethods.Get("https://www.marktplaats.nl/account/login.html", "https://www.marktplaats.nl/account/login.html", ref myCookies);
string regex = "<input type\\=\\\"hidden\\\" name=\\\"xsrf\\.token\\\" value\\=\\\"([^\\\"]+)\\\"";

var xsrfToken = Regex.Match(myStr, regex).Groups[1].Value;

Console.WriteLine(xsrfToken);

As for those stating you should never use Regex for parsing HTML, using HTML Agility Pack for something like this is overkill and will likely perform a lot worse too, don't believe me? Hear it from their own developers

https://html-agility-pack.net/knowledge-base/43677736/csharp-htmlagilitypack-vs-regular-expressions-for-extracting-links-from-html

Aydin
  • 15,016
  • 4
  • 32
  • 42
  • I don't get your code because the expectedOutput is different everytime I use a webrequest I just need the code after value for every new webrequest. Thanks anyways – CSharper Dec 14 '19 at 14:36
  • Yes the output is different every time, it's supposed to be, but the text it is grabbing every time is the actual xsrf token. Are you trying to grab the token or are you trying to grab that entire line? If you're trying to grab the token, replace the "testCase" with the html output of the website and it will return the very xsrf token you are searching for – Aydin Dec 14 '19 at 14:39
  • Im trying to get the token everytime. I'm kind of new to C# and trying to get the value of the input field. Can you change your code so I can get the value of the input field? Thanks for the help I really appreciate it. – CSharper Dec 14 '19 at 14:41
  • This worked for me, however I got an error for my webrequest can you maybe take a look at the request for this website I think something is wrong with my headers.. I got an error that says string is not json formatted? Posted pictures off my request in my question. – CSharper Dec 14 '19 at 14:54
  • RegEx is definitely a powerful tool, but with it comes with a lot of weight and it's probably not an efficient way to do such a simple thing. A new developer should also learn how to develop algorithms to hone their problem solving skills. I liked the OP's first attempt because it involved trying to solve the one specific problem. It was close but needed some tweaks. – Jim Berg Dec 14 '19 at 15:16
  • @JimBerg You'd be surprised, I tried your solution, iterated over it 1,000,000 times, it did that in 11.9 seconds, the regex method does it in 6.5 over the same number of iterations – Aydin Dec 14 '19 at 16:47
  • @CSharper for additional problems, you need to open a new question – Aydin Dec 14 '19 at 16:51
  • @Aydin I made a new question can you help me out ? https://stackoverflow.com/questions/59336746/my-webrequest-is-not-working-because-of-exceeding-the-content-length-what-is-wro – CSharper Dec 14 '19 at 17:24
  • @Aydin, I am surprised since my benchmark shows the opposite. I iterate over each 10 million times. First with .NET Framework 4.6.1. I get 8.4 seconds for regex and 1.9 for my solution. I also used a RegEx object instead of the RegEx.Match() static method. Doing that shaved off 2.4 seconds so I get 6 seconds. My input was the example text that CSharper supplied in his question. The single line of text. I also ran the same code in .NET Core 3.1. It's definitely faster. 5 secs for RegEx static, 4.7 for RegEx instance, and 0.8 seconds for my code. – Jim Berg Dec 14 '19 at 18:15
  • @Aydin, I should mention that I'm using Windows 10 on a core i9 9900k with 64GB of RAM and VS2019. – Jim Berg Dec 14 '19 at 18:22
  • 1
    @JimBerg I see, the issue is the sample text being used, I was testing over the actual HTML from the website he is using – Aydin Dec 14 '19 at 18:23
  • FWIW, the issue I have w/ regex is it's very reliant on original format. If they change attribute order, add class tag between, decide to add whitespace for readability--anything, your sln fails. I'd rather take the perf hit, too, if it means writing it once (esp for something this trivial). – Brad Christie Dec 16 '19 at 14:36
  • @Aydin, I used it against the text from the site and got the same results as you. It's absolutely bizarre and counter intuitive that RegEx can be faster, but it's good to know. The big question now is how could it be faster? I would hope that the framework developers would use an efficient string search algorithm rather than a simple scan and compare process. – Jim Berg Dec 16 '19 at 14:48
0

Your call to GetBetween doesn't even pass the correct values. It should be a double-quote, not a single quote since the text you're searching uses double quotes.

You also need verify that you actually find the string by checking if the index is less than zero.

You can use this:

    public static string GetBetween(string source, string start, string end)
    {
        var startPos = source.IndexOf(start, StringComparison.Ordinal);
        if (startPos < 0) return null;
        startPos += start.Length;
        var endPos = source.IndexOf(end, startPos,StringComparison.Ordinal);
        return endPos < 0 ? null : source.Substring(startPos, endPos - startPos - 1);
    }

I return null if the string isn't present.

Adding Benchmark Code for reference: private static string check = "";

    static void Main(string[] args)
    {
        string regex = "<input type\\=\\\"hidden\\\" name=\\\"xsrf\\.token\\\" value\\=\\\"([^\\\"]+)\\\"";
        var reg = new Regex(regex);
        string xsrfToken;
        DateTime start;
        TimeSpan diff;
        var rnd = new Random();

        for (var j = 0; j < 100; j++)
        {
            if (rnd.Next(30) < 10)
            {
                start = DateTime.Now;
                for (var i = 0; i < 10000000; i++)
                    xsrfToken = Regex.Match(check, regex).Groups[1].Value;
                diff = DateTime.Now - start;
                Console.WriteLine("RegEx: " + diff.TotalSeconds);
                continue;
            }

            if (rnd.Next(30) < 20)
            {
                start = DateTime.Now;
                for (var i = 0; i < 10000000; i++)
                    xsrfToken = reg.Match(check).Groups[1].Value;
                diff = DateTime.Now - start;
                Console.WriteLine("RegEx Prepped: " + diff.TotalSeconds);
                continue;
            }

            start = DateTime.Now;
            for (var i = 0; i < 10000000; i++)
                xsrfToken = InBetween(check, "name=\"xsrf.token\" value=\"", ">");
            diff = DateTime.Now - start;
            Console.WriteLine("InBetween: " + diff.TotalSeconds);
        }

        Console.ReadKey();
    }

    public static string InBetween(string source, string start, string end)
    {
        var startPos = source.IndexOf(start, StringComparison.Ordinal);
        if (startPos < 0) return null;
        startPos += start.Length;
        var endPos = source.IndexOf(end, startPos, StringComparison.Ordinal);
        return endPos < 0 ? null : source.Substring(startPos, endPos - startPos - 1);
    }
Jim Berg
  • 609
  • 4
  • 7
  • Thanks for the help, @Aydin helped me out but I have posted a new error that occured.. Maybe you can help me out with this request? – CSharper Dec 14 '19 at 15:00
  • @CSharper Your question is answered, then. Mark his answer as your solution so he gets the reputation points and then ask a new one about your additional problem. As far as I know, these aren't supposed to be open debugging sessions. – Jim Berg Dec 14 '19 at 15:27
0

If string start =

"name=\"xsrf.token\" value=\""

and string end =

"\""

Then:

static string GetBetween(string message, string start, string end)
        {
            //Required to handle possible problems that will arise due to indexing if the string does not exist
            try{

                //Splits the original message by the character array of the string "start", then takes the second index of that string array
                string splitmessage = message.Split(new string[] {start})[1]

                //Splits the remaining message by the character array of string "end", then takes the first index of that string array
                splitmessage = message.Split(new string[] {end})[0]

                return splitmessage;

            } catch(Exeption ex){
                //Handle when the string you're attempting to find is not there
            }

        }

Should return only the value of the input you are looking for.

You will have to handle what happens when that value is not found separately.

JFoxx64
  • 262
  • 2
  • 11