1

I tried to scrape this site:

http://www.finanzen.net/historische-kurse/Daimler

and used the following source code:

from cookies import cookies
import datetime
import requests
import time
import webbrowser


def download(number,
             isin,
             start=datetime.date(1998,1,1),
             end=datetime.date.today(),
             dst="raw"):
    """
    """
    # ToDo -- I want to give a tag and not an url!
    url = "http://www.finanzen.net/historische-kurse/daimler"

    # ToDo -- should be dynamic
    today = datetime.date.today()
    inTag1    = str(1)
    inMonat1  = str(1)
    inJahr1   = str(1998)
    inTag2    = str(today.day)
    inMonat2  = str(today.month)
    inJahr2   = str(today.year)
    strBoerse = "XETRA"
    pkBHTs    = int(time.time())

    payload = {"inTag1"    : inTag1,
               "inMonat1"  : inMonat1,
               "inJahr1"   : inJahr1,
               "inTag2"    : inTag2,
               "inMonat2"  : inMonat2,
               "inJahr2"   : inJahr2,
               "strBoerse" : strBoerse,
               "pkBHTs"    : pkBHTs}

    r = requests.get(url, params=payload, cookies=cookies)

    if "08.11.2017" in r.text:
        print("OK")
    else:
        print("FAIL!")

    with open("output.html", "w") as f:
        f.write(r.text)


if __name__ == "__main__":

    print("Test: download()")

    download(1510210323,
             "DE0007100000",
             start=datetime.date(1998,1,1),
             end=datetime.date.today(),
             dst="raw")
    print("Done.")

    webbrowser.open("output.html")

I have a second script which provides the cookie data from my browser:

cookies = {'CAP' : 'data=44a1e1f46fef0411bf06d9bfc501913f',
           'CUID' : 'N,1510569302850:ALHGLuQAAAAPTiwxNTEwNTY5MzAyODUwVdj35/i8kfuLw5RmnsCECh6uWduJEJHPHe44+gmS5k1OeVohiY2UE0s8Toc6Z1KsPkSIOyvb0rHFvfBB5GtZD0BeUVeUq8xKkDIkDqq2RsE7AvdO9c+GoqElRytvxjPuoExKFUZ7sMl3+ugTDvQsjM0q6iEkcfYTCjZcqRhGJ2JicnT0yZI8NIINqvt1OUufo4jtHTgznYHCgSG8lxydqzv+Cax90XRsvKoUEzTfJCxzqryt3rkXiy4IMEOrTMxZOZCoT0HO3hgghkd3XyzOhhr70tLnPbY4GxPkWrcXy4y+7xHwwoX+jmJGiNvEJod8mQF3QkkDSN+uwmTlAgy7Yg==',
           'MI' : '1',
           'OPTOUTMULTI' : '0:0%7Cc2:0',
           '__utma' : '99761801.1635127051.1447939999.1510562258.1510569245.10',
           '__utmb' : '99761801.1.10.1510569245',
           '__utmc' : '99761801',
           '__utmt_UA-1858090-1' : '1',
           '__utmz' : '99761801.1503872185.2.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)',
           '_ga' : 'GA1.2.1635127051.1447939999',
           '_gid' : 'GA1.2.216894857.1510518215',
           'finpopup2' : 'n=3&d=08%2E09%2E2017',
           'fintargeting' : 'v=1&h=0x000000&hd=131117091117091117091117091117091117',
           'finvisit' : 'v=5&p=16&d=13%2E11%2E2017',
           'mjdkyj' : 'AKsRol8bjClrOgYBI9F53uwo0572ZvzG_ifsQhL0W6CGIPDli067aLH682nhCzAvZJLwgmI_hfnp1G_cO6_R7La4pHyjXqGz7w',
           'utag_main' : 'v_id:015e25c45b49001bad6f158e01b004066002a05e00838$_sn:10$_ss:0$_st:1510571043449$dc_visit:10$dc_visit_dip-main:10$dip_times_empty_enrichment:26$ses_id:1510569240676%3Bexp-session$_pn:1%3Bexp-session$collectCookieMode:3rdParty%3Bexp-session$dc_event:1%3Bexp-session$dip_events_this_session:1%3Bexp-session$dc_event_dip-main:1%3Bexp-session$dc_region_dip-main:eu-central-1%3Bexp-session$dc_region:eu-central-1%3Bexp-session',
           'xdefcc' : 'G18e8ffb3a46fc000807955c49556bf4cc',
           '1P_JAR' : '2017-11-13-10',
           'AID' : 'AJHaeXJrF9XjJOxZm8l4doMQZS4yMOPes3h5NKKcWJZxIyGSOAjtzw',
           'APISID' : 'VILFXotMggFxtb9h/AcPvkxMR5pWR0AzUo',
           'ASPSESSIONIDQSRTTSCC' : 'GLLJJMHAIDCBGEEODOLMADEN',
           'ASPSESSIONIDSSRSRSBD' : 'PGHHNMHANNCJJKIPNALFCCMO',
           'CAP' : 'data=44a1e1f46fef0411bf06d9bfc501913f',
           'CONSENT' : 'YES+DE.de+20150726-13-0',
           'CUID' : 'N,1510569302850:ALHGLuQAAAAPTiwxNTEwNTY5MzAyODUwVdj35/i8kfuLw5RmnsCECh6uWduJEJHPHe44+gmS5k1OeVohiY2UE0s8Toc6Z1KsPkSIOyvb0rHFvfBB5GtZD0BeUVeUq8xKkDIkDqq2RsE7AvdO9c+GoqElRytvxjPuoExKFUZ7sMl3+ugTDvQsjM0q6iEkcfYTCjZcqRhGJ2JicnT0yZI8NIINqvt1OUufo4jtHTgznYHCgSG8lxydqzv+Cax90XRsvKoUEzTfJCxzqryt3rkXiy4IMEOrTMxZOZCoT0HO3hgghkd3XyzOhhr70tLnPbY4GxPkWrcXy4y+7xHwwoX+jmJGiNvEJod8mQF3QkkDSN+uwmTlAgy7Yg==',
           'HSID' : 'AkD2AyVb5Z9wR9QT-',
           'MI' : '1',
           'NID' : '117=IbpFdbi-srSVK7HAD-b_ENXheFKQqlI0MtwsBKkQ5kMzKE_YMKHN7THu3mgYrYZZ45mSgYb1r67FfyX30QDu3pT22YEzK4Ylj_DbQ0BwbLx36NJV68KmDoivU9zD38hSPe9oPS89Z8spzbp9mPSEuYOhhznowFxCS0ZnZDhYIKsnwdMEYWqDVhQ0fUqnpnTQFV1VWb1-7dZJ5LvnKKWf5xHi5wNhL5YQtaFdYEb2rY0L3HTR0IeZMu3ZIR-2pOYQYspwyg',
           'NID' : '110=Chz_C4sXWBfkLwySlpc-od0DauOGbWPjjZ_UYJgVYSeOXLWaQayrm_PHSMfnI4bkHwFQjWq0atuDmXEGq39-uJHLpanLu7kQDs9WTm004KBx7nWacN1-_x_p05gbw8wx',
           'OPTOUTMULTI' : '0:0%7Cc2:0',
           'POPUPCHECK' : '1510604610799',
           'SAPISID' : 'e-07IvwMgsqnc1KL/AWrA0YaffjZEXetqg',
           'SID' : 'FwUbfPob8j7z4QAbMUsPD7HI4FwPoOM7wPo9cyTwyAveBe0fy86idhBKLMz8mn93l-pyPA.',
           'SIDCC' : 'AE4kn7_ynLaaOiVILBfgv5-_j3I-18GWRw0_rgIiJGSRP0YLV8zwv3Me80u9dqJI_a58y8xXxMCyLlu7qBw2rQ',
           'SSID' : 'ADQhejhWBta7RIl3q',
           '__utma' : '99761801.1635127051.1447939999.1510562258.1510569245.10',
           '__utmb' : '99761801.1.10.1510569245',
           '__utmc' : '99761801',
           '__utmt_UA-1858090-1' : '1',
           '__utmz' : '99761801.1503872185.2.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)',
           '_ga' : 'GA1.2.1635127051.1447939999',
           '_gid' : 'GA1.2.216894857.1510518215',
           'anj' : """dTM7k!M4/C@-S3@:5]qk`_#I2PQ7s]@!7PT-Q!$Rkf%Tam0H]QFpc6s(!H!k>jP5fr<Pn!Sse@DnFc!m#J[!_?'td[$Y+EO30JW48u`al*p]+pQ%.IaIz(eLt1a=RpN+IIuSI6>2pVG*(l/YR]55R_8*)f_>Bgu2u'nh]2pe>#n88><bUjXaW]vuR5bchvb:ei<F/=Ow02i-18D:GO_nsX`cEj282$Vr6Zz2%R'kHi(@n0veK7e2'oM:Nd`*Ju!l/-nAbGM6F3YP-^KnNXwhV1.yggNVGk!<5]]1?:av7hOQ/L#d9Q``r=MjWhED9SGjg%*!X1%w5kp?T<s?7j=KIM8Y$p1^aigtynxfq/5MBw3X2Bc8E5EILM(^k=p=B+%ra1D^CUB]=ExTXvZx7go4f/B(8iGBZRtY(q7_H=>?rY$IpGd).%<kME<mKK:vp7SPIctG0J8vv3y6MW*)3'5N9/Ju_Oiw98wU4016EdBD9et%n*bF_uZv<jF>'E9@VDsp3i^mu'h2R4Y8:2]ko2YdA.(6d3^+<+4Dyo=Z-Mi./o1lsVmD61*JVd'oKHFeNRD*7d:wbm(7!4vs*Ahw+.XI2d?c4#.oV1%5pnBbLG^!1N1qfcZMzhBp?xs_#Ch3o$L$hlWShx4HF@u5e9Agc*S[s.gz6MTIX6:[Rm`v-pH_<(#8YlG`P_(PHOI*v*N^mA!>5Nmh8U?<WbeUp48=ux1N@quBj7U2(Kz1W7/>7`Q2A-`C7X6N_.bl/V/43$YC8N6zsH:M$KCuLYMOsnBP<3v3h)rg2aA2V?PAMJ71Lc*oW?!I^KQ<CPZ#RByeri1*v-M93nj?6#tLa?5o-[E*.ys9IO/2KA)?C^vETDT%wlq#n=7kK9_U`Q=HTXf]UwV(h8H2]_x^UElV<-Jd.hF$pC#O#+2f1I6i44C<NiDE$C2Lhv=z#pWxmO?o4I*aMM2t])V=p>>i[n-caqAWXA`#7(h2Ka8_dV7bgcPn_h./y#T[!AyHDHC-4!t3+wnhevgtid(Y=^JnIDS6@J$jP$wO`bUt5^neF+TZMJNpbwFfE=<dL.627>80_)>!ViHO9vIz7]ebKf-WPz3-%7p]w_1nPBQvyJ_r*^cg['SV3T/t<?UK_ZqgOio<1W])V^'PW^Ev7^_`McptJ3`gig?Hh$DFMgh[yImN`wByK71`iKGSxl_8Fq!?p7WW)]R:8XVy+fJZ6zYJc@ro-!w@'_WN`A7'O]Slqu3gh/RPX!Ps[gJp9IA439Ki3Ak!II7Lgy997x07G][')<2wjcG9H3f'SwP/wKC>o=F7#3mMF9R#1U[5^SQCb:VD*7$G9pj9i7hJq:DMe)rg`lO-kf#2>mv3gzo_Jf?`*]*afAw3#21o/#YC<A9@/!41/zOjJ'<W2Z@?6EN2's]ZiejVf$y!mpqR7b2[ulW:gR.5Bcv[6>C9'Gl7uJHY1%@j!sP>>!bCYTvKIL>wV*`Hq.2q/FBSY?F%_iT7I%AZ<.`7@*)oo+DWqHvHvnK?4OQEzdX+4g.Mrhj(H<WjfG<4gIa([228)?u)*nhKni6oaXD!1YJEEVg07mn1_Ad:6cO37jyJ3?2L?+)$PaLfTKocjN55L@E+cMm4)k3jv.$z_rTgYiga4/""",
           'finpopup2' : 'n=3&d=08%2E09%2E2017',
           'fintargeting' : 'v=1&h=0x000000&hd=131117091117091117091117091117091117',
           'finvisit' : 'v=5&p=16&d=13%2E11%2E2017',
           'i00' : '002963d5e4108c32058a5679f0001%3B5a097588%3B5a3b7d3c',
           'icu' : 'ChgI95w7EAoYAiACKAIwuPaX0AU4AkACSAIQuPaX0AUYAQ..',
           'mjdkyj' : 'AKsRol8bjClrOgYBI9F53uwo0572ZvzG_ifsQhL0W6CGIPDli067aLH682nhCzAvZJLwgmI_hfnp1G_cO6_R7La4pHyjXqGz7w',
           'sess' : '1',
           'utag_main' : 'v_id:015e25c45b49001bad6f158e01b004066002a05e00838$_sn:10$_ss:0$_st:1510571043449$dc_visit:10$dc_visit_dip-main:10$dip_times_empty_enrichment:26$ses_id:1510569240676%3Bexp-session$_pn:1%3Bexp-session$collectCookieMode:3rdParty%3Bexp-session$dc_event:1%3Bexp-session$dip_events_this_session:1%3Bexp-session$dc_event_dip-main:1%3Bexp-session$dc_region_dip-main:eu-central-1%3Bexp-session$dc_region:eu-central-1%3Bexp-session',
           'uuid2' : '869611879133359501',
           'xdefcc' : 'G18e8ffb3a46fc000807955c49556bf4cc'}

Please inform me if I provide in the cookie any personal information. Thanks.

I run my script and expect to get the same data like processing the form manually in my browser, but this didn't worked.

In my console I get:

Test: download()
FAIL!
Done.

The output.html file - generated by my script - contains "Bitte wählen Sie das Start- und Enddatum des Zeitraumes, für den Sie historische Kursdaten anzeigen möchten." (= "Please choose a start and end date for which you want to show historical prices") instead of the data. This messaage is the error message and tolds me to send data to the form - but I had send the data to the website as you can see!

At the moment I don't understand what I can change to get the data instead of this message. Can you give me a tipp, please. Thank you for your efforts and excuse my English!

M14
  • 444
  • 6
  • 8
  • If there is Javascript involved, have a look at [Web-scraping JavaScript page with Python](https://stackoverflow.com/questions/8049520/web-scraping-javascript-page-with-python) - you need more than just requests for JavaScript. – bastelflp Nov 13 '17 at 20:44
  • Can you decide if I have a JavaScript problem? I don't know much js, but I thougth that js only appends a hidden input tag, which I send with the other params to that form. – M14 Nov 13 '17 at 21:07
  • At the moment I try to find out if I need JS! Is there a way to find out (e. g. in chrome) what happens, when I submit the form??? – M14 Nov 13 '17 at 22:30

1 Answers1

2

You may choose selenium to get the same result. So I modified your code to achieve the values you tried to get.

# -*- coding: utf-8 -*-
import datetime
import requests
import time
import webbrowser
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import io

def getBoersenIndex(boerse):
    """Get index of given boerse.
       To set the value of the boersen dropdown, you have to know the index of the chosen boerse.

        Keyword arguments:
        boerse -- name of boerse as you see in dropdown at the website
    """

    boersen = [
        'Budapest',
        'Berlin',
        'Baader Bank',
        'BX Swiss',
        'Bats',
        'Chi-X',
        'Düsseldorf',
        'Frankfurt',
        'Hamburg',
        'Hannover',
        'Lang und Schwarz',
        'München',
        'Mexiko',
        'Nasdaq OTC',
        'XETRA',
        'Prag',
        'Stuttgart',
        'Swiss Exchange',
        'Tradegate',
        'Wien',
        'Quotrix'
    ]

    for index, boerseName in enumerate(boersen):
        if boerseName == boerse:
            return index+1


def download(startDay, startMonth, startYear, boerse, url):

    today = datetime.date.today()
    inTag1    = str(startDay)
    inMonat1  = str(startMonth)
    inJahr1   = str(startYear + 1 - 1998) # because the dropdown uses the index as a value starting with 1998 = 1
    inTag2    = str(today.day)
    inMonat2  = str(today.month)
    inJahr2   = str(today.year + 1 - 1998) # same as starting year
    strBoerse = str(getBoersenIndex(boerse))

    # this is my webdriver implementation, you may use another one
    options = webdriver.ChromeOptions()
    options.add_argument('headless')
    browser = webdriver.Chrome(<path_to_your_chrome_driver>, chrome_options=options)

    browser.get(url) 
    time.sleep(1)

    browser.find_element_by_xpath("//select[@name='inTag1']/option[" +inTag1+ "]").click()
    browser.find_element_by_xpath("//select[@name='inMonat1']/option[" +inMonat1+ "]").click()
    browser.find_element_by_xpath("//select[@name='inJahr1']/option[" +inJahr1+ "]").click()
    browser.find_element_by_xpath("//select[@name='inTag2']/option[" +inTag2+ "]").click()
    browser.find_element_by_xpath("//select[@name='inMonat2']/option[" +inMonat2+ "]").click()
    browser.find_element_by_xpath("//select[@name='inJahr2']/option[" +inJahr2+ "]").click()    
    browser.find_element_by_xpath("//select[@name='strBoerse']/option[" +strBoerse+ "]").click()
    browser.find_element_by_css_selector("span.button").click()

    if "08.11.2017" in browser.page_source:
        print("OK")
    else:
        print("FAIL!")

    with io.open("output.html", "w", encoding='utf8') as f:
        f.write(browser.page_source)


if __name__ == "__main__":
    print("Test: download()")
    download(1,1,1998,'XETRA', 'http://www.finanzen.net/historische-kurse/daimler')
    print("Done.")

    webbrowser.open("output.html")
JustOneQuestion
  • 322
  • 1
  • 9
  • I've voted your answer up. I was able to run your script on my system, unfortunately I can only download the first data line but not the whole dataset on the page. I get "14.11.2017 70,..." but not "14.11.2017 ..., 13.11.2017 ..." and so on). But I can download something without an error message on the site. Thank you very much for your effort! – M14 Nov 14 '17 at 20:02
  • i had the same problem even on the website itself. sometimes i had to press the button a second time, because the first time, it only showed one line like you said. try to run the script another time and it should work. – JustOneQuestion Nov 14 '17 at 20:05
  • 1
    I accepted your answer - I will test it tomorrow! Thank you very much! – M14 Nov 14 '17 at 21:24