I'm using Python 2.7.9
I tried open and reading a website but I get errors like: 11001 getaddrsinfo OR no connection ... machine actively refused it
In fact when I try to open a website with the purpose of reading it I'm never able to open it. I believe the problem is a configuration of the system. With webdriver I can open a website but don't know how to read that content. Can you help please?
Here is the code I used, with different possibilities but always with the same errors.
import socket
import os
os.environ['http_proxy'] = '127.0.0.1:8080'
import requests, re
import urllib2
#from urllib2 import urlopen
from bs4 import BeautifulSoup as bs
from HTMLParser import HTMLParser
from six.moves import urllib
# as req
#from urllib.request import urlopen
def news ():
url = "http://www.edureka.co/"
#payload = {'q': 'shape of you'}
#r = requests.get(url, params = payload)
## socket.getaddrinfo('127.0.0.1', 8080)
## r = requests.get(url)
## soup = bs(r.text,"html.parser")
# html = urlopen(url).read()
# soup = BeautifulSoup(html)
#https://www.crummy.com/software/BeautifulSoup/bs4/doc/
#webbrowser.register('chrome', None,
#webbrowser.BackgroundBrowser("C://Program Files (x86)//Google//Chrome//Application//chrome.exe"))
## link = soup.find('a', {'href':re.compile('http://www.edureka.co/')})['href']
#link = "http://www.edureka.co/"
link = 'http://www.edureka.co/'
print(link)
#proxies = {'http': 'http://www.someproxy.com:3128'}
#proxies = {'http': 'http://www.edureka.co/'}
#f = urllib.urlopen(link, proxies={})
#proxy_support = urllib2.ProxyHandler({'http': '127.0.0.1'})
### proxy_support = urllib2.ProxyHandler({})
### opener = urllib2.build_opener(proxy_support)
### urllib2.install_opener(opener)
### in_ = opener.open(link)
### in_.read()
## result = urllib2.urlopen(link)
#result = urllib.request.urlopen(link)
#f = urllib2.Request('http://www.edureka.co/')
socket.getaddrinfo('localhost', 8080)
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(("www.edureka.co/", 80))
#mysock.send('GET http://www.edureka.co/ HTTP/1.0\n\n')
## f2 = urllib2.urlopen('http://www.edureka.co/')
## my = f2.read()
#HTMLParser.feed('http://www.edureka.co/')
#import requests
#s = requests.Session()
#url = requests.form['http://www.edureka.co/']
## r = req.get('http://www.edureka.co')
## print(r.status_code)
#req = requests.get('<a href="http://www.edureka.co/">http://www.edureka.co/</a>')
## r.encoding # returns 'utf-8'
## soup = BeautifulSoup(r.text, 'html.parser').get_text()
## print(soup.prettify())