2
from bs4 import BeautifulSoup
import urllib2
from lxml.html import fromstring
import re
import csv

wiki = "http://en.wikipedia.org/wiki/List_of_Test_cricket_records"
header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error on Wikipedia
req = urllib2.Request(wiki,headers=header)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page)


csv_out = open("mycsv.csv",'wb')
mywriter = csv.writer(csv_out) 

def parse_rows(rows):
 results = []
 for row in rows:
     tableheaders = row.findall('th')
    if table_headers:
        results.append(headers.get_text() for headers in table_headers])

    table_data = row.find_all('td')
    if table_data:
        results.append([data.gettext() for data in table_data])
return results

# Get table
 try:
     table = soup.find_all('table')[1]
 except AttributeError as e:
     print 'No tables found, exiting'
       # return 1

  # Get rows
 try:
    rows = table.find_all('tr')
 except AttributeError as e:
    print 'No table rows found, exiting'
     #return 1

table_data = parse_rows(rows)

# Print data
for i in table_data:
    print '\t'.join(i)

mywriter.writerow(i) csv_out.close()


UnicodeEncodeError Traceback (most recent call last) in ()

---> 51 mywriter.writerow(d1)

UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 0: ordinal not in range(128)


I do get the data on the ipython notebook but I cant figure out when the csv file is being written.

What could be the error for ?? please help

Wally
  • 432
  • 6
  • 19
Divya Jose
  • 389
  • 1
  • 4
  • 21
  • I tried replicating your problem but I wasn't able to—I had to clean up the code because the indentation is all messy, and the variable you're trying to write out in the penultimate line (d1) isn't defined anywhere. Can you post the code you're actually running to produce this error? – jgysland Feb 17 '15 at 18:14
  • Yes. d1 is not defined anywhere. but I wrote the line mywriter.writerow([s.encode("utf-8") for s in i]) as suggested by Yannis P. and it worked. – Divya Jose Feb 17 '15 at 23:39

1 Answers1

1

This is a known problem with csv writing in python. You can see a solution here. In your case it all boils down to writing:

mywriter.writerow([s.encode("utf-8") for s in d1])

Alternatively you can use the unicodecsv library for avoiding this trick

Community
  • 1
  • 1
Yannis P.
  • 2,745
  • 1
  • 24
  • 39