0

when i run this code for crawl data twitter

from tqdm import tqdm
from bs4 import BeautifulSoup as bs
import re, csv
def html2csv(fData, fHasil, full=True):
    urlPattern=re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    print('Loading Data: ', flush = True)
    Tweets, Username, waktu, replies, retweets, likes, Language, urlStatus =  [], [], [], [], [], [], [], []
    soup = bs(open(fData,encoding='utf-8', errors = 'ignore', mode='r'),'html.parser')
    data = soup.find_all('li', class_= 'stream-item')
for i,t in tqdm(enumerate(data)):
    T = t.find_all('p',class_='TweetTextSize')[0] # Loading tweet
    Tweets.append(bs(str(T),'html.parser').text)
    U = t.find_all('span',class_='username')
    Username.append(bs(str(U[0]),'html.parser').text)
    T = t.find_all('a',class_='tweet-timestamp')[0]# Loading Time
    waktu.append(bs(str(T),'html.parser').text)
    RP = t.find_all('span',class_='ProfileTweet-actionCountForAria')[0]# Loading reply, retweet & Likes
    replies.append(int((bs(str(RP), "lxml").text.split()[0]).replace('.','').replace(',','')))
    RT = t.find_all('span',class_='ProfileTweet-actionCountForAria')[1]
    RT = int((bs(str(RT), "lxml").text.split()[0]).replace('.','').replace(',',''))
    retweets.append(RT)
    L  = t.find_all('span',class_='ProfileTweet-actionCountForAria')[2]
    likes.append(int((bs(str(L), "lxml").text.split()[0]).replace('.','').replace(',','')))

try:# Loading Bahasa
    L = t.find_all('span',class_='tweet-language')
    Language.append(bs(str(L[0]), "lxml").text)
    except:
        Language.append('')
        url = str(t.find_all('small',class_='time')[0])
        try:
            url = re.findall(urlPattern,url)[0]
            except:
                try:
                    mulai, akhir = url.find('href="/')+len('href="/'), url.find('" title=')
                    url = 'https://twitter.com/' + url[mulai:akhir]
                    except:
                        url = ''
                        urlStatus.append(url)
                        print('Saving Data to "%s" ' %fHasil, flush = True)
                        dfile = open(fHasil, 'w', encoding='utf-8', newline='')
                        if full:
                            dfile.write('Time, Username, Tweet, Replies, Retweets, Likes, Language, urlStatus\n')
                            with dfile:
                                writer = csv.writer(dfile)
                                for i,t in enumerate(Tweets):
                                    writer.writerow([waktu[i],Username[i],t,replies[i],retweets[i],likes[i],Language[i],urlStatus[i]])
                                    else:
                                        with dfile:
                                            writer = csv.writer(dfile)
                                            for i,t in enumerate(Tweets):
                                                writer.writerow([Username[i],t])
                                                dfile.close()
                                                print('All Finished', flush = True)

i got this error

File "<ipython-input-4-4a19b18dc90d>", line 27
    except:
         ^
SyntaxError: invalid syntax
}
anakecil
  • 13
  • 3

1 Answers1

0

In Python, indentation is used to delimit blocks of code. This is different from many other languages that use curly braces {} to delimit blocks such as Java, Javascript, and C. Because of this, Python users must pay close attention to when and how they indent their code because whitespace matters.

When Python encounters a problem with the indentation of your program, it either raises an exception called IndentationError or TabError.[1]

In your case, this is the issue:

try:
    print(x)
    except:              # wrong indentation 
      print("An exception occurred")

You can simply fix it like this:

try:
  print(x)
except:         # correct, try and catch stay at the same level
  print("An exception occurred")

Hope this helps. Good luck.

Harshal Parekh
  • 5,918
  • 4
  • 21
  • 43