I am extracting data from 700+ Telegram groups and channels using Telethon in PyCharm. I am using a for loop to loop through all entities and extract the data needed for my analysis.
The code works fine when the list object "groups_list" contains a lower number of groups and channels. However, when "groups_list" contains all 700+ groups/channels I get the error messages after a few minutes.
My guess is that I am flooding Telegram's API with too many requests. Is there a way I can alter my code so it works more effective?
# Define an infinite number to get all messages in channel/group
infinite_number = float('inf')
# Making an empty list for each column/variable I want to extract from the API
message_id = []
message = []
channel_id =[]
reply_to =[]
time = []
retweet_count = []
view_count = []
user_id = []
retweet = []
# For loop that iterates over list of groups i.e. "list_groups" and extract Telegram data
for x in list_groups:
chats = client.get_messages(x, infinite_number)
if len(chats):
for chat in chats:
message_id.append(chat.id)
message.append(chat.message)
channel_id.append(chat.peer_id)
reply_to.append(chat.reply_to_msg_id)
time.append(chat.date)
retweet_count.append(chat.forwards)
view_count.append(chat.views)
user_id.append(chat.from_id)
retweet.append(chat.fwd_from)
data ={'message_id':message_id, 'message':message, 'channel_id': channel_id, 'reply_to_msg_id':reply_to, 'time':time, 'retweet_count':retweet_count, 'view_count':view_count, 'user_id':user_id, 'retweet':retweet}
# Convert lists til data.frame
df = pd.DataFrame(data)
raise errors.FloodWaitError(request=r, capture=diff)
telethon.errors.rpcerrorlist.FloodWaitError: A wait of 85313 seconds is required (caused by ResolveUsernameRequest)
PS: Political science student new to Python :)