The task is to wrap URLs in excel file with html tag. For this, I have a fucntion and the following code that works for one column named ANSWER:
import pandas as pd
import numpy as np
import string
import re
def hyperlinksWrapper(myString):
#finding all substrings that look like a URL
URLs = re.findall("(?P<url>https?://[^','')'' ''<'';'\s\n]+)", myString)
#print(URLs)
#replacing each URL by a link wrapped into <a> html-tags
for link in URLs:
wrappedLink = '<a href="' + link + '">' + link + '</a>'
myString = myString.replace(link, wrappedLink)
return(myString)
#Opening the original XLS file
filename = "Excel.xlsx"
df = pd.read_excel(filename)
#Filling all the empty cells in the ANSWER cell with the value "n/a"
df.ANSWER.replace(np.NaN, "n/a", inplace=True)
#Going through the ANSWER column and applying hyperlinksWrapper to each cell
for i in range(len(df.ANSWER)):
df.ANSWER[i] = hyperlinksWrapper(df.ANSWER[i])
#Export to CSV
df.to_excel('Excel_refined.xlsx')
The question is, how do I look not in one column, but in all the columns (each cell) in the dataframe without specifying the exact column names?