So I'm working on this file that converts a CSV to JSON, however I keep getting this error message but can't seem to figure it out. Indentation seems to be correct so I'm a bit lost on where to go with it. Code is below:
Traceback (most recent call last):
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 308, in <module>
main()
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 303, in main
mySite.writeJSONFile(options)
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 247, in writeJSONFile
outputFile.write('"' + str(dateOfCrawl) + '"' )
NameError: global name 'dateOfCrawl' is not defined
Code
class Site:
dateOfCrawl = 0;
def __init__(self,csvFilePath):
self.pageList = [] # ordered list of page IDs
self.pageData={} # dictionary of individual page dictionaries, indexed on page ID
self.titleDict = { } # dictionary of unique titles
self.buildPageData(csvFilePath)
self.homePageId=self.pageList[0] # only use of site.pageList
self.depth=0
def buildPageData(self,csvFilePath):
global dateOfCrawl
# read data from CSV file, build a dictionary of page data, including list of children, in order
lines = csv.reader(open(csvFilePath, "rb"))
for line in lines:
pageURL=line[0]
pageURL=re.sub('\/\Z', '',pageURL) # remove any trailing slash
self.pageData[pageURL]={}
self.pageData[pageURL]["URL"]=pageURL
self.pageData[pageURL]["Title"]=self.cleanTitle(line[1],pageURL)
# when taking the home page and chop its url the parent will be http:/
# which should be avoided by setting it to ''
parent = chopPath(pageURL)
if(parent == 'http:/'):
parent=''
dateOfCrawl = line[2]
self.pageData[pageURL]["Parent"]= parent
self.pageData[pageURL]["Modified"]=line[2]
self.pageData[pageURL]["Children"]=[]
list = self.pageData.keys()
# sort IDs before attempting to match children
self.pageList = self.pageData.keys()
self.pageList.sort()
lineCount = 0
for pageURL in self.pageList:
# record page as child of its parent (parents must already be in the list!)
parentURL=self.pageData[pageURL]["Parent"]
if (lineCount > 0):
while( self.pageData.has_key(parentURL)== False):
if(parentURL == ''):
sys.exit(pageURL + " has no parent at " + parentURL)
parentURL = chopPath(parentURL)
self.pageData[parentURL]["Children"].append(pageURL)
lineCount+=1
self.pageCount=lineCount
def writeJSONFile(self,options):
global dateOfCrawl
outputFile = options ["outputFile"]
#see http://code.google.com/intl/en/apis/visualization/documentation/reference.html#DataTable
outputFile.write('[')
outputFile.write('"' + str(dateOfCrawl) + '"' )
self.homePage.toJSON(options)
outputFile.write(']')
outputFile.close()