I am using pickle to save an object graph by dumping the root. When I load the root it has all the instance variables and connected object nodes. However I am saving all the nodes in a class variable of type dictionary. The class variable is full before being saved but after I unpickle the data it is empty.
Here is the class I am using:
class Page():
__crawled = {}
def __init__(self, title = '', link = '', relatedURLs = []):
self.__title = title
self.__link = link
self.__relatedURLs = relatedURLs
self.__related = []
@property
def relatedURLs(self):
return self.__relatedURLs
@property
def title(self):
return self.__title
@property
def related(self):
return self.__related
@property
def crawled(self):
return self.__crawled
def crawl(self,url):
if url not in self.__crawled:
webpage = urlopen(url).read()
patFinderTitle = re.compile('<title>(.*)</title>')
patFinderLink = re.compile('<link rel="canonical" href="([^"]*)" />')
patFinderRelated = re.compile('<li><a href="([^"]*)"')
findPatTitle = re.findall(patFinderTitle, webpage)
findPatLink = re.findall(patFinderLink, webpage)
findPatRelated = re.findall(patFinderRelated, webpage)
newPage = Page(findPatTitle,findPatLink,findPatRelated)
self.__related.append(newPage)
self.__crawled[url] = newPage
else:
self.__related.append(self.__crawled[url])
def crawlRelated(self):
for link in self.__relatedURLs:
self.crawl(link)
I save it like such:
with open('medTwiceGraph.dat','w') as outf:
pickle.dump(root,outf)
and I load it like such:
def loadGraph(filename): #returns root
with open(filename,'r') as inf:
return pickle.load(inf)
root = loadGraph('medTwiceGraph.dat')
All the data loads except for the class variable __crawled.
What am I doing wrong?