# -*- coding: utf-8 -*- import zipfile, string class user: def __init__(self,id='',date='',username=''): self.userid = username self.joindate = date self.name = username class post: def __init__(self, id='', user='', date='',cat='',commentcount=0,favoritecount=0,deletedstatus='',deletionreason='',posttitle='',tagslist=[]): self.postid = id self.userid = user self.datestamp = date self.category = cat self.comments = commentcount self.favorites = favoritecount self.deleted = deletedstatus self.reason = deletionreason self.title = posttitle self.tags = tagslist class tag: def __init__(self,id='',link='',date='',name=''): self.tagid = id self.linkid = link self.linkdate = date self.tagname = name class comment: def __init__(self,id='',post='',user='',date='',favecount=0,best=0,commentlength=0): self.commentid = id self.postid = post self.userid = user self.datestamp = date self.faves = favecount self.bestanswer = best self.length = commentlength class infodumpdata: def __init__(self,userdict={},mefipostdict={},mefitagdict={},meficommentdict={},askmepostdict={},askmetagdict={},askmecommentdict={},metapostdict={},metatagdict={},metacommentdict={},musicpostdict={},musictagdict={},musiccommentdict={}): self.users=userdict self.mefiposts=mefipostdict self.mefitags=mefitagdict self.meficomments=meficommentdict self.askmeposts=askmepostdict self.askmetags=askmetagdict self.askmecomments=askmecommentdict self.metaposts=metapostdict self.metatags=metatagdict self.metacomments=metacommentdict self.musicposts=musicpostdict self.musictags=musictagdict self.musiccomments=musiccommentdict def loaddata(thezipfile, filename): print "Loading %s..." % filename datafile = thezipfile.open(filename) timestamp = datafile.readline() headings = datafile.readline() datadict = {} for line in datafile: strippedline = str.strip(line) linelist = str.split(strippedline,"\t") #there is probably a better way of doing this bit, but this will work: if "usernames" in filename: userobj = user(linelist[0],linelist[1],str.strip(linelist[2])) datadict[linelist[0]] = userobj if "postdata" in filename: if len(linelist) > 7: postobj = post(linelist[0],linelist[1],linelist[2],linelist[3],int(linelist[4]),int(linelist[5]),linelist[6],linelist[7]) else: postobj = post(linelist[0],linelist[1],linelist[2],linelist[3],int(linelist[4]),int(linelist[5]),linelist[6]) datadict[linelist[0]] = postobj if "posttitles" in filename: if len(linelist)>1: datadict[linelist[0]] = linelist[1] else: datadict[linelist[0]] = "" if "tagdata" in filename: tagobj = tag(linelist[0],linelist[1],linelist[2],linelist[3]) datadict[linelist[0]] = tagobj if "commentdata" in filename: #this takes forever. commentobj = comment(linelist[0],linelist[1],linelist[2],linelist[3],linelist[4],int(linelist[4]),int(linelist[1])) datadict[linelist[0]] = commentobj return datadict def assignposttitles(postsdict, titlesdict): for post in postsdict: postsdict[post].title=titlesdict[postsdict[post].postid] def assigntags(postsdict, tagsdict): for tag in tagsdict: thistag = tagsdict[tag] if thistag.linkid in postsdict: #needed because askmepost 56056 no longer exists but still has tags--some kind of data loss back in the day? postsdict[thistag.linkid].tags.append(thistag) def importfromzipfile(zipfileloc): thezipfile = zipfile.ZipFile(zipfileloc, "r") usersdict = loaddata(thezipfile,"usernames.txt") #askme askmeposts = loaddata(thezipfile, "postdata_askme.txt") askmeposttitles = loaddata(thezipfile, "posttitles_askme.txt") assignposttitles(askmeposts,askmeposttitles) askmeposttitles = None askmetags = loaddata(thezipfile, "tagdata_askme.txt") assigntags(askmeposts,askmetags) #askmecomments = loaddata(thezipfile, "commentdata_askme.txt") #mefi mefiposts = loaddata(thezipfile, "postdata_mefi.txt") mefiposttitles = loaddata(thezipfile, "posttitles_mefi.txt") assignposttitles(mefiposts,mefiposttitles) mefiposttitles = None mefitags = loaddata(thezipfile, "tagdata_mefi.txt") assigntags(mefiposts,mefitags) #meta metaposts = loaddata(thezipfile, "postdata_meta.txt") metaposttitles = loaddata(thezipfile, "posttitles_meta.txt") assignposttitles(metaposts,metaposttitles) metaposttitles = None metatags = loaddata(thezipfile, "tagdata_meta.txt") assigntags(metaposts,metatags) #music musicposts = loaddata(thezipfile, "postdata_music.txt") musicposttitles = loaddata(thezipfile, "posttitles_music.txt") assignposttitles(musicposts,musicposttitles) musicposttitles = None musictags = loaddata(thezipfile, "tagdata_music.txt") assigntags(musicposts,musictags) data = infodumpdata(userdict=usersdict, mefipostdict=mefiposts, mefitagdict=mefitags, askmepostdict=askmeposts, askmetagdict=askmetags, metapostdict=metaposts, metatagdict=metatags, musicpostdict=musicposts, musictagdict=musictags) return data if __name__ == '__main__': print "This is a test of how to make a module work." print "See 27a.py"