/ Published in: Python
e.g.
$ twlogmerge.py twitter-log-user-dir
read file: twitter-log-user-dir/page001.xml
get status: id: 123456789
append status: accum.:1234
...
append status: accum.:3456
write file: twitter-log-user-dir-marged-20090828-130146.xml
Expand |
Embed | Plain Text
import os import os.path import sys from codecs import open from time import strftime, localtime, time from xml.dom.minidom import parse, parseString initXmlStr='''\ <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="viewstyle.xsl"?> <statuses></statuses>''' if len(sys.argv)<2: print 'Usage: '+sys.argv[0]+' <log-dir>' elif not os.path.exists(sys.argv[1]): print 'No such file or directory' else: dir=sys.argv[1] print 'dir: '+dir # init dom=parseString(initXmlStr) statuses=dom.getElementsByTagName('statuses').item(0) ids=set(); # read log-files for path in os.listdir(dir): path=dir+'/'+path print 'read file: '+path localDom=parse(path) localStatuses=localDom.getElementsByTagName('status') # append statuses for status in localStatuses: idNode=status.getElementsByTagName('id').item(0) id=int(idNode.firstChild.data) print 'get status: id: '+str(id) if id not in ids: statuses.appendChild(status) ids.add(id) print 'append status: accum.:'+str(len(ids)) else: print 'not append status' # write marged-file path=dir+'-marged-'+strftime("%Y%m%d-%H%M%S", localtime(time()))+'.xml' f=open(path,'w','utf-8') dom.writexml(f) print 'write file: '+path
You need to login to post a comment.
