Revision: 17231
Updated Code
at August 29, 2009 20:45 by mustam
Updated Code
# vim: ts=2 sw=2 expandtab import sys from os import mkdir from os.path import exists import time from urllib2 import urlopen from xml.dom import Node from xml.dom.minidom import parseString from time import strptime, strftime, localtime, time, sleep from calendar import timegm debug=True def getRemain(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('remaining-hits').item(0) remain=int(tag.firstChild.data) return remain def getResetTime(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('reset-time').item(0) resetTime=tag.firstChild.data return resetTime def getIds(dom): ids=[] for idNode in dom.getElementsByTagName('id'): ids.append(int(idNode.childNodes[0].data)) return ids def getPage(user, page): url='http://twitter.com/statuses/user_timeline/' rawData=urlopen(url+user+'.xml?page='+str(page)).read() return rawData def getEpoch(str): dd = strptime(str, "%a %b %d %H:%M:%S +0000 %Y") return timegm(dd) def addEpochAttr(dom): ds=dom.getElementsByTagName('created_at') for d in ds: dtext=d.firstChild.data depoch=getEpoch(dtext) d.setAttribute('epoch', str(depoch)) def debug(str): if debug: print 'DEBUG: '+str ### MAIN #################################################### if(len(sys.argv)<2): print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]' else: user=sys.argv[1] print 'twitter account name: '+user dir='twitter-log-'+user sleepTime=3 # CONFIRM s=raw_input('Now logging start, take several times. r u OK? [y/N]: ') if s.lower()=='y' or s.lower()=='yes': # START LOGGING remain=getRemain() if(remain>0): if(not exists(dir)): mkdir(dir) # make dir debug('make dir: '+dir) idSet=set() # init if(len(sys.argv)>2): page=int(sys.argv[2]) else: page=1 pageStr='%04d'%page rawData=getPage(user, page) # get log dom=parseString(rawData) addEpochAttr(dom) debug('get log: page='+str(page)) newIdSet=set(getIds(dom))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) while(len(newIdSet)>0 and remain>0): rawData=dom.toxml("utf-8") path=strftime("%Y%m%d-%H%M%S", localtime(time()))+'-page'+pageStr+'.xml' f=open(dir+'/'+path, 'w') f.write(rawData) debug('write file: '+path) debug('wait '+str(sleepTime)+'sec.') sleep(sleepTime); # wait page+=1 pageStr='%04d'%page rawData=getPage(user, page) # get log dom=parseString(rawData) addEpochAttr(dom) debug('get log: page='+str(page)) newIdSet=set(getIds(dom))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) if(remain==0): print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'OK!' else: print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'Aborted.'
Revision: 17230
Updated Code
at August 29, 2009 20:14 by mustam
Updated Code
# vim: ts=2 sw=2 expandtab import sys from os import mkdir from os.path import exists import time from urllib2 import urlopen from xml.dom import Node from xml.dom.minidom import parseString debug=True def getRemain(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('remaining-hits').item(0) remain=int(tag.firstChild.data) return remain def getResetTime(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('reset-time').item(0) resetTime=tag.firstChild.data return resetTime def getIds(rawdata): ids=[] dom=parseString(rawdata) for idNode in dom.getElementsByTagName('id'): ids.append(int(idNode.childNodes[0].data)) return ids def getPage(user, page): url='http://twitter.com/statuses/user_timeline/' rawData=urlopen(url+user+'.xml?page='+str(page)).read() return rawData def addEpochAttr(dom): ds=dom.getElementsByTagName('created_at') for d in ds: dtext=d.firstChild.data depoch=getEpoch(dtext) d.setAttribute('epoch', str(depoch)) def debug(str): if debug: print 'DEBUG: '+str ### MAIN #################################################### if(len(sys.argv)<2): print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]' else: user=sys.argv[1] print 'twitter account name: '+user dir='twitter-log-'+user sleepTime=3 # CONFIRM s=raw_input('Now logging start, take several times. r u OK? [y/N]: ') if s.lower()=='y' or s.lower()=='yes': # START LOGGING remain=getRemain() if(remain>0): if(not exists(dir)): mkdir(dir) # make dir debug('make dir: '+dir) idSet=set() # init if(len(sys.argv)>2): page=int(sys.argv[2]) else: page=1 pageStr='%04d'%page rawData=getPage(user, page) # get log addEpochAttr(rawData) debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) while(len(newIdSet)>0 and remain>0): path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml' f=open(dir+'/'+path, 'w') f.write(rawData) debug('write file: '+path) debug('wait '+str(sleepTime)+'sec.') time.sleep(sleepTime); # wait page+=1 pageStr='%04d'%page rawData=getPage(user, page) # get log debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) if(remain==0): print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'OK!' else: print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'Aborted.'
Revision: 17229
Updated Code
at August 28, 2009 10:14 by mustam
Updated Code
import sys from os import mkdir from os.path import exists import time from urllib2 import urlopen from xml.dom import Node from xml.dom.minidom import parseString debug=True def getRemain(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('remaining-hits').item(0) remain=int(tag.firstChild.data) return remain def getResetTime(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('reset-time').item(0) resetTime=tag.firstChild.data return resetTime def getIds(rawdata): ids=[] dom=parseString(rawdata) for idNode in dom.getElementsByTagName('id'): ids.append(int(idNode.childNodes[0].data)) return ids def getPage(user, page): url='http://twitter.com/statuses/user_timeline/' rawData=urlopen(url+user+'.xml?page='+str(page)).read() return rawData def addEpochAttr(dom): ds=dom.getElementsByTagName('created_at') for d in ds: dtext=d.firstChild.data depoch=getEpoch(dtext) d.setAttribute('epoch', str(depoch)) def debug(str): if debug: print 'DEBUG: '+str ### MAIN #################################################### if(len(sys.argv)<2): print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]' else: user=sys.argv[1] print 'twitter account name: '+user dir='twitter-log-'+user sleepTime=3 # CONFIRM s=raw_input('Now logging start, take several times. r u OK? [y/N]: ') if s.lower()=='y' or s.lower()=='yes': # START LOGGING remain=getRemain() if(remain>0): if(not exists(dir)): mkdir(dir) # make dir debug('make dir: '+dir) idSet=set() # init if(len(sys.argv)>2): page=int(sys.argv[2]) else: page=1 pageStr='%04d'%page rawData=getPage(user, page) # get log addEpochAttr(rawData) debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) while(len(newIdSet)>0 and remain>0): path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml' f=open(dir+'/'+path, 'w') f.write(rawData) debug('write file: '+path) debug('wait '+str(sleepTime)+'sec.') time.sleep(sleepTime); # wait page+=1 pageStr='%04d'%page rawData=getPage(user, page) # get log debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) if(remain==0): print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'OK!' else: print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'Aborted.'
Revision: 17228
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 27, 2009 22:16 by mustam
Initial Code
import sys from os import mkdir from os.path import exists import time from urllib2 import urlopen from xml.dom import Node from xml.dom.minidom import parseString debug=True def getRemain(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('remaining-hits').item(0) remain=int(tag.firstChild.data) return remain def getResetTime(): url='http://twitter.com/account/rate_limit_status.xml' raw=urlopen(url).read() dom=parseString(raw) tag=dom.getElementsByTagName('reset-time').item(0) resetTime=tag.firstChild.data return resetTime def getIds(rawdata): ids=[] dom=parseString(rawdata) for idNode in dom.getElementsByTagName('id'): ids.append(int(idNode.childNodes[0].data)) return ids def getPage(user, page): url='http://twitter.com/statuses/user_timeline/' rawData=urlopen(url+user+'.xml?page='+str(page)).read() return rawData def debug(str): if debug: print 'DEBUG: '+str ### MAIN #################################################### if(len(sys.argv)<2): print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]' else: user=sys.argv[1] print 'twitter account name: '+user dir='twitter-log-'+user sleepTime=3 # CONFIRM s=raw_input('Now logging start, take several times. r u OK? [y/N]: ') if s.lower()=='y' or s.lower()=='yes': # START LOGGING remain=getRemain() if(remain>0): if(not exists(dir)): mkdir(dir) # make dir debug('make dir: '+dir) idSet=set() # init if(len(sys.argv)>2): page=int(sys.argv[2]) else: page=1 pageStr='%04d'%page rawData=getPage(user, page) # get log debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) while(len(newIdSet)>0 and remain>0): path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml' f=open(dir+'/'+path, 'w') f.write(rawData) debug('write file: '+path) debug('wait '+str(sleepTime)+'sec.') time.sleep(sleepTime); # wait page+=1 pageStr='%04d'%page rawData=getPage(user, page) # get log debug('get log: page='+str(page)) newIdSet=set(getIds(rawData))-idSet idSet=idSet|newIdSet debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet))) remain=getRemain() debug('remain hits: '+str(remain)) if(remain==0): print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'OK!' else: print 'Error: no more request.' print 'Next reset time(UTC): '+getResetTime() else: print 'Aborted.'
Initial URL
Initial Description
e.g. <pre> $ twlog.py user twitter account name: user Now logging start, take several times. r u OK? [y/N]: y DEBUG: make dir: twitter-log-user DEBUG: get log: page=1 DEBUG: new tweet: 21, accum. tweet: 21 DEBUG: remain hits: 149 DEBUG: write file: 20090828-105421-page0001.xml DEBUG: wait 3sec. ... DEBUG: get log: page=7 DEBUG: new tweet: 0, accum. tweet: 115 DEBUG: remain hits: 143 OK! </pre> * cf. [Stylesheet for twitter-log file](http://snipplr.com/view/18927/stylesheet-for-twitterlog-file/) * cf. [Merge twitter-logged files into one xml-file](http://snipplr.com/view/18928/merge-twitterlogged-files-into-one-xmlfile/)
Initial Title
Backup my twitter-log
Initial Tags
Initial Language
Python