Revision: 17231
Updated Code
at August 29, 2009 20:45 by mustam
Updated Code
# vim: ts=2 sw=2 expandtab
import sys
from os import mkdir
from os.path import exists
import time
from urllib2 import urlopen
from xml.dom import Node
from xml.dom.minidom import parseString
from time import strptime, strftime, localtime, time, sleep
from calendar import timegm
debug=True
def getRemain():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('remaining-hits').item(0)
remain=int(tag.firstChild.data)
return remain
def getResetTime():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('reset-time').item(0)
resetTime=tag.firstChild.data
return resetTime
def getIds(dom):
ids=[]
for idNode in dom.getElementsByTagName('id'):
ids.append(int(idNode.childNodes[0].data))
return ids
def getPage(user, page):
url='http://twitter.com/statuses/user_timeline/'
rawData=urlopen(url+user+'.xml?page='+str(page)).read()
return rawData
def getEpoch(str):
dd = strptime(str, "%a %b %d %H:%M:%S +0000 %Y")
return timegm(dd)
def addEpochAttr(dom):
ds=dom.getElementsByTagName('created_at')
for d in ds:
dtext=d.firstChild.data
depoch=getEpoch(dtext)
d.setAttribute('epoch', str(depoch))
def debug(str):
if debug: print 'DEBUG: '+str
### MAIN ####################################################
if(len(sys.argv)<2):
print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]'
else:
user=sys.argv[1]
print 'twitter account name: '+user
dir='twitter-log-'+user
sleepTime=3
# CONFIRM
s=raw_input('Now logging start, take several times. r u OK? [y/N]: ')
if s.lower()=='y' or s.lower()=='yes':
# START LOGGING
remain=getRemain()
if(remain>0):
if(not exists(dir)): mkdir(dir) # make dir
debug('make dir: '+dir)
idSet=set() # init
if(len(sys.argv)>2): page=int(sys.argv[2])
else: page=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
dom=parseString(rawData)
addEpochAttr(dom)
debug('get log: page='+str(page))
newIdSet=set(getIds(dom))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
while(len(newIdSet)>0 and remain>0):
rawData=dom.toxml("utf-8")
path=strftime("%Y%m%d-%H%M%S", localtime(time()))+'-page'+pageStr+'.xml'
f=open(dir+'/'+path, 'w')
f.write(rawData)
debug('write file: '+path)
debug('wait '+str(sleepTime)+'sec.')
sleep(sleepTime); # wait
page+=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
dom=parseString(rawData)
addEpochAttr(dom)
debug('get log: page='+str(page))
newIdSet=set(getIds(dom))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
if(remain==0):
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'OK!'
else:
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'Aborted.'
Revision: 17230
Updated Code
at August 29, 2009 20:14 by mustam
Updated Code
# vim: ts=2 sw=2 expandtab
import sys
from os import mkdir
from os.path import exists
import time
from urllib2 import urlopen
from xml.dom import Node
from xml.dom.minidom import parseString
debug=True
def getRemain():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('remaining-hits').item(0)
remain=int(tag.firstChild.data)
return remain
def getResetTime():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('reset-time').item(0)
resetTime=tag.firstChild.data
return resetTime
def getIds(rawdata):
ids=[]
dom=parseString(rawdata)
for idNode in dom.getElementsByTagName('id'):
ids.append(int(idNode.childNodes[0].data))
return ids
def getPage(user, page):
url='http://twitter.com/statuses/user_timeline/'
rawData=urlopen(url+user+'.xml?page='+str(page)).read()
return rawData
def addEpochAttr(dom):
ds=dom.getElementsByTagName('created_at')
for d in ds:
dtext=d.firstChild.data
depoch=getEpoch(dtext)
d.setAttribute('epoch', str(depoch))
def debug(str):
if debug: print 'DEBUG: '+str
### MAIN ####################################################
if(len(sys.argv)<2):
print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]'
else:
user=sys.argv[1]
print 'twitter account name: '+user
dir='twitter-log-'+user
sleepTime=3
# CONFIRM
s=raw_input('Now logging start, take several times. r u OK? [y/N]: ')
if s.lower()=='y' or s.lower()=='yes':
# START LOGGING
remain=getRemain()
if(remain>0):
if(not exists(dir)): mkdir(dir) # make dir
debug('make dir: '+dir)
idSet=set() # init
if(len(sys.argv)>2): page=int(sys.argv[2])
else: page=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
addEpochAttr(rawData)
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
while(len(newIdSet)>0 and remain>0):
path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml'
f=open(dir+'/'+path, 'w')
f.write(rawData)
debug('write file: '+path)
debug('wait '+str(sleepTime)+'sec.')
time.sleep(sleepTime); # wait
page+=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
if(remain==0):
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'OK!'
else:
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'Aborted.'
Revision: 17229
Updated Code
at August 28, 2009 10:14 by mustam
Updated Code
import sys
from os import mkdir
from os.path import exists
import time
from urllib2 import urlopen
from xml.dom import Node
from xml.dom.minidom import parseString
debug=True
def getRemain():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('remaining-hits').item(0)
remain=int(tag.firstChild.data)
return remain
def getResetTime():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('reset-time').item(0)
resetTime=tag.firstChild.data
return resetTime
def getIds(rawdata):
ids=[]
dom=parseString(rawdata)
for idNode in dom.getElementsByTagName('id'):
ids.append(int(idNode.childNodes[0].data))
return ids
def getPage(user, page):
url='http://twitter.com/statuses/user_timeline/'
rawData=urlopen(url+user+'.xml?page='+str(page)).read()
return rawData
def addEpochAttr(dom):
ds=dom.getElementsByTagName('created_at')
for d in ds:
dtext=d.firstChild.data
depoch=getEpoch(dtext)
d.setAttribute('epoch', str(depoch))
def debug(str):
if debug: print 'DEBUG: '+str
### MAIN ####################################################
if(len(sys.argv)<2):
print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]'
else:
user=sys.argv[1]
print 'twitter account name: '+user
dir='twitter-log-'+user
sleepTime=3
# CONFIRM
s=raw_input('Now logging start, take several times. r u OK? [y/N]: ')
if s.lower()=='y' or s.lower()=='yes':
# START LOGGING
remain=getRemain()
if(remain>0):
if(not exists(dir)): mkdir(dir) # make dir
debug('make dir: '+dir)
idSet=set() # init
if(len(sys.argv)>2): page=int(sys.argv[2])
else: page=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
addEpochAttr(rawData)
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
while(len(newIdSet)>0 and remain>0):
path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml'
f=open(dir+'/'+path, 'w')
f.write(rawData)
debug('write file: '+path)
debug('wait '+str(sleepTime)+'sec.')
time.sleep(sleepTime); # wait
page+=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
if(remain==0):
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'OK!'
else:
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'Aborted.'
Revision: 17228
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 27, 2009 22:16 by mustam
Initial Code
import sys
from os import mkdir
from os.path import exists
import time
from urllib2 import urlopen
from xml.dom import Node
from xml.dom.minidom import parseString
debug=True
def getRemain():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('remaining-hits').item(0)
remain=int(tag.firstChild.data)
return remain
def getResetTime():
url='http://twitter.com/account/rate_limit_status.xml'
raw=urlopen(url).read()
dom=parseString(raw)
tag=dom.getElementsByTagName('reset-time').item(0)
resetTime=tag.firstChild.data
return resetTime
def getIds(rawdata):
ids=[]
dom=parseString(rawdata)
for idNode in dom.getElementsByTagName('id'):
ids.append(int(idNode.childNodes[0].data))
return ids
def getPage(user, page):
url='http://twitter.com/statuses/user_timeline/'
rawData=urlopen(url+user+'.xml?page='+str(page)).read()
return rawData
def debug(str):
if debug: print 'DEBUG: '+str
### MAIN ####################################################
if(len(sys.argv)<2):
print 'Usage: '+sys.argv[0]+' <twitter account name> [page offset]'
else:
user=sys.argv[1]
print 'twitter account name: '+user
dir='twitter-log-'+user
sleepTime=3
# CONFIRM
s=raw_input('Now logging start, take several times. r u OK? [y/N]: ')
if s.lower()=='y' or s.lower()=='yes':
# START LOGGING
remain=getRemain()
if(remain>0):
if(not exists(dir)): mkdir(dir) # make dir
debug('make dir: '+dir)
idSet=set() # init
if(len(sys.argv)>2): page=int(sys.argv[2])
else: page=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
while(len(newIdSet)>0 and remain>0):
path=time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))+'-page'+pageStr+'.xml'
f=open(dir+'/'+path, 'w')
f.write(rawData)
debug('write file: '+path)
debug('wait '+str(sleepTime)+'sec.')
time.sleep(sleepTime); # wait
page+=1
pageStr='%04d'%page
rawData=getPage(user, page) # get log
debug('get log: page='+str(page))
newIdSet=set(getIds(rawData))-idSet
idSet=idSet|newIdSet
debug('new tweet: '+str(len(newIdSet))+', accum. tweet: '+str(len(idSet)))
remain=getRemain()
debug('remain hits: '+str(remain))
if(remain==0):
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'OK!'
else:
print 'Error: no more request.'
print 'Next reset time(UTC): '+getResetTime()
else: print 'Aborted.'
Initial URL
Initial Description
e.g. <pre> $ twlog.py user twitter account name: user Now logging start, take several times. r u OK? [y/N]: y DEBUG: make dir: twitter-log-user DEBUG: get log: page=1 DEBUG: new tweet: 21, accum. tweet: 21 DEBUG: remain hits: 149 DEBUG: write file: 20090828-105421-page0001.xml DEBUG: wait 3sec. ... DEBUG: get log: page=7 DEBUG: new tweet: 0, accum. tweet: 115 DEBUG: remain hits: 143 OK! </pre> * cf. [Stylesheet for twitter-log file](http://snipplr.com/view/18927/stylesheet-for-twitterlog-file/) * cf. [Merge twitter-logged files into one xml-file](http://snipplr.com/view/18928/merge-twitterlogged-files-into-one-xmlfile/)
Initial Title
Backup my twitter-log
Initial Tags
Initial Language
Python