Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.
import cgi import md5 import time import datetime import math from xml.dom import minidom import urllib import sys from google.appengine.api import users from google.appengine.ext import webapp from google.appengine.ext.webapp.util import run_wsgi_app from google.appengine.ext import db from google.appengine.api import urlfetch from django.utils import simplejson as json # Database Classes class TumblrCache(db.Model): cache_id = db.StringProperty(required=True) content = db.StringProperty(multiline=True) date = db.DateTimeProperty(auto_now_add=True) class Flush(webapp.RequestHandler): def get(self): self.response.headers['Content-Type'] = 'text/plain' w = self.response.out query = TumblrCache.all() for q in query: q.delete() w.write('FLUSHED!') class MainPage(webapp.RequestHandler): def get(self): self.response.headers['Content-Type'] = 'text/plain' class TagCloud(webapp.RequestHandler): def get(self): self.response.headers['Content-Type'] = 'text/plain' w = self.response.out # Thu thap cac bien can thiet tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read' tumblr_api = 'http://im.doquangtu.net/api/read' cache_id = md5.new(tumblr_api).hexdigest() # Neu da co cache, thi tra lai cache try: cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0] except: cache = TumblrCache(cache_id=cache_id) content = cache.content if content == None: content = '' d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second d = time.time() - d if d > 24*60*60 or len(content) < 5: # Da qua 24h cache trong he thong form_fields = { "filter": "text" } loop = True start = 0 taglist = {} try: while loop == True: form_fields['start'] = int(start) * 20 form_data = urllib.urlencode(form_fields) result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start'])) result = result.content #.encode('utf-8') xdom = minidom.parseString(result) try: posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post") for post in posts: post_id = post.getAttribute("id") tags = post.getElementsByTagName("tag") try: for tag in tags: # Duyet qua tags t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE) t = t.strip() #w.write(t) if t not in taglist: taglist[t] = 1 else: taglist[t] = taglist[t] + 1 except: # Loi & khong tim thay tags nao i = 0 except: # Loi & khong tim thay posts nao i = 0 # kiem tra xem co thoat vong lap hay chua try: posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total") posts = int(math.ceil(float(posts) / float(20)) - 1) if start + 1 > posts: loop = False else: start = start + 1 loop = True except: loop = False except: loop = False # Viet tags txt = json.JSONEncoder().encode( taglist ) cache.content = txt cache.date = datetime.datetime.today() cache.put() w.write(taglist) else: # Chua qua 24h cache trong he thong # Tra ve cache data content = cache.content w.write( "var myJSONObject = " + content + ";" ) application = webapp.WSGIApplication( [('/', MainPage), ('/tagcloud', TagCloud), ('/flush', Flush)], debug=True) def main(): run_wsgi_app(application) if __name__ == "__main__": main()