Posted By

doqtu84 on 02/19/09

Tagged

python

Versions (?)

Last Edited at 02/19/09 04:57am

Statistics

Viewed 1021 times

Favorited by 1 user(s)

Related snippets

Tumblr taglist generator

/ Published in: Python

Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.

Expand | Embed | Plain Text

Copy this code and paste it in your HTML

import cgi
 
import md5
 
import time
 
import datetime
 
import math
 
from xml.dom import minidom
 
import urllib
 
import sys
 
 
 
from google.appengine.api import users
 
from google.appengine.ext import webapp
 
from google.appengine.ext.webapp.util import run_wsgi_app
 
from google.appengine.ext import db
 
from google.appengine.api import urlfetch
 
from django.utils import simplejson as json
 
 
 
 
 
# Database Classes
 
class TumblrCache(db.Model):
 
    cache_id = db.StringProperty(required=True)
 
    content = db.StringProperty(multiline=True)
 
    date = db.DateTimeProperty(auto_now_add=True)
 
 
 
class Flush(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
      w = self.response.out
 
 
 
      query = TumblrCache.all()
 
      for q in query:
 
          q.delete()
 
 
 
      w.write('FLUSHED!')
 
 
 
 
 
class MainPage(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
 
 
class TagCloud(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
      w = self.response.out
 
      # Thu thap cac bien can thiet
 
      tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read'
 
      tumblr_api = 'http://im.doquangtu.net/api/read'
 
      cache_id = md5.new(tumblr_api).hexdigest()
 
      # Neu da co cache, thi tra lai cache
 
      try:
 
          cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0]
 
      except:
 
          cache = TumblrCache(cache_id=cache_id)
 
 
 
      content = cache.content
 
      if content == None:
 
          content = ''
 
 
 
      d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second
 
      d = time.time() - d
 
 
 
      if d > 24*60*60 or len(content) < 5:
 
          # Da qua 24h cache trong he thong
 
            form_fields = {
 
              "filter": "text"
 
            }
 
 
 
            loop = True
 
            start = 0
 
            taglist = {}
 
 
 
 
 
 
 
            try:
 
                while loop == True:
 
                    form_fields['start'] = int(start) * 20
 
                    form_data = urllib.urlencode(form_fields)
 
                    result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start']))
 
                    result = result.content #.encode('utf-8')
 
 
 
                    xdom = minidom.parseString(result)
 
                    try:
 
                        posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post")
 
                        for post in posts:
 
                            post_id = post.getAttribute("id")
 
                            tags = post.getElementsByTagName("tag")
 
                            try:
 
                                for tag in tags:
 
                                    # Duyet qua tags
 
                                    t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE)
 
                                    t = t.strip()
 
                                    #w.write(t)
 
                                    if t not in taglist:
 
                                        taglist[t] = 1
 
                                    else:
 
                                        taglist[t] = taglist[t] + 1
 
                            except:
 
                                # Loi & khong tim thay tags nao
 
                                i = 0
 
                    except:
 
                        # Loi & khong tim thay posts nao
 
                        i = 0
 
 
 
                    # kiem tra xem co thoat vong lap hay chua
 
                    try:
 
                        posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total")
 
                        posts = int(math.ceil(float(posts) / float(20)) - 1)
 
                        if start + 1 > posts:
 
                            loop = False
 
                        else:
 
                            start = start + 1
 
                            loop = True
 
                    except:
 
                        loop = False
 
            except:
 
                loop = False
 
 
 
            # Viet tags
 
            txt = json.JSONEncoder().encode( taglist )
 
 
 
            cache.content = txt
 
            cache.date = datetime.datetime.today()
 
            cache.put()            
 
 
 
            w.write(taglist)
 
      else:
 
          # Chua qua 24h cache trong he thong
 
          # Tra ve cache data 
 
          content = cache.content
 
          w.write( "var myJSONObject = " + content + ";" )
 
 
 
application = webapp.WSGIApplication(
 
                                     [('/', MainPage),
 
                                      ('/tagcloud', TagCloud),
 
                                      ('/flush', Flush)],
 
                                     debug=True)
 
 
 
 
 
 
 
def main():
 
  run_wsgi_app(application)
 
 
 
if __name__ == "__main__":
 
  main()

URL: http://www.doquangtu.net

Report this snippet Tweet

Comments

Subscribe to comments

Comment:

You need to login to post a comment.