Posted By

doqtu84 on 02/19/09


Tagged

python tumblr GAE tagcloud


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

atr000


Tumblr taglist generator


 / Published in: Python
 

URL: http://www.doquangtu.net

Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.

  1. import cgi
  2.  
  3. import md5
  4.  
  5. import time
  6.  
  7. import datetime
  8.  
  9. import math
  10.  
  11. from xml.dom import minidom
  12.  
  13. import urllib
  14.  
  15. import sys
  16.  
  17.  
  18.  
  19. from google.appengine.api import users
  20.  
  21. from google.appengine.ext import webapp
  22.  
  23. from google.appengine.ext.webapp.util import run_wsgi_app
  24.  
  25. from google.appengine.ext import db
  26.  
  27. from google.appengine.api import urlfetch
  28.  
  29. from django.utils import simplejson as json
  30.  
  31.  
  32.  
  33.  
  34.  
  35. # Database Classes
  36.  
  37. class TumblrCache(db.Model):
  38.  
  39. cache_id = db.StringProperty(required=True)
  40.  
  41. content = db.StringProperty(multiline=True)
  42.  
  43. date = db.DateTimeProperty(auto_now_add=True)
  44.  
  45.  
  46.  
  47. class Flush(webapp.RequestHandler):
  48.  
  49. def get(self):
  50.  
  51. self.response.headers['Content-Type'] = 'text/plain'
  52.  
  53. w = self.response.out
  54.  
  55.  
  56.  
  57. query = TumblrCache.all()
  58.  
  59. for q in query:
  60.  
  61. q.delete()
  62.  
  63.  
  64.  
  65. w.write('FLUSHED!')
  66.  
  67.  
  68.  
  69.  
  70.  
  71. class MainPage(webapp.RequestHandler):
  72.  
  73. def get(self):
  74.  
  75. self.response.headers['Content-Type'] = 'text/plain'
  76.  
  77.  
  78.  
  79. class TagCloud(webapp.RequestHandler):
  80.  
  81. def get(self):
  82.  
  83. self.response.headers['Content-Type'] = 'text/plain'
  84.  
  85. w = self.response.out
  86.  
  87. # Thu thap cac bien can thiet
  88.  
  89. tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read'
  90.  
  91. tumblr_api = 'http://im.doquangtu.net/api/read'
  92.  
  93. cache_id = md5.new(tumblr_api).hexdigest()
  94.  
  95. # Neu da co cache, thi tra lai cache
  96.  
  97. try:
  98.  
  99. cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0]
  100.  
  101. except:
  102.  
  103. cache = TumblrCache(cache_id=cache_id)
  104.  
  105.  
  106.  
  107. content = cache.content
  108.  
  109. if content == None:
  110.  
  111. content = ''
  112.  
  113.  
  114.  
  115. d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second
  116.  
  117. d = time.time() - d
  118.  
  119.  
  120.  
  121. if d > 24*60*60 or len(content) < 5:
  122.  
  123. # Da qua 24h cache trong he thong
  124.  
  125. form_fields = {
  126.  
  127. "filter": "text"
  128.  
  129. }
  130.  
  131.  
  132.  
  133. loop = True
  134.  
  135. start = 0
  136.  
  137. taglist = {}
  138.  
  139.  
  140.  
  141.  
  142.  
  143.  
  144.  
  145. try:
  146.  
  147. while loop == True:
  148.  
  149. form_fields['start'] = int(start) * 20
  150.  
  151. form_data = urllib.urlencode(form_fields)
  152.  
  153. result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start']))
  154.  
  155. result = result.content #.encode('utf-8')
  156.  
  157.  
  158.  
  159. xdom = minidom.parseString(result)
  160.  
  161. try:
  162.  
  163. posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post")
  164.  
  165. for post in posts:
  166.  
  167. post_id = post.getAttribute("id")
  168.  
  169. tags = post.getElementsByTagName("tag")
  170.  
  171. try:
  172.  
  173. for tag in tags:
  174.  
  175. # Duyet qua tags
  176.  
  177. t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE)
  178.  
  179. t = t.strip()
  180.  
  181. #w.write(t)
  182.  
  183. if t not in taglist:
  184.  
  185. taglist[t] = 1
  186.  
  187. else:
  188.  
  189. taglist[t] = taglist[t] + 1
  190.  
  191. except:
  192.  
  193. # Loi & khong tim thay tags nao
  194.  
  195. i = 0
  196.  
  197. except:
  198.  
  199. # Loi & khong tim thay posts nao
  200.  
  201. i = 0
  202.  
  203.  
  204.  
  205. # kiem tra xem co thoat vong lap hay chua
  206.  
  207. try:
  208.  
  209. posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total")
  210.  
  211. posts = int(math.ceil(float(posts) / float(20)) - 1)
  212.  
  213. if start + 1 > posts:
  214.  
  215. loop = False
  216.  
  217. else:
  218.  
  219. start = start + 1
  220.  
  221. loop = True
  222.  
  223. except:
  224.  
  225. loop = False
  226.  
  227. except:
  228.  
  229. loop = False
  230.  
  231.  
  232.  
  233. # Viet tags
  234.  
  235. txt = json.JSONEncoder().encode( taglist )
  236.  
  237.  
  238.  
  239. cache.content = txt
  240.  
  241. cache.date = datetime.datetime.today()
  242.  
  243. cache.put()
  244.  
  245.  
  246.  
  247. w.write(taglist)
  248.  
  249. else:
  250.  
  251. # Chua qua 24h cache trong he thong
  252.  
  253. # Tra ve cache data
  254.  
  255. content = cache.content
  256.  
  257. w.write( "var myJSONObject = " + content + ";" )
  258.  
  259.  
  260.  
  261. application = webapp.WSGIApplication(
  262.  
  263. [('/', MainPage),
  264.  
  265. ('/tagcloud', TagCloud),
  266.  
  267. ('/flush', Flush)],
  268.  
  269. debug=True)
  270.  
  271.  
  272.  
  273.  
  274.  
  275.  
  276.  
  277. def main():
  278.  
  279. run_wsgi_app(application)
  280.  
  281.  
  282.  
  283. if __name__ == "__main__":
  284.  
  285. main()

Report this snippet  

Comments

RSS Icon Subscribe to comments
Posted By: karaleonard on March 20, 2009

do i just copy and paste the embed code into my tumblr customization html? is there a certain place you need to paste the code? i just pasted it in and nothing happened, i guess i put it in the wrong place?

You need to login to post a comment.