Google Page Range lookup


/ Published in: Python
Save to your folder(s)

lookup the google page rank of a website from python. Uses the same interface that the Google Toolbar uses to get the value.


Copy this code and paste it in your HTML
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # (C) 2008 Fred Cirera
  4. # ported in Python from the Ruby code by Vsevolod S. Balashov
  5. # http://snippets.dzone.com/posts/show/3284
  6.  
  7. import urllib2
  8. import re
  9. import time
  10. import sys
  11.  
  12. from urllib import urlencode
  13. from pprint import pprint
  14.  
  15. HOST = "toolbarqueries.google.com"
  16.  
  17. def mix(a, b, c):
  18. M = lambda v: v % 0x100000000 # int32 modulo
  19. a, b, c = (M(a), M(b), M(c))
  20.  
  21. a = M(a-b-c) ^ (c >> 13)
  22. b = M(b-c-a) ^ (a < < 8)
  23. c = M(c-a-b) ^ (b >> 13)
  24.  
  25. a = M(a-b-c) ^ (c >> 12)
  26. b = M(b-c-a) ^ (a < < 16)
  27. c = M(c-a-b) ^ (b >> 5)
  28.  
  29. a = M(a-b-c) ^ (c >> 3)
  30. b = M(b-c-a) ^ (a < < 10)
  31. c = M(c-a-b) ^ (b >> 15)
  32.  
  33. return a, b, c
  34.  
  35. def checksum(iurl):
  36. C2I = lambda s: sum(c < < 8*i for i, c in enumerate(s[:4]))
  37. a, b, c = 0x9e3779b9, 0x9e3779b9, 0xe6359a60
  38. lg = len(iurl)
  39. k = 0
  40. while k <= lg-12:
  41. a = a + C2I(iurl[k:k+4])
  42. b = b + C2I(iurl[k+4:k+8])
  43. c = c + C2I(iurl[k+8:k+12])
  44. a, b, c = mix(a, b, c)
  45. k += 12
  46.  
  47. a = a + C2I(iurl[k:k+4])
  48. b = b + C2I(iurl[k+4:k+8])
  49. c = c + (C2I(iurl[k+8:])<&lt;8) + lg
  50. a, b, c = mix(a, b, c)
  51. return c
  52.  
  53.  
  54. def GoogleHash(value):
  55. I2C = lambda i: [i & 0xff, i >> 8 & 0xff, i >> 16 & 0xff, i >> 24 & 0xff]
  56. ch = checksum([ord(c) for c in value])
  57. ch = ((ch % 0x0d) & 7) | ((ch/7) < < 2)
  58. return "6%s" % checksum(sum((I2C(ch-9*i) for i in range(20)), []))
  59.  
  60.  
  61. def make_url(host, site_url):
  62. url = "info:" + site_url
  63. params = dict(client="navclient-auto", ch="%s" % GoogleHash(url),
  64. ie="UTF-8", oe="UTF-8", features="Rank", q=url)
  65. return "http://%s/search?%s" % (host, urlencode(params))
  66.  
  67.  
  68. # Where the fun begins
  69.  
  70. if __name__ == "__main__":
  71. if len(sys.argv) != 2:
  72. url = 'http://www.google.com/'
  73. else:
  74. url = sys.argv[1]
  75.  
  76. if not url.startswith('http://'):
  77. url = 'http://%s' % url
  78.  
  79. # print make_url(HOST, url)
  80. req = urllib2.Request(make_url(HOST, url))
  81. try:
  82. f = urllib2.urlopen(req)
  83. response = f.readline()
  84. except Exception, err:
  85. print err
  86. # print err.read()
  87. sys.exit(1)
  88.  
  89. try:
  90. rank = re.match(r'^Rank_\d+:\d+:(\d+)', response.strip()).group(1)
  91. except AttributeError:
  92. print "This page is not ranked"
  93. rank = -1
  94.  
  95. print "PageRank: %d\tURL: %s" % (int(rank), url)

URL: http://www.halotis.com/2009/08/02/google-page-range-python-script/

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.