Posted By

whitetiger on 11/09/06


Tagged

regex url php image path google python flickr blog yahoo wiki web uri syntax LaTeX local translate


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

anayhk


Python - randomYahoo


 / Published in: Python
 

  1. import os
  2. import random
  3. import re
  4. import urllib
  5. import urllib2
  6.  
  7. class yahooImages(object):
  8.  
  9. RE_IMAGEURL = re.compile('&imgurl=(.+?)&', re.DOTALL | re.IGNORECASE)
  10.  
  11. def __init__(self):
  12.  
  13. self.imagesURLs = {}
  14.  
  15. def getRandomImages(self, imageName=None):
  16. '''
  17. imageName = Nome dell'immagine da cercare, se non impostato viene generato un nome Random
  18.  
  19. Scarica dal sito YahooImages delle immagini in maniera random...
  20. '''
  21.  
  22. htmlPage = ''
  23. request = ''
  24.  
  25. if imageName == None: imageName = self._randomWords()
  26.  
  27. requestURL = 'http://it.search.yahoo.com/search/images?p=%s&b=%d' % (imageName, (random.randint(0, 50)*10))
  28. requestHeaders = {'User-Agent':'yahooImages/1.0'}
  29.  
  30. try:
  31. request = urllib2.Request(requestURL, None, requestHeaders)
  32. htmlPage = urllib2.urlopen(request).read(500000)
  33. except:
  34. pass
  35.  
  36. results = yahooImages.RE_IMAGEURL.findall(htmlPage)
  37.  
  38. if len(results) > 0:
  39. for image in results:
  40. imageURL = urllib.unquote_plus(image)
  41. if not imageURL.startswith('http://'): imageURL = 'http://'+imageURL
  42. self.imagesURLs[imageURL] = 0
  43.  
  44. def _randomWords(self):
  45. '''
  46. Viene generata una parola in maniera Random...
  47. '''
  48.  
  49. words = ''
  50. charset = 'abcdefghijklmnopqrtuvwxyz'*2 + '0123456789'
  51.  
  52. for i in range(random.randint(2, 7)): words += random.choice(charset)
  53.  
  54. return words
  55.  
  56. def downloadImages(self):
  57. '''
  58. Scarica nella cartella yahooIMGs le foto che vengono trovate in rete...
  59. '''
  60.  
  61. numberIMGs = len(self.imagesURLs)
  62. posIMGs = 1
  63.  
  64. for imageName in self.imagesURLs:
  65. print '[' + str(posIMGs) + '/' + str(numberIMGs) + '] - ' + imageName
  66. urllib.urlretrieve(imageName, 'yahooIMGs' + os.sep + os.path.split(imageName)[1])
  67. posIMGs += 1
  68.  
  69. if __name__ == '__main__':
  70.  
  71. test = yahooImages()
  72.  
  73. test.getRandomImages()
  74. test.downloadImages()
  75.  
  76. print 'Finito...'

Report this snippet  

You need to login to post a comment.