Use a random user agent for each request


/ Published in: Python
Save to your folder(s)



Copy this code and paste it in your HTML
  1. # You can use this middleware to have a random user agent every request the spider makes.
  2. # You can define a user USER_AGEN_LIST in your settings and the spider will chose a random user agent from that list every time.
  3. #
  4. # You will have to disable the default user agent middleware and add this to your settings file.
  5. #
  6. # DOWNLOADER_MIDDLEWARES = {
  7. # 'scraper.random_user_agent.RandomUserAgentMiddleware': 400,
  8. # 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,
  9. # }
  10.  
  11. from scraper.settings import USER_AGENT_LIST
  12. import random
  13. from scrapy import log
  14.  
  15. class RandomUserAgentMiddleware(object):
  16.  
  17. def process_request(self, request, spider):
  18. ua = random.choice(USER_AGENT_LIST)
  19. if ua:
  20. request.headers.setdefault('User-Agent', ua)
  21. #log.msg('>>>> UA %s'%request.headers)
  22.  
  23. # Snippet imported from snippets.scrapy.org (which no longer works)
  24. # author: dushyant
  25. # date : Sep 16, 2011
  26.  

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.