My approach to stats extension


/ Published in: Python
Save to your folder(s)



Copy this code and paste it in your HTML
  1. # Gather global stats and sending it via email/outputting to stdout when scrapy is done.
  2.  
  3. """
  4. StatsDump extension sends an email and prints stats to stdout when a engine finishes scraping.
  5.  
  6. Use STATSMAILER_RCPTS setting to give the recipient mail address
  7. """
  8.  
  9. from scrapy.xlib.pydispatch import dispatcher
  10. from datetime import datetime
  11. from scrapy.stats import stats
  12. from scrapy import signals
  13. from scrapy.mail import MailSender
  14. from scrapy.conf import settings
  15.  
  16. from pprint import pprint
  17.  
  18. class StatsDump(object):
  19. def __init__(self):
  20. self.recipients = settings.getlist("STATSMAILER_RCPTS")
  21.  
  22. dispatcher.connect(self.engine_stopped, signals.engine_stopped)
  23. dispatcher.connect(self.engine_started, signals.engine_started)
  24.  
  25. def engine_started(self):
  26. self.start_time = datetime.now()
  27.  
  28. def engine_stopped(self):
  29. now_time = datetime.now()
  30. stats.set_value('start_time', str(self.start_time))
  31. stats.set_value('finish_time', str(now_time))
  32. stats.set_value('total_time', str(now_time - self.start_time))
  33.  
  34. if self.recipients:
  35. mail = MailSender()
  36. body = "Global stats\n\n"
  37. body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items())
  38. mail.send(self.recipients, "Scrapy stats", body)
  39.  
  40. pprint(stats.get_stats())
  41.  
  42. # Snippet imported from snippets.scrapy.org (which no longer works)
  43. # author: dchaplinsky
  44. # date : Oct 07, 2011
  45.  

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.