Revision: 59320
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 1, 2012 07:15 by scrapy
Initial Code
# Gather global stats and sending it via email/outputting to stdout when scrapy is done. """ StatsDump extension sends an email and prints stats to stdout when a engine finishes scraping. Use STATSMAILER_RCPTS setting to give the recipient mail address """ from scrapy.xlib.pydispatch import dispatcher from datetime import datetime from scrapy.stats import stats from scrapy import signals from scrapy.mail import MailSender from scrapy.conf import settings from pprint import pprint class StatsDump(object): def __init__(self): self.recipients = settings.getlist("STATSMAILER_RCPTS") dispatcher.connect(self.engine_stopped, signals.engine_stopped) dispatcher.connect(self.engine_started, signals.engine_started) def engine_started(self): self.start_time = datetime.now() def engine_stopped(self): now_time = datetime.now() stats.set_value('start_time', str(self.start_time)) stats.set_value('finish_time', str(now_time)) stats.set_value('total_time', str(now_time - self.start_time)) if self.recipients: mail = MailSender() body = "Global stats\n\n" body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items()) mail.send(self.recipients, "Scrapy stats", body) pprint(stats.get_stats()) # Snippet imported from snippets.scrapy.org (which no longer works) # author: dchaplinsky # date : Oct 07, 2011
Initial URL
Initial Description
Initial Title
My approach to stats extension
Initial Tags
Initial Language
Python