Return to Snippet

Revision: 59320
at September 1, 2012 07:15 by scrapy


Initial Code
# Gather global stats and sending it via email/outputting to stdout when scrapy is done.

"""
StatsDump extension sends an email and prints stats to stdout when a engine finishes scraping.

Use STATSMAILER_RCPTS setting to give the recipient mail address
"""

from scrapy.xlib.pydispatch import dispatcher
from datetime import datetime
from scrapy.stats import stats
from scrapy import signals
from scrapy.mail import MailSender
from scrapy.conf import settings

from pprint import pprint

class StatsDump(object):
    def __init__(self):
        self.recipients = settings.getlist("STATSMAILER_RCPTS")

        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

    def engine_started(self):
        self.start_time = datetime.now()

    def engine_stopped(self):
        now_time = datetime.now()
        stats.set_value('start_time', str(self.start_time))
        stats.set_value('finish_time', str(now_time))
        stats.set_value('total_time', str(now_time - self.start_time))

        if self.recipients:
            mail = MailSender()
            body = "Global stats\n\n"
            body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items())
            mail.send(self.recipients, "Scrapy stats", body)
            
        pprint(stats.get_stats())

# Snippet imported from snippets.scrapy.org (which no longer works)
# author: dchaplinsky
# date  : Oct 07, 2011

Initial URL

                                

Initial Description

                                

Initial Title
My approach to stats extension

Initial Tags

                                

Initial Language
Python