Posted By

scrapy on 09/01/12


Tagged

scrapy


Versions (?)

Django and Scrapy without using DjangoItem


 / Published in: Python
 

  1. # # django-admin.py startproject djangoapp
  2. # # Create your django model: django startapp website
  3. # # Edit scrapy settings.py with method to point to Django environment
  4. # # Create a pipeline that accesses Django using the model.save() method
  5.  
  6. ***settings.py***
  7.  
  8. import os
  9. ITEM_PIPELINES = ['myapp.pipelines.DjangoPipeline']
  10.  
  11. # http://stackoverflow.com/questions/4271975/access-django-models-inside-of-scrapy
  12. def setup_django_env(path):
  13. import imp, os
  14. from django.core.management import setup_environ
  15.  
  16. f, filename, desc = imp.find_module('settings', [path])
  17. project = imp.load_module('settings', f, filename, desc)
  18.  
  19. setup_environ(project)
  20.  
  21.  
  22. current_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
  23. setup_django_env(os.path.join(current_dir, '../djangoapp/'))
  24.  
  25. ***pipelines.py***
  26. from djangoapp.websites.models import Website
  27. from django.db.utils import IntegrityError
  28.  
  29. class DjangoPipeline(object):
  30.  
  31. def process_item(self, item, spider):
  32. website = Website(link=item['link'][0],
  33. created=datetime.datetime.now(),
  34. )
  35. try:
  36. website.save()
  37. except IntegrityError:
  38. raise DropItem("Contains duplicate domain: %s" % item['link'][0])
  39. return item
  40.  
  41. ***djangoapp model***
  42.  
  43. from django.db import models
  44.  
  45. class Website(models.Model):
  46. link = models.CharField(max_length=200, unique=True)
  47. created = models.DateTimeField('date created')
  48.  
  49. def __unicode__(self):
  50. return u"%s" % self.link
  51.  
  52. # Snippet imported from snippets.scrapy.org (which no longer works)
  53. # author: redtricycle
  54. # date : Nov 27, 2011
  55.  

Report this snippet  

You need to login to post a comment.