scrapy.Spider - python examples

Here are the examples of the python api scrapy.Spider taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

4 Examples 7

3 View Complete Implementation : tests.py
Copyright MIT License
Author : AlexTan-b-z
    def setUp(self):
        self.persist = False
        self.key_prefix = 'scrapy_redis:tests:'
        self.queue_key = self.key_prefix + '%(spider)s:requests'
        self.dupefilter_key = self.key_prefix + '%(spider)s:dupefilter'
        self.idle_before_close = 0
        self.scheduler = Scheduler(self.server, self.persist, self.queue_key,
                                   SpiderQueue, self.dupefilter_key,
                                   self.idle_before_close)
        self.spider = Spider('myspider')

3 View Complete Implementation : test_pagestorage.py
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
    def setUp(self):
        self.spider = Spider('default')
        self.mocked_hsref = mock.Mock()
        self.patch = mock.patch('sh_scrapy.hsref.hsref', self.mocked_hsref)
        self.crawler_mock = mock.Mock()
        self.crawler_mock.settings = Settings(
            {'PAGE_STORAGE_ENABLED': True,
             'PAGE_STORAGE_MODE': 'VERSIONED_CACHE',
             'PAGE_STORAGE_LIMIT': 10,
             'PAGE_STORAGE_ON_ERROR_LIMIT': 5})
        self.mocked_hsref.project.collections.url = '/test/url'
        self.patch.start()
        self.instance = PageStorageMiddleware.from_crawler(self.crawler_mock)

3 View Complete Implementation : test_pagestorage.py
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
    def test_save_response_with_trim(self):
        self.instance._writer.maxitemsize = 26
        self.instance.hsref.job.key = '123/45/67'
        resp = TextResponse(
            'http://resp', request=Request('http://req'), encoding='cp1251',
            body='\r\n\r\n<html><body></body></html>\r\n \0\0\0\0\0')
        with mock.patch.object(Spider, 'logger') as log:
            spider = Spider('default')
            self.instance.save_response(resp, self.spider)
        log.warning.astert_called_with(
            "Page not saved, body too large: <http://resp>")
        self.instance.trim_html = True
        self.instance.save_response(resp, spider)
        self.instance._writer.write.astert_called_with(
            {u'body': u'<html><body></body></html>', u'_encoding': u'cp1251',
             u'_type': u'_pageitem',
             u'_key': u'9b4bed7e56103ddf63455ed39145f61f53b3c702',
             u'url': u'http://resp', '_jobid': '123/45/67'})

0 View Complete Implementation : tests.py
Copyright MIT License
Author : AlexTan-b-z
    def setUp(self):
        self.spider = Spider('myspider')
        self.key = 'scrapy_redis:tests:%s:queue' % self.spider.name
        self.q = self.queue_cls(self.server, Spider('myspider'), self.key)