Here are the examples of the python api scrapy.Spider taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
4 Examples
3
View Complete Implementation : tests.py
Copyright MIT License
Author : AlexTan-b-z
Copyright MIT License
Author : AlexTan-b-z
def setUp(self):
self.persist = False
self.key_prefix = 'scrapy_redis:tests:'
self.queue_key = self.key_prefix + '%(spider)s:requests'
self.dupefilter_key = self.key_prefix + '%(spider)s:dupefilter'
self.idle_before_close = 0
self.scheduler = Scheduler(self.server, self.persist, self.queue_key,
SpiderQueue, self.dupefilter_key,
self.idle_before_close)
self.spider = Spider('myspider')
3
View Complete Implementation : test_pagestorage.py
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
def setUp(self):
self.spider = Spider('default')
self.mocked_hsref = mock.Mock()
self.patch = mock.patch('sh_scrapy.hsref.hsref', self.mocked_hsref)
self.crawler_mock = mock.Mock()
self.crawler_mock.settings = Settings(
{'PAGE_STORAGE_ENABLED': True,
'PAGE_STORAGE_MODE': 'VERSIONED_CACHE',
'PAGE_STORAGE_LIMIT': 10,
'PAGE_STORAGE_ON_ERROR_LIMIT': 5})
self.mocked_hsref.project.collections.url = '/test/url'
self.patch.start()
self.instance = PageStorageMiddleware.from_crawler(self.crawler_mock)
3
View Complete Implementation : test_pagestorage.py
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
Copyright BSD 3-Clause "New" or "Revised" License
Author : scrapy-plugins
def test_save_response_with_trim(self):
self.instance._writer.maxitemsize = 26
self.instance.hsref.job.key = '123/45/67'
resp = TextResponse(
'http://resp', request=Request('http://req'), encoding='cp1251',
body='\r\n\r\n<html><body></body></html>\r\n \0\0\0\0\0')
with mock.patch.object(Spider, 'logger') as log:
spider = Spider('default')
self.instance.save_response(resp, self.spider)
log.warning.astert_called_with(
"Page not saved, body too large: <http://resp>")
self.instance.trim_html = True
self.instance.save_response(resp, spider)
self.instance._writer.write.astert_called_with(
{u'body': u'<html><body></body></html>', u'_encoding': u'cp1251',
u'_type': u'_pageitem',
u'_key': u'9b4bed7e56103ddf63455ed39145f61f53b3c702',
u'url': u'http://resp', '_jobid': '123/45/67'})
0
View Complete Implementation : tests.py
Copyright MIT License
Author : AlexTan-b-z
Copyright MIT License
Author : AlexTan-b-z
def setUp(self):
self.spider = Spider('myspider')
self.key = 'scrapy_redis:tests:%s:queue' % self.spider.name
self.q = self.queue_cls(self.server, Spider('myspider'), self.key)