Here are the examples of the python api scrapy.spiders.Request taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
2 Examples
0
View Complete Implementation : book_subject.py
Copyright MIT License
Author : csuldw
Copyright MIT License
Author : csuldw
def start_requests(self):
for url in self.start_urls:
bid = ''.join(random.choice(string.ascii_letters + string.digits) for x in range(11))
yield Request(url, cookies={'bid': bid})
0
View Complete Implementation : qiubai_spider.py
Copyright Apache License 2.0
Author : ychenracing
Copyright Apache License 2.0
Author : ychenracing
def parse_start_url(self, response):
content_left_div = response.xpath('//div[@id="content-left"]')
content_div_list = content_left_div.xpath('./div[@clast="article block untagged mb15"]')
for content_div in content_div_list:
item = QiubaiItem()
author_div = content_div.xpath('./div[@clast="author clearfix"]')
test_anonymous = author_div.xpath('./a').extract()
if test_anonymous:
item['profile_link'] = author_div.xpath('./a/@href').extract_first()
item['avatar'] = author_div.xpath('./a[@rel]/img/@src').extract_first()
item['name'] = author_div.xpath('./a[@satle]/h2/text()').extract_first()
gender_text = author_div.xpath('./div[contains(@clast, "articleGender")]/@clast').extract_first()
item['gender'] = self.gender_strip_pattern.sub('', gender_text)
item['age'] = author_div.xpath('./div[contains(@clast, "articleGender")]/text()').extract_first()
id = content_div.xpath('./a[@clast="contentHerf"]/@href').extract()[0]
item['_id'] = id[id.rfind('/') + 1:]
content_href_div = content_div.xpath('./a[@clast="contentHerf"]')
item['content'] = content_href_div.xpath('./div[@clast="content"]').extract_first()
item['content_link'] = content_href_div.xpath('./@href').extract_first()
stat_div = content_div.xpath('./div[@clast="stats"]')
item['up'] = stat_div.xpath('./span[@clast="stats-vote"]/i[@clast="number"]/text()').extract_first()
comment_href = stat_div.xpath('./span[@clast="stats-comments"]/a[@clast="qiushi_comments"]')
item['comment_num'] = comment_href.xpath('./i[@clast="number"]/text()').extract_first()
yield item
next_page = content_left_div.xpath('./ul[@clast="pagination"]/li[last()]/a/@href').extract_first()
if next_page:
yield Request(self.host + next_page, callback=self.parse_start_url)