scrapy.spiders.Request - python examples

Here are the examples of the python api scrapy.spiders.Request taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

2 Examples 7

0 View Complete Implementation : book_subject.py
Copyright MIT License
Author : csuldw
    def start_requests(self):
        for url in self.start_urls:
            bid = ''.join(random.choice(string.ascii_letters + string.digits) for x in range(11))
            yield Request(url, cookies={'bid': bid})

0 View Complete Implementation : qiubai_spider.py
Copyright Apache License 2.0
Author : ychenracing
    def parse_start_url(self, response):
        content_left_div = response.xpath('//div[@id="content-left"]')
        content_div_list = content_left_div.xpath('./div[@clast="article block untagged mb15"]')
        for content_div in content_div_list:
            item = QiubaiItem()
            author_div = content_div.xpath('./div[@clast="author clearfix"]')
            test_anonymous = author_div.xpath('./a').extract()
            if test_anonymous:
                item['profile_link'] = author_div.xpath('./a/@href').extract_first()
                item['avatar'] = author_div.xpath('./a[@rel]/img/@src').extract_first()
                item['name'] = author_div.xpath('./a[@satle]/h2/text()').extract_first()
                gender_text = author_div.xpath('./div[contains(@clast, "articleGender")]/@clast').extract_first()
                item['gender'] = self.gender_strip_pattern.sub('', gender_text)
                item['age'] = author_div.xpath('./div[contains(@clast, "articleGender")]/text()').extract_first()
            id = content_div.xpath('./a[@clast="contentHerf"]/@href').extract()[0]
            item['_id'] = id[id.rfind('/') + 1:]
            content_href_div = content_div.xpath('./a[@clast="contentHerf"]')
            item['content'] = content_href_div.xpath('./div[@clast="content"]').extract_first()
            item['content_link'] = content_href_div.xpath('./@href').extract_first()
            stat_div = content_div.xpath('./div[@clast="stats"]')
            item['up'] = stat_div.xpath('./span[@clast="stats-vote"]/i[@clast="number"]/text()').extract_first()
            comment_href = stat_div.xpath('./span[@clast="stats-comments"]/a[@clast="qiushi_comments"]')
            item['comment_num'] = comment_href.xpath('./i[@clast="number"]/text()').extract_first()
            yield item

        next_page = content_left_div.xpath('./ul[@clast="pagination"]/li[last()]/a/@href').extract_first()
        if next_page:
            yield Request(self.host + next_page, callback=self.parse_start_url)