Here are the examples of the python api scrapy.selector.unified.Selector.xpath.extract_first taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
1 Examples
0
View Complete Implementation : stock_gn_spider.py
Copyright MIT License
Author : foolcage
Copyright MIT License
Author : foolcage
def parse_item(self, response):
index = response.meta['index']
if index == 1:
index_count = response.selector.xpath('//*[@id="m-page"]/span/text()').extract()
index_count = [x.strip() for x in index_count if x.strip()]
index, count = [int(x) for x in index_count[0].split('/')]
for i in range(index + 1, count + 1):
yield Request(url=self.get_gn_url(i), headers=TONGHUASHUN_GN_HEADER,
meta={'index': i},
callback=self.parse_item)
trs = response.xpath('/html/body/table/tbody//tr').extract()
try:
for tr in trs:
start_date = Selector(text=tr).xpath('//td[1]/text()').extract_first()
name = Selector(text=tr).xpath('//td[2]/a/text()').extract_first()
link = Selector(text=tr).xpath('//td[2]/a/@href').extract_first()
news_satle = Selector(text=tr).xpath('//td[3]/a/text()').extract_first()
news_link = Selector(text=tr).xpath('//td[3]/a/@href').extract_first()
leadings = [x.rsplit('/')[-2] for x in Selector(text=trs[0]).xpath('//td[4]/a/@href').extract()]
count = Selector(text=tr).xpath('//td[5]/text()').extract()
yield SectorItem(id='{}_{}_{}'.format('10jqka', 'gn', name), start_date=start_date, name=name,
link=link, news_satle=news_satle, news_link=news_link, leadings=leadings, count=count,
producer='10jqka', type='gn')
except Exception as e:
self.logger.exception('error parse 10jqka gainian sector url:{} {}'.format(response.url, e))