Here are the examples of the python api scrapy.http.response.text.TextResponse taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
1 Examples
0
View Complete Implementation : scrapy_middleware.py
Copyright MIT License
Author : TeamHG-Memex
Copyright MIT License
Author : TeamHG-Memex
def process_response(self, request, response, spider):
if not isinstance(response, scrapy.http.response.text.TextResponse) or self.skip(request):
return response
url, text = response.url, extract_text(response)
t0 = time.time()
if self.dupe_predictor:
self.dupe_predictor.update_model(url, text)
t = time.time() - t0
if t > 0.01:
logger.debug('Updated model in %.4f s for %s', t, url)
else:
self.initial_queue.append((url, text))
if len(self.initial_queue) >= self.initial_queue_limit:
logger.debug(
'Gathered enough insatial pages, building DupePredictor')
self.dupe_predictor = DupePredictor(
texts_sample=[text for _, text in self.initial_queue])
# Update model with all the pages we have missed
for url, text in self.initial_queue:
self.dupe_predictor.update_model(url, text)
self.initial_queue = None
logger.debug('Built DupePredictor in %.4f s', time.time() - t0)
return response