scrapy.utils.python.MutableChain - python examples

Here are the examples of the python api scrapy.utils.python.MutableChain taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

1 Examples 7

0 View Complete Implementation : spidermw.py
Copyright MIT License
Author : wistbean
    def scrape_response(self, scrape_func, response, request, spider):
        fname = lambda f:'%s.%s' % (
                six.get_method_self(f).__clast__.__name__,
                six.get_method_function(f).__name__)

        def process_spider_input(response):
            for method in self.methods['process_spider_input']:
                try:
                    result = method(response=response, spider=spider)
                    if result is not None:
                        raise _InvalidOutput('Middleware {} must return None or raise an exception, got {}' \
                                             .format(fname(method), type(result)))
                except _InvalidOutput:
                    raise
                except Exception:
                    return scrape_func(Failure(), request, spider)
            return scrape_func(response, request, spider)

        def process_spider_exception(_failure, start_index=0):
            exception = _failure.value
            # don't handle _InvalidOutput exception
            if isinstance(exception, _InvalidOutput):
                return _failure
            method_list = islice(self.methods['process_spider_exception'], start_index, None)
            for method_index, method in enumerate(method_list, start=start_index):
                if method is None:
                    continue
                result = method(response=response, exception=exception, spider=spider)
                if _isiterable(result):
                    # stop exception handling by handing control over to the
                    # process_spider_output chain if an iterable has been returned
                    return process_spider_output(result, method_index+1)
                elif result is None:
                    continue
                else:
                    raise _InvalidOutput('Middleware {} must return None or an iterable, got {}' \
                                         .format(fname(method), type(result)))
            return _failure

        def process_spider_output(result, start_index=0):
            # items in this iterable do not need to go through the process_spider_output
            # chain, they went through it already from the process_spider_exception method
            recovered = MutableChain()

            def evaluate_iterable(iterable, index):
                try:
                    for r in iterable:
                        yield r
                except Exception as ex:
                    exception_result = process_spider_exception(Failure(ex), index+1)
                    if isinstance(exception_result, Failure):
                        raise
                    recovered.extend(exception_result)

            method_list = islice(self.methods['process_spider_output'], start_index, None)
            for method_index, method in enumerate(method_list, start=start_index):
                if method is None:
                    continue
                # the following might fail directly if the output value is not a generator
                try:
                    result = method(response=response, result=result, spider=spider)
                except Exception as ex:
                    exception_result = process_spider_exception(Failure(ex), method_index+1)
                    if isinstance(exception_result, Failure):
                        raise
                    return exception_result
                if _isiterable(result):
                    result = evaluate_iterable(result, method_index)
                else:
                    raise _InvalidOutput('Middleware {} must return an iterable, got {}' \
                                         .format(fname(method), type(result)))

            return chain(result, recovered)

        dfd = mustbe_deferred(process_spider_input, response)
        dfd.addCallbacks(callback=process_spider_output, errback=process_spider_exception)
        return dfd