scrapy.utils.log.configure_logging - python examples

Here are the examples of the python api scrapy.utils.log.configure_logging taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

17 Examples 7

3 View Complete Implementation : run_spider.py
Copyright MIT License
Author : awolfly9
def runspider(name):
    configure_logging(install_root_handler=False)
    logging.basicConfig(
        filename='log/%s.log' % name,
        format='%(levelname)s %(asctime)s: %(message)s',
        level=logging.DEBUG
    )
    process = CrawlerProcess(get_project_settings())
    try:
        logging.info('runspider start spider:%s' % name)
        process.crawl(name)
        process.start()
    except Exception as e:
        logging.exception('runspider spider:%s exception:%s' % (name, e))

    logging.debug('finish this spider:%s\n\n' % name)

3 View Complete Implementation : full_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
    def runspider(self):
        configure_logging(install_root_handler = False)
        s = get_project_settings()
        runner = CrawlerRunner(settings = s)

        @defer.inlineCallbacks
        def crawl(**spargs):
            yield runner.crawl(JDItemInfoSpider, **spargs)
            yield runner.crawl(JDCommentSpider, **spargs)
            reactor.stop()

        crawl(**self.spargs)
        reactor.run()  # the script will block here until the last crawl call is finished

3 View Complete Implementation : config.py
Copyright Apache License 2.0
Author : fhamborg
    def handle_logging(self):
        """
        To allow devs to log as early as possible, logging will already be
        handled here
        """

        configure_logging(self.get_scrapy_options())

        # Disable duplicates
        self.__scrapy_options["LOG_ENABLED"] = False

        # Now, after log-level is correctly set, lets log them.
        for msg in self.log_output:
            if msg["level"] is "error":
                self.log.error(msg["msg"])
            elif msg["level"] is "info":
                self.log.info(msg["msg"])
            elif msg["level"] is "debug":
                self.log.debug(msg["msg"])

3 View Complete Implementation : run.py
Copyright MIT License
Author : matejbasic
def run():
    configure_logging()
    # importing project settings for further usage
    # mainly because of the middlewares
    settings = get_project_settings()
    runner = CrawlerRunner(settings)

    # running spiders sequentially (non-distributed)
    @defer.inlineCallbacks
    def crawl():
        yield runner.crawl(IPTesterSpider)
        yield runner.crawl(UATesterSpider)
        reactor.stop()

    crawl()
    reactor.run() # block until the last call

3 View Complete Implementation : cli.py
Copyright GNU Affero General Public License v3.0
Author : PyFeeds
@cli.command()
@click.past_context
def cleanup(ctx):
    """
    Cleanup old cache entries.

    By default, entries older than 90 days will be removed. This value can be
    overriden in the config file.
    """
    settings = ctx.obj["settings"]
    # Manually configure logging since we don't have a CrawlerProcess which
    # would take care of that.
    configure_logging(settings)

    if not settings.getbool("HTTPCACHE_ENABLED"):
        logger.error("Cache is disabled, will not clean up cache dir.")
        return 1

    run_cleanup_cache(settings)

0 View Complete Implementation : runspider.py
Copyright MIT License
Author : awolfly9
def runspider(name, product_id):
    configure_logging(install_root_handler = False)
    logging.basicConfig(
            filename = 'log/%s.log' % product_id,
            format = '%(levelname)s %(asctime)s: %(message)s',
            level = logging.DEBUG
    )
    process = CrawlerProcess(get_project_settings())
    try:
        logging.info('runscrapy start spider:%s' % name)
        data = {
            'product_id': product_id
        }
        process.crawl(name, **data)
        process.start()
    except Exception, e:
        logging.error('runscrapy spider:%s exception:%s' % (name, e))
        past

    logging.info('finish this spider:%s\n\n' % name)

0 View Complete Implementation : full_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
    def __init__(self, red, key, user):
        self.key = key
        self.red = red

        data = json.loads(user)
        self.product_id = data.get('product_id')
        self.url = data.get('url')
        self.email = data.get('email')
        self.guid = data.get('guid')
        self.spider_name = 'jd_comment'
        self.spargs = data

        self.sql = SqlHelper()
        self.spargs['red'] = self.red
        self.spargs['sql'] = self.sql

        if not os.path.exists('log'):
            os.makedirs('log')

        configure_logging(install_root_handler = False)
        logging.basicConfig(
                filename = 'log/%s.log' % self.product_id,
                format = '%(levelname)s %(asctime)s: %(message)s',
                level = logging.DEBUG
        )

0 View Complete Implementation : real_time_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
    def handle(self, *args, **options):
        reload(sys)
        sys.setdefaultencoding('utf-8')
        os.chdir(sys.path[0])

        spargs = utils.arglist_to_dict(options['spargs'])

        if not os.path.exists('log'):
            os.makedirs('log')

        configure_logging(install_root_handler = False)
        logging.basicConfig(
                filename = 'log/%s.log' % spargs.get('product_id'),
                format = '%(levelname)s %(asctime)s: %(message)s',
                level = logging.ERROR
        )

        guid = spargs.get('guid', '0')
        product_id = spargs.get('product_id', '0')

        if guid == '0' or product_id == '0':
            utils.log('分析数据传入参数不对,接收到的参数为: spargs:%s' % spargs)
            utils.push_redis(guid = guid, product_id = product_id, info = '分析数据传入参数不对,接收到的参数为:%s' % spargs)
            utils.push_redis(guid = guid, product_id = product_id, info = 'finish')
            return

        utils.log('开始分析:%s' % spargs)
        sql = SqlHelper()
        red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db,
                                pastword = config.redis_past)
        spargs['sql'] = sql
        spargs['red'] = red

        # 运行爬虫
        runspider(spargs)

        # 开启分析
        yyyysis = RealTimeyyyysis(**spargs)
        yyyysis.run()

0 View Complete Implementation : real_time_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def runspider(spargs):
    url = spargs.get('url')
    name = spargs.get('name', 'jd')

    if not os.path.exists('log'):
        os.makedirs('log')

    configure_logging(install_root_handler = False)
    logging.basicConfig(
            filename = 'log/%s.log' % name,
            format = '%(levelname)s %(asctime)s: %(message)s',
            level = logging.ERROR
    )
    print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES']
    process = CrawlerProcess(get_project_settings())
    start_time = time.time()
    try:
        logging.info('进入爬虫')
        process.crawl(name, **spargs)
        process.start()
    except Exception, e:
        process.stop()
        logging.error("url:%s, errorMsg:%s" % (url, e.message))
    finally:
        logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止"))

    utils.log('spider crawl time:%s' % str(time.time() - start_time))

0 View Complete Implementation : run_spider.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def runspider(spargs):
    url = spargs.get('url')
    name = spargs.get('name', 'jd')
    guid = spargs.get('guid')
    product_id = spargs.get('product_id')

    if not os.path.exists('log'):
        os.makedirs('log')

    configure_logging(install_root_handler = False)
    logging.basicConfig(
            filename = 'log/%s.log' % name,
            format = '%(levelname)s %(asctime)s: %(message)s',
            level = logging.ERROR
    )
    print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES']
    process = CrawlerProcess(get_project_settings())
    start_time = time.time()
    try:
        logging.info('进入爬虫')
        process.crawl(name, **spargs)
        process.start()
    except Exception, e:
        process.stop()
        logging.error("url:%s, errorMsg:%s" % (url, e.message))
    finally:
        logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止"))

    utils.log('spider crawl time:%s' % str(time.time() - start_time))