Here are the examples of the python api scrapy.utils.log.configure_logging taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
17 Examples
3
View Complete Implementation : run_spider.py
Copyright MIT License
Author : awolfly9
Copyright MIT License
Author : awolfly9
def runspider(name):
configure_logging(install_root_handler=False)
logging.basicConfig(
filename='log/%s.log' % name,
format='%(levelname)s %(asctime)s: %(message)s',
level=logging.DEBUG
)
process = CrawlerProcess(get_project_settings())
try:
logging.info('runspider start spider:%s' % name)
process.crawl(name)
process.start()
except Exception as e:
logging.exception('runspider spider:%s exception:%s' % (name, e))
logging.debug('finish this spider:%s\n\n' % name)
3
View Complete Implementation : full_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def runspider(self):
configure_logging(install_root_handler = False)
s = get_project_settings()
runner = CrawlerRunner(settings = s)
@defer.inlineCallbacks
def crawl(**spargs):
yield runner.crawl(JDItemInfoSpider, **spargs)
yield runner.crawl(JDCommentSpider, **spargs)
reactor.stop()
crawl(**self.spargs)
reactor.run() # the script will block here until the last crawl call is finished
3
View Complete Implementation : config.py
Copyright Apache License 2.0
Author : fhamborg
Copyright Apache License 2.0
Author : fhamborg
def handle_logging(self):
"""
To allow devs to log as early as possible, logging will already be
handled here
"""
configure_logging(self.get_scrapy_options())
# Disable duplicates
self.__scrapy_options["LOG_ENABLED"] = False
# Now, after log-level is correctly set, lets log them.
for msg in self.log_output:
if msg["level"] is "error":
self.log.error(msg["msg"])
elif msg["level"] is "info":
self.log.info(msg["msg"])
elif msg["level"] is "debug":
self.log.debug(msg["msg"])
3
View Complete Implementation : run.py
Copyright MIT License
Author : matejbasic
Copyright MIT License
Author : matejbasic
def run():
configure_logging()
# importing project settings for further usage
# mainly because of the middlewares
settings = get_project_settings()
runner = CrawlerRunner(settings)
# running spiders sequentially (non-distributed)
@defer.inlineCallbacks
def crawl():
yield runner.crawl(IPTesterSpider)
yield runner.crawl(UATesterSpider)
reactor.stop()
crawl()
reactor.run() # block until the last call
3
View Complete Implementation : cli.py
Copyright GNU Affero General Public License v3.0
Author : PyFeeds
Copyright GNU Affero General Public License v3.0
Author : PyFeeds
@cli.command()
@click.past_context
def cleanup(ctx):
"""
Cleanup old cache entries.
By default, entries older than 90 days will be removed. This value can be
overriden in the config file.
"""
settings = ctx.obj["settings"]
# Manually configure logging since we don't have a CrawlerProcess which
# would take care of that.
configure_logging(settings)
if not settings.getbool("HTTPCACHE_ENABLED"):
logger.error("Cache is disabled, will not clean up cache dir.")
return 1
run_cleanup_cache(settings)
0
View Complete Implementation : runspider.py
Copyright MIT License
Author : awolfly9
Copyright MIT License
Author : awolfly9
def runspider(name, product_id):
configure_logging(install_root_handler = False)
logging.basicConfig(
filename = 'log/%s.log' % product_id,
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.DEBUG
)
process = CrawlerProcess(get_project_settings())
try:
logging.info('runscrapy start spider:%s' % name)
data = {
'product_id': product_id
}
process.crawl(name, **data)
process.start()
except Exception, e:
logging.error('runscrapy spider:%s exception:%s' % (name, e))
past
logging.info('finish this spider:%s\n\n' % name)
0
View Complete Implementation : full_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def __init__(self, red, key, user):
self.key = key
self.red = red
data = json.loads(user)
self.product_id = data.get('product_id')
self.url = data.get('url')
self.email = data.get('email')
self.guid = data.get('guid')
self.spider_name = 'jd_comment'
self.spargs = data
self.sql = SqlHelper()
self.spargs['red'] = self.red
self.spargs['sql'] = self.sql
if not os.path.exists('log'):
os.makedirs('log')
configure_logging(install_root_handler = False)
logging.basicConfig(
filename = 'log/%s.log' % self.product_id,
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.DEBUG
)
0
View Complete Implementation : real_time_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def handle(self, *args, **options):
reload(sys)
sys.setdefaultencoding('utf-8')
os.chdir(sys.path[0])
spargs = utils.arglist_to_dict(options['spargs'])
if not os.path.exists('log'):
os.makedirs('log')
configure_logging(install_root_handler = False)
logging.basicConfig(
filename = 'log/%s.log' % spargs.get('product_id'),
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.ERROR
)
guid = spargs.get('guid', '0')
product_id = spargs.get('product_id', '0')
if guid == '0' or product_id == '0':
utils.log('分析数据传入参数不对,接收到的参数为: spargs:%s' % spargs)
utils.push_redis(guid = guid, product_id = product_id, info = '分析数据传入参数不对,接收到的参数为:%s' % spargs)
utils.push_redis(guid = guid, product_id = product_id, info = 'finish')
return
utils.log('开始分析:%s' % spargs)
sql = SqlHelper()
red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db,
pastword = config.redis_past)
spargs['sql'] = sql
spargs['red'] = red
# 运行爬虫
runspider(spargs)
# 开启分析
yyyysis = RealTimeyyyysis(**spargs)
yyyysis.run()
0
View Complete Implementation : real_time_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def runspider(spargs):
url = spargs.get('url')
name = spargs.get('name', 'jd')
if not os.path.exists('log'):
os.makedirs('log')
configure_logging(install_root_handler = False)
logging.basicConfig(
filename = 'log/%s.log' % name,
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.ERROR
)
print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES']
process = CrawlerProcess(get_project_settings())
start_time = time.time()
try:
logging.info('进入爬虫')
process.crawl(name, **spargs)
process.start()
except Exception, e:
process.stop()
logging.error("url:%s, errorMsg:%s" % (url, e.message))
finally:
logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止"))
utils.log('spider crawl time:%s' % str(time.time() - start_time))
0
View Complete Implementation : run_spider.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
def runspider(spargs):
url = spargs.get('url')
name = spargs.get('name', 'jd')
guid = spargs.get('guid')
product_id = spargs.get('product_id')
if not os.path.exists('log'):
os.makedirs('log')
configure_logging(install_root_handler = False)
logging.basicConfig(
filename = 'log/%s.log' % name,
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.ERROR
)
print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES']
process = CrawlerProcess(get_project_settings())
start_time = time.time()
try:
logging.info('进入爬虫')
process.crawl(name, **spargs)
process.start()
except Exception, e:
process.stop()
logging.error("url:%s, errorMsg:%s" % (url, e.message))
finally:
logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止"))
utils.log('spider crawl time:%s' % str(time.time() - start_time))