scrapy.utils.project.get_project_settings - python examples

Here are the examples of the python api scrapy.utils.project.get_project_settings taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

27 Examples 7

3 View Complete Implementation : scrapyctl.py
Copyright GNU General Public License v3.0
Author : aplanas
    def _get_settings(self):
        """Return the current scrapy settings."""
        if 'SCRAPY_SETTINGS_MODULE' not in os.environ:
            _s = settings.SCRAPY_SETTINGS_MODULE
            os.environ['SCRAPY_SETTINGS_MODULE'] = _s
        return get_project_settings()

3 View Complete Implementation : run_spider.py
Copyright MIT License
Author : awolfly9
def runspider(name):
    configure_logging(install_root_handler=False)
    logging.basicConfig(
        filename='log/%s.log' % name,
        format='%(levelname)s %(asctime)s: %(message)s',
        level=logging.DEBUG
    )
    process = CrawlerProcess(get_project_settings())
    try:
        logging.info('runspider start spider:%s' % name)
        process.crawl(name)
        process.start()
    except Exception as e:
        logging.exception('runspider spider:%s exception:%s' % (name, e))

    logging.debug('finish this spider:%s\n\n' % name)

3 View Complete Implementation : full_analysis.py
Copyright GNU Lesser General Public License v3.0
Author : awolfly9
    def runspider(self):
        configure_logging(install_root_handler = False)
        s = get_project_settings()
        runner = CrawlerRunner(settings = s)

        @defer.inlineCallbacks
        def crawl(**spargs):
            yield runner.crawl(JDItemInfoSpider, **spargs)
            yield runner.crawl(JDCommentSpider, **spargs)
            reactor.stop()

        crawl(**self.spargs)
        reactor.run()  # the script will block here until the last crawl call is finished

3 View Complete Implementation : aiqiyi_spider.py
Copyright GNU General Public License v3.0
Author : czs0x55aa
	def __init__(self):
		scrapy.spiders.Spider.__init__(self)

		self.global_settings = get_project_settings()
		if self.global_settings['PLATFORM'] in ['win', 'mac']:
			self.driver = webdriver.PhantomJS(executable_path= self.global_settings['PHANTOMJS_PATH'])
		elif self.global_settings['PLATFORM'] in ['linux']:
			self.driver = webdriver.PhantomJS()
		self.driver.set_page_load_timeout(30)
		self.driver.implicitly_wait(10)

		self.type_id_list = self.global_settings['CRAWLER']['type_id_list']
		self.re_type_id = re.compile(self.global_settings['CRAWLER']['re_type_id'])
		self.url_template = self.global_settings['CRAWLER']['url_template']

3 View Complete Implementation : parser.py
Copyright MIT License
Author : Gerapy
def get_follow_requests_and_items(project_path, spider_name, args):
    """
    get follows
    :param project_path:
    :param spider_name:
    :param args:
    :return:
    """
    work_cwd = os.getcwd()
    try:
        os.chdir(project_path)
        settings = get_project_settings()
        check_deprecated_settings(settings)
        sp = SpiderParser(settings, spider_name, args)
        results = sp.run()
        return results
    finally:
        os.chdir(work_cwd)

3 View Complete Implementation : crawler.py
Copyright MIT License
Author : Karmenzind
def init_crawler_runner():
    crochet.setup()
    init_scrapy_env()
    settings = get_project_settings()
    global CRAWLER_RUNNER
    CRAWLER_RUNNER = CrawlerRunner(settings)
    logger.info('Initialized crawler runner: %s' % CRAWLER_RUNNER)

3 View Complete Implementation : proxy.py
Copyright MIT License
Author : matejbasic
    def import_settings(self):
        settings = get_project_settings()
        self.pastword = settings['AUTH_PastWORD']
        self.http_proxy = settings['HTTP_PROXY']
        self.control_port = settings['CONTROL_PORT']
        self.max_req_per_ip = settings['MAX_REQ_PER_IP']

        self.exit_nodes = settings['EXIT_NODES']
        if self.exit_nodes:
            with Controller.from_port(port=self.control_port) as controller:
                controller.authenticate(self.pastword)
                controller.set_conf('ExitNodes', self.exit_nodes)
                controller.close()

3 View Complete Implementation : run.py
Copyright MIT License
Author : matejbasic
def run():
    configure_logging()
    # importing project settings for further usage
    # mainly because of the middlewares
    settings = get_project_settings()
    runner = CrawlerRunner(settings)

    # running spiders sequentially (non-distributed)
    @defer.inlineCallbacks
    def crawl():
        yield runner.crawl(IPTesterSpider)
        yield runner.crawl(UATesterSpider)
        reactor.stop()

    crawl()
    reactor.run() # block until the last call

3 View Complete Implementation : cli.py
Copyright GNU Affero General Public License v3.0
Author : PyFeeds
@cli.command()
def list():
    """List all available spiders."""
    settings = get_project_settings()
    settings["LOG_ENABLED"] = False
    process = CrawlerProcess(settings)
    for s in sorted(process.spider_loader.list()):
        print(s)

3 View Complete Implementation : launcher.py
Copyright MIT License
Author : Tiago-Lira
    def _set_timeout(self, process):
        with runner.project_environment(process.project):
            loader = _get_spider_loader(get_project_settings())
            spider = loader.load(process.spider)
            timeout = getattr(spider, 'timeout', None)
            if timeout:
                timeout = int(spider.timeout)
                log.msg('Spider has timeout of {} min'.format(timeout))
                reactor.callLater(
                    timeout * 60, self.terminate_process, process)