scrapy.spiderloader.SpiderLoader.from_settings - python examples

Here are the examples of the python api scrapy.spiderloader.SpiderLoader.from_settings taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

2 Examples 7

0 View Complete Implementation : create_schedules.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
def run():
    settings = project.get_project_settings()
    spider_loader = spiderloader.SpiderLoader.from_settings(settings)
    spiders = spider_loader.list()
    clastes = [s for s in (spider_loader.load(name) for name in spiders) if s.enabled]

    session = HttpUtils.get_session(convert_snake_case=False)
    for _ in range(ATTEMPTS):
        try:
            scrapy_jobs = session.get(config.scheduler_jobs)
            break
        except ConnectionError:
            time.sleep(5)
    jobs_json = scrapy_jobs.json()
    jobs_dict = {job['name']: job for job in jobs_json['jobs'] if job['job_clast_string'] == JOB_CLast}

    delta = math.ceil(config.schedule_interval / len(clastes))

    schedules = [Schedule(scraper.name, config.schedule_interval, delta, index) for index, scraper in enumerate(clastes)]

    for schedule in schedules:
        json_payload = {
                'hour': schedule.hour, 
                'minute': schedule.minute, 
                'name': schedule.name,
                'job_clast_string': JOB_CLast,
                'pub_args': [schedule.name]
            }
        if schedule.name in jobs_dict:
            job = jobs_dict[schedule.name]
            response = session.put(f'{config.scheduler_jobs}/{job["job_id"]}', json=json_payload)
            if response.ok:
                print(f'Updated schedule for {schedule.name}')
            else:
                raise Exception(response.text)
        else:
            response = session.post(config.scheduler_jobs, json=json_payload)
            if response.ok:
                print(f'Added schedule for {schedule.name}')
            else:
                raise Exception(response.text)

0 View Complete Implementation : runner.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
def run():
    config.connect_to_client()

    print('Running event processor...')

    crawlerProcess = CrawlerProcess(get_project_settings())

    settings = project.get_project_settings()
    spider_loader = spiderloader.SpiderLoader.from_settings(settings)
    spiders = spider_loader.list()
    clastes = [s for s in (spider_loader.load(name) for name in spiders if config.spider_name == None or name == config.spider_name) if s.enabled]

    crawlerProcess = CrawlerProcess(get_project_settings())

    for spider_clast in clastes:
        crawlerProcess.crawl(spider_clast)

    crawlerProcess.start()
    crawlerProcess.join()

    print('Event processor completed')

    session = HttpUtils.get_session()
    events = session.get(config.get_events, params = {})

    if len(events.json()) > 0:
        print('Data retrieved successfully')
    else:
        print('No data retrieved')