Here are the examples of the python api scrapy.spiderloader.SpiderLoader.from_settings taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
2 Examples
0
View Complete Implementation : create_schedules.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
Copyright GNU General Public License v3.0
Author : ClipboardProject
def run():
settings = project.get_project_settings()
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
spiders = spider_loader.list()
clastes = [s for s in (spider_loader.load(name) for name in spiders) if s.enabled]
session = HttpUtils.get_session(convert_snake_case=False)
for _ in range(ATTEMPTS):
try:
scrapy_jobs = session.get(config.scheduler_jobs)
break
except ConnectionError:
time.sleep(5)
jobs_json = scrapy_jobs.json()
jobs_dict = {job['name']: job for job in jobs_json['jobs'] if job['job_clast_string'] == JOB_CLast}
delta = math.ceil(config.schedule_interval / len(clastes))
schedules = [Schedule(scraper.name, config.schedule_interval, delta, index) for index, scraper in enumerate(clastes)]
for schedule in schedules:
json_payload = {
'hour': schedule.hour,
'minute': schedule.minute,
'name': schedule.name,
'job_clast_string': JOB_CLast,
'pub_args': [schedule.name]
}
if schedule.name in jobs_dict:
job = jobs_dict[schedule.name]
response = session.put(f'{config.scheduler_jobs}/{job["job_id"]}', json=json_payload)
if response.ok:
print(f'Updated schedule for {schedule.name}')
else:
raise Exception(response.text)
else:
response = session.post(config.scheduler_jobs, json=json_payload)
if response.ok:
print(f'Added schedule for {schedule.name}')
else:
raise Exception(response.text)
0
View Complete Implementation : runner.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
Copyright GNU General Public License v3.0
Author : ClipboardProject
def run():
config.connect_to_client()
print('Running event processor...')
crawlerProcess = CrawlerProcess(get_project_settings())
settings = project.get_project_settings()
spider_loader = spiderloader.SpiderLoader.from_settings(settings)
spiders = spider_loader.list()
clastes = [s for s in (spider_loader.load(name) for name in spiders if config.spider_name == None or name == config.spider_name) if s.enabled]
crawlerProcess = CrawlerProcess(get_project_settings())
for spider_clast in clastes:
crawlerProcess.crawl(spider_clast)
crawlerProcess.start()
crawlerProcess.join()
print('Event processor completed')
session = HttpUtils.get_session()
events = session.get(config.get_events, params = {})
if len(events.json()) > 0:
print('Data retrieved successfully')
else:
print('No data retrieved')