Here are the examples of the python api scrapy.exceptions.DropItem taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
45 Examples
3
View Complete Implementation : pipelines.py
Copyright MIT License
Author : 1dot75cm
Copyright MIT License
Author : 1dot75cm
def process_item(self, item, spider):
if item['nid'] in self.itemlist:
raise DropItem('Duplication data!')
#self.collection.update({'nid': item['nid']}, dict(item), upsert=True)
self.collection.insert(dict(item))
log.msg('Goods added to MongoDB database!',
level=log.DEBUG, spider=spider)
return item
3
View Complete Implementation : pipelines.py
Copyright MIT License
Author : banglakit
Copyright MIT License
Author : banglakit
def process_item(self, item, spider):
if item['body']:
item['body'] = item['body'].strip()
return item
else:
raise DropItem("Empty Body")
3
View Complete Implementation : pipelines.py
Copyright MIT License
Author : brantou
Copyright MIT License
Author : brantou
def process_item(self, item, spider):
if item['pid'] in self.ids_seen:
raise DropItem("Duplicate item found: %s" % item)
else:
self.ids_seen.add(item['pid'])
return item
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : chinesehuazhou
Copyright GNU General Public License v3.0
Author : chinesehuazhou
def process_item(self, item, spider):
valid = True
for data in item:
if not data:
valid = False
raise DropItem("Missing {0}!".format(data))
if valid:
self.table.insert(dict(item))
return item
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : chinesehuazhou
Copyright GNU General Public License v3.0
Author : chinesehuazhou
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
# print(image_paths)
if not image_paths:
raise DropItem("Item contains no images")
# item['image_paths'] = image_paths
return item
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
Copyright GNU General Public License v3.0
Author : ClipboardProject
def process_item(self, item, spider):
item['organization'] = spider.organization
if 'event_time' in item:
item['event_time']['date_format'] = spider.date_format
loader = EventLoader(**item)
# see if there is a custom filter for the item
if not spider.item_filter(item):
raise DropItem('Custom item filter did not allow this event')
if 'event_time' in loader.item:
time = loader.item['event_time']
if self.time_utils.time_range_is_between(time['start_timestamp'], time['end_timestamp'], spider.start_timestamp, spider.end_timestamp):
return loader.item
else:
raise DropItem('Event is not in the configured timeframe')
else:
return loader.item
3
View Complete Implementation : pipelines.py
Copyright Mozilla Public License 2.0
Author : code4romania
Copyright Mozilla Public License 2.0
Author : code4romania
def process_item(self, item, spider):
for field in self.REQUIRED_FIELDS:
if not item.get(field):
message = "Missing field {}".format(field)
logger.warn(message)
raise DropItem(message)
return item
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : czs0x55aa
Copyright GNU General Public License v3.0
Author : czs0x55aa
def process_item(self, item, spider):
if item['level'] == 1 and self.__insert_vedio(item):
return item
elif item['level'] == 2 and self.__update_vedio(item):
return item
raise DropItem('fail to store data')
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : digitalengineering
Copyright GNU General Public License v3.0
Author : digitalengineering
def process_item(self, item, spider):
if item['id'] in self.ids_seen:
raise DropItem("Duplicate item found: %s" % item)
else:
self.ids_seen.add(item['id'])
return item
3
View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : evilcos
Copyright GNU General Public License v3.0
Author : evilcos
def item_completed(self, results, item, info):
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
item['image_paths'] = image_paths
return item