scrapy.exceptions.DropItem - python examples

Here are the examples of the python api scrapy.exceptions.DropItem taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

45 Examples 7

3 View Complete Implementation : pipelines.py
Copyright MIT License
Author : 1dot75cm
    def process_item(self, item, spider):
        if item['nid'] in self.itemlist:
            raise DropItem('Duplication data!')
        #self.collection.update({'nid': item['nid']}, dict(item), upsert=True)
        self.collection.insert(dict(item))
        log.msg('Goods added to MongoDB database!',
                level=log.DEBUG, spider=spider)
        return item

3 View Complete Implementation : pipelines.py
Copyright MIT License
Author : banglakit
    def process_item(self, item, spider):
        if item['body']:
            item['body'] = item['body'].strip()
            return item
        else:
            raise DropItem("Empty Body")

3 View Complete Implementation : pipelines.py
Copyright MIT License
Author : brantou
    def process_item(self, item, spider):
        if item['pid'] in self.ids_seen:
            raise DropItem("Duplicate item found: %s" % item)
        else:
            self.ids_seen.add(item['pid'])
            return item

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : chinesehuazhou
    def process_item(self, item, spider):
        valid = True
        for data in item:
          if not data:
              valid = False
              raise DropItem("Missing {0}!".format(data))
        if valid:
              self.table.insert(dict(item))
        return item

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : chinesehuazhou
    def item_completed(self, results, item, info):
        image_paths = [x['path'] for ok, x in results if ok]
        # print(image_paths)
        if not image_paths:
            raise DropItem("Item contains no images")
        # item['image_paths'] = image_paths
        return item

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : ClipboardProject
    def process_item(self, item, spider):
        item['organization'] = spider.organization
        if 'event_time' in item:
            item['event_time']['date_format'] = spider.date_format
        loader = EventLoader(**item)
        # see if there is a custom filter for the item
        if not spider.item_filter(item):
            raise DropItem('Custom item filter did not allow this event')
        if 'event_time' in loader.item:
            time = loader.item['event_time']
            if self.time_utils.time_range_is_between(time['start_timestamp'], time['end_timestamp'], spider.start_timestamp, spider.end_timestamp):
                return loader.item
            else:
                raise DropItem('Event is not in the configured timeframe')
        else:
            return loader.item

3 View Complete Implementation : pipelines.py
Copyright Mozilla Public License 2.0
Author : code4romania
    def process_item(self, item, spider):
        for field in self.REQUIRED_FIELDS:
            if not item.get(field):
                message = "Missing field {}".format(field)
                logger.warn(message)
                raise DropItem(message)
        return item

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : czs0x55aa
	def process_item(self, item, spider):
		if item['level'] == 1 and self.__insert_vedio(item):
			return item
		elif item['level'] == 2 and self.__update_vedio(item):
			return item
		raise DropItem('fail to store data')

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : digitalengineering
    def process_item(self, item, spider):
        if item['id'] in self.ids_seen:
            raise DropItem("Duplicate item found: %s" % item)
        else:
            self.ids_seen.add(item['id'])
            return item

3 View Complete Implementation : pipelines.py
Copyright GNU General Public License v3.0
Author : evilcos
    def item_completed(self, results, item, info):
        image_paths = [x['path'] for ok, x in results if ok]
        if not image_paths:
            raise DropItem("Item contains no images")
        item['image_paths'] = image_paths
        return item