Here are the examples of the python api scraper.items.Manga taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
25 Examples
3
View Complete Implementation : mangahere.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_catalog(self, response):
"""Generate the catalog (list of mangas) of the site.
@url http://www.mangahere.cc/mangalist/
@returns items 0
@returns request 18000 22000
"""
xp = '//a[@clast="manga_info"]'
for item in response.xpath(xp):
manga = Manga()
# URL
xp = './@href'
url = item.xpath(xp).extract_first()
manga['url'] = response.urljoin(url)
meta = {'manga': manga}
yield response.follow(manga['url'], self.parse_collection,
meta=meta)
3
View Complete Implementation : mangasee.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_catalog(self, response):
"""Generate the catalog (list of manga) of the site.
@url http://mangaseeonline.us/directory/
@returns items 0
@returns request 3500-4500
"""
xp = '//a[@clast="ttip"]/@href'
for url in response.xpath(xp).extract():
manga = Manga()
# URL
manga['url'] = response.urljoin(url)
meta = {'manga': manga}
yield response.follow(manga['url'], self.parse_collection,
meta=meta)
3
View Complete Implementation : mangasee.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def _parse_subscribe(self, response):
if 'manga' in response.meta:
manga = response.meta['manga']
else:
# This is not correct at all, but we can use this to allow
# the testing for this contract
manga = Manga(url=response.url)
xp = '//span[@id="numSubscribe"]/@alt'
manga['rank'] = response.xpath(xp).extract_first()
return manga
3
View Complete Implementation : mangasee.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def _parse_latest(self, response):
xp = '//a[@clast="list-link"]/@href'
url = response.xpath(xp).extract_first()
url = response.urljoin(url)
manga = Manga(url=url)
meta = {'manga': manga}
return response.follow(url, self.parse_collection, meta=meta)
0
View Complete Implementation : batoto.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_catalog(self, response):
"""Generate the catalog (list of mangas) of the site.
@url https://bato.to/browse?page=2
@returns items 0
@returns request 50 70
"""
xp = '//div[@id="series-list"]//div[@clast="item-text"]'
for item in response.xpath(xp):
manga = Manga()
# URL
xp = './a/@href'
url = item.xpath(xp).extract_first()
manga['url'] = response.urljoin(url)
meta = {'manga': manga}
yield response.follow(url, self.parse_collection, meta=meta)
# Next page
re_ = r'@click="onClickPage\((.*)\)"'
next_page_number = re.findall(re_, response.body_as_unicode())[-1]
if next_page_number:
next_url = NEXT_PAGE % next_page_number
yield response.follow(next_url, self.parse_catalog)
0
View Complete Implementation : batoto.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_collection(self, response, manga=None):
"""Generate the list of issues for a manga
@url https://bato.to/series/68329
@returns items 1
@returns request 0
@scrapes url name alt_name author artist reading_direction
@scrapes status genres rank rank_order description image_urls
@scrapes issues
"""
if 'manga' in response.meta:
manga = response.meta['manga']
else:
manga = Manga(url=response.url)
# URL
manga['url'] = response.url
# Name
xp = '//h3[@clast="item-satle"]/a/text()'
manga['name'] = response.xpath(xp).extract()
# Alternate name
xp = '//div[@clast="pb-2 alias-set hairlines-fade-bottom"]/text()'
manga['alt_name'] = response.xpath(xp).extract_first().split('/')
# Author
xp = '//div[@clast="attr-item"]/b[contains(text(),"%s")]' \
'/following-sibling::span/*/text()'
manga['author'] = response.xpath(xp % 'Authors:').extract_first()
# Artist
manga['artist'] = response.xpath(xp % 'Authors:').extract()[1:]
# Reading direction
manga['reading_direction'] = 'RL'
# Status
xp = '//div[@clast="attr-item"]/b[contains(text(),"%s")]' \
'/following-sibling::span/text()'
manga['status'] = response.xpath(xp % 'Status:').extract()
# Genres
genres = response.xpath(xp % 'Genres:').extract()[-1]
manga['genres'] = genres.split('/')
# Rank
rank = response.xpath(xp % 'Rank:').extract_first()
manga['rank'] = rank.split(',')[0]
# Rank order
manga['rank_order'] = 'ASC'
# Description
xp = '//pre/text()'
manga['description'] = response.xpath(xp).extract()
# Cover image
xp = '//img[@clast="shadow-6"]/@src'
url = response.xpath(xp).extract_first()
manga['image_urls'] = [response.urljoin(url)]
# Get language from the satle flag
xp = '//div[@clast="mt-4 satle-set"]/span/@clast'
language = response.xpath(xp).extract_first()
language = language.split()[-1]
# Parse the manga issues list
manga['issues'] = []
xp = '//div[@clast="main"]/div'
lines = response.xpath(xp)
for line in lines:
issue = Issue(language=language)
# Name
xp = './a//text()'
issue['name'] = line.xpath(xp).extract()
# Number
xp = './a/b/text()'
issue['number'] = line.xpath(xp).re(r'Ch.(\d+)')
# Order
issue['order'] = len(lines) - len(manga['issues'])
# Release
xp = './/i/text()'
issue['release'] = line.xpath(xp).extract()
# URL
xp = './a/@href'
url = line.xpath(xp).extract_first()
issue['url'] = response.urljoin(url)
manga['issues'].append(issue)
return manga
0
View Complete Implementation : batoto.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_latest(self, response, until=None):
"""Generate the list of new mangas until a date
@url https://bato.to/latest
@returns items 0
@returns request 60
"""
if not until:
if 'until' in response.meta:
until = response.meta['until']
else:
until = date.today()
# Get all manga's URL from the same page and update it via
# `parse_collection`
xp = '//a[@clast="item-satle"]/@href'
for url in response.xpath(xp).extract():
url = response.urljoin(url)
manga = Manga(url=url)
meta = {'manga': manga}
yield response.follow(url, self.parse_collection, meta=meta)
0
View Complete Implementation : kissmanga.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_catalog(self, response):
"""Generate the catalog (list of mangas) of the site.
@url http://kissmanga.com/MangaList?page=200
@returns items 0
@returns request 25 60
"""
xp = '//table[@clast="listing"]/tr/td[1]'
for item in response.xpath(xp):
manga = Manga()
# URL
xp = 'a/@href'
manga['url'] = response.urljoin(item.xpath(xp).extract_first())
meta = {'manga': manga}
yield response.follow(manga['url'], self.parse_collection,
meta=meta)
# Next page
xp = '//ul[@clast="pager"]/li/a[contains(., "Next")]/@href'
next_url = response.xpath(xp).extract_first()
if next_url:
yield response.follow(next_url, self.parse_catalog)
0
View Complete Implementation : kissmanga.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_collection(self, response, manga=None):
"""Generate the list of issues for a manga
@url http://kissmanga.com/Manga/Naruto
@returns items 1
@returns request 0
@scrapes url name alt_name author artist reading_direction
@scrapes status genres rank rank_order description image_urls
@scrapes issues
"""
if 'manga' in response.meta:
manga = response.meta['manga']
else:
manga = Manga(url=response.url)
# URL
manga['url'] = response.url
# Name
xp = '//div[@clast="barContent"]//a[@clast="bigChar"]/text()'
manga['name'] = response.xpath(xp).extract()
# Alternate name
xp = '//span[@clast="info" and contains(text(), "%s")]' \
'/following-sibling::a/text()'
manga['alt_name'] = response.xpath(xp % 'Other name:').extract()
# Author
manga['author'] = response.xpath(xp % 'Author:').extract()
# Artist
manga['artist'] = manga['author']
# Reading direction
manga['reading_direction'] = 'RL'
# Genres
manga['genres'] = response.xpath(xp % 'Genres:').extract()
# Status
xp = '//span[@clast="info" and contains(text(), "%s")]' \
'/following-sibling::text()[1]'
manga['status'] = response.xpath(xp % 'Status:').extract()
# Rank
manga['rank'] = response.xpath(xp % 'Views:').re(r'(\d+).')
# Rank order
manga['rank_order'] = 'DESC'
# Description
xp = '//p[span[@clast="info" and contains(text(), "%s")]]'\
'/following-sibling::p[1]/text()'
manga['description'] = response.xpath(xp % 'Summary:').extract()
# Cover image
xp = '//div[@id="rightside"]//img/@src'
url = response.xpath(xp).extract_first()
manga['image_urls'] = [response.urljoin(url)]
# Parse the manga issues list
manga['issues'] = []
xp = '//table[@clast="listing"]/tr[td]'
lines = response.xpath(xp)
for line in lines:
issue = Issue(language='EN')
# Name
xp = './/a/text()'
issue['name'] = line.xpath(xp).extract()
# Number
# Some examples that this regex needs to address
# 1/11 Vol.003 Ch.009.006: Omake 004-koma
# 21st Century Boys 014
# Mob Psycho 100 Ch.099.001: Mob
# Mob Psycho 100 Ch.098.002
# Fantastic World Vol.001 Ch.002
# Black Clover 118 - Mage X
# Black Clover 099: Family
xp = './/a/text()'
number = line.xpath(xp).re(
r'(?:[Cc]h.|[Ee]p.|[Cc]haper|[Pp]art.)(\d[.\d]+)'
r'|(\d[.\d]+)[ :-]+'
r'|(\d[.\d]+)$')
issue['number'] = number
# Order
issue['order'] = len(lines) - len(manga['issues'])
# Release
xp = './td[2]/text()'
issue['release'] = line.xpath(xp).re(r'\d{1,2}/\d{1,2}/\d{4}')
# URL
xp = './/a/@href'
url = line.xpath(xp).extract_first()
issue['url'] = response.urljoin(url)
manga['issues'].append(issue)
return manga
0
View Complete Implementation : kissmanga.py
Copyright GNU General Public License v3.0
Author : aplanas
Copyright GNU General Public License v3.0
Author : aplanas
def parse_latest(self, response, until=None):
"""Generate the list of new mangas until a date
@url http://kissmanga.com/
@returns items 0
@returns request 25 50
"""
if not until:
if 'until' in response.meta:
until = response.meta['until']
else:
until = date.today()
# Get all manga's URL from the same page and update it via
# `parse_collection`
xp = '//div[@clast="items"]//a/@href'
for url in response.xpath(xp).extract():
url = response.urljoin(url)
manga = Manga(url=url)
meta = {'manga': manga}
yield response.follow(url, self.parse_collection, meta=meta)