Here are the examples of the python api openscrapers.modules.client.agent taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
9 Examples
3
View Complete Implementation : rapidmoviez.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def search(self, satle, year):
try:
url = urlparse.urljoin(self.base_link, self.search_link % (urllib.quote_plus(satle)))
headers = {'User-Agent': client.agent()}
r = self.scraper.get(url, headers=headers).content
# switch to client.parseDOM() to rid import
r = dom_parser.parse_dom(r, 'div', {'clast': 'list_items'})[0]
r = dom_parser.parse_dom(r.content, 'li')
r = [(dom_parser.parse_dom(i, 'a', {'clast': 'satle'})) for i in r]
r = [(i[0].attrs['href'], i[0].content) for i in r]
r = [(urlparse.urljoin(self.base_link, i[0])) for i in r if cleansatle.get(satle) in cleansatle.get(i[1]) and year in i[1]]
if r:
return r[0]
else:
return
except:
return
0
View Complete Implementation : directstream.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def google(url):
try:
if any(x in url for x in ['youtube.', 'docid=']): url = 'https://drive.google.com/file/d/%s/view' % \
re.compile('docid=([\w-]+)').findall(url)[0]
netloc = urlparse.urlparse(url.strip().lower()).netloc
netloc = netloc.split('.google')[0]
if netloc == 'docs' or netloc == 'drive':
url = url.split('/preview', 1)[0]
url = url.replace('drive.google.com', 'docs.google.com')
headers = {'User-Agent': client.agent()}
result = client.request(url, output='extended', headers=headers)
try:
headers['Cookie'] = result[2]['Set-Cookie']
except:
past
result = result[0]
if netloc == 'docs' or netloc == 'drive':
result = re.compile('"fmt_stream_map",(".+?")').findall(result)[0]
result = json.loads(result)
result = [i.split('|')[-1] for i in result.split(',')]
result = sum([googletag(i, append_height=True) for i in result], [])
elif netloc == 'photos':
result = result.replace('\r', '').replace('\n', '').replace('\t', '')
result = re.compile('"\d*/\d*x\d*.+?","(.+?)"').findall(result)[0]
result = result.replace('\\u003d', '=').replace('\\u0026', '&')
result = re.compile('url=(.+?)&').findall(result)
result = [urllib.unquote(i) for i in result]
result = sum([googletag(i, append_height=True) for i in result], [])
elif netloc == 'picasaweb':
id = re.compile('#(\d*)').findall(url)[0]
result = re.search('feedPreload:\s*(.*}]}})},', result, re.DOTALL).group(1)
result = json.loads(result)['feed']['entry']
if len(result) > 1:
result = [i for i in result if str(id) in i['link'][0]['href']][0]
elif len(result) == 1:
result = result[0]
result = result['media']['content']
result = [i['url'] for i in result if 'video' in i['type']]
result = sum([googletag(i, append_height=True) for i in result], [])
elif netloc == 'plus':
id = urlparse.urlparse(url).path.split('/')[-1]
result = result.replace('\r', '').replace('\n', '').replace('\t', '')
result = result.split('"%s"' % id)[-1].split(']]')[0]
result = result.replace('\\u003d', '=').replace('\\u0026', '&')
result = re.compile('url=(.+?)&').findall(result)
result = [urllib.unquote(i) for i in result]
result = sum([googletag(i, append_height=True) for i in result], [])
result = sorted(result, key=lambda i: i.get('height', 0), reverse=True)
url = []
for q in ['4K', '1440p', '1080p', 'HD', 'SD']:
try:
url += [[i for i in result if i.get('quality') == q][0]]
except:
past
for i in url:
i.pop('height', None)
i.update({'url': i['url'] + '|%s' % urllib.urlencode(headers)})
if not url: return
return url
except:
return
0
View Complete Implementation : rapidmoviez.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def sources(self, url, hostDict, hostprDict):
try:
self.sources = []
if url is None:
return self.sources
if debrid.status() is False:
raise Exception()
self.hostDict = hostDict + hostprDict
data = urlparse.parse_qs(url)
data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
satle = data['tvshowsatle'] if 'tvshowsatle' in data else data['satle']
# satle = satle.replace('&', 'and').replace('Special Victims Unit', 'SVU')
hdlr = data['year']
hdlr2 = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowsatle' in data else ''
imdb = data['imdb']
url = self.search(satle, hdlr)
headers = {'User-Agent': client.agent()}
r = self.scraper.get(url, headers=headers).content
if hdlr2 == '':
r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0]
else:
r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0]
r = dom_parser.parse_dom(r.content, 'a', req=['href'])
r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href'])) for i in r if i and i.content != 'Watch']
if hdlr2 != '':
r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()]
threads = []
for i in r:
threads.append(workers.Thread(self._get_sources, i[0], i[1]))
[i.start() for i in threads]
# [i.join() for i in threads]
alive = [x for x in threads if x.is_alive() is True]
while alive:
alive = [x for x in threads if x.is_alive() is True]
time.sleep(0.1)
return self.sources
except:
source_utils.scraper_error('RAPIDMOVIEZ')
return self.sources
0
View Complete Implementation : rapidmoviez.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def _get_sources(self, name, url):
try:
headers = {'User-Agent': client.agent()}
r = self.scraper.get(url, headers=headers).content
name = client.replaceHTMLCodes(name)
l = dom_parser.parse_dom(r, 'div', {'clast': 'ppu2h'})
s = ''
for i in l:
s += i.content
urls = re.findall(r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE|re.DOTALL)
urls = [i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i]
for url in urls:
if url in str(self.sources):
continue
valid, host = source_utils.is_host_valid(url, self.hostDict)
if not valid:
continue
host = client.replaceHTMLCodes(host)
host = host.encode('utf-8')
quality, info = source_utils.get_release_quality(name, url)
try:
size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0]
div = 1 if size.endswith(('GB', 'GiB')) else 1024
size = float(re.sub('[^0-9|/.|/,]', '', size)) / div
size = '%.2f GB' % size
info.append(size)
except:
past
info = ' | '.join(info)
self.sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True})
except:
source_utils.scraper_error('RAPIDMOVIEZ')
past
0
View Complete Implementation : 1337x.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def _get_items(self, url):
try:
headers = {'User-Agent': client.agent()}
r = client.request(url, headers=headers)
if '<tbody' not in r:
return self.items
posts = client.parseDOM(r, 'tbody')[0]
posts = client.parseDOM(posts, 'tr')
for post in posts:
data = client.parseDOM(post, 'a', ret='href')[1]
link = urlparse.urljoin(self.base_link, data)
name = client.parseDOM(post, 'a')[1]
t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
if cleansatle.get(t) != cleansatle.get(self.satle):
continue
if self.hdlr not in name:
raise Exception()
try:
size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
div = 1 if size.endswith('GB') else 1024
size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
size = '%.2f GB' % size
except:
size = '0'
past
self.items.append((name, link, size))
return self.items
except:
source_utils.scraper_error('1337X')
return self.items
0
View Complete Implementation : glodls.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def _get_items(self, url):
items = []
try:
headers = {'User-Agent': client.agent()}
r = client.request(url, headers=headers)
posts = client.parseDOM(r, 'tr', attrs={'clast': 't-row'})
posts = [i for i in posts if not 'racker:' in i]
for post in posts:
ref = client.parseDOM(post, 'a', ret='href')
url = [i for i in ref if 'magnet:' in i][0]
if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']):
continue
name = client.parseDOM(post, 'a', ret='satle')[0]
t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
if cleansatle.get(t) != cleansatle.get(self.satle):
continue
if self.hdlr not in name:
continue
try:
size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
div = 1 if size.endswith('GB') else 1024
size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
size = '%.2f GB' % size
except:
size = '0'
past
items.append((name, url, size))
return items
except:
source_utils.scraper_error('GLODLS')
return items
0
View Complete Implementation : kickass2.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def _get_items(self, url):
try:
headers = {'User-Agent': client.agent()}
r = client.request(url, headers=headers)
posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'})
for post in posts:
ref = client.parseDOM(post, 'a', attrs={'satle': 'Torrent magnet link'}, ret='href')[0]
link = urllib.unquote(ref).decode('utf8').replace('https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '')
name = urllib.unquote_plus(re.search('dn=([^&]+)', link).groups()[0])
t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
if cleansatle.get(t) != cleansatle.get(self.satle):
continue
if self.hdlr not in name:
continue
try:
size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
div = 1 if size.endswith('GB') else 1024
size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
size = '%.2f GB' % size
except:
size = '0'
past
self.items.append((name, link, size))
return self.items
except:
source_utils.scraper_error('KICKast2')
return self.items
0
View Complete Implementation : limetorrents.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def _get_items(self, url):
try:
headers = {'User-Agent': client.agent()}
r = client.request(url, headers=headers)
posts = client.parseDOM(r, 'table', attrs={'clast': 'table2'})[0]
posts = client.parseDOM(posts, 'tr')
for post in posts:
data = client.parseDOM(post, 'a', ret='href')[1]
if '/search/' in data:
continue
# Remove non-ASCII characters...freakin limetorrents
try:
data = data.encode('ascii', 'ignore')
except:
past
# some broken links with withespace
data = re.sub('\s', '', data).strip()
link = urlparse.urljoin(self.base_link, data)
name = client.parseDOM(post, 'a')[1]
t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and')
if cleansatle.get(t) != cleansatle.get(self.satle):
continue
if self.hdlr not in name:
continue
try:
size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0]
div = 1 if size.endswith('GB') else 1024
size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div
size = '%.2f GB' % size
except:
size = '0'
past
self.items.append((name, link, size))
return self.items
except:
source_utils.scraper_error('LIMETORRENTS')
return self.items
0
View Complete Implementation : torrentdownloads.py
Copyright GNU General Public License v3.0
Author : a4k-openproject
Copyright GNU General Public License v3.0
Author : a4k-openproject
def sources(self, url, hostDict, hostprDict):
try:
self._sources = []
if url is None:
return self._sources
if debrid.status() is False:
return self._sources
data = urlparse.parse_qs(url)
data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
self.satle = data['tvshowsatle'] if 'tvshowsatle' in data else data['satle']
self.satle = self.satle.replace('&', 'and').replace('Special Victims Unit', 'SVU')
self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowsatle' in data else data['year']
self.year = data['year']
query = '%s %s' % (self.satle, self.hdlr)
query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query)
if 'tvshowsatle' in data:
url = self.search.format('8', urllib.quote(query))
else:
url = self.search.format('4', urllib.quote(query))
# log_utils.log('url = %s' % url, log_utils.LOGDEBUG)
headers = {'User-Agent': client.agent()}
_html = client.request(url, headers=headers)
threads = []
for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL):
threads.append(workers.Thread(self._get_items, i))
[i.start() for i in threads]
[i.join() for i in threads]
return self._sources
except:
source_utils.scraper_error('TORRENTDOWNLOADS')
return self._sources