Here are the examples of the python api scrapy.utils.python.to_native_str taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
16 Examples
3
View Complete Implementation : cookies.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def _debug_cookie(self, request, spider):
if self.debug:
cl = [to_native_str(c, errors='replace')
for c in request.headers.getlist('Cookie')]
if cl:
cookies = "\n".join("Cookie: {}\n".format(c) for c in cl)
msg = "Sending cookies to: {}\n{}".format(request, cookies)
logger.debug(msg, extra={'spider': spider})
3
View Complete Implementation : cookies.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def _debug_set_cookie(self, response, spider):
if self.debug:
cl = [to_native_str(c, errors='replace')
for c in response.headers.getlist('Set-Cookie')]
if cl:
cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl)
msg = "Received cookies from: {}\n{}".format(response, cookies)
logger.debug(msg, extra={'spider': spider})
3
View Complete Implementation : robotstxt.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def process_request_2(self, rp, request, spider):
if rp is None:
return
if not rp.can_fetch(to_native_str(self._useragent), request.url):
logger.debug("Forbidden by robots.txt: %(request)s",
{'request': request}, extra={'spider': spider})
self.crawler.stats.inc_value('robotstxt/forbidden')
raise IgnoreRequest("Forbidden by robots.txt")
3
View Complete Implementation : exporters.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def _build_row(self, values):
for s in values:
try:
yield to_native_str(s, self.encoding)
except TypeError:
yield s
3
View Complete Implementation : cookies.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def header_items(self):
return [
(to_native_str(k, errors='replace'),
[to_native_str(x, errors='replace') for x in v])
for k, v in self.request.headers.items()
]
3
View Complete Implementation : text.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def _set_url(self, url):
if isinstance(url, six.text_type):
if six.PY2 and self.encoding is None:
raise TypeError("Cannot convert unicode url - %s "
"has no encoding" % type(self).__name__)
self._url = to_native_str(url, self.encoding)
else:
super(TextResponse, self)._set_url(url)
3
View Complete Implementation : text.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def _body_inferred_encoding(self):
if self._cached_benc is None:
content_type = to_native_str(self.headers.get(b'Content-Type', b''))
benc, ubody = html_to_unicode(content_type, self.body,
auto_detect_fun=self._auto_detect_fun,
default_encoding=self._DEFAULT_ENCODING)
self._cached_benc = benc
self._cached_ubody = ubody
return self._cached_benc
3
View Complete Implementation : request.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def referer_str(request):
""" Return Referer HTTP header suitable for logging. """
referrer = request.headers.get('Referer')
if referrer is None:
return referrer
return to_native_str(referrer, errors='replace')
0
View Complete Implementation : lxmlhtml.py
Copyright MIT License
Author : creativecommons
Copyright MIT License
Author : creativecommons
def _extract_links(self, selector, response_url, response_encoding, base_url):
'''
Pretty much the same function, just added 'ignore' to to_native_str()
'''
links = []
# hacky way to get the underlying lxml parsed docameent
for el, attr, attr_val in self._iter_links(selector.root):
# pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
try:
attr_val = urljoin(base_url, attr_val)
except ValueError:
continue # skipping bogus links
else:
url = self.process_attr(attr_val)
if url is None:
continue
# added 'ignore' to encoding errors
url = to_native_str(url, encoding=response_encoding,
errors='ignore')
# to fix relative links after process_value
url = urljoin(response_url, url)
link = Link(url, _collect_string_content(el) or u'',
nofollow=rel_has_nofollow(el.get('rel')))
links.append(link)
return self._deduplicate_if_needed(links)
0
View Complete Implementation : cookies.py
Copyright MIT License
Author : wistbean
Copyright MIT License
Author : wistbean
def get_header(self, name, default=None):
return to_native_str(self.request.headers.get(name, default),
errors='replace')