scrapy.http.Request - python examples

Here are the examples of the python api scrapy.http.Request taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

145 Examples 7

3 View Complete Implementation : openwrt.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            yield Request(
                url=urlparse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)

3 View Complete Implementation : trendnet.py
Copyright MIT License
Author : firmadyne
    def parse_product(self, response):
        for tab in response.xpath("//ul[@clast='etabs']//a"):
            text = tab.xpath(".//text()").extract()[0]
            href = tab.xpath("./@href").extract()[0]

            if "downloads" in text.lower():
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    meta={"product": response.meta["product"]},
                    headers={"Referer": response.url},
                    callback=self.parse_download)

3 View Complete Implementation : actiontec.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        for link in response.xpath("//div[@clast='newboxes2']//a"):
            product = link.xpath(".//text()").extract()[0]
            # some product strings are e.g. "(GT701-WRU) - 54 Mbps Wireless
            # Cable/DSL Router"
            actual = re.match(r"\(([\w ,\\/()-]+?)\)", product)
            if actual:
                product = actual.group(1).replace("(", "").replace(")", "")

            yield Request(
                url=urlparse.urljoin(
                    response.url, link.xpath(".//@href").extract()[0]),
                headers={"Referer": response.url},
                meta={"product": product},
                callback=self.parse_product)

3 View Complete Implementation : tp-link_zh.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        for product in response.xpath(
                "//table[@id='mainlist']//a/@href").extract():
            yield Request(
                url=urlparse.urljoin(response.url, product),
                headers={"Referer": response.url},
                callback=self.parse_product)

        for page in response.xpath("//div[@id='paging']/a/@href").extract():
            yield Request(
                url=urlparse.urljoin(response.url, page),
                headers={"Referer": response.url},
                callback=self.parse)

3 View Complete Implementation : huawei_en.py
Copyright MIT License
Author : firmadyne
    def parse_product(self, response):
        json_response = json.loads(response.body_as_unicode())

        for product in json_response:
            yield Request(
                url=urlparse.urljoin(
                    response.url, "/support/services/service/file/list?productID=%s&siteCode=%s" % (product["productId"], self.region)),
                meta={"product": product["productCode"]},
                headers={"Referer": response.url,
                         "X-Requested-With": "XMLHttpRequest"},
                callback=self.parse_download)

3 View Complete Implementation : att.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        for href in response.xpath("//a/@href").extract():
            if href == ".." or href == "/":
                continue
            elif href.endswith(".bin") or href.endswith(".upg"):
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", href)
                item.add_value("vendor", self.name)
                yield item.load_item()
            elif "/" in href:
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)

3 View Complete Implementation : trendnet.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        for entry in response.xpath("//select[@id='SUBTYPE_ID']/option"):
            if entry.xpath(".//text()"):
                text = entry.xpath(".//text()").extract()[0]
                href = entry.xpath("./@value").extract()[0]

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    meta={"product": text},
                    headers={"Referer": response.url},
                    callback=self.parse_product)

3 View Complete Implementation : huawei_zh.py
Copyright MIT License
Author : firmadyne
    def parse_category(self, response):
        json_response = json.loads(response.body_as_unicode())

        for category in json_response:
            yield Request(
                url=urlparse.urljoin(
                    response.url, "/support/services/service/product/list?productID=%s&siteCode=%s" % (category["productId"], self.region)),
                headers={"Referer": response.url,
                         "X-Requested-With": "XMLHttpRequest"},
                callback=self.parse_product)

3 View Complete Implementation : mercury.py
Copyright MIT License
Author : firmadyne
    def parse(self, response):
        end_page = int(response.xpath("//*[@clast='pagebar']//a[last()]//text()").extract()[0])
        cur_page = 0
        while cur_page < end_page:
            cur_page += 1
            url = 'http://service.mercurycom.com.cn/download-tip-software-{}-0-1.html'.format(cur_page)
            yield Request(
                url = url,
                headers={"Referer": response.url},
                callback = self.parse_list)

3 View Complete Implementation : dlink.py
Copyright MIT License
Author : firmadyne
    def parse_product(self, response):
        for entry in response.xpath("//select[@id='ddlHardWare']/option"):
            rev = entry.xpath(".//text()").extract()[0]
            val = entry.xpath("./@value").extract()[0]

            if val:
                yield Request(
                    url=urlparse.urljoin(
                        response.url, "/ajax/ajax.ashx?action=productfile&ver=%s" % val),
                    headers={"Referer": response.url,
                             "X-Requested-With": "XMLHttpRequest"},
                    meta={"product": response.meta[
                        "product"], "revision": rev},
                    callback=self.parse_json)