当前位置: 首页>>代码示例>>Python>>正文


Python http.FormRequest方法代码示例

本文整理汇总了Python中scrapy.http.FormRequest方法的典型用法代码示例。如果您正苦于以下问题:Python http.FormRequest方法的具体用法?Python http.FormRequest怎么用?Python http.FormRequest使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http的用法示例。


在下文中一共展示了http.FormRequest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
        # small images 200x200
        #urls = response.xpath('//div[@id="thumbsContainer"]//img/@data-original').extract()
        #urls = response.xpath('//img[@class="res-photo-thumbnail thumb-load lazy-photo-inner"]/@data-original').extract()
        #yield {'image_urls': urls}

        # big images 800x600
        #urls = [url.replace('200%3A200', '800%3A600') for url in urls]
        #yield {'image_urls': urls}

        # big images 1900x1200
        #urls = [url.replace('200%3A200', '1900%3A1200') for url in urls]
        #yield {'image_urls': urls}

        data = {
            'res_id': '16761868', #, '16780723', # place ID
            'offset': '30',    # change it
            'category':	'all', # 'food'
            'action': 'fetch_photos',
            'index': '30',
            'limit': '10', # chage it
        }

        url = 'https://www.zomato.com/php/load_more_res_pics.php'
        yield FormRequest(url, callback=self.parse_post, formdata=data) 
开发者ID:furas,项目名称:python-examples,代码行数:27,代码来源:main.py

示例2: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
        for songid in response.xpath('//a/@href').re('/song/(\d+)'):
            print('songIds:', songid)
            data = {'songIds': songid}  # 257524668
            yield FormRequest(url=self.songlink_url, formdata=data, callback=self.parse_song)
            # break 
开发者ID:makelove,项目名称:Python_Master_Courses,代码行数:8,代码来源:music.py

示例3: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        print 'Preparing login'
        return [FormRequest("https://accounts.coursera.org/api/v1/login",
                            headers = self.make_header(response),
                            formdata = {
                            "email": "1095511864@qq.com",
                            "password": "HUAZANG.55789260",
                            "webrequest": "true"
                            },
                            callback = self.parse_page
                            )] 
开发者ID:Andrew-liu,项目名称:scrapy_example,代码行数:13,代码来源:coursera_spider.py

示例4: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        return [FormRequest(
            "http://www.zhihu.com/login",
            formdata = {'email':'xxx@gmail.com',
                'password':'123456'
                },
            callback = self.after_login
            )] 
开发者ID:KeithYue,项目名称:Zhihu_Spider,代码行数:10,代码来源:zhizhu_user_topic_spider.py

示例5: gen_topic_form

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def gen_topic_form(self, response):
        # yield the beginning topics
        sel = Selector(response)
        for topic_sel in sel.xpath('//div[@id="zh-profile-topic-list"]/div[contains(@class, "zm-profile-section-item")]'):
            # new user-topic relationship
            yield self.get_UT_item(topic_sel, response.url)

        # get the number of topics of one user
        num_topic = sel.xpath('//div[contains(@class, "zm-profile-section-wrap")]/div[contains(@class, "zm-profile-section-head")]//span[contains(@class, "zm-profile-section-name")]/text()')
        number_str = num_topic.extract()[0]
        # print number_str
        p = re.compile(r'\d+')
        m = p.findall(number_str)
        if m:
            num_topic = int(m[0])
            # crawl the remainding topics of a user
            base_line = 20
            if num_topic > 20:
                while  num_topic > 0:
                    yield FormRequest(
                            url = response.url,
                            formdata = {
                                'start': '0',
                                'offset': str(base_line),
                                '_xsrf': self.xsrf
                                },
                            callback=self.parse
                            )
                    num_topic = num_topic - 20
                    base_line += 20 
开发者ID:KeithYue,项目名称:Zhihu_Spider,代码行数:32,代码来源:zhizhu_user_topic_spider.py

示例6: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        return [FormRequest(
            "http://www.zhihu.com/login",
            formdata = {'email':'example.com',
                'password':'123456'
                },
            callback = self.after_login
            )] 
开发者ID:KeithYue,项目名称:Zhihu_Spider,代码行数:10,代码来源:zhihu_spider.py

示例7: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
    url = "https://downloadcenter.intel.com/SearchResult.aspx?lang=eng"

    search_form = {
      "search_downloads": ".BIO",
      "ctl00$body$submit_search_downloads": "Search downloads",
      "ctl00$body$searchKeyword": "BIO"
    }

    return [FormRequest(url= url, method= "POST",
      formdata= search_form, callback= self.parse_form)] 
开发者ID:theopolis,项目名称:uefi-spider,代码行数:13,代码来源:intel_spider.py

示例8: parse_again

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse_again(self, response):
        sel = Selector(response)

        hidden_fields = {}
        inputs = sel.xpath("//input")
        for ele in inputs:
            input_type = ele.xpath(".//@type").extract()[0]
            value = ele.xpath(".//@value").extract()[0]
            name = ele.xpath(".//@name").extract()[0]
            if input_type not in ["hidden"]:
                continue
            hidden_fields[name] = value

        for product_type in self.product_types:
            ### Create a POST form and apply a generated ScriptManager
            form_data = _select_form(1, product_type)
            for field in hidden_fields:
                ### Replace static fields with page-generated inputs.
                form_data[field] = hidden_fields[field]
            #print form_data
            yield FormRequest(formdata= form_data, method= "POST",
                headers= {
                    "Content-Type": "application/x-www-form-urlencoded",
                    #"X-MicrosoftAjax": "Delta=true",
                    "X-Requested-With": "XMLHttpRequest",
                    "User-Agent": self._get_uas()
                },
                url= self.select_urls[0],
                #meta= {"cookiejar": "GLOBAL"},
                callback= self.parse_series)
            return 
开发者ID:theopolis,项目名称:uefi-spider,代码行数:33,代码来源:asus_spider.py

示例9: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
        ### Generate a search for AMD and Intel chips
        intel_search = self._get_vars(170, 1)
        amd_search   = self._get_vars(171, 1)
        yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
            formdata= intel_search, callback= self.parse_search)
        yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
            formdata= amd_search, callback= self.parse_search) 
开发者ID:theopolis,项目名称:uefi-spider,代码行数:10,代码来源:msi_spider.py

示例10: parse_search

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse_search(self, response):
        sel = Selector(response)

        ### Parse each sub-product type.
        searches = []
        product_selector = sel.css(".mr20").xpath("@no")
        if product_selector:
            pno = product_selector.extract()[0]

            products = sel.css(".ProdSel-item")
            for product in products:
                no = product.xpath("@no").extract()[0]
                searches.append((no, pno))
        #print searches

        ### Parse the actual products/boards.
        boards = []
        items = sel.css(".Prod-item")
        for item in items:
            title = item.xpath("@title").extract()[0]
            no = item.xpath("@no").extract()[0]
            boards.append((title, no))
        #print boards

        for sub_search in searches:
            search_vars = self._get_vars(sub_search[0], sub_search[1])
            yield FormRequest(url= self.start_urls[0], method= "POST", headers= json_headers,
                formdata= search_vars, callback= self.parse_search)

        for board in boards:
            url = "http://us.msi.com/product/mb/%s.html" % board[0]
            item = MsiUpdateLinkItem()
            item["id"] = board[1]
            item["title"] = board[0]
            item["url"] = url

            yield Request(url= "%s#/?div=BIOS" % url, callback= self.parse_board, 
                meta= {"attrs": item})
        pass 
开发者ID:theopolis,项目名称:uefi-spider,代码行数:41,代码来源:msi_spider.py

示例11: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
        hidden = lambda id: response.xpath(
                '/html/body/input[@id="{}"]/@data-value'.
                format(id)).extract_first()

        total_pages = int(hidden('quantidadeTotalPaginas').replace('.',''))

        hashfragment = OrderedDict([
            ('pagina', None),
            ('semente', self.seed or hidden('semente')),
        ])

        formdata = OrderedDict([
            ('tipoOferta', '1'),
            ('paginaAtual', None),
            ('pathName', parse_url(response.url).path),
            ('hashFragment', ''),
        ])

        headers = {'X-Requested-With': 'XMLHttpRequest'}
        url = 'https://www.zapimoveis.com.br/Busca/RetornarBuscaAssincrona/'

        from_page = self.start
        if self.count:
            to_page = min(self.start + self.count - 1, total_pages)
        else:
            to_page = total_pages

        self.crawler.stats.set_value('total_pages', total_pages)
        self.crawler.stats.set_value('selected_pages',
                                     max(0, to_page - from_page + 1))

        for page in range(from_page, to_page + 1):
            hashfragment['pagina'] = formdata['paginaAtual'] = str(page)
            formdata['hashFragment'] = json.dumps(hashfragment,
                                                  separators=(',', ':'))
            yield FormRequest(
                    url,
                    headers=headers,
                    formdata=formdata,
                    callback=self.parse_busca) 
开发者ID:pauloromeira,项目名称:realestate-scraper,代码行数:43,代码来源:zapimoveis.py

示例12: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        return [FormRequest(
            "http://www.zhihu.com/login",
            formdata={'email': 'june.chan@foxmail.com',
                      'password': 'czj0617_zhihu'
                      },
            callback=self.after_login
        )] 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:10,代码来源:zhihu_ask_spider.py

示例13: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        for i, url in enumerate(self.start_urls):
            yield FormRequest(url, meta={'cookiejar': i}, \
                              headers=self.headers, \
                              cookies=self.cookies,
                              callback=self.parse_item)  # jump to login page 
开发者ID:openslack,项目名称:openslack-crawler,代码行数:8,代码来源:login1_spider.py

示例14: parse

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def parse(self, response):
        try:
            for news in response.css('div.news-card'):
                self.urls_parsed += 1
                try:
                    item = ScrapenewsItem()
                    item['image'] = news.css('div.news-card-image::attr(style)').extract_first()[23:-3]
                    item['title'] = news.css('a.clickable>span::text').extract_first()
                    item['content'] = news.css('div[itemprop*=articleBody]::text').extract_first()
                    item['newsDate'] = news.css('span.time::attr(content)').extract_first()[:-5]
                    item['link'] = news.css('div.read-more>a::attr(href)').extract_first()
                    item['source'] = 105
                    yield item
                    self.urls_scraped += 1
                except Exception as e:
                    logger.error(__name__ + " [UNHANDLED] Unable to Extract Data : " + str(e))
                    self.urls_dropped += 1

            #news_id extraction
            pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
            js = response.xpath('//script[@type="text/javascript"]/text()').extract()[-1]
            self.news_id = pattern.search(js).group(1)

            while (self.pages > 1 and not self.infinite):
                yield FormRequest('https://www.inshorts.com/en/ajax/more_news',
                                    formdata={'news-offset' : self.news_id},
                                    callback=self.parse_more_news,
                                    errback=self.errorRequestHandler,
                                    dont_filter=True)
                self.pages -= 1

            while (self.infinite):
                yield FormRequest('https://www.inshorts.com/en/ajax/more_news',
                                    formdata={'news-offset' : self.news_id},
                                    callback=self.parse_more_news,
                                    errback=self.errorRequestHandler,
                                    dont_filter=True)
        except Exception as e:
            logger.error(__name__ + " [UNHANDLED] " + str(e) + " for response url " + response.url) 
开发者ID:vipulgupta2048,项目名称:scrape,代码行数:41,代码来源:inshorts.py

示例15: start_requests

# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import FormRequest [as 别名]
def start_requests(self):
        count = self.sql.get_proxy_count(self.name)
        count_httpbin = self.sql.get_proxy_count(config.httpbin_table)

        ids = self.sql.get_proxy_ids(self.name)
        ids_httpbin = self.sql.get_proxy_ids(config.httpbin_table)

        for i in range(0, count + count_httpbin):
            table = self.name if (i < count) else config.httpbin_table
            id = ids[i] if i < count else ids_httpbin[i - len(ids)]

            proxy = self.sql.get_proxy_with_id(table, id)
            if proxy == None:
                continue

            for url in self.urls:
                cur_time = time.time()
                yield FormRequest(
                        url = url,
                        headers = self.headers,
                        method = 'POST',
                        meta = {
                            'cur_time': cur_time,
                            'download_timeout': self.timeout,
                            'proxy_info': proxy,
                            'table': table,
                            'id': proxy.id,
                            'proxy': 'http://%s:%s' % (proxy.ip, proxy.port),
                            'vali_count': proxy.vali_count,
                        },
                        cookies = {
                            'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488937030',
                            '_ga': 'GA1.2.40497390.1488937014',
                            'TG-TRACK-CODE': 'search_code',
                            'index_location_city': '%E5%8C%97%E4%BA%AC',
                            'LGRID': '20170308093710-bf6755eb-039f-11e7-8025-525400f775ce',
                            'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488881288,1488936799,1488936947,1488937014',
                            'JSESSIONID': 'BDCBB6167F960CE43AF54B75A651F586',
                            'LGSID': '20170308093653-b59316f0-039f-11e7-9229-5254005c3644',
                            'LGUID': '20170308093653-b593185f-039f-11e7-9229-5254005c3644',
                            'user_trace_token': '20170308093654-723efcfac8fb4c28a670d073d5113e02',
                            'SEARCH_ID': '4db4dc3dea1c46b49018ae5421b53ffa'
                        },
                        formdata = {
                            'first': 'true',
                            'kd': 'ios',
                            'pn': '1',
                        },
                        dont_filter = True,
                        callback = self.success_parse,
                        errback = self.error_parse,
                ) 
开发者ID:awolfly9,项目名称:IPProxyTool,代码行数:54,代码来源:lagou.py


注:本文中的scrapy.http.FormRequest方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。