当前位置: 首页>>代码示例>>Python>>正文


Python scrapy.FormRequest方法代码示例

本文整理汇总了Python中scrapy.FormRequest方法的典型用法代码示例。如果您正苦于以下问题:Python scrapy.FormRequest方法的具体用法?Python scrapy.FormRequest怎么用?Python scrapy.FormRequest使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy的用法示例。


在下文中一共展示了scrapy.FormRequest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):
        'Extrai os nomes dos cursos disponíveis'

        html = response.body_as_unicode()
        codigo_cursos = [line for line in html.splitlines()
                         if 'var listaProcurar' in line][0]
        json_cursos = codigo_cursos.replace('var listaProcurar =', '').strip()[:-1]
        for curso_busca in json.loads(json_cursos):
            curso_busca = curso_busca['id']
            yield FormRequest(
                callback=self.parse_cidades,
                formdata={'opcao': '1', 'tipo': '3', 'valor': curso_busca},
                meta={'curso_busca': curso_busca},
                method='POST',
                url='http://prounialuno.mec.gov.br/consulta/resultado-procurar/',
            ) 
开发者ID:turicas,项目名称:cursos-prouni,代码行数:18,代码来源:cursos_prouni.py

示例2: get_news

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def get_news(self, response):
        response.meta['iter_time'] += 1
        news_items = json.loads(response.text)

        if news_items:
            for n in news_items:
                yield {
                    'website': '公視',
                    'url': ARTICLE_PREFIX + n['news_id'],
                    'title': n['subject'],
                    'date': n['news_date'],
                    'content': n['content'],
                    'category': n['program_name']
                }
            yield scrapy.FormRequest(
                url="https://news.pts.org.tw/list/getmore.php",
                callback=self.get_news,
                meta=response.meta,
                formdata={
                    'page': str(response.meta['iter_time'])
                }) 
开发者ID:TaiwanStat,项目名称:Taiwan-news-crawlers,代码行数:23,代码来源:pts_spider.py

示例3: parse_person_center

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_person_center(self, response):
        """
        解析个人中心页面
        :param response:
        :return:
        """
        if response.url == self.person_center_url:
            print('进入到个人中心页面了')
            ck = response.xpath('//input[@name="ck"]/@value').get()
            print('获取的ck是:%s' % ck)
            formdata = {
                'ck': ck,
                'signature': '时光如水,岁月如斯'
            }
            # 发送post请求来更改签名
            yield scrapy.FormRequest(self.edit_signature, formdata=formdata)
        else:
            print('进入个人中心页面失败') 
开发者ID:xingag,项目名称:spider_python,代码行数:20,代码来源:douban.py

示例4: request_inventory_data

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def request_inventory_data(self):
        today = pd.Timestamp.today()
        requests = []
        for date in pd.date_range(start=today.date()-pd.Timedelta(weeks=520),end=today):
            the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip'
            if(date.dayofweek<5 and not os.path.exists(the_dir)):
                requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={
            'batchExportFlag':'batch',
            'contract.contract_id':'all',
            'contract.variety_id':'a',
            'year':str(date.year),
                'month':str(date.month-1),
                'day':str(date.day),
                'memberDealPosiQuotes.trade_type':'0',
                'memberDealPosiQuotes.variety':'all'
            },callback=self.download_dce_kline_data,meta={
                'filename':the_dir
            }))
        return requests 
开发者ID:foolcage,项目名称:fooltrader,代码行数:21,代码来源:future_dce_spider.py

示例5: request_currentyear_kdata

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def request_currentyear_kdata(self):
        today = pd.Timestamp.today()
        requests=[]
        for date in pd.date_range(start=today.date()-pd.Timedelta(days=today.dayofyear-1),end=today):
            the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls'
            if(date.dayofweek<5 and not os.path.exists(the_dir)):
                requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={
            'year':str(date.year),
                'month':str(date.month-1),
                'day':str(date.day),
                'dayQuotes.trade_type':'0',
                'dayQuotes.variety':'all',
                'exportType':'excel'
            },callback=self.download_dce_kline_data,meta={
                'filename':the_dir
            }))
        return requests 
开发者ID:foolcage,项目名称:fooltrader,代码行数:19,代码来源:future_dce_spider.py

示例6: parse_login

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_login(self, response):
        self._check_login_params()
        self._login = False
        form_data = {
            self.username_field: self.username,
            self.password_field: self.password
        }
        if hasattr(self, 'form_xpath'):
            return scrapy.FormRequest.from_response(
                response,
                formxpath=self.form_xpath,
                formdata=form_data,
                callback=self.parse_after_login
            )
        elif hasattr(self, 'form_url'):
            return scrapy.FormRequest(
                self.form_url,
                formdata=form_data,
                callback=self.parse_after_login
            ) 
开发者ID:aplanas,项目名称:kmanga,代码行数:22,代码来源:mangaspider.py

示例7: parse

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        yield FormRequest("POST_URL", formdata={'parameter': 'p'},
                                        meta={'item': item_loader.load_item()}, callback=self.populate_field) 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:post_pass_item.py

示例8: gen_detail

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_detail(self, **kwargs):
        """
        生成查询详情的请求
        :param patent_id, sipo, data_item, nrdAn, nrdPn:
        :return:
        """
        patent_id = str(kwargs.pop('patent_id'))
        formdata = url_detail.get('form_data')
        formdata.__setitem__('nrdAn', patent_id.split('.')[0])
        formdata.__setitem__('cid', patent_id)
        formdata.__setitem__('sid', patent_id)

        return FormRequest(
            url=url_detail.get('url'),
            formdata=formdata,
            headers=url_detail.get('headers'),
            callback=self.parse_patent_detail,
            meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item'), 'patent_id': patent_id,
                  'law_info': {'nrdAn': kwargs.pop('nrdAn'), 'nrdPn': kwargs.pop('nrdPn')}}
        ) 
开发者ID:will4906,项目名称:PatentCrawler,代码行数:22,代码来源:patent.py

示例9: gen_related_info

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_related_info(self, **kwargs):
        """
        生成相关信息的请求,包含法律信息和同族信息
        :param sipo:
        :param data_item:
        :param nrdAn:
        :param nrdPn:
        :return:
        """
        form_data = url_related_info.get('form_data')
        form_data.__setitem__('literaInfo.nrdAn', kwargs.pop('nrdAn'))
        form_data.__setitem__('literaInfo.nrdPn', kwargs.pop('nrdPn'))
        return FormRequest(
            url=url_related_info.get('url'),
            method='POST',
            dont_filter=True,  # 此处可能会发生重复采集,但是还是想要采集,所以关闭过滤
            formdata=form_data,
            callback=self.parse_related_info,
            meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item'), 'patent_id': kwargs.pop('patent_id')}
        ) 
开发者ID:will4906,项目名称:PatentCrawler,代码行数:22,代码来源:patent.py

示例10: gen_full_text

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def gen_full_text(self, **kwargs):
        """
        生成全文文本的请求
        :param patent_id:
        :param sipo:
        :param data_item:
        :return:
        """
        patent_id = str(kwargs.pop('patent_id'))
        form_data = url_full_text.get('form_data')
        form_data.__setitem__('nrdAn', patent_id.split('.')[0])
        form_data.__setitem__('cid', patent_id)
        form_data.__setitem__('sid', patent_id)
        return FormRequest(
            url=url_full_text.get('url'),
            method='POST',
            dont_filter=True,  # 此处可能会发生重复采集,但是还是想要采集,所以关闭过滤
            formdata=form_data,
            callback=self.parse_full_text,
            meta={'sipo': kwargs.pop('sipo'), 'data_item': kwargs.pop('data_item')}
        ) 
开发者ID:will4906,项目名称:PatentCrawler,代码行数:23,代码来源:patent.py

示例11: start_requests

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def start_requests(self):
        """
        初始请求
        :return:
        """
        for sipo in self.query_list:
            headers = url_search.get('headers')
            search_exp_cn = sipo.search_exp_cn
            logger.info('检索表达式--- %s' % search_exp_cn)
            form_data = url_search.get('form_data')
            form_data.__setitem__('searchCondition.searchExp', search_exp_cn)
            yield FormRequest(
                url=url_search.get('url'),
                callback=self.parse,
                method="POST",
                headers=headers,
                formdata=form_data,
                meta={'sipo': sipo}
            ) 
开发者ID:will4906,项目名称:PatentCrawler,代码行数:21,代码来源:patent.py

示例12: _login

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def _login(self, response):
        response = yield scrapy.Request(
            "https://www.{}/login/".format(self.name),
            meta={"cache_expires": timedelta(days=14)},
        )
        response = yield scrapy.FormRequest(
            "https://www.{}/login/".format(self.name),
            formdata=OrderedDict(
                [
                    ("user[control][login]", "true"),
                    ("permanent", "checked"),
                    ("username", self._username),
                    ("password", self._password),
                ]
            ),
            meta={"cache_expires": timedelta(days=14)},
        )
        if response and response.css(".notloggedin"):
            # We tried to login but we failed.
            self.logger.error("Login failed: Username or password wrong") 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:22,代码来源:nachrichten_at.py

示例13: parse_item

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse_item(self,response):
        #print('parse_item] url:', response.url)
        #print('parse_item] text:', response.text)

        #for quote in response.xpath('//div[contains(@style,"overflow-x:auto")]'):
        #    for row in quote.xpath('./table[contains(@class,"table-striped")]/tbody/tr'):
        #        link = row.xpath('td[1]/a/@href').extract_first()
        #        yield scrapy.Request(link, callback=self.parse_product)

        for row in response.xpath('//table[@name="MVCGridTable_advancesearchawardedprojectsp"]/tbody/tr'):
            link = row.xpath('.//a/@href').get()
            #title = row.xpath('.//a/text()').get()
            yield scrapy.Request(link, callback=self.parse_product)

        # create request for next page
        onclick = response.xpath('//a[@aria-label="Next page"]/@onclick').get()
        
        if onclick:
            # next page 
            self.args['page'] += 1
            args = urllib.parse.urlencode(self.args)
            url = 'https://researchgrant.gov.sg/eservices/mvcgrid?' + args
            yield scrapy.FormRequest(url, callback=self.parse_item, method='POST', formdata=self.params, headers={'X-Requested-With': 'XMLHttpRequest'}) 
开发者ID:furas,项目名称:python-examples,代码行数:25,代码来源:test-scrapy.py

示例14: extract_more_news

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def extract_more_news(self):
        pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
        script = response.css('script[type*="text/javascript"]').extract()[-1]
        try:
            id = pattern.search(script).group(1)
        except:
            id = response['min_news_id']
        r = scrapy.FormRequest('https://www.inshorts.com/en/ajax/more_news', formdata={'news_offset':id})
        scrapy.fetch(r)
        contents = json.loads(response.text)
        html = lxml.html.fromstring(contents['html'])
        for news in lxml.cssselect('div.news-card'):
            item = {
                'title': news.cssselect('a.clickable>span::text').extract_first(),
                'author': news.css('span.author::text').extract_first(),
                'time': news.css('span.time::text').extract_first(),
                'date': news.css('span[clas*=date]::text').extract_first(),
                'content': news.css('div[itemprop*=articleBody]::text').extract_first(),
                'link' : news.css('div.read-more>a::attr(href)')
            }
            yield item 
开发者ID:vipulgupta2048,项目名称:scrape,代码行数:23,代码来源:scrapper.py

示例15: parse

# 需要导入模块: import scrapy [as 别名]
# 或者: from scrapy import FormRequest [as 别名]
def parse(self, response):

        for news in response.css('div.news-card'):
            item = {
                'headline': news.css('a.clickable>span::text').extract_first(),
                'author': news.css('span.author::text').extract_first(),
                'time': news.css('span.time::text').extract_first(),
                'date': news.css('span[clas*=date]::text').extract_first(),
                'body': news.css('div[itemprop*=articleBody]::text').extract_first(),
            }

            yield item

        while self.pages > 1:
            pattern = re.compile('var min_news_id\s+=\s+"(.*?)"')
            script = response.css('script[type*="text/javascript"]').extract()[-1]
            id = pattern.search(script).group(1)
            r = scrapy.FormRequest('https://www.inshorts.com/en/ajax/more_news', callback=self.parse, formdata={'news_offset':id})
            yield scrapy.fetch(r)
            self.pages -= 1 
开发者ID:vipulgupta2048,项目名称:scrape,代码行数:22,代码来源:inshorts_scraper.py


注:本文中的scrapy.FormRequest方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。