当前位置: 首页>>代码示例>>Python>>正文


Python pyquery.PyQuery方法代码示例

本文整理汇总了Python中pyquery.PyQuery方法的典型用法代码示例。如果您正苦于以下问题:Python pyquery.PyQuery方法的具体用法?Python pyquery.PyQuery怎么用?Python pyquery.PyQuery使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery的用法示例。


在下文中一共展示了pyquery.PyQuery方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_paste

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def check_paste ( self, paste_id ):
        paste_url = self.PASTEBIN_URL + paste_id
        try:
            paste_txt = PyQuery ( url = paste_url )('#paste_code').text()

            for regex,file,directory in self.regexes:
                if re.match ( regex, paste_txt, re.IGNORECASE ):
                    Logger ().log ( 'Found a matching paste: ' + paste_url + ' (' + file + ')', True, 'CYAN' )
                    self.save_result ( paste_url,paste_id,file,directory )
                    return True
            Logger ().log ( 'Not matching paste: ' + paste_url )
        except KeyboardInterrupt:
            raise
        except:
            Logger ().log ( 'Error reading paste (probably a 404 or encoding issue).', True, 'YELLOW')
        return False 
开发者ID:fabiospampinato,项目名称:pastebin-monitor,代码行数:18,代码来源:pastebin_crawler.py

示例2: get_ligands

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_ligands(self, identifier):
        """List the ligands that can be found in a PDB entry

        :param identifier: a valid PDB identifier (e.g., 4HHB)
        :return: xml document


            >>> from bioservices import PDB
            >>> s = PDB()
            >>> s.get_ligands("4HHB")

        Then, ::

            x = s.get_ligands("4HHB")
            from pyquery import PyQuery as pq
            d = pq(x)


        """

        res = self.services.http_get("rest/ligandInfo", frmt='xml',
                params={'structureId': identifier})
        return res 
开发者ID:cokelaer,项目名称:bioservices,代码行数:25,代码来源:pdb.py

示例3: pyquery

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def pyquery(self):
        """
        Returns the response as a `PyQuery <http://pyquery.org/>`_ object.

        Only works with HTML and XML responses; other content-types raise
        AttributeError.
        """
        if 'html' not in self.content_type and 'xml' not in self.content_type:
            raise AttributeError(
                "Not an HTML or XML response body (content-type: %s)"
                % self.content_type)
        try:
            from pyquery import PyQuery
        except ImportError:  # pragma: no cover
            raise ImportError(
                "You must have PyQuery installed to use response.pyquery")
        d = PyQuery(self.testbody)
        return d 
开发者ID:MayOneUS,项目名称:pledgeservice,代码行数:20,代码来源:response.py

示例4: fetch_note_list

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def fetch_note_list(self):
        url = self.account.user.alt + 'notes'
        notes = []
        while True:
            response = self.fetch_url_content(url)
            if not response:
                break
            dom = PyQuery(response.text)
            note_items = dom('#content .article>.note-container')
            for note_item in note_items:
                notes.append(PyQuery(note_item).attr('data-url'))
            next_page = dom('#content .article>.paginator>.next>a')
            if next_page:
                url = next_page.attr('href')
            else:
                break
        return notes 
开发者ID:tabris17,项目名称:doufen,代码行数:19,代码来源:tasks.py

示例5: get

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get(self, douban_id):
        try:
            subject = db.Note.get(db.Note.douban_id == douban_id)
            history = db.NoteHistorical.select().where(db.NoteHistorical.id == subject.id)
        except db.Note.DoesNotExist:
            raise tornado.web.HTTPError(404)

        comments = db.Comment.select().join(db.User).where(
            db.Comment.target_type == 'note',
            db.Comment.target_douban_id == subject.douban_id
        )

        dom = PyQuery(subject.content)
        dom_iframe = dom('iframe')
        dom_iframe.before('<p class="title"><a href="{0}" class="external-link">站外视频</a></p>'.format(dom_iframe.attr('src')))
        dom_iframe.remove()
        dom('a').add_class('external-link')

        self.render('note.html', note=subject, comments=comments, content=dom) 
开发者ID:tabris17,项目名称:doufen,代码行数:21,代码来源:handlers.py

示例6: get_url_and_date

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_url_and_date(I: str, O: str, id_data_output_path: str) -> None:
  '''
  Get image url and date.
  Saved in the resource directory with names of `O` and `id_data_output_path`.
  :param I:
  :param O:
  :param id_data_output_path:
  :return: None
  '''
  with open(I, encoding='utf-8') as fin:
    doc = pyquery.PyQuery(fin.read())
  table = doc.attr('id', 'query_result_main')('tbody')
  id_data = []
  with open(O, 'w', encoding='utf-8') as fout:
    for line in table.items():
      for tr in line('tr').items():
        lst = re.findall(ID_PATTERN, tr.text())
        data = re.findall(DATA_PATTERN, tr.text())
        if len(lst) == 0:
          continue
        fout.write('http://www.getchu.com/soft.phtml?id={}&gc=gc\n'.format(lst[-1]))
        id_data.append([lst[-1], data[-1]])
  with open(id_data_output_path, 'w', encoding='utf-8') as fout:
    for each in id_data:
      fout.write('{} {}\n'.format(each[0], each[1])) 
开发者ID:VincentXWD,项目名称:create-girls-moe-pytorch,代码行数:27,代码来源:getchu_get_urls.py

示例7: parse_reviews_and_users

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def parse_reviews_and_users(self, response):
        if not self.is_tabelog(response):
            return Request(url=response.url, dont_filter=True)

        dom = PyQuery(response.body)
        review_nodes = dom('div.rvw-item')
        business_id = int(re.findall(r'[a-z]+/A\d{4}/A\d{6}/(\d+)/dtlrvwlst/', response.url)[0])

        reviews_and_users = []
        for review_node in review_nodes:
            user_id = self._extract_user_id(review_node)
            review = self._generate_review(review_node, business_id, user_id)
            if review:
                reviews_and_users.append(review)
            user = self._generate_user(review_node, user_id)
            if user:
                reviews_and_users.append(user)
        return reviews_and_users 
开发者ID:mrorii,项目名称:tabebot,代码行数:20,代码来源:tabelog.py

示例8: query_album_cover_from_xiami

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def query_album_cover_from_xiami(artist_name, album_name):
    if not is_network_connected():
        return None

    if not artist_name and not album_name:
        return False
    xiami_album_search_url = 'http://www.xiami.com/search/album?key=' + artist_name + '+' + album_name
    html = public_curl.get(xiami_album_search_url)
    try:
        search_result_object = PyQuery(html)
        album_info_element = search_result_object(
            'div.albumBlock_list div.album_item100_block p.cover a.CDcover100 img')
        info_href_attr = album_info_element.attr('src')
        if not info_href_attr: return False
        return info_href_attr.replace("_1", "_2")
    except:
        return False 
开发者ID:dragondjf,项目名称:QMusic,代码行数:19,代码来源:cover_query.py

示例9: getOriginalArticalNums

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def getOriginalArticalNums(self,proxies):
        main_response = requests.get(self.blogurl,proxies=proxies)
        # 判断是否成功获取 (根据状态码来判断)
        if main_response.status_code == 200:
            print('获取成功')
            self.main_html = main_response.text
            main_doc = pq(self.main_html)
            mainpage_str = main_doc.text() #页面信息去除标签信息
            origin_position = mainpage_str.index('原创') #找到原创的位置
            end_position = mainpage_str.index('原创',origin_position+1) #最终的位置,即原创底下是数字多少篇博文
            self.blog_nums = ''
            # 获取写的博客数目
            for num in range(3,10):
                #判断为空格 则跳出循环
                if mainpage_str[end_position + num].isspace() == True:
                    break
                self.blog_nums += mainpage_str[end_position + num]
            print(type(str(self.blog_nums)))
            cur_blog_nums = (int((self.blog_nums))) #获得当前博客文章数量
            return cur_blog_nums #返回博文数量
        else:
            print('爬取失败')
            return 0 #返回0 说明博文数为0或者爬取失败 
开发者ID:aimi-cn,项目名称:AILearners,代码行数:25,代码来源:csdn.py

示例10: get_seasons_for_league

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_seasons_for_league(self, main_league_results_url):
        """
        Params:
            (str) main_league_results_url e.g. https://www.oddsportal.com/hockey/usa/nhl/results/

        Returns:
            (list) urls to each season for given league
        """
        seasons = []
        logger.info('Getting all seasons for league via %s', main_league_results_url)
        if not self.go_to_link(main_league_results_url):
            logger.error('League results URL loaded unsuccessfully %s', main_league_results_url)
            # Going to send back empty list so this is not processed further
            return seasons
        html_source = self.get_html_source()
        html_querying = pyquery(html_source)
        season_links = html_querying.find('div.main-menu2.main-menu-gray > ul.main-filter > li > span > strong > a')
        logger.info('Extracted links to %d seasons', len(season_links))
        for season_link in season_links:
            this_season = Season(season_link.text)
            # Start the Season's list of URLs with just the root one
            this_season_url = self.base_url + season_link.attrib['href']
            this_season.urls.append(this_season_url)
            seasons.append(this_season)
        return seasons 
开发者ID:gingeleski,项目名称:odds-portal-scraper,代码行数:27,代码来源:crawler.py

示例11: test_report_movement_redirect

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def test_report_movement_redirect(self):
        """
        When showing a report, if it contains transactions the slug of the transaction is transformed into an
        <a> elem, here we test that the <a redirect to an actual change form
        :return:
        """
        self.client.login(username='super', password='secret')
        response = self.client.get(reverse('ra_admin:report', args=('client', 'clientdetailedstatement')),
                                   data={'client_id': self.client1.pk},
                                   HTTP_X_REQUESTED_WITH='XMLHttpRequest')
        data = response.json()
        a_elem = pq(data['data'][0]['slug'])
        doc_type = data['data'][0]['doc_type']
        url = a_elem.attr('href')
        response = self.client.get(url, follow=True)
        self.assertEqual(response.status_code, 200)
        instance = response.context['original']
        self.assertEqual(instance.slug, a_elem.text())
        self.assertEqual(instance.doc_type, doc_type) 
开发者ID:ra-systems,项目名称:django-ra-erp,代码行数:21,代码来源:tests.py

示例12: get_attribute

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_attribute(self, attribute_id):
        """ Returns the attribute object for the given attribute id.

        Args:
            attribute_id (str): the attribute guid

        Returns:
            Attribute: Attribute object for this guid

        Raises:
            MstrClientException: if no attribute id is supplied
        """

        if not attribute_id:
            raise MstrClientException("You must provide an attribute id")
        arguments = {'taskId': 'getAttributeForms', 'attributeID': attribute_id, 'sessionState': self._session}
        response = self._request(arguments)
        d = pq(response)
        return Attribute(d('dssid')[0].text, d('n')[0].text) 
开发者ID:infoscout,项目名称:py-mstr,代码行数:21,代码来源:py_mstr.py

示例13: fetch_urls

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def fetch_urls(self, queue, quantity):
        while not queue.empty():
            url = queue.get()
            html = self.s.get(url, headers=self.headers).text
            pq = PyQuery(html)
            size = pq.find('tbody tr').size()
            for index in range(size):
                item = pq.find('tbody tr').eq(index)
                ip = item.find('td').eq(0).text()
                port = item.find('td').eq(1).text()
                _type = item.find('td').eq(3).text()
                self.result_arr.append({
                    str(_type).lower(): '{0}://{1}:{2}'.format(str(_type).lower(), ip, port)
                })
                if len(self.result_arr) >= quantity:
                    break 
开发者ID:MyFaith,项目名称:Proxies,代码行数:18,代码来源:Proxies.py

示例14: get_coin_id

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_coin_id(coin_code):
    """
    This method fetches the name(id) of currency from the given code
    :param coin_code: coin code of a cryptocurrency e.g. btc
    :return: coin-id for the a cryptocurrency on the coinmarketcap.com
    """

    try:
        url = "https://coinmarketcap.com/all/views/all/"

        html = get_url_data(url).text
        raw_data = pq(html)

        coin_code = coin_code.upper()

        for _row in raw_data("tr")[1:]:
            symbol = _row.cssselect("td.text-left.col-symbol")[0].text_content()
            coin_id = _row.values()[0][3:]
            if symbol == coin_code:
                return coin_id
        raise InvalidCoinCode("'{}' coin code is unavailable on coinmarketcap.com".format(coin_code))
    except Exception as e:
        raise e 
开发者ID:guptarohit,项目名称:cryptoCMD,代码行数:25,代码来源:utils.py

示例15: getTypesL2

# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def getTypesL2(target, types, href):
    """
        取得二级分类
    """
    loger.info(colored(f'fetching {href}', 'yellow'))
    resp = await spiderSession.get(href)
    async with trio.open_nursery() as nursery:
        for item in jq(resp.text)("body > div.content-base > section > div > table > tbody > tr").items():
            name = item(
                'td:nth-child(1)>a').text().strip().replace(' ', '_').lower()
            target[name] = {}
            url = urljoin(href, item('td:nth-child(1)>a').attr('href'))
            nums = int(item('td:nth-child(2)').text().strip())
            target[name]['url'] = url
            target[name]['nums'] = nums
            target[name]['UA_list'] = []
            for page in range(1, math.ceil(nums/PERPAGE)+1):
                TASKS.add('__'.join([
                    types,
                    name,
                    f"{url}{page}"
                ])) 
开发者ID:aoii103,项目名称:FakeUA,代码行数:24,代码来源:FakeUA.py


注:本文中的pyquery.PyQuery方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。