当前位置: 首页>>代码示例>>Python>>正文


Python bs4.Tag方法代码示例

本文整理汇总了Python中bs4.Tag方法的典型用法代码示例。如果您正苦于以下问题:Python bs4.Tag方法的具体用法?Python bs4.Tag怎么用?Python bs4.Tag使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在bs4的用法示例。


在下文中一共展示了bs4.Tag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def __init__(self, input, id_=None, **kwargs):
        # TODO: should divide this class into two subclasses
        # to deal with string and bs4.Tag separately

        # validate the input
        if not isinstance(input, str) and not isinstance(input, Tag):
            raise Exception('Unrecognized type. Valid input: str, bs4.element.Tag')

        soup = BeautifulSoup(input, 'html.parser').find() if isinstance(input, str) else input

        # locate the target table
        if soup.name == 'table':
            self._table = soup
        else:
            self._table = soup.find(id=id_)

        if 'transformer' in kwargs:
            self._transformer = kwargs['transformer']
        else:
            self._transformer = str

        self._output = [] 
开发者ID:yuanxu-li,项目名称:html-table-extractor,代码行数:24,代码来源:extractor.py

示例2: append_to

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def append_to(parent, tag, **kwargs):
    """
    Append an element to the supplied parent.

    :param parent: Parent to append to.
    :param tag: Tag to create.
    :param kwargs: Tag kwargs.
    :return: New element.
    """
    if hasattr(parent, "soup"):
        soup = parent.soup
    else:
        soup = parent.find_parent("html")

    # Create Tag explicitly instead of using new_tag, otherwise attribute "name" leads to clash with tag-name in bs4
    new_tag = bs4.Tag(builder=soup.builder, name=tag, attrs=kwargs)

    new_tag.soup = soup

    parent.append(new_tag)

    return new_tag 
开发者ID:man-group,项目名称:PyBloqs,代码行数:24,代码来源:html.py

示例3: construct_element

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def construct_element(container=None, content=None, tag=None, element_type=None):
    """
    Constructs an element and appends it to the container.

    :param container: Container to add the element to.
    :param content: String representation of content (e.g. JS or CSS)
    :param tag: Tag name, e.g. "script" or "style"
    :param element_type: E.g. "text/javascript" or "text/css"
    :return: New element.
    """
    if container is None:
        el = root(tag, type=element_type)
    else:
        el = append_to(container, tag, type=element_type)
    if content is not None:
        el.string = content
    return el 
开发者ID:man-group,项目名称:PyBloqs,代码行数:19,代码来源:html.py

示例4: html

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def html(self):
        html = markdown.markdown(self.markdown,
                                 extensions=MARKDOWN_EXTENSIONS,
                                 output_format='html5')

        # fix image links
        soup = BeautifulSoup(html, 'lxml')
        for img in soup.find_all('img'):
            img.attrs['src'] = self._get_static_url(img.attrs['src'])

        # strip html and body tags
        body = soup.find('body') or ''
        if isinstance(body, SoupTag):
            body = ''.join(map(str, body.contents))

        # prefix stylesheet if necessary
        if not self.is_dir or not os.path.exists(
                os.path.join(self.dir_path, ARTICLE_STYLESHEET_FILENAME)):
            return body

        href = self._get_static_url(ARTICLE_STYLESHEET_FILENAME)
        return f'<link rel="stylesheet" type="text/css" href="{href}">' + body 
开发者ID:briancappello,项目名称:flask-react-spa,代码行数:24,代码来源:article_data.py

示例5: retrieve_erly_iframe_src

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def retrieve_erly_iframe_src(self, video_source_response: CachedResponse) -> str:
        erly_iframe: Union[Tag, None] = pipe(
            lambda r_content: BeautifulSoup(
                r_content,
                "html.parser"
            ),
            lambda soup: soup.select_one(
                "div.full_screen > iframe"
            )
        )(video_source_response.content)

        if not erly_iframe:
            return {"fatal_error": ".full_screen > iframe wasn't found"}

        erly_iframe_src: Union[str, None] = erly_iframe.get("src")

        if not erly_iframe_src:
            return {"fatal_error": ".full_screen > iframe doesn't have src attribute"}

        return erly_iframe_src 
开发者ID:limitedeternity,项目名称:foxford_courses,代码行数:22,代码来源:fns.py

示例6: _find_base_element

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def _find_base_element(element: Tag) -> Optional[Tag]:
    """
    Find the 'base element' of a symbol. In most cases, this is the base identifier that
    is being modified by other symbols. For example, this function will return the
    '<mi>' element for 'x' in the symbol 'x^2', or 'x_i'. If this element does not have any
    descendant that can qualify as a base element, None is returned.
    """

    BASE_ELEMENT_TAG = "mi"

    # To find the base element perform a depth-first search. The first identifier ('<mi>') in a
    # pre-order traversal of the tree is the base element. This is because the 'base' element
    # is the first child of '<msub>' or '<msup>' elements.
    if element.name == BASE_ELEMENT_TAG:
        return element
    for child in element.children:
        if isinstance(child, Tag):
            base_element = _find_base_element(child)
            if base_element is not None:
                return base_element

    return None 
开发者ID:allenai,项目名称:scholar-reader,代码行数:24,代码来源:match_symbols.py

示例7: _extract_tokens

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def _extract_tokens(element: Tag) -> List[Token]:
    """
    Get the tokens defined in this element. Tokens are only found in low-level elements like
    "<mi>" and "<mn>". This function will find no tokens in higher-level nodes that solely
    group other low-level elements (like "<mrow>" and "<msub>").
    """

    tokens = []
    if element.name in TOKEN_TAGS and _has_s2_token_annotations(element):
        tokens.append(
            Token(
                text=element.string,
                token_index=int(element["s2:index"]),
                start=int(element["s2:start"]),
                end=int(element["s2:end"]),
            )
        )

    return tokens 
开发者ID:allenai,项目名称:scholar-reader,代码行数:21,代码来源:parse_equation.py

示例8: define

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def define(word, num=1):
    if num < 1:
        num = 1
    try:
        url = "http://wordnetweb.princeton.edu/perl/webwn?s=" + word + "&sub=Search+WordNet&o2=&o0=&o8=1&o1=1&o7=&o5=&o9=&o6=&o3=&o4=&h=0000000000"
    except Exception as e:
        print(e)
        return 'Couldn\'t download definition.'
    try:
        soup = BeautifulSoup(request.urlopen(url))
    except:
        return "Network Error: Couldn't download definition.", 0
    if soup.ul is not None:
        definitions = [x.text for x in list(soup.ul) if isinstance(x, Tag) and x.text != '\n' and x.text != '']
        if len(definitions) >= num:
            return (definitions[num - 1] + '[' + str(num) + ' of ' + str(len(definitions)) + ']')[
                   3:].capitalize(), len(definitions)
    return "Couldn\'t find definition.", 0 
开发者ID:wardellbagby,项目名称:HangoutsBot,代码行数:20,代码来源:UtilBot.py

示例9: get_pkd

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def get_pkd(self, *args, **kwargs):
        pkd = []
        details = self._get_details(*args, **kwargs)
        if details is not None:
            data = BeautifulSoup(details, 'lxml')
            report_type = self.pkd_report_type.get('F')
            if 'P' in data.typ.get_text():
                report_type = self.pkd_report_type.get('P')
            report = self._service(
                'DanePobierzPelnyRaport', data.regon.get_text(), report_type)
            if report is not None:
                for item in BeautifulSoup(report, 'lxml').find_all('dane'):
                    data = {i.name.split('_', 1)[1].replace('_', '').lower(): i.get_text()
                            for i in item.children if isinstance(i, Tag)}
                    pkd.append({
                        'code': data['pkdkod'],
                        'name': data['pkdnazwa'],
                        'main': data['pkdprzewazajace'] == '1'})
                pkd = [dict(t) for t in set([tuple(d.items()) for d in pkd])]
        return pkd 
开发者ID:bogdal,项目名称:gusregon,代码行数:22,代码来源:gus.py

示例10: soup_strings

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def soup_strings(soup):
  paragraph_tags = set(["caption", "details", "h1", "h2", "h3", "h4", "h5",
                        "h6", "li", "p", "td", "div", "span"])

  skip_children = None
  for descendant in soup.descendants:
    # If we've treated a tag as a contiguous paragraph, don't re-emit the
    # children (see below).
    if skip_children is not None:
      try:
        in_skip = descendant in skip_children
      except RecursionError:
        # Possible for this check to hit a nasty infinite recursion because of
        # BeautifulSoup __eq__ checks.
        in_skip = True
      if in_skip:
        continue
      else:
        skip_children = None

    # Treat some tags as contigous paragraphs, regardless of other tags nested
    # inside (like <a> or <b>).
    if isinstance(descendant, bs4.Tag):
      if descendant.name in paragraph_tags:
        if descendant.find_all(paragraph_tags):
          # If there are nested paragraph tags, don't treat it as a single
          # contiguous tag.
          continue
        skip_children = list(descendant.descendants)
        text = " ".join(descendant.get_text(" ", strip=True).split())
        if text:
          yield text
        continue

    if (isinstance(descendant, bs4.Comment) or
        not isinstance(descendant, bs4.NavigableString)):
      continue

    text = " ".join(descendant.strip().split())
    if text:
      yield text 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:43,代码来源:get_references_web_single_group.py

示例11: get_last_descendant

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def get_last_descendant(self, node):
        """Get the last descendant."""

        if node.next_sibling is not None:
            last_descendant = node.next_sibling
        else:
            last_child = node
            while isinstance(last_child, bs4.Tag) and last_child.contents:
                last_child = last_child.contents[-1]
            last_descendant = last_child.next_element

        return last_descendant 
开发者ID:facelessuser,项目名称:pyspelling,代码行数:14,代码来源:xml.py

示例12: __init__

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def __init__(self, table):
        if isinstance(table, Tag):
            self.table = table
        elif isinstance(table, soup):
            self.table = table.find_all("table")
        elif isinstance(table, str):
            self.table = soup(str, 'html.parser').find_all("table")
        else:
            raise Exception('unrecognized type')

        self.output = [] 
开发者ID:KiriKira,项目名称:scripts,代码行数:13,代码来源:ipip.py

示例13: clean_pullquote_tags

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def clean_pullquote_tags(item: BS4_Tag) -> BS4_Tag:
    """
    Replace "[pullquote][/pullquote]" tags in string with "<span class='pullquote'></span>"
    https://stackoverflow.com/a/44593228/1191545
    """

    replacement_values = [
        ("[pullquote]", ""),
        ("[/pullquote]", ""),
    ]

    for replacement_value in replacement_values:
        item.string = item.string.replace(*replacement_value)

    return item 
开发者ID:WesternFriend,项目名称:WF-website,代码行数:17,代码来源:import_articles.py

示例14: makeTag

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def makeTag(name, string=None, **kwargs):
    tag = Tag(name=name, attrs=kwargs)
    for key in kwargs:
        if kwargs[key] is None:
            kwargs[key] = ""
    if string:
        tag.string = string
    return tag 
开发者ID:Terrance,项目名称:SkPy,代码行数:10,代码来源:msg.py

示例15: parse_business_hours

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Tag [as 别名]
def parse_business_hours(business_hours_html):
    business_hours = ''
    soup = bs4.BeautifulSoup(business_hours_html, 'html.parser')
    time_icon = soup.find(class_='glyphicon-time')
    transfer_icon = soup.find(class_='glyphicon-transfer')
    education_icon = soup.find(class_='glyphicon-education')

    if time_icon:
        business_hours += '\n*Öffnungszeiten*'
        for sib in time_icon.parent.parent.next_siblings:
            if type(sib) == bs4.Tag and transfer_icon not in sib.descendants and education_icon not in sib.descendants:
                for item in sib.find_all('div', class_='col-xs-10'):
                    for string in item.stripped_strings:
                        business_hours += '\n%s' % string
    return business_hours.strip() 
开发者ID:ekeih,项目名称:OmNomNom,代码行数:17,代码来源:studierendenwerk.py


注:本文中的bs4.Tag方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。