Python bs4.NavigableString方法代码示例

本文整理汇总了Python中bs4.NavigableString方法的典型用法代码示例。如果您正苦于以下问题：Python bs4.NavigableString方法的具体用法？Python bs4.NavigableString怎么用？Python bs4.NavigableString使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4的用法示例。

在下文中一共展示了bs4.NavigableString方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: print_content

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def print_content(contents):
    for content in contents:
        name = content.name
        #if not isinstance(content, Tag):
        if isinstance(content, NavigableString):
            s = str(content)
            s = s.replace("\n","")
            print s.strip()
        else:
            if name == "img":
                '''
                img = content.find("img")
                if img:
                    print img.get("src")
                '''
                print "[图片]"
            elif name == "br":
                print ""
            elif name == "noscript":
                continue
            elif name == "li":
                print "•",
            print_content(content.contents)

开发者ID:duduainankai，项目名称:zhihu-terminal，代码行数:25，代码来源:Answer.py

示例2: normalize_text_sections

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.strip()
        return paragraph

开发者ID:cobalt-uoft，项目名称:uoft-scrapers，代码行数:21，代码来源:__init__.py

示例3: normalize_text_sections

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.replace('  ', ' ')
        paragraph = paragraph.strip()
        return paragraph

开发者ID:cobalt-uoft，项目名称:uoft-scrapers，代码行数:22，代码来源:__init__.py

示例4: __clear

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def __clear(parent_node,config):
    # return bs.prettify()
    content = ""
    # print parent_node
    if isinstance(parent_node, NavigableString):
        return parent_node.string

    if parent_node.name in line_elements:
        content += "\n"

    children = parent_node.contents

    for child in children:
        if child.name == "table":
            content += parse_table(child,config)
        else:
            content += __clear(child,config)

    return content

开发者ID:newsettle，项目名称:ns4_chatbot，代码行数:21，代码来源:html_clear.py

示例5: process_tag

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def process_tag(node):
    """
    Recursively go through a tag's children, converting them, then
    convert the tag itself.

    """
    text = ''

    exceptions = ['table']

    for element in node.children:
        if isinstance(element, NavigableString):
            text += element
        elif not node.name in exceptions:
            text += process_tag(element)

    try:
        convert_fn = globals()["convert_%s" % node.name.lower()]
        text = convert_fn(node, text)

    except KeyError:
        pass

    return text

开发者ID:doakey3，项目名称:DashTable，代码行数:26，代码来源:process_tag.py

示例6: get_children

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def get_children(descendants, parsed):
    subelement = False
    descendants_buff = deque()
    if descendants is None:
        return descendants_buff
    if (isinstance(descendants, NavigableString)):
        parsed.append(descendants)
    else:
        for child in descendants.children:
            if (child.name == None):
                if (subelement == False):
                    parsed.append(child)
                else:
                    descendants_buff.append(child)
            else:
                if (subelement == False):
                    subelement = True
                    descendants_buff.append(child)
                else:
                    descendants_buff.append(child)
    descendants_buff.reverse()
    return descendants_buff

开发者ID:jhpyle，项目名称:docassemble，代码行数:24，代码来源:file_docx.py

示例7: soup_strings

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def soup_strings(soup):
  paragraph_tags = set(["caption", "details", "h1", "h2", "h3", "h4", "h5",
                        "h6", "li", "p", "td", "div", "span"])

  skip_children = None
  for descendant in soup.descendants:
    # If we've treated a tag as a contiguous paragraph, don't re-emit the
    # children (see below).
    if skip_children is not None:
      try:
        in_skip = descendant in skip_children
      except RecursionError:
        # Possible for this check to hit a nasty infinite recursion because of
        # BeautifulSoup __eq__ checks.
        in_skip = True
      if in_skip:
        continue
      else:
        skip_children = None

    # Treat some tags as contigous paragraphs, regardless of other tags nested
    # inside (like <a> or <b>).
    if isinstance(descendant, bs4.Tag):
      if descendant.name in paragraph_tags:
        if descendant.find_all(paragraph_tags):
          # If there are nested paragraph tags, don't treat it as a single
          # contiguous tag.
          continue
        skip_children = list(descendant.descendants)
        text = " ".join(descendant.get_text(" ", strip=True).split())
        if text:
          yield text
        continue

    if (isinstance(descendant, bs4.Comment) or
        not isinstance(descendant, bs4.NavigableString)):
      continue

    text = " ".join(descendant.strip().split())
    if text:
      yield text

开发者ID:akzaidi，项目名称:fine-lm，代码行数:43，代码来源:get_references_web_single_group.py

示例8: apply_correction_map

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def apply_correction_map(soup, tag, cor_map):
    for item in list(tag.descendants):
        if isinstance(item, bs4.NavigableString):
            origstr = str(item)
            itemstr = origstr
            for fontset in cor_map:
                for badc, goodc in fontset.items():
                    if badc in itemstr:
                        itemstr = itemstr.replace(badc, goodc)
            if origstr != itemstr:
                news = soup.new_string(itemstr)
                item.replace_with(news)

开发者ID:fake-name，项目名称:ReadableWebProxy，代码行数:14，代码来源:FontRemapProcessors.py

示例9: get_text_lines

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def get_text_lines(parent_node):
    text_lines = ['']
    for node in parent_node.children:
        if isinstance(node, bs4.NavigableString):
            text_lines[-1] += str(node)
        elif node.name == 'br':
            text_lines.append('')
        else:
            text_lines[-1] += node.text
    return text_lines

开发者ID:jbms，项目名称:beancount-import，代码行数:12，代码来源:amazon_invoice.py

示例10: clean_node

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def clean_node(self, doc, node):
        """Clean a BeautifulSoup document in-place"""
        if isinstance(node, NavigableString):
            self.clean_string_node(doc, node)
        elif isinstance(node, Tag):
            self.clean_tag_node(doc, node)
        # This branch is here in case node is a BeautifulSoup object that does
        # not inherit from NavigableString or Tag. I can't find any examples
        # of such a thing at the moment, so this branch is untested.
        else:  # pragma: no cover
            self.clean_unknown_node(doc, node)

开发者ID:wagtail，项目名称:wagtail，代码行数:13，代码来源:whitelist.py

示例11: make_catena_input

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def make_catena_input(src, dest):
    text = open(src).read()
    soup = BeautifulSoup(text, 'xml')
    soup.find('DCT').insert_after(soup.new_tag('TITLE'))
    soup.find('DCT').append(soup.new_tag('TIMEX3', functionInDocument="CREATION_TIME", temporalFunction="false", tid="t0", type="DATE", value=""))

    for e in soup.find_all('event'):
        new_e = soup.new_tag('EVENT', **e.attrs)
        new_e.insert(0, NavigableString(e.get_text()))
        e.replaceWith(new_e)

    [s.extract() for s in soup('TLINK')]

    with open(args.dest + src.split('/')[-1] + '.tml', 'w') as f:
        f.write(str(soup))

开发者ID:malllabiisc，项目名称:NeuralDater，代码行数:17，代码来源:make_catena_input.py

示例12: is_navigable_string

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def is_navigable_string(obj):
        """Is navigable string."""
        return isinstance(obj, bs4.NavigableString)

开发者ID:facelessuser，项目名称:soupsieve，代码行数:5，代码来源:css_match.py

示例13: _html2text

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def _html2text(elem):
    for child in elem.children:
        if isinstance(child, Tag):
            _html2text(child)
        elif isinstance(child, NavigableString):
            # No changes necessary
            continue

    if elem.parent:
        if elem.name in _ELEMENT_REPLACER:
            _ELEMENT_REPLACER[elem.name](elem)

开发者ID:quay，项目名称:quay，代码行数:13，代码来源:html.py

示例14: wrap_elem_content

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def wrap_elem_content(elem, begin, end):
    elem.insert(0, NavigableString(begin))
    elem.append(NavigableString(end))

开发者ID:paperswithcode，项目名称:axcell，代码行数:5，代码来源:extract_tables.py

示例15: _insert_anchor

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def _insert_anchor(el, anchor_id, prefix="xxanchor"):
    el.insert(0, NavigableString(f' {prefix}-{anchor_id} '))

开发者ID:paperswithcode，项目名称:axcell，代码行数:4，代码来源:doc_utils.py

注：本文中的bs4.NavigableString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。