当前位置: 首页>>代码示例>>Python>>正文


Python bs4.NavigableString方法代码示例

本文整理汇总了Python中bs4.NavigableString方法的典型用法代码示例。如果您正苦于以下问题:Python bs4.NavigableString方法的具体用法?Python bs4.NavigableString怎么用?Python bs4.NavigableString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在bs4的用法示例。


在下文中一共展示了bs4.NavigableString方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: print_content

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def print_content(contents):
    for content in contents:
        name = content.name
        #if not isinstance(content, Tag):
        if isinstance(content, NavigableString):
            s = str(content)
            s = s.replace("\n","")
            print s.strip()
        else:
            if name == "img":
                '''
                img = content.find("img")
                if img:
                    print img.get("src")
                '''
                print "[图片]"
            elif name == "br":
                print ""
            elif name == "noscript":
                continue
            elif name == "li":
                print "•",
            print_content(content.contents) 
开发者ID:duduainankai,项目名称:zhihu-terminal,代码行数:25,代码来源:Answer.py

示例2: normalize_text_sections

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.strip()
        return paragraph 
开发者ID:cobalt-uoft,项目名称:uoft-scrapers,代码行数:21,代码来源:__init__.py

示例3: normalize_text_sections

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.replace('  ', ' ')
        paragraph = paragraph.strip()
        return paragraph 
开发者ID:cobalt-uoft,项目名称:uoft-scrapers,代码行数:22,代码来源:__init__.py

示例4: __clear

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def __clear(parent_node,config):
	# return bs.prettify()
	content = ""
	# print parent_node
	if isinstance(parent_node, NavigableString):
		return parent_node.string

	if parent_node.name in line_elements:
		content += "\n"

	children = parent_node.contents

	for child in children:
		if child.name == "table":
			content += parse_table(child,config)
		else:
			content += __clear(child,config)

	return content 
开发者ID:newsettle,项目名称:ns4_chatbot,代码行数:21,代码来源:html_clear.py

示例5: process_tag

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def process_tag(node):
    """
    Recursively go through a tag's children, converting them, then
    convert the tag itself.

    """
    text = ''

    exceptions = ['table']

    for element in node.children:
        if isinstance(element, NavigableString):
            text += element
        elif not node.name in exceptions:
            text += process_tag(element)

    try:
        convert_fn = globals()["convert_%s" % node.name.lower()]
        text = convert_fn(node, text)

    except KeyError:
        pass

    return text 
开发者ID:doakey3,项目名称:DashTable,代码行数:26,代码来源:process_tag.py

示例6: get_children

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def get_children(descendants, parsed):
    subelement = False
    descendants_buff = deque()
    if descendants is None:
        return descendants_buff
    if (isinstance(descendants, NavigableString)):
        parsed.append(descendants)
    else:
        for child in descendants.children:
            if (child.name == None):
                if (subelement == False):
                    parsed.append(child)
                else:
                    descendants_buff.append(child)
            else:
                if (subelement == False):
                    subelement = True
                    descendants_buff.append(child)
                else:
                    descendants_buff.append(child)
    descendants_buff.reverse()
    return descendants_buff 
开发者ID:jhpyle,项目名称:docassemble,代码行数:24,代码来源:file_docx.py

示例7: soup_strings

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def soup_strings(soup):
  paragraph_tags = set(["caption", "details", "h1", "h2", "h3", "h4", "h5",
                        "h6", "li", "p", "td", "div", "span"])

  skip_children = None
  for descendant in soup.descendants:
    # If we've treated a tag as a contiguous paragraph, don't re-emit the
    # children (see below).
    if skip_children is not None:
      try:
        in_skip = descendant in skip_children
      except RecursionError:
        # Possible for this check to hit a nasty infinite recursion because of
        # BeautifulSoup __eq__ checks.
        in_skip = True
      if in_skip:
        continue
      else:
        skip_children = None

    # Treat some tags as contigous paragraphs, regardless of other tags nested
    # inside (like <a> or <b>).
    if isinstance(descendant, bs4.Tag):
      if descendant.name in paragraph_tags:
        if descendant.find_all(paragraph_tags):
          # If there are nested paragraph tags, don't treat it as a single
          # contiguous tag.
          continue
        skip_children = list(descendant.descendants)
        text = " ".join(descendant.get_text(" ", strip=True).split())
        if text:
          yield text
        continue

    if (isinstance(descendant, bs4.Comment) or
        not isinstance(descendant, bs4.NavigableString)):
      continue

    text = " ".join(descendant.strip().split())
    if text:
      yield text 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:43,代码来源:get_references_web_single_group.py

示例8: apply_correction_map

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def apply_correction_map(soup, tag, cor_map):
	for item in list(tag.descendants):
		if isinstance(item, bs4.NavigableString):
			origstr = str(item)
			itemstr = origstr
			for fontset in cor_map:
				for badc, goodc in fontset.items():
					if badc in itemstr:
						itemstr = itemstr.replace(badc, goodc)
			if origstr != itemstr:
				news = soup.new_string(itemstr)
				item.replace_with(news) 
开发者ID:fake-name,项目名称:ReadableWebProxy,代码行数:14,代码来源:FontRemapProcessors.py

示例9: get_text_lines

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def get_text_lines(parent_node):
    text_lines = ['']
    for node in parent_node.children:
        if isinstance(node, bs4.NavigableString):
            text_lines[-1] += str(node)
        elif node.name == 'br':
            text_lines.append('')
        else:
            text_lines[-1] += node.text
    return text_lines 
开发者ID:jbms,项目名称:beancount-import,代码行数:12,代码来源:amazon_invoice.py

示例10: clean_node

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def clean_node(self, doc, node):
        """Clean a BeautifulSoup document in-place"""
        if isinstance(node, NavigableString):
            self.clean_string_node(doc, node)
        elif isinstance(node, Tag):
            self.clean_tag_node(doc, node)
        # This branch is here in case node is a BeautifulSoup object that does
        # not inherit from NavigableString or Tag. I can't find any examples
        # of such a thing at the moment, so this branch is untested.
        else:  # pragma: no cover
            self.clean_unknown_node(doc, node) 
开发者ID:wagtail,项目名称:wagtail,代码行数:13,代码来源:whitelist.py

示例11: make_catena_input

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def make_catena_input(src, dest):
	text = open(src).read()
	soup = BeautifulSoup(text, 'xml')
	soup.find('DCT').insert_after(soup.new_tag('TITLE'))
	soup.find('DCT').append(soup.new_tag('TIMEX3', functionInDocument="CREATION_TIME", temporalFunction="false", tid="t0", type="DATE", value=""))

	for e in soup.find_all('event'):
		new_e = soup.new_tag('EVENT', **e.attrs)
		new_e.insert(0, NavigableString(e.get_text()))
		e.replaceWith(new_e)

	[s.extract() for s in soup('TLINK')]

	with open(args.dest + src.split('/')[-1] + '.tml', 'w') as f:
		f.write(str(soup)) 
开发者ID:malllabiisc,项目名称:NeuralDater,代码行数:17,代码来源:make_catena_input.py

示例12: is_navigable_string

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def is_navigable_string(obj):
        """Is navigable string."""
        return isinstance(obj, bs4.NavigableString) 
开发者ID:facelessuser,项目名称:soupsieve,代码行数:5,代码来源:css_match.py

示例13: _html2text

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def _html2text(elem):
    for child in elem.children:
        if isinstance(child, Tag):
            _html2text(child)
        elif isinstance(child, NavigableString):
            # No changes necessary
            continue

    if elem.parent:
        if elem.name in _ELEMENT_REPLACER:
            _ELEMENT_REPLACER[elem.name](elem) 
开发者ID:quay,项目名称:quay,代码行数:13,代码来源:html.py

示例14: wrap_elem_content

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def wrap_elem_content(elem, begin, end):
    elem.insert(0, NavigableString(begin))
    elem.append(NavigableString(end)) 
开发者ID:paperswithcode,项目名称:axcell,代码行数:5,代码来源:extract_tables.py

示例15: _insert_anchor

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import NavigableString [as 别名]
def _insert_anchor(el, anchor_id, prefix="xxanchor"):
    el.insert(0, NavigableString(f' {prefix}-{anchor_id} ')) 
开发者ID:paperswithcode,项目名称:axcell,代码行数:4,代码来源:doc_utils.py


注:本文中的bs4.NavigableString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。