當前位置: 首頁>>代碼示例>>Python>>正文


Python bs4.NavigableString方法代碼示例

本文整理匯總了Python中bs4.NavigableString方法的典型用法代碼示例。如果您正苦於以下問題:Python bs4.NavigableString方法的具體用法?Python bs4.NavigableString怎麽用?Python bs4.NavigableString使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在bs4的用法示例。


在下文中一共展示了bs4.NavigableString方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: print_content

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def print_content(contents):
    for content in contents:
        name = content.name
        #if not isinstance(content, Tag):
        if isinstance(content, NavigableString):
            s = str(content)
            s = s.replace("\n","")
            print s.strip()
        else:
            if name == "img":
                '''
                img = content.find("img")
                if img:
                    print img.get("src")
                '''
                print "[圖片]"
            elif name == "br":
                print ""
            elif name == "noscript":
                continue
            elif name == "li":
                print "•",
            print_content(content.contents) 
開發者ID:duduainankai,項目名稱:zhihu-terminal,代碼行數:25,代碼來源:Answer.py

示例2: normalize_text_sections

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.strip()
        return paragraph 
開發者ID:cobalt-uoft,項目名稱:uoft-scrapers,代碼行數:21,代碼來源:__init__.py

示例3: normalize_text_sections

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def normalize_text_sections(div):
        paragraph = ''
        for content in div.contents:
            text = ''
            if type(content) == NavigableString:
                text = content
            elif type(content) == Comment:
                pass
            elif content.name == 'li':
                text = content.text
            else:
                text = content.text
            text = text.strip()
            paragraph += text.strip() + ' '
        paragraph = paragraph.strip()
        paragraph = paragraph.replace('\r', '')
        paragraph = paragraph.replace('\n', ', ')
        paragraph = paragraph.replace('  ', ' ')
        paragraph = paragraph.strip()
        return paragraph 
開發者ID:cobalt-uoft,項目名稱:uoft-scrapers,代碼行數:22,代碼來源:__init__.py

示例4: __clear

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def __clear(parent_node,config):
	# return bs.prettify()
	content = ""
	# print parent_node
	if isinstance(parent_node, NavigableString):
		return parent_node.string

	if parent_node.name in line_elements:
		content += "\n"

	children = parent_node.contents

	for child in children:
		if child.name == "table":
			content += parse_table(child,config)
		else:
			content += __clear(child,config)

	return content 
開發者ID:newsettle,項目名稱:ns4_chatbot,代碼行數:21,代碼來源:html_clear.py

示例5: process_tag

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def process_tag(node):
    """
    Recursively go through a tag's children, converting them, then
    convert the tag itself.

    """
    text = ''

    exceptions = ['table']

    for element in node.children:
        if isinstance(element, NavigableString):
            text += element
        elif not node.name in exceptions:
            text += process_tag(element)

    try:
        convert_fn = globals()["convert_%s" % node.name.lower()]
        text = convert_fn(node, text)

    except KeyError:
        pass

    return text 
開發者ID:doakey3,項目名稱:DashTable,代碼行數:26,代碼來源:process_tag.py

示例6: get_children

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def get_children(descendants, parsed):
    subelement = False
    descendants_buff = deque()
    if descendants is None:
        return descendants_buff
    if (isinstance(descendants, NavigableString)):
        parsed.append(descendants)
    else:
        for child in descendants.children:
            if (child.name == None):
                if (subelement == False):
                    parsed.append(child)
                else:
                    descendants_buff.append(child)
            else:
                if (subelement == False):
                    subelement = True
                    descendants_buff.append(child)
                else:
                    descendants_buff.append(child)
    descendants_buff.reverse()
    return descendants_buff 
開發者ID:jhpyle,項目名稱:docassemble,代碼行數:24,代碼來源:file_docx.py

示例7: soup_strings

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def soup_strings(soup):
  paragraph_tags = set(["caption", "details", "h1", "h2", "h3", "h4", "h5",
                        "h6", "li", "p", "td", "div", "span"])

  skip_children = None
  for descendant in soup.descendants:
    # If we've treated a tag as a contiguous paragraph, don't re-emit the
    # children (see below).
    if skip_children is not None:
      try:
        in_skip = descendant in skip_children
      except RecursionError:
        # Possible for this check to hit a nasty infinite recursion because of
        # BeautifulSoup __eq__ checks.
        in_skip = True
      if in_skip:
        continue
      else:
        skip_children = None

    # Treat some tags as contigous paragraphs, regardless of other tags nested
    # inside (like <a> or <b>).
    if isinstance(descendant, bs4.Tag):
      if descendant.name in paragraph_tags:
        if descendant.find_all(paragraph_tags):
          # If there are nested paragraph tags, don't treat it as a single
          # contiguous tag.
          continue
        skip_children = list(descendant.descendants)
        text = " ".join(descendant.get_text(" ", strip=True).split())
        if text:
          yield text
        continue

    if (isinstance(descendant, bs4.Comment) or
        not isinstance(descendant, bs4.NavigableString)):
      continue

    text = " ".join(descendant.strip().split())
    if text:
      yield text 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:43,代碼來源:get_references_web_single_group.py

示例8: apply_correction_map

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def apply_correction_map(soup, tag, cor_map):
	for item in list(tag.descendants):
		if isinstance(item, bs4.NavigableString):
			origstr = str(item)
			itemstr = origstr
			for fontset in cor_map:
				for badc, goodc in fontset.items():
					if badc in itemstr:
						itemstr = itemstr.replace(badc, goodc)
			if origstr != itemstr:
				news = soup.new_string(itemstr)
				item.replace_with(news) 
開發者ID:fake-name,項目名稱:ReadableWebProxy,代碼行數:14,代碼來源:FontRemapProcessors.py

示例9: get_text_lines

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def get_text_lines(parent_node):
    text_lines = ['']
    for node in parent_node.children:
        if isinstance(node, bs4.NavigableString):
            text_lines[-1] += str(node)
        elif node.name == 'br':
            text_lines.append('')
        else:
            text_lines[-1] += node.text
    return text_lines 
開發者ID:jbms,項目名稱:beancount-import,代碼行數:12,代碼來源:amazon_invoice.py

示例10: clean_node

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def clean_node(self, doc, node):
        """Clean a BeautifulSoup document in-place"""
        if isinstance(node, NavigableString):
            self.clean_string_node(doc, node)
        elif isinstance(node, Tag):
            self.clean_tag_node(doc, node)
        # This branch is here in case node is a BeautifulSoup object that does
        # not inherit from NavigableString or Tag. I can't find any examples
        # of such a thing at the moment, so this branch is untested.
        else:  # pragma: no cover
            self.clean_unknown_node(doc, node) 
開發者ID:wagtail,項目名稱:wagtail,代碼行數:13,代碼來源:whitelist.py

示例11: make_catena_input

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def make_catena_input(src, dest):
	text = open(src).read()
	soup = BeautifulSoup(text, 'xml')
	soup.find('DCT').insert_after(soup.new_tag('TITLE'))
	soup.find('DCT').append(soup.new_tag('TIMEX3', functionInDocument="CREATION_TIME", temporalFunction="false", tid="t0", type="DATE", value=""))

	for e in soup.find_all('event'):
		new_e = soup.new_tag('EVENT', **e.attrs)
		new_e.insert(0, NavigableString(e.get_text()))
		e.replaceWith(new_e)

	[s.extract() for s in soup('TLINK')]

	with open(args.dest + src.split('/')[-1] + '.tml', 'w') as f:
		f.write(str(soup)) 
開發者ID:malllabiisc,項目名稱:NeuralDater,代碼行數:17,代碼來源:make_catena_input.py

示例12: is_navigable_string

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def is_navigable_string(obj):
        """Is navigable string."""
        return isinstance(obj, bs4.NavigableString) 
開發者ID:facelessuser,項目名稱:soupsieve,代碼行數:5,代碼來源:css_match.py

示例13: _html2text

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def _html2text(elem):
    for child in elem.children:
        if isinstance(child, Tag):
            _html2text(child)
        elif isinstance(child, NavigableString):
            # No changes necessary
            continue

    if elem.parent:
        if elem.name in _ELEMENT_REPLACER:
            _ELEMENT_REPLACER[elem.name](elem) 
開發者ID:quay,項目名稱:quay,代碼行數:13,代碼來源:html.py

示例14: wrap_elem_content

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def wrap_elem_content(elem, begin, end):
    elem.insert(0, NavigableString(begin))
    elem.append(NavigableString(end)) 
開發者ID:paperswithcode,項目名稱:axcell,代碼行數:5,代碼來源:extract_tables.py

示例15: _insert_anchor

# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import NavigableString [as 別名]
def _insert_anchor(el, anchor_id, prefix="xxanchor"):
    el.insert(0, NavigableString(f' {prefix}-{anchor_id} ')) 
開發者ID:paperswithcode,項目名稱:axcell,代碼行數:4,代碼來源:doc_utils.py


注:本文中的bs4.NavigableString方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。