当前位置: 首页>>代码示例>>Python>>正文


Python tidy.parseString函数代码示例

本文整理汇总了Python中tidy.parseString函数的典型用法代码示例。如果您正苦于以下问题:Python parseString函数的具体用法?Python parseString怎么用?Python parseString使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parseString函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_bad_option_values

 def test_bad_option_values(self):
     badopts = [{"indent": "---"}, {"indent_spaces": None}]
     for opts in badopts:
         with self.assertRaisesRegexp(
             tidy.OptionArgError, "missing or malformed argument"
         ):
             tidy.parseString(self.input2, **opts)
开发者ID:nijel,项目名称:utidylib,代码行数:7,代码来源:test_tidy.py

示例2: test_bad_options

 def test_bad_options(self):
     badopts = [{"foo": 1}]
     for opts in badopts:
         with self.assertRaisesRegexp(
             tidy.InvalidOptionError, "not a valid Tidy option"
         ):
             tidy.parseString(self.input2, **opts)
开发者ID:nijel,项目名称:utidylib,代码行数:7,代码来源:test_tidy.py

示例3: test_encodings

 def test_encodings(self):
     foo = file('foo.htm').read().decode('utf8').encode('ascii', 
                                                        'xmlcharrefreplace')
     doc1u = tidy.parseString(foo, input_encoding='ascii',
                              output_encoding='latin1')
     self.failUnless(str(doc1u).find('\xe9')>=0)
     doc2u = tidy.parseString(foo, input_encoding='ascii',
                              output_encoding='utf8')
     self.failUnless(str(doc2u).find('\xc3\xa9')>=0)
开发者ID:corydodt,项目名称:uTidylib,代码行数:9,代码来源:test_tidy.py

示例4: test_badOptions

 def test_badOptions(self):
     badopts = [{'foo': 1}, {'indent': '---'}, {'indent_spaces': None}]
     for dct in badopts:
         try:
             tidy.parseString(self.input2, **dct)
         except tidy.TidyLibError:
             pass
         else:
             self.fail("Invalid option %s should have raised an error" %
                       repr(dct))
开发者ID:corydodt,项目名称:uTidylib,代码行数:10,代码来源:test_tidy.py

示例5: test_encodings

 def test_encodings(self):
     text = (
         open(self.test_file, "rb")
         .read()
         .decode("utf8")
         .encode("ascii", "xmlcharrefreplace")
     )
     doc1u = tidy.parseString(text, input_encoding="ascii", output_encoding="latin1")
     self.assertTrue(doc1u.getvalue().find(b"\xe9") >= 0)
     doc2u = tidy.parseString(text, input_encoding="ascii", output_encoding="utf8")
     self.assertTrue(doc2u.getvalue().find(b"\xc3\xa9") >= 0)
开发者ID:nijel,项目名称:utidylib,代码行数:11,代码来源:test_tidy.py

示例6: test_options

 def test_options(self):
     doc1 = tidy.parseString(
         self.input1, add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=1
     )
     self.assertIn("CDATA", str(doc1))
     doc2 = tidy.parseString(
         "<Html>", add_xml_decl=1, show_errors=1, newline="CR", output_xhtml=1
     )
     self.assertTrue(str(doc2).startswith("<?xml"))
     self.assertFalse(len(doc2.errors) == 0)
     self.assertNotIn("\n", str(doc2))
     doc3 = tidy.parse(self.test_file, char_encoding="utf8", alt_text="foo")
     self.assertIn('alt="foo"', doc3.gettext())
     self.assertIn("é", doc3.gettext())
开发者ID:nijel,项目名称:utidylib,代码行数:14,代码来源:test_tidy.py

示例7: load_doc_file

def load_doc_file(filename, f):
	tidyopts = dict(drop_proprietary_attributes=1,
				alt_text='',
				hide_comments=1,
				output_xhtml=1,
				show_body_only=1,
				clean=1,
				char_encoding='utf8',
				indent='auto',
			)

	contents = unicode(f.read(),'latin1')
	tm = re_titlematch.search(contents)
	if tm:
		title = tm.group(1)
	else:
		title = ""
	if not quiet: print "--- file: %s (%s) ---" % (filename, title)

	s = tidy.parseString(contents.encode('utf-8'), **tidyopts)
	curs.execute("INSERT INTO docs (file, version, title, content) VALUES (%(f)s, %(v)s, %(t)s, %(c)s)",{
		'f': filename,
		'v': ver,
		't': title,
		'c': str(s),
	})
	global pagecount
	pagecount += 1
开发者ID:ChristophBerg,项目名称:pgweb,代码行数:28,代码来源:docload.py

示例8: get_page_title

def get_page_title(content):
  try:
    content = str(tidy.parseString(content, output_xhtml=True, add_xml_decl=True, indent=False, tidy_mark=False))
    content = ENTITY.sub(ENTITY_REP, content)
  
  #~ f = open("tmp.log", "w")
  #~ f.write(content)
  #~ f.close()
  
    root = etree.fromstring(content)
  
    head = root.find("{http://www.w3.org/1999/xhtml}head")
    title = head.find("{http://www.w3.org/1999/xhtml}title")
    titletext = title.text
    
    time.sleep(0.5)
    
    return titletext
  
  except Exception, e:
    print "\tHTML Parser Error:", str(e)
    
    m = R_TITLE.search(content)
    if m is not None:
      return m.group(1)
    
    return ""
开发者ID:iand,项目名称:talisians,代码行数:27,代码来源:scanner.py

示例9: issue

def issue(answers_xml):

    # validate the answers
    # validateAnswers(answers_xml)
        
    # generate the answers XML document
    ctxt = validateAnswers(answers_xml) # lxml.etree.parse(StringIO(answers_xml)) 

    # apply the xslt transform
    transform = lxml.etree.XSLT(
        lxml.etree.parse(XSLT_SOURCE)
        )

    result = transform.apply(ctxt)

    # return the transformed document, after passing it through tidy
    return transform.tostring(result)

    try:
        return str(tidy.parseString(transform.tostring(result),
                                output_xml=1, input_xml=1, tidy_mark=0, indent=1))
    except:
        # if something goes wrong with Tidy, just return the version with 
        # the fucked img tag
        return transform.tostring(result)
开发者ID:cc-archive,项目名称:api,代码行数:25,代码来源:support.py

示例10: tidyhtml

def tidyhtml(html):
    """simply tidies up html code, returning xhtml"""
    if isinstance(html, unicode):
        html = html.encode("utf-8")
    html = tidy.parseString(html, output_xhtml=1, tidy_mark=0, input_encoding="utf8", output_encoding="utf8")
    html = str(html)
    return html
开发者ID:cc-archive,项目名称:jtoolkit,代码行数:7,代码来源:tidywidget.py

示例11: clean

def clean(txt):
    return unicode(str(tidy.parseString(txt, **{'output_xhtml' : 1,
                                                'add_xml_decl' : 0,
                                                'indent' : 0,
                                                'tidy_mark' : 0,
                                                'doctype' : "strict",
                                                'wrap' : 0})),'utf8')
开发者ID:dnet,项目名称:f33dme,代码行数:7,代码来源:fetch.py

示例12: tidy_html

def tidy_html(html_buffer, cleaning_lib='utidylib'):
    """
    Tidy up the input HTML using one of the installed cleaning
    libraries.

    @param html_buffer: the input HTML to clean up
    @type html_buffer: string
    @param cleaning_lib: chose the preferred library to clean the HTML. One of:
                         - utidylib
                         - beautifulsoup
    @return: a cleaned version of the input HTML
    @note: requires uTidylib or BeautifulSoup to be installed. If the chosen library is missing, the input X{html_buffer} is returned I{as is}.
    """

    if CFG_TIDY_INSTALLED and cleaning_lib == 'utidylib':
        options = dict(output_xhtml=1,
                       show_body_only=1,
                       merge_divs=0,
                       wrap=0)
        try:
            output = str(tidy.parseString(html_buffer, **options))
        except:
            output = html_buffer
    elif CFG_BEAUTIFULSOUP_INSTALLED and cleaning_lib == 'beautifulsoup':
        try:
            output = str(BeautifulSoup(html_buffer).prettify())
        except:
            output = html_buffer
    else:
        output = html_buffer

    return output
开发者ID:AlbertoPeon,项目名称:invenio,代码行数:32,代码来源:htmlutils.py

示例13: _tidy2

        def _tidy2(text):
            """uTidyLib's XHTML validator.

            This function is a wrapper to uTidyLib's validator.
            """
            text = tidy.parseString(text,  output_xhtml=1, add_xml_decl=0, indent=0, tidy_mark=0)
            return _in_tag(str(text), 'body')
开发者ID:nnevvinn,项目名称:crockersrules,代码行数:7,代码来源:textile.py

示例14: to_xhtml

 def to_xhtml(self, stylesheet_url='', settings=DEFAULT_HTML_OVERRIDES,
 tidy_settings=DEFAULT_TIDY_XHTML_OPTIONS, *args, **kwargs):
     if 'tidy_output' in kwargs:
         del kwargs['tidy_output']
     html_string, discard = self.to_html(stylesheet_url, tidy_output=False,
         *args, **kwargs)
     return str(tidy.parseString(html_string, **tidy_settings)), []
开发者ID:pombredanne,项目名称:rst2a,代码行数:7,代码来源:rst2a.py

示例15: run

 def run(self, text):
     # Pass text to Tidy. As Tidy does not accept unicode we need to encode
     # it and decode its return value.
     enc = self.markdown.tidy_options.get('char_encoding', 'utf8')
     return unicode(tidy.parseString(text.encode(enc), 
                                     **self.markdown.tidy_options),
                    encoding=enc) 
开发者ID:2770862886,项目名称:Quicksilver,代码行数:7,代码来源:html_tidy.py


注:本文中的tidy.parseString函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。