本文整理汇总了Python中re.html方法的典型用法代码示例。如果您正苦于以下问题:Python re.html方法的具体用法?Python re.html怎么用?Python re.html使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类re
的用法示例。
在下文中一共展示了re.html方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: set_html
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def set_html(self, html):
"""
When setting the html for this Google Document we do two
things:
1. We extract the content from the html. Using a regular
expression we pull the meat of the document out of the body
of the html, we also cut off the footer Google adds on
automatically.
2. We extract the various sections from the content of the
document. Again using a regular expression, we look for h1,
h2, ... tags to split the document up into sections. Note:
it is important when you are writing your Google Document
to use the heading text styles, so this code will split
things correctly.
"""
self._html = html
self._extract_content()
self._extract_sections()
示例2: _construct_section_tree
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def _construct_section_tree(self):
"""
For some weird reason Google Documents doesn't like nesting
lists, so their table of contents requires a bunch of special
formatting. Instead of trying to hack off what they provide
us, we create a tree of sections based on each sections
level. This tree will be used to construct the html for the
table of contents.
"""
self._section_tree = TreeNode(Section(level=0))
current_node = self._section_tree
for section in self._sections:
while section['level'] <= current_node.value['level']:
current_node = current_node.parent
while section['level'] > current_node.value['level'] + 1:
empty_section = Section(level=current_node.value['level'] + 1)
current_node = current_node.add_child(empty_section)
assert section['level'] == current_node.value['level'] + 1
current_node = current_node.add_child(section)
示例3: _navigation_list
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def _navigation_list(self, node=None):
"""
Return an html representation of the table of contents for
this document. This is done recursively adding on a list item
for each element in the tree, and an unordered list if this
node has children. I might want to double check that this html
is the correct way to nest lists.
"""
if node is None:
self._construct_section_tree()
return self._navigation_list(self._section_tree)
result = ""
if 'title' in node.value and 'id' in node.value:
result += '<li>%s</li>' % node.value.url()
if len(node) > 0:
result += "<ul>%s</ul>" % \
"\n".join([self._navigation_list(child) for child in node])
return result
示例4: test_linux_input_slack
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def test_linux_input_slack(self):
expected_captured_output = util.load_from_disk(self.current_directory +"/data/stdout_captured_linux_input_slack")
capturedOutput = StringIO.StringIO()
sys.stdout = capturedOutput
log_data = reader.linux_input_slack(self.current_directory + "/data/slackware/", self.starting_date, self.ending_date)
output = capturedOutput.getvalue()
capturedOutput.close()
sys.stdout = sys.__stdout__
#See https://docs.python.org/2/library/re.html for more details.
# string 'Working on: /any_valid_path/IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' is replaced by
# 'Working on: IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n'
output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)',r'\g<begin>\g<constant>', output)
self.assertEqual(log_data, self.log_data)
self.assertEqual(expected_captured_output, output)
示例5: test_linux_input
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def test_linux_input(self):
expected_capturedOutput = util.load_from_disk(self.current_directory + "/data/stdout_captured_linux_input")
capturedOutput = StringIO.StringIO()
sys.stdout = capturedOutput
log_data = reader.linux_input(self.current_directory + "/data/log/", self.channel_name, self.starting_date, self.ending_date)
output = capturedOutput.getvalue()
capturedOutput.close()
sys.stdout = sys.__stdout__
#See https://docs.python.org/2/library/re.html for more details.
# string 'Working on: /any_valid_path/IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' is replaced by
# 'Working on: IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n'
output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)', r'\g<begin>\g<constant>', output)
self.assertEqual(log_data, self.log_data)
self.assertEqual(expected_capturedOutput, output)
示例6: test_linux_input_all_channels
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def test_linux_input_all_channels(self):
expected_capturedOutput = util.load_from_disk(self.current_directory + "/data/stdout_captured_linux_input_all_channels")
expected_log_data = util.load_from_disk(self.current_directory + "/data/log_data_for_test_linux_input_all_channels")
capturedOutput = StringIO.StringIO()
sys.stdout = capturedOutput
log_data = reader.linux_input(self.current_directory + "/data/log_to_test_for_all_channels/", ["ALL"], "2013-1-1", "2013-1-2")
output = capturedOutput.getvalue()
capturedOutput.close()
sys.stdout = sys.__stdout__
#See https://docs.python.org/2/library/re.html for more details.
output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)', r'\g<begin>\g<constant>', output)
self.assertEqual(expected_log_data, log_data)
self.assertEqual(expected_capturedOutput, output)
示例7: take_screenshot
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def take_screenshot(self, height='410', width='670'):
"""Generate a screenshot of the IBM 3270 Mainframe in a html format. The
default folder is the log folder of RobotFramework, if you want change see the `Set Screenshot Folder`.
The Screenshot is printed in a iframe log, with the values of height=410 and width=670, you
can change this values passing them from the keyword.
Examples:
| Take Screenshot |
| Take Screenshot | height=500 | width=700 |
"""
filename_prefix = 'screenshot'
extension = 'html'
filename_sufix = str(int(round(time.time() * 1000)))
filepath = os.path.join(self.imgfolder, '%s_%s.%s' % (filename_prefix, filename_sufix, extension))
self.mf.save_screen(os.path.join(self.output_folder, filepath))
logger.write('<iframe src="%s" height="%s" width="%s"></iframe>' % (filepath.replace("\\", "/"), height, width),
level='INFO', html=True)
示例8: re_group
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def re_group(pattern, group=1, flags=0):
"""
Returns a :mod:`composable <wex.composed>` callable that
extract the specified group using a regular expression.
:param pattern: The regular expression.
:param group: The group from the `MatchObject <https://docs.python.org/2/library/re.html#re.MatchObject.group>`_.
:param flags: Flags to use when compiling the
`pattern <https://docs.python.org/2/library/re.html#re.compile>`_.
"""
compiled = re.compile(pattern, flags)
@composable
def regroup(src):
for string in flatten(src):
for match in compiled.finditer(string):
yield match.group(group)
return regroup
示例9: re_groupdict
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def re_groupdict(pattern, flags=0):
"""
Returns a :mod:`composable <wex.composed>` callable that
extract the a group dictionary using a regular expression.
:param pattern: The regular expression.
:param flags: Flags to use when compiling the
`pattern <https://docs.python.org/2/library/re.html#re.compile>`_.
"""
compiled = re.compile(pattern, flags)
compiled = re.compile(pattern, flags)
@composable
def redict(src):
for string in flatten(src):
for match in compiled.finditer(string):
yield match.groupdict()
return redict
示例10: assert_found
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def assert_found(patt, filename, msg=None, encoding='utf-8'):
'''Assert that regex pattern ``patt`` is found in the file ``filename``.
:arg patt: The regex pattern to search.
Any standard Python `regular expression
<https://docs.python.org/3/library/re.html#regular-expression-syntax>`_
is accepted.
:arg filename: The name of the file to examine.
Any :class:`OSError` raised while processing the file will be
propagated as a :class:`reframe.core.exceptions.SanityError`.
:arg encoding: The name of the encoding used to decode the file.
:returns: ``True`` on success.
:raises reframe.core.exceptions.SanityError: if assertion fails.
'''
num_matches = count(finditer(patt, filename, encoding))
try:
evaluate(assert_true(num_matches))
except SanityError:
error_msg = msg or "pattern `{0}' not found in `{1}'"
raise SanityError(_format(error_msg, patt, filename))
else:
return True
示例11: extractall
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'):
'''Extract all values from the capturing group ``tag`` of a matching regex
``patt`` in the file ``filename``.
:arg patt: The regex pattern to search.
Any standard Python `regular expression
<https://docs.python.org/3/library/re.html#regular-expression-syntax>`_
is accepted.
:arg filename: The name of the file to examine.
:arg encoding: The name of the encoding used to decode the file.
:arg tag: The regex capturing group to be extracted.
Group ``0`` refers always to the whole match.
Since the file is processed line by line, this means that group ``0``
returns the whole line that was matched.
:arg conv: A callable that takes a single argument and returns a new value.
If provided, it will be used to convert the extracted values before
returning them.
:returns: A list of the extracted values from the matched regex.
:raises reframe.core.exceptions.SanityError: In case of errors.
'''
return list(evaluate(x)
for x in extractiter(patt, filename, tag, conv, encoding))
示例12: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def __init__(self, pattern, flags=0):
# We compile the regex because re.compile also adds flags defined in
# the pattern and implicit flags to its .flags.
# See https://docs.python.org/3/library/re.html#re.regex.flags
compiled = re.compile(pattern, flags)
self.pattern = compiled.pattern
self.flags = compiled.flags
self._user_flags = flags
示例13: build_regex_search
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def build_regex_search(search_string):
"""
Build up a compiled regular expression from the search string.
Supports the use of flags - ie. search for `nothing/i` will perform a
case-insensitive regex for `nothing`
"""
sspat = None
valid_flags = {
'i': re.IGNORECASE
}
if search_string:
try:
search_string, flag_letters = re.match(r'^(.+?)(?:/([a-z]+))?$', search_string).groups()
flags = 0
# if flags are given, OR together all the valid flags
# see https://docs.python.org/3/library/re.html#re.compile
if flag_letters:
for letter in flag_letters:
if letter in valid_flags:
flags = flags | valid_flags[letter]
sspat = re.compile(search_string, flags)
except re.error:
sspat = None
return sspat
示例14: to_html
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def to_html(self):
return render_to_string('section.html', self)
示例15: _extract_sections
# 需要导入模块: import re [as 别名]
# 或者: from re import html [as 别名]
def _extract_sections(self):
"""
Here is an example of what a section header looks like in the
html of a Google Document:
<h3 class="c1"><a name="h.699ffpepx6zs"></a><span>Hello World
</span></h3>
We split the content of the Google Document up using a regular
expression that matches the above header. re.split is a pretty
cool function if you haven't tried it before. It puts the
matching groups into the list as well as the content between
the matches. Check it out here:
http://docs.python.org/library/re.html#re.split
One big thing we do in this method is replace the ugly section
id that Google creates with a nicely slugified version of the
section title. This makes for pretty urls.
"""
self._sections = []
header = r'<h(?P<level>\d) class="[^"]+">' \
r'<a name="(?P<id>[^"]+)"></a>' \
r'<span>(?P<title>[^<]+)</span>' \
r'</h\d>'
l = re.split(header, self._content)
l.pop(0)
while l:
section = Section(
# hack: cause we started with h3 in google docs
level=int(l.pop(0)) - 2,
id=l.pop(0),
title=l.pop(0).decode('utf8'),
content=l.pop(0),
)
section['id'] = slugify(section['title'])
if section['level'] >= 1:
self._sections.append(section)