本文整理汇总了Python中webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.find方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.find方法的具体用法?Python BeautifulSoup.find怎么用?Python BeautifulSoup.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类webkitpy.thirdparty.BeautifulSoup.BeautifulSoup
的用法示例。
在下文中一共展示了BeautifulSoup.find方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _parse_bug_page
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_bug_page(self, page):
soup = BeautifulSoup(page)
bug = {}
bug["id"] = int(soup.find("bug_id").string)
bug["title"] = unicode(soup.find("short_desc").string)
bug["reporter_email"] = str(soup.find("reporter").string)
bug["assigned_to_email"] = str(soup.find("assigned_to").string)
bug["cc_emails"] = [str(element.string)
for element in soup.findAll('cc')]
bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
return bug
示例2: test_attachment_parsing
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def test_attachment_parsing(self):
bugzilla = Bugzilla()
soup = BeautifulSoup(self._example_attachment)
attachment_element = soup.find("attachment")
attachment = bugzilla._parse_attachment_element(attachment_element, self._expected_example_attachment_parsing['bug_id'])
self.assertTrue(attachment)
self._assert_dictionaries_equal(attachment, self._expected_example_attachment_parsing)
示例3: test_status_parsing
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def test_status_parsing(self):
buildbot = BuildBot()
soup = BeautifulSoup(self._example_one_box_status)
status_table = soup.find("table")
input_rows = status_table.findAll("tr")
for x in range(len(input_rows)):
status_row = input_rows[x]
expected_parsing = self._expected_example_one_box_parsings[x]
builder = buildbot._parse_builder_status_from_row(status_row)
# Make sure we aren't parsing more or less than we expect
self.assertEquals(builder.keys(), expected_parsing.keys())
for key, expected_value in expected_parsing.items():
self.assertEquals(
builder[key],
expected_value,
(
"Builder %d parse failure for key: %s: Actual='%s' Expected='%s'"
% (x, key, builder[key], expected_value)
),
)
示例4: _parse_bug_page
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_bug_page(self, page):
soup = BeautifulSoup(page)
bug = {}
bug["id"] = int(soup.find("bug_id").string)
bug["title"] = self._string_contents(soup.find("short_desc"))
bug["reporter_email"] = self._string_contents(soup.find("reporter"))
bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
bug["cc_emails"] = [self._string_contents(element)
for element in soup.findAll('cc')]
bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
bug["platform"] = self._string_contents(soup.find("rep_platform"))
bug["os"] = self._string_contents(soup.find("op_sys"))
bug["long_description"] = self._string_contents(soup.find("long_desc").findNext("thetext"))
bug["keywords"] = self._string_contents(soup.find("keywords"))
bug["component"] = self._string_contents(soup.find("component"))
return bug
示例5: test_failures_from_fail_row
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def test_failures_from_fail_row(self):
row = BeautifulSoup("<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>")
test_name = unicode(row.find("a").string)
# Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again.
failures = OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row])
self.assertEqual(len(failures), 1)
self.assertEqual(type(sorted(failures)[0]), test_failures.FailureImageHashMismatch)
row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>")
expected_stderr = "Unhandled link text in results.html parsing: foo. Please file a bug against webkitpy.\n"
OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row], expected_stderr=expected_stderr)
示例6: _revisions_for_builder
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _revisions_for_builder(self, builder):
soup = BeautifulSoup(self._fetch_builder_page(builder))
revisions = []
for status_row in soup.find('table').findAll('tr'):
revision_anchor = status_row.find('a')
table_cells = status_row.findAll('td')
if not table_cells or len(table_cells) < 3 or not table_cells[2].string:
continue
if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string):
revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string))
return revisions
示例7: _parse_bug_dictionary_from_xml
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_bug_dictionary_from_xml(self, page):
soup = BeautifulSoup(page)
bug = {}
bug["id"] = int(soup.find("bug_id").string)
bug["title"] = self._string_contents(soup.find("short_desc"))
bug["bug_status"] = self._string_contents(soup.find("bug_status"))
dup_id = soup.find("dup_id")
if dup_id:
bug["dup_id"] = self._string_contents(dup_id)
bug["reporter_email"] = self._string_contents(soup.find("reporter"))
bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')]
bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
return bug
示例8: user_dict_from_edit_user_page
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def user_dict_from_edit_user_page(self, page):
soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
user_table = soup.find("table", {"class": "main"})
user_dict = {}
for row in user_table("tr"):
label_element = row.find("label")
if not label_element:
continue # This must not be a row we know how to parse.
if row.find("table"):
continue # Skip the <tr> holding the groups table.
key = label_element["for"]
if "group" in key:
key = "groups"
value = user_dict.get("groups", set())
# We must be parsing a "tr" inside the inner group table.
(group_name, _) = self._group_name_and_string_from_row(row)
if row.find("input", {"type": "checkbox", "checked": "checked"}):
value.add(group_name)
else:
value = unicode(row.find("td").string).strip()
user_dict[key] = value
return user_dict
示例9: _parse_twisted_directory_listing
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_twisted_directory_listing(self, page):
soup = BeautifulSoup(page)
# HACK: Match only table rows with a class to ignore twisted header/footer rows.
file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
示例10: TestParser
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
class TestParser(object):
def __init__(self, options, filename):
self.options = options
self.filename = filename
self.host = Host()
self.filesystem = self.host.filesystem
self.test_doc = None
self.ref_doc = None
self.load_file(filename)
def load_file(self, filename):
if self.filesystem.exists(filename):
self.test_doc = Parser(self.filesystem.read_text_file(filename))
else:
self.test_doc = None
self.ref_doc = None
def analyze_test(self, test_contents=None, ref_contents=None):
""" Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """
test_info = None
if test_contents is None and self.test_doc is None:
return test_info
if test_contents is not None:
self.test_doc = Parser(test_contents)
if ref_contents is not None:
self.ref_doc = Parser(ref_contents)
# First check if it's a reftest
matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
if matches:
if len(matches) > 1:
print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename)
ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
if self.ref_doc is None:
self.ref_doc = self.load_file(ref_file)
test_info = {'test': self.filename, 'reference': ref_file}
# If the ref file path is relative, we need to check it for
# relative paths also because when it lands in WebKit, it will be
# moved down into the test dir.
#
# Note: The test files themselves are not checked for support files
# outside their directories as the convention in the CSSWG is to
# put all support files in the same dir or subdir as the test.
#
# All non-test files in the test's directory tree are normally
# copied as part of the import as they are assumed to be required
# support files.
#
# *But*, there is exactly one case in the entire css2.1 suite where
# at test depends on a file that lives in a different directory,
# which depends on another file that lives outside of its
# directory. This code covers that case :)
if matches[0]['href'].startswith('..'):
support_files = self.support_files(self.ref_doc)
test_info['refsupport'] = support_files
elif self.is_jstest():
test_info = {'test': self.filename, 'jstest': True}
elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
test_info = {'test': self.filename}
return test_info
def reference_links_of_type(self, reftest_type):
return self.test_doc.findAll(rel=reftest_type)
def is_jstest(self):
"""Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))
def support_files(self, doc):
""" Searches the file for all paths specified in url()'s, href or src attributes."""
support_files = []
if doc is None:
return support_files
elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
elements_with_href_attributes = doc.findAll(href=re.compile('.*'))
url_pattern = re.compile('url\(.*\)')
urls = []
for url in doc.findAll(text=url_pattern):
url = re.search(url_pattern, url)
url = re.sub('url\([\'\"]', '', url.group(0))
url = re.sub('[\'\"]\)', '', url)
urls.append(url)
src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]
#.........这里部分代码省略.........
示例11: _parse_mks_response_for_mks_id
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_mks_response_for_mks_id(response):
# Part of the response may be encoded as HTML entities. We need to
# decode such entities so as to retrieve the text of <value>
decoded = BeautifulSoup(response, convertEntities=BeautifulSoup.XML_ENTITIES)
soup = BeautifulSoup(decoded.encode("UTF-8"))
return int(soup.find("value").string)
示例12: TestParser
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
class TestParser(object):
def __init__(self, filename, host):
self.filename = filename
self.host = host
self.filesystem = self.host.filesystem
self.test_doc = None
self.ref_doc = None
self.load_file(filename)
def load_file(self, filename, is_ref=False):
if self.filesystem.isfile(filename):
try:
doc = BeautifulSoup(self.filesystem.read_binary_file(filename))
except IOError:
_log.error("IOError: Failed to read %s", filename)
doc = None
except HTMLParser.HTMLParseError:
# FIXME: Figure out what to do if we can't parse the file.
_log.error("HTMLParseError: Failed to parse %s", filename)
doc = None
else:
if self.filesystem.isdir(filename):
# FIXME: Figure out what is triggering this and what to do about it.
_log.error("Trying to load %s, which is a directory", filename)
doc = None
if is_ref:
self.ref_doc = doc
else:
self.test_doc = doc
def analyze_test(self, test_contents=None, ref_contents=None):
"""Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.
Returns: A dict which can have the properties:
"test": test file name.
"reference": related reference test file name if this is a reference test.
"reference_support_info": extra information about the related reference test and any support files.
"jstest": A boolean, whether this is a JS test.
If the path doesn't look a test or the given contents are empty,
then None is returned.
"""
test_info = None
if test_contents is None and self.test_doc is None:
return test_info
if test_contents is not None:
self.test_doc = BeautifulSoup(test_contents)
if ref_contents is not None:
self.ref_doc = BeautifulSoup(ref_contents)
# First check if it's a reftest
matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
if matches:
if len(matches) > 1:
# FIXME: Is this actually true? We should fix this.
_log.warning('Multiple references are not supported. Importing the first ref defined in %s',
self.filesystem.basename(self.filename))
try:
ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
except KeyError:
# FIXME: Figure out what to do w/ invalid test files.
_log.error('%s has a reference link but is missing the "href"', self.filesystem)
return None
if self.ref_doc is None:
self.load_file(ref_file, True)
test_info = {'test': self.filename, 'reference': ref_file}
# If the ref file does not live in the same directory as the test file, check it for support files.
test_info['reference_support_info'] = {}
if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
reference_support_files = self.support_files(self.ref_doc)
if len(reference_support_files) > 0:
reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}
elif self.is_jstest():
test_info = {'test': self.filename, 'jstest': True}
elif 'csswg-test' in self.filename:
# In csswg-test, all other files should be manual tests.
# This function isn't called for non-test files in support/.
test_info = {'test': self.filename}
elif '-manual.' in self.filesystem.basename(self.filename):
# WPT has a naming convention for manual tests.
test_info = {'test': self.filename}
return test_info
def reference_links_of_type(self, reftest_type):
return self.test_doc.findAll(rel=reftest_type)
#.........这里部分代码省略.........
示例13: _parse_quips
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def _parse_quips(self, page):
soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
quips = soup.find(text=re.compile(r"Existing quips:")).findNext("ul").findAll("li")
return [unicode(quip_entry.string) for quip_entry in quips]
示例14: login_userid_pairs_from_edit_user_results
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
def login_userid_pairs_from_edit_user_results(self, results_page):
soup = BeautifulSoup(results_page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
results_table = soup.find(id="admin_table")
login_userid_pairs = [self._login_and_uid_from_row(row) for row in results_table('tr')]
# Filter out None from the logins.
return filter(lambda pair: bool(pair), login_userid_pairs)
示例15: TestParser
# 需要导入模块: from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup import find [as 别名]
class TestParser(object):
def __init__(self, options, filename):
self.options = options
self.filename = filename
self.host = Host()
self.filesystem = self.host.filesystem
self.test_doc = None
self.ref_doc = None
self.load_file(filename)
def load_file(self, filename, is_ref=False):
if self.filesystem.isfile(filename):
try:
doc = Parser(self.filesystem.read_binary_file(filename))
except:
# FIXME: Figure out what to do if we can't parse the file.
_log.error("Failed to parse %s", filename)
doc = None
else:
if self.filesystem.isdir(filename):
# FIXME: Figure out what is triggering this and what to do about it.
_log.error("Trying to load %s, which is a directory", filename)
doc = None
if is_ref:
self.ref_doc = doc
else:
self.test_doc = doc
def analyze_test(self, test_contents=None, ref_contents=None):
""" Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """
test_info = None
if test_contents is None and self.test_doc is None:
return test_info
if test_contents is not None:
self.test_doc = Parser(test_contents)
if ref_contents is not None:
self.ref_doc = Parser(ref_contents)
# First check if it's a reftest
matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
if matches:
if len(matches) > 1:
# FIXME: Is this actually true? We should fix this.
_log.warning('Multiple references are not supported. Importing the first ref defined in %s',
self.filesystem.basename(self.filename))
try:
ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
except KeyError as e:
# FIXME: Figure out what to do w/ invalid test files.
_log.error('%s has a reference link but is missing the "href"', self.filesystem)
return None
if self.ref_doc is None:
self.load_file(ref_file, True)
test_info = {'test': self.filename, 'reference': ref_file}
# If the ref file does not live in the same directory as the test file, check it for support files
test_info['reference_support_info'] = {}
if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
reference_support_files = self.support_files(self.ref_doc)
if len(reference_support_files) > 0:
reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}
elif self.is_jstest():
test_info = {'test': self.filename, 'jstest': True}
elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
test_info = {'test': self.filename}
return test_info
def reference_links_of_type(self, reftest_type):
return self.test_doc.findAll(rel=reftest_type)
def is_jstest(self):
"""Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))
def support_files(self, doc):
""" Searches the file for all paths specified in url()'s or src attributes."""
support_files = []
if doc is None:
return support_files
elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
elements_with_href_attributes = doc.findAll(href=re.compile('.*'))
url_pattern = re.compile('url\(.*\)')
urls = []
#.........这里部分代码省略.........