当前位置: 首页>>代码示例>>Python>>正文


Python BeautifulSoup.BeautifulSoup类代码示例

本文整理汇总了Python中webkitpy.thirdparty.BeautifulSoup.BeautifulSoup的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup类的具体用法?Python BeautifulSoup怎么用?Python BeautifulSoup使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了BeautifulSoup类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_attachment_parsing

 def test_attachment_parsing(self):
     bugzilla = Bugzilla()
     soup = BeautifulSoup(self._example_attachment)
     attachment_element = soup.find("attachment")
     attachment = bugzilla._parse_attachment_element(attachment_element, self._expected_example_attachment_parsing['bug_id'])
     self.assertTrue(attachment)
     self._assert_dictionaries_equal(attachment, self._expected_example_attachment_parsing)
开发者ID:0x4d52,项目名称:JavaScriptCore-X,代码行数:7,代码来源:bugzilla_unittest.py

示例2: _parse_attachment_ids_request_query

    def _parse_attachment_ids_request_query(self, page, since=None):
        # Formats
        digits = re.compile("\d+")
        attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
        # if no date is given, return all ids
        if not since:
            attachment_links = SoupStrainer("a", href=attachment_href)
            return [int(digits.search(tag["href"]).group(0))
                for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)]

        # Parse the main table only
        date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
        mtab = SoupStrainer("table", {"class": "requests"})
        soup = BeautifulSoup(page, parseOnlyThese=mtab)
        patch_ids = []

        for row in soup.findAll("tr"):
            patch_tag = row.find("a", {"href": attachment_href})
            if not patch_tag:
                continue
            patch_id = int(digits.search(patch_tag["href"]).group(0))
            date_tag = row.find("td", text=date_format)
            if date_tag and datetime.strptime(date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since:
                _log.info("Patch is old: %d (%s)" % (patch_id, date_tag))
                continue
            patch_ids.append(patch_id)
        return patch_ids
开发者ID:chenbk85,项目名称:webkit2-wincairo,代码行数:27,代码来源:bugzilla.py

示例3: test_convert_vendor_prefix_js_paths

    def test_convert_vendor_prefix_js_paths(self):
        test_html = """<head>
<script src="/common/vendor-prefix.js">
</head>
"""
        fake_dir_path = self.fake_dir_path('adapterjspaths')
        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)

        oc = OutputCapture()
        oc.capture_output()
        try:
            converter.feed(test_html)
            converter.close()
            converted = converter.output()
        finally:
            oc.restore_output()

        new_html = BeautifulSoup(converted[1])

        # Verify the original paths are gone, and the new paths are present.
        orig_path_pattern = re.compile('\"/common/vendor-prefix.js')
        self.assertEquals(len(new_html.findAll(src=orig_path_pattern)), 0, 'vendor-prefix.js path was not converted')

        resources_dir = converter.path_from_webkit_root("LayoutTests", "resources")
        new_relpath = os.path.relpath(resources_dir, fake_dir_path)
        relpath_pattern = re.compile(new_relpath)
        self.assertEquals(len(new_html.findAll(src=relpath_pattern)), 1, 'vendor-prefix.js relative path not correct')
开发者ID:smil-in-javascript,项目名称:blink,代码行数:27,代码来源:test_converter_unittest.py

示例4: test_status_parsing

    def test_status_parsing(self):
        buildbot = BuildBot()

        soup = BeautifulSoup(self._example_one_box_status)
        status_table = soup.find("table")
        input_rows = status_table.findAll("tr")

        for x in range(len(input_rows)):
            status_row = input_rows[x]
            expected_parsing = self._expected_example_one_box_parsings[x]

            builder = buildbot._parse_builder_status_from_row(status_row)

            # Make sure we aren't parsing more or less than we expect
            self.assertEquals(builder.keys(), expected_parsing.keys())

            for key, expected_value in expected_parsing.items():
                self.assertEquals(
                    builder[key],
                    expected_value,
                    (
                        "Builder %d parse failure for key: %s: Actual='%s' Expected='%s'"
                        % (x, key, builder[key], expected_value)
                    ),
                )
开发者ID:,项目名称:,代码行数:25,代码来源:

示例5: _parse_result_count

 def _parse_result_count(self, results_page):
     result_count_text = BeautifulSoup(results_page).find(attrs={'class': 'bz_result_count'}).string
     result_count_parts = result_count_text.strip().split(" ")
     if result_count_parts[0] == "Zarro":
         return 0
     if result_count_parts[0] == "One":
         return 1
     return int(result_count_parts[0])
开发者ID:,项目名称:,代码行数:8,代码来源:

示例6: test_failures_from_fail_row

    def test_failures_from_fail_row(self):
        row = BeautifulSoup("<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>")
        test_name = unicode(row.find("a").string)
        # Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again.
        failures = OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row])
        self.assertEqual(len(failures), 1)
        self.assertEqual(type(sorted(failures)[0]), test_failures.FailureImageHashMismatch)

        row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>")
        expected_stderr = "Unhandled link text in results.html parsing: foo.  Please file a bug against webkitpy.\n"
        OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row], expected_stderr=expected_stderr)
开发者ID:,项目名称:,代码行数:11,代码来源:

示例7: _revisions_for_builder

 def _revisions_for_builder(self, builder):
     soup = BeautifulSoup(self._fetch_builder_page(builder))
     revisions = []
     for status_row in soup.find('table').findAll('tr'):
         revision_anchor = status_row.find('a')
         table_cells = status_row.findAll('td')
         if not table_cells or len(table_cells) < 3 or not table_cells[2].string:
             continue
         if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string):
             revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string))
     return revisions
开发者ID:SchleunigerAG,项目名称:WinEC7_Qt5.3.1_Fixes,代码行数:11,代码来源:buildbot.py

示例8: verify_test_harness_paths

    def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths):
        if isinstance(converted, basestring):
            converted = BeautifulSoup(converted)

        resources_dir = converter.path_from_webkit_root("LayoutTests", "resources")

        # Verify the original paths are gone, and the new paths are present.
        orig_path_pattern = re.compile('\"/resources/testharness')
        self.assertEquals(len(converted.findAll(src=orig_path_pattern)), 0, 'testharness src path was not converted')
        self.assertEquals(len(converted.findAll(href=orig_path_pattern)), 0, 'testharness href path was not converted')

        new_relpath = os.path.relpath(resources_dir, test_path)
        relpath_pattern = re.compile(new_relpath)
        self.assertEquals(len(converted.findAll(src=relpath_pattern)), num_src_paths, 'testharness src relative path not correct')
        self.assertEquals(len(converted.findAll(href=relpath_pattern)), num_href_paths, 'testharness href relative path not correct')
开发者ID:,项目名称:,代码行数:15,代码来源:

示例9: analyze_test

    def analyze_test(self, test_contents=None, ref_contents=None):
        """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.

        Returns: A dict which can have the properties:
            "test": test file name.
            "reference": related reference test file name if this is a reference test.
            "reference_support_info": extra information about the related reference test and any support files.
            "jstest": A boolean, whether this is a JS test.
            If the given contents are empty, then None is returned.
        """
        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = BeautifulSoup(test_contents)

        if ref_contents is not None:
            self.ref_doc = BeautifulSoup(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files.
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
                        self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] and '-ref' not in self.filename and 'reference' not in self.filename:
            test_info = {'test': self.filename}

        return test_info
开发者ID:ollie314,项目名称:chromium,代码行数:56,代码来源:test_parser.py

示例10: _parse_bug_dictionary_from_xml

 def _parse_bug_dictionary_from_xml(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = self._string_contents(soup.find("short_desc"))
     bug["bug_status"] = self._string_contents(soup.find("bug_status"))
     dup_id = soup.find("dup_id")
     if dup_id:
         bug["dup_id"] = self._string_contents(dup_id)
     bug["reporter_email"] = self._string_contents(soup.find("reporter"))
     bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
     bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     return bug
开发者ID:,项目名称:,代码行数:14,代码来源:

示例11: user_dict_from_edit_user_page

    def user_dict_from_edit_user_page(self, page):
        soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        user_table = soup.find("table", {"class": "main"})
        user_dict = {}
        for row in user_table("tr"):
            label_element = row.find("label")
            if not label_element:
                continue  # This must not be a row we know how to parse.
            if row.find("table"):
                continue  # Skip the <tr> holding the groups table.

            key = label_element["for"]
            if "group" in key:
                key = "groups"
                value = user_dict.get("groups", set())
                # We must be parsing a "tr" inside the inner group table.
                (group_name, _) = self._group_name_and_string_from_row(row)
                if row.find("input", {"type": "checkbox", "checked": "checked"}):
                    value.add(group_name)
            else:
                value = unicode(row.find("td").string).strip()
            user_dict[key] = value
        return user_dict
开发者ID:sohocoke,项目名称:webkit,代码行数:23,代码来源:bugzilla.py

示例12: _parse_bug_page

 def _parse_bug_page(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = unicode(soup.find("short_desc").string)
     bug["reporter_email"] = str(soup.find("reporter").string)
     bug["assigned_to_email"] = str(soup.find("assigned_to").string)
     bug["cc_emails"] = [str(element.string)
                         for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     return bug
开发者ID:mikezit,项目名称:Webkit_Code,代码行数:11,代码来源:bugzilla.py

示例13: _parse_twisted_directory_listing

 def _parse_twisted_directory_listing(self, page):
     soup = BeautifulSoup(page)
     # HACK: Match only table rows with a class to ignore twisted header/footer rows.
     file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
     return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
开发者ID:SchleunigerAG,项目名称:WinEC7_Qt5.3.1_Fixes,代码行数:5,代码来源:buildbot.py

示例14: _parse_mks_response_for_mks_id

 def _parse_mks_response_for_mks_id(response):
     # Part of the response may be encoded as HTML entities. We need to
     # decode such entities so as to retrieve the text of <value>
     decoded = BeautifulSoup(response, convertEntities=BeautifulSoup.XML_ENTITIES)
     soup = BeautifulSoup(decoded.encode("UTF-8"))
     return int(soup.find("value").string)
开发者ID:,项目名称:,代码行数:6,代码来源:

示例15: _parse_quips

 def _parse_quips(self, page):
     soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
     quips = soup.find(text=re.compile(r"Existing quips:")).findNext("ul").findAll("li")
     return [unicode(quip_entry.string) for quip_entry in quips]
开发者ID:,项目名称:,代码行数:4,代码来源:


注:本文中的webkitpy.thirdparty.BeautifulSoup.BeautifulSoup类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。