本文整理匯總了Python中w3af.core.data.parsers.doc.sgml.SGMLParser類的典型用法代碼示例。如果您正苦於以下問題:Python SGMLParser類的具體用法?Python SGMLParser怎麽用?Python SGMLParser使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了SGMLParser類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_extract_emails_mailto
def test_extract_emails_mailto(self):
body = u'<a href="mailto:[email protected]">test</a>'
resp = build_http_response(self.url, body)
p = SGMLParser(resp)
p.parse()
expected_res = {u'[email protected]'}
self.assertEqual(p.get_emails(), expected_res)
示例2: test_mailto_ignored_in_links
def test_mailto_ignored_in_links(self):
body = u'<a href="mailto:[email protected]">a</a>'
resp = build_http_response(self.url, body)
p = SGMLParser(resp)
p.parse()
parsed, _ = p.references
self.assertEqual(parsed, [])
示例3: test_get_emails_filter
def test_get_emails_filter(self):
resp = build_http_response(self.url, '')
p = SGMLParser(resp)
p._emails = {'[email protected]', '[email protected]'}
self.assertEqual(p.get_emails(), {'[email protected]', '[email protected]'})
self.assertEqual(p.get_emails(domain='w3af.com'), ['[email protected]'])
self.assertEqual(p.get_emails(domain='not.com'), ['[email protected]'])
示例4: close
def close(self):
"""
Called by the parser when it ends
"""
SGMLParser.close(self)
# Don't call clear() here! That would call clear() on SGMLParser and
# remove all the forms, references, etc.
self._html_internals_clear()
示例5: _handle_textarea_tag_inside_form
def _handle_textarea_tag_inside_form(self, tag, tag_name, attrs):
"""
Handler for textarea tag inside a form
"""
SGMLParser._handle_textarea_tag_start(self, tag, tag_name, attrs)
# Set the data and name
self._text_area_data = tag.text
self._text_area_tag_name = get_value_by_key(attrs, 'name', 'id')
示例6: test_mailto_subject_body
def test_mailto_subject_body(self):
body = u'<a href="mailto:[email protected]?subject=testing out mailto'\
u'&body=Just testing">test</a>'
resp = build_http_response(self.url, body)
p = SGMLParser(resp)
p.parse()
expected_res = {u'[email protected]'}
self.assertEqual(p.get_emails(), expected_res)
示例7: test_get_clear_text_body
def test_get_clear_text_body(self):
html = 'header <b>ABC</b>-<b>DEF</b>-<b>XYZ</b> footer'
clear_text = 'header ABC-DEF-XYZ footer'
headers = Headers([('Content-Type', 'text/html')])
r = build_http_response(self.url, html, headers)
p = SGMLParser(r)
p.parse()
self.assertEquals(clear_text, p.get_clear_text_body())
示例8: test_meta_tags
def test_meta_tags(self):
body = HTML_DOC % \
{'head': META_REFRESH + META_REFRESH_WITH_URL,
'body': ''}
resp = build_http_response(self.url, body)
p = SGMLParser(resp)
p.parse()
self.assertTrue(2, len(p.meta_redirs))
self.assertTrue("2;url=http://crawler.w3af.com/" in p.meta_redirs)
self.assertTrue("600" in p.meta_redirs)
self.assertEquals([URL('http://crawler.w3af.com/')], p.references[0])
示例9: test_nested_with_text
def test_nested_with_text(self):
body = '<html><a href="/abc">foo<div>bar</div></a></html>'
url = URL('http://www.w3af.com/')
headers = Headers()
headers['content-type'] = 'text/html'
resp = HTTPResponse(200, body, headers, url, url, charset='utf-8')
p = SGMLParser(resp)
tags = p.get_tags_by_filter(('a', 'b'), yield_text=True)
tags = list(tags)
self.assertEqual([Tag('a', {'href': '/abc'}, 'foo')], tags)
示例10: _handle_script_tag_start
def _handle_script_tag_start(self, tag, tag_name, attrs):
"""
Handle the script tags
"""
SGMLParser._handle_script_tag_start(self, tag, tag_name, attrs)
if tag.text is not None:
re_extract = ReExtract(tag.text.strip(),
self._base_url,
self._encoding)
re_extract.parse()
self._re_urls.update(re_extract.get_references())
示例11: test_meta_tags_with_single_quotes
def test_meta_tags_with_single_quotes(self):
body = HTML_DOC % {'head': META_REFRESH + META_REFRESH_WITH_URL_AND_QUOTES,
'body': ''}
resp = build_http_response(self.url, body)
p = SGMLParser(resp)
p.parse()
self.assertEqual(2, len(p.meta_redirs))
self.assertIn("2;url='http://crawler.w3af.com/'", p.meta_redirs)
self.assertIn("600", p.meta_redirs)
self.assertEqual([URL('http://crawler.w3af.com/')], p.references[0])
示例12: test_none
def test_none(self):
body = '<html><a href="/abc">foo<div>bar</div></a></html>'
url = URL('http://www.w3af.com/')
headers = Headers()
headers['content-type'] = 'text/html'
resp = HTTPResponse(200, body, headers, url, url, charset='utf-8')
p = SGMLParser(resp)
tags = p.get_tags_by_filter(None)
tags = list(tags)
tag_names = [tag.name for tag in tags]
self.assertEqual(tag_names, ['html', 'body', 'a', 'div'])
示例13: test_reference_with_colon
def test_reference_with_colon(self):
body = """
<html>
<a href="d:url.html?id=13&subid=3">foo</a>
</html>"""
r = build_http_response(self.url, body)
p = SGMLParser(r)
p.parse()
parsed_refs = p.references[0]
#
# Finding zero URLs is the correct behavior based on what
# I've seen in Opera and Chrome.
#
self.assertEquals(0, len(parsed_refs))
示例14: _handle_form_tag_start
def _handle_form_tag_start(self, tag, tag_name, attrs):
"""
Handle the form tags.
This method also looks if there are "pending inputs" in the
self._saved_inputs list and parses them.
"""
SGMLParser._handle_form_tag_start(self, tag, tag_name, attrs)
method = attrs.get('method', 'GET').upper()
action = attrs.get('action', None)
form_encoding = attrs.get('enctype', DEFAULT_FORM_ENCODING)
autocomplete = attrs.get('autocomplete', None)
if action is None:
action = self._source_url
else:
action = self._decode_url(action)
try:
action = self._base_url.url_join(action,
encoding=self._encoding)
except ValueError:
# The URL in the action is invalid, the best thing we can do
# is to guess, and our best guess is that the URL will be the
# current one.
action = self._source_url
# Create the form object and store everything for later use
form_params = FormParameters(encoding=self._encoding,
method=method,
action=action,
form_encoding=form_encoding,
attributes=attrs,
hosted_at_url=self._source_url)
form_params.set_autocomplete(autocomplete)
self._forms.append(form_params)
# Now I verify if there are any input tags that were found
# outside the scope of a form tag
for input_attrs in self._saved_inputs:
# Parse them just like if they were found AFTER the
# form tag opening
self._handle_input_tag_inside_form(tag, 'input', input_attrs)
# All parsed, remove them.
self._saved_inputs = []
示例15: test_parsed_references
def test_parsed_references(self):
# The *parsed* urls *must* come both from valid tags and tag attributes
# Also invalid urls like must be ignored (like javascript instructions)
body = """
<html>
<a href="/x.py?a=1" Invalid_Attr="/invalid_url.php">
<form action="javascript:history.back(1)">
<tagX href="/py.py"/>
</form>
</html>"""
r = build_http_response(self.url, body)
p = SGMLParser(r)
p.parse()
parsed_refs = p.references[0]
self.assertEquals(1, len(parsed_refs))
self.assertEquals(
'http://w3af.com/x.py?a=1', parsed_refs[0].url_string)