本文整理汇总了Python中scraper.Scraper.matchTag方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.matchTag方法的具体用法?Python Scraper.matchTag怎么用?Python Scraper.matchTag使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.matchTag方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testMatchTagWithMoreThenOneAsterisk
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithMoreThenOneAsterisk(self):
pattern = "<a><b>*</b>*</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup("<a><b>*</b>*</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# asterisk can match the remaining
actual = BeautifulSoup("<a><b>*</b><c></c></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# asterisk can match the remaining
pattern = "<a><b>*</b>*(c)</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><b>*</b><c></c></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# only c is accepted
pattern = "<a><b>*</b>*(c)</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><b>*</b><d></d></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
示例2: testMatchTagWithSubTag
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithSubTag(self):
pattern = "<a><b></b></a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup("<a><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# sub tag with different name
actual = BeautifulSoup("<a><c></c></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# sub tag with more sub tag
actual = BeautifulSoup("<a><b><c></c></b></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
示例3: testMatchTagWithText
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithText(self):
pattern = "<a>text</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup("<a>text</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# withtou text content
actual = BeautifulSoup("<a></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# with sub tag
actual = BeautifulSoup("<a><b></b></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
示例4: testMatchTagWithoutAttribute
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithoutAttribute(self):
# test simple tag
pattern = "<a></a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup("<a></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# different tag name
actual = BeautifulSoup("<b></b>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# different child count
actual = BeautifulSoup("<a><b></b></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
示例5: testMatchTagWithAttribute
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithAttribute(self):
pattern = "<a name='abc'></a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup('''<a name="abc"></a>''')
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# different attribute name
actual = BeautifulSoup("<a age='abc'></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# different attribute value
actual = BeautifulSoup("<a name='abcd'></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# more attributes
actual = BeautifulSoup("<a name='abc' address='111'></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
示例6: testExtractAsteriskValue
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testExtractAsteriskValue(self):
pattern = "<a>*$content</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# extract text
actual = BeautifulSoup("<a>hello world</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
self.assertEqual('hello world', _scraper.extractTag(exp.contents[0], actual.contents[0])['content'][0])
pattern = "<a>*(b)$content</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# asterisk only restrict on tag but not text
actual = BeautifulSoup("<a>hello world</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
self.assertEqual('hello world', _scraper.extractTag(exp.contents[0], actual.contents[0])['content'][0])
# asterisk restrict tag
actual = BeautifulSoup("<a><c></c></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# asterisk restrict tag
actual = BeautifulSoup("<a><b>hello world</b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
self.assertEqual(BeautifulSoup('<b>hello world</b>').contents[0], _scraper.extractTag(exp.contents[0], actual.contents[0])['content'][0])
# asterisk restrict tag
actual = BeautifulSoup("<a><b>hello</b><b>world</b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
ret = _scraper.extractTag(exp.contents[0], actual.contents[0])
self.assertEqual(BeautifulSoup('<b>hello</b>').contents[0], ret['content'][0])
self.assertEqual(BeautifulSoup('<b>world</b>').contents[0], ret['content'][1])
# prefix asterisk
pattern = "<a>*(b)<c></c>$content</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><b></b><b></b><c></c>hello world</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
ret = _scraper.extractTag(exp.contents[0], actual.contents[0])
self.assertEqual('hello world', ret['content'])
# prefix asterisk
pattern = "<a>*(b)<c></c>*$content</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><b></b><c></c><d>hello world</d></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
ret = _scraper.extractTag(exp.contents[0], actual.contents[0])
self.assertEqual(BeautifulSoup('<d>hello world</d>').contents[0], ret['content'][0])
actual = BeautifulSoup("<a><b></b><c></c><d>hello</d><d>world</d></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
ret = _scraper.extractTag(exp.contents[0], actual.contents[0])
self.assertEqual(BeautifulSoup('<d>hello</d>').contents[0], ret['content'][0])
self.assertEqual(BeautifulSoup('<d>world</d>').contents[0], ret['content'][1])
# prefix asterisk
pattern = "<a>*<c></c>*$content</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><b></b>some text<c></c><d>hello world</d></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
ret = _scraper.extractTag(exp.contents[0], actual.contents[0])
self.assertEqual(BeautifulSoup('<d>hello world</d>').contents[0], ret['content'][0])
示例7: testMatchTagWithAsterisk
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import matchTag [as 别名]
def testMatchTagWithAsterisk(self):
pattern = "<a>*</a>"
_scraper = Scraper(pattern)
exp = BeautifulSoup(pattern)
# same tag
actual = BeautifulSoup("<a>*</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# asterisk can match anything
actual = BeautifulSoup("<a><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# asterisk can match null
actual = BeautifulSoup("<a></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# restricted asterisk,only accept tag b, text or null
pattern = "<a>*(b)</a>"
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
actual = BeautifulSoup("<a>text</a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
actual = BeautifulSoup("<a><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
actual = BeautifulSoup("<a><b></b><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
actual = BeautifulSoup("<a><c></c><b></b></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# restricted asterisk,only accept tag b,tab c, text or null
pattern = "<a>*(b,c)</a>"
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><c></c><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
actual = BeautifulSoup("<a><c></c><b></b><d></d></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# restricted prefix asterisk
pattern = "<a>*<b></b></a>"
exp = BeautifulSoup(pattern)
actual = BeautifulSoup("<a><c></c><b></b></a>")
self.assertTrue(_scraper.matchTag(exp.contents[0], actual.contents[0]))
# no tag should appear after tag b
actual = BeautifulSoup("<a><c></c><b></b><d></d></a>")
self.assertFalse(_scraper.matchTag(exp.contents[0], actual.contents[0]))