本文整理汇总了Python中scraper.Scraper.match方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.match方法的具体用法?Python Scraper.match怎么用?Python Scraper.match使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.match方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testMatchAndExtract
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import match [as 别名]
def testMatchAndExtract(self):
pattern = "<a name='$name'></a>"
_scraper = Scraper(pattern)
# same tag
actual = BeautifulSoup("<a name='abc'></a>")
ret = _scraper.match(actual)
self.assertEqual(1, len(ret))
ret = _scraper.extract(ret[0])
self.assertEqual(1, len(ret))
self.assertEqual('abc', ret['name'])
pattern = "<a name='$name'>*</a>"
_scraper = Scraper(pattern)
# same tag
actual = BeautifulSoup("<a name='abc'><b></b></a>")
ret = _scraper.match(actual)
self.assertEqual(1, len(ret))
ret = _scraper.extract(ret[0])
self.assertEqual(1, len(ret))
self.assertEqual('abc', ret['name'])
示例2: scrapestruct
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import match [as 别名]
def scrapestruct(self, context ):
pc = context['pc']
rowscrape = pc['dom_row_pattern']
blockstr = context['blockstr']
soupdoc = CustomizedSoup( blockstr )
scraper = Scraper( rowscrape )
results = scraper.match( soupdoc )
if( len(results) == 0 ): #TBD scraper need to be imporved
raise Exception("0 ITEMS SCRAPED WARNING")
count = min(len(results), 10 )
items = results[0:count]
eitems = map( lambda i:scraper.extract(i), items)
context['items'] = eitems
示例3: testMatch
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import match [as 别名]
def testMatch(self):
pattern = "<a><b></b></a>"
_scraper = Scraper(pattern)
# one match
actual = BeautifulSoup("<a><b></b></a>")
self.assertEqual(1, len(_scraper.match(actual)))
# one match
actual = BeautifulSoup("<div><a><b></b></a></div>")
self.assertEqual(1, len(_scraper.match(actual)))
# one match
actual = BeautifulSoup("<a><a><b></b></a></a>")
self.assertEqual(1, len(_scraper.match(actual)))
# two match
actual = BeautifulSoup("<a><b></b></a><a><b></b></a>")
self.assertEqual(2, len(_scraper.match(actual)))
# two match
actual = BeautifulSoup("<a><b></b></a><c><a><b></b></a></c>")
self.assertEqual(2, len(_scraper.match(actual)))