本文整理汇总了Python中boilerpipe.extract.Extractor.getTitle方法的典型用法代码示例。如果您正苦于以下问题:Python Extractor.getTitle方法的具体用法?Python Extractor.getTitle怎么用?Python Extractor.getTitle使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类boilerpipe.extract.Extractor
的用法示例。
在下文中一共展示了Extractor.getTitle方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from boilerpipe.extract import Extractor [as 别名]
# 或者: from boilerpipe.extract.Extractor import getTitle [as 别名]
def run(self):
count = 0
docCount = self.doc_cursor.count()
for doc in self.doc_cursor:
url = doc['url']
if (self.keepText(url)):
try:
extractor = Extractor(extractor='ArticleExtractor', url=url)
extracted_text = extractor.getText()
if (len(extracted_text) > 0):
title = extractor.getTitle()
if title != None:
doc['title'] = title
doc['extracted_text'] = title + " " + extracted_text
else:
doc['extracted_text'] = extracted_text
self.db_collection.save(doc)
print 'OK -' + url
except IOError, err:
print "IOError with url " + url
print str(err)
except (LookupError):
print "LookupError - Maybe not text or weird encoding " + url
except (UnicodeDecodeError, UnicodeEncodeError):
print "UnicodeDecodeError or UnicodeEncodeError- " + url
示例2: fetch_articles
# 需要导入模块: from boilerpipe.extract import Extractor [as 别名]
# 或者: from boilerpipe.extract.Extractor import getTitle [as 别名]
def fetch_articles(self):
greq_gen = (grequests.get(u, headers=self.header,) for u in self.urls)
responses = grequests.map(greq_gen)
for i,res in enumerate(responses):
if res is not None:
extractor = Extractor(html=res.text)
self.entries[i]['text'] = extractor.getText()
if '...' in self.entries[i]['title']:
self.entries[i]['title'] = extractor.getTitle()
return True
示例3: predict
# 需要导入模块: from boilerpipe.extract import Extractor [as 别名]
# 或者: from boilerpipe.extract.Extractor import getTitle [as 别名]
def predict(self, url):
print 'Predicting %s' %url
try:
article = Extractor(url=url)
except:
print 'Exception %s' %url
d = {}
d['Body'] = ''
d['Title'] = ''
# print d
return d
d = {}
d['Body'] = ''
d['Title'] = article.getTitle()
if d['Title'] is None:
d['Title'] = ''
return d