本文整理汇总了Python中corpus.Corpus.buildCorpusOnDB方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.buildCorpusOnDB方法的具体用法?Python Corpus.buildCorpusOnDB怎么用?Python Corpus.buildCorpusOnDB使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.buildCorpusOnDB方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generateData
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import buildCorpusOnDB [as 别名]
def generateData():
rep = Representor(None, 'citybeat', 'next_week_candidate_event_25by25_merged')
corpus = Corpus()
corpus.buildCorpusOnDB('citybeat', 'next_week_candidate_event_25by25_merged')
true_event_list, false_event_list = loadNextWeekData()
EventFeatureTwitter(None).GenerateArffFileHeader()
for event in true_event_list + false_event_list:
EventFeatureTwitter(event, corpus, rep).printFeatures()
示例2: generateData2
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import buildCorpusOnDB [as 别名]
def generateData2(_182, sparse=False):
# if sparse:
rep = Representor()
corpus = Corpus()
corpus.buildCorpusOnDB('citybeat', 'candidate_event_25by25_merged')
true_event_list, false_event_list = loadUnbalancedData(_182)
if sparse:
word_index, word_list = getCorpusWordList(rep, true_event_list + false_event_list)
EventFeatureSparse(None).GenerateArffFileHeader(word_list)
else:
EventFeatureTwitter(None).GenerateArffFileHeader()
for event in true_event_list + false_event_list:
if not sparse:
EventFeatureTwitter(event, corpus, rep).printFeatures()
else:
EventFeatureSparse(event, corpus, rep).printFeatures(word_index)
示例3: min
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import buildCorpusOnDB [as 别名]
k = min(len(photos), k)
# discard the keywords with only one photo
# if k == 1:
# break
res.append([word, fre, photos[0:k]])
return res
def getTopKeywordsAndPhotos(self, num_keywords, num_photos):
keywords = self._getTopKeywordsWithoutStopwords(num_keywords)
return self._getRandomPhotosAssociatedWithKeywords(keywords, num_photos)
def getTopKeywordsAndPhotosByTFIDF(self, num_keywords, num_photos):
keywords = self._getTopKeywordsWithoutStopwords(100000)
keywords = self._corpus.chooseTopWordWithHighestTDIDF(keywords, num_keywords)
return self._getRandomPhotosAssociatedWithKeywords(keywords, num_photos)
if __name__=='__main__':
collection = 'candidate_event_10by10_merged'
c = Corpus()
c.buildCorpusOnDB('citybeat', collection)
ei = EventInterface()
ei.setDB('citybeat')
ei.setCollection(collection)
events = ei.getAllDocuments()
for event in events:
event = EventFrontend(event, c)
print event.getTopKeywordsAndPhotosByTFIDF(10,0)