本文整理汇总了Python中nl_lib.Concepts.Concepts.getConcepts方法的典型用法代码示例。如果您正苦于以下问题:Python Concepts.getConcepts方法的具体用法?Python Concepts.getConcepts怎么用?Python Concepts.getConcepts使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nl_lib.Concepts.Concepts
的用法示例。
在下文中一共展示了Concepts.getConcepts方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Collocations
# 需要导入模块: from nl_lib.Concepts import Concepts [as 别名]
# 或者: from nl_lib.Concepts.Concepts import getConcepts [as 别名]
class Collocations(object):
concepts = None
conceptsNGram = None
conceptNGramScore = None
conceptsNGramSubject = None
conceptFile = u"documents.p"
ngramFile = u"ngrams.p"
ngramScoreFile = u"ngramscore.p"
ngramSubjectFile = u"ngramsubject.p"
def __init__(self, conceptFile=None):
if conceptFile == None:
conceptFile = u"documents.p"
logger.info(u"Load Concepts from %s " % (conceptFile))
self.concepts = Concepts.loadConcepts(conceptFile)
logger.info(u"Loaded Concepts")
self.conceptsNGram = Concepts(u"n-gram", u"NGRAM")
self.conceptsNGramScore = Concepts(u"NGram_Score", u"Score")
self.conceptsNGramSubject = Concepts(u"Subject", u"Subjects")
def getCollocationConcepts(self):
return self.conceptsNGram, self.conceptsNGramScore, self.conceptsNGramSubject
def find_collocations(self):
lemmatizer = WordNetLemmatizer()
stopset = set(stop)
filter_stops = lambda w: len(w) < 3 or w in stopset
words = list()
dictWords = dict()
for document in self.concepts.getConcepts().values():
logger.debug(document.name)
for concept in document.getConcepts().values():
logger.debug(concept.name)
for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept.name)):
logger.debug(u"Word: " + word + u" POS: " + pos)
lemmaWord = lemmatizer.lemmatize(word.lower())
logger.debug(u"Word: " + word + u" Lemma: " + lemmaWord)
words.append(lemmaWord)
if pos[0] == u"N":
dictWords[lemmaWord] = word
for x in dictWords.keys():
logger.info(u"noun : %s" % x)
bcf = BigramCollocationFinder.from_words(words)
tcf = TrigramCollocationFinder.from_words(words)
bcf.apply_word_filter(filter_stops)
tcf.apply_word_filter(filter_stops)
tcf.apply_freq_filter(3)
listBCF = bcf.nbest(BigramAssocMeasures.likelihood_ratio, 100)
for bigram in listBCF:
concept = u' '.join([bg for bg in bigram])
e = self.conceptsNGram.addConceptKeyType(concept, u"BiGram")
logger.info(u"Bigram : %s" % concept)
for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept)):
e.addConceptKeyType(word, pos)
listTCF = tcf.nbest(TrigramAssocMeasures.likelihood_ratio, 100)
for trigram in listTCF:
concept = u' '.join([bg for bg in trigram])
e = self.conceptsNGram.addConceptKeyType(concept, u"TriGram")
logger.info(u"Trigram : %s" % concept)
for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(concept)):
e.addConceptKeyType(word, pos)
bcfscored = bcf.score_ngrams(BigramAssocMeasures.likelihood_ratio)
lt = sorted(bcfscored, key=lambda c: c[1], reverse=True)
for score in lt:
name = ' '.join([w for w in score[0]])
count = float(score[1])
e = self.conceptsNGramScore.addConceptKeyType(name, u"BiGram")
for x in score[0]:
e.addConceptKeyType(x, u"BWord")
e.count = count
logger.debug(u"bcfscored: %s=%s" % (name, count))
tcfscored = tcf.score_ngrams(TrigramAssocMeasures.likelihood_ratio)
lt = sorted(tcfscored, key=lambda c: c[1], reverse=True)
for score in lt:
name = ' '.join([w for w in score[0]])
count = float(score[1])
e = self.conceptsNGramScore.addConceptKeyType(name, u"TriGram")
for x in score[0]:
e.addConceptKeyType(x, u"TWord")
#.........这里部分代码省略.........