本文整理汇总了Python中helper.Helper.saveAllFeaturesExtracted方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.saveAllFeaturesExtracted方法的具体用法?Python Helper.saveAllFeaturesExtracted怎么用?Python Helper.saveAllFeaturesExtracted使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类helper.Helper
的用法示例。
在下文中一共展示了Helper.saveAllFeaturesExtracted方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from helper import Helper [as 别名]
# 或者: from helper.Helper import saveAllFeaturesExtracted [as 别名]
class FeatureExtraction:
def __init__(self, newsBody, sentiment):
self.sentences = [sentence.strip() for sentence in newsBody.split(".")][:-1]
self.sentiment = sentiment
self.featureDistribution = FreqDist()
self.helperObject = Helper()
def convertToFeatureDist(self, featureSet):
for feature in featureSet:
self.featureDistribution.inc(feature)
def getBagOfWords(self):
featureSet = []
for sentence in self.sentences:
wordsInSentence = sentence.split(' ')
featureSet += wordsInSentence
self.convertToFeatureDist(featureSet)
self.helperObject.saveAllFeaturesExtracted(featureSet)
def getTwoGrams(self):
featureSet = []
for sentence in self.sentences:
wordsInSentence = sentence.split(' ')
twoGrams = ngrams(wordsInSentence, 2)
for twoGram in twoGrams:
gramFeature = twoGram[0] + ' ' + twoGram[1]
featureSet.append(gramFeature)
self.convertToFeatureDist(featureSet)
self.helperObject.saveAllFeaturesExtracted(featureSet)
def getTwoWordCombination(self):
featureSet = []
for sentence in self.sentences:
wordsInSentence = sentence.split(' ')
lengthOfSentence = len(wordsInSentence)
for i in range(2, lengthOfSentence + 1, 1):
nGrams = ngrams(wordsInSentence, i)
for nGram in nGrams:
gramFeature = nGram[0] + " " + nGram[-1]
featureSet.append(gramFeature)
self.convertToFeatureDist(featureSet)
self.helperObject.saveAllFeaturesExtracted(featureSet)
def getNounPhrases(self):
featureSet = []
# Handbook of NLP - Multiword Expressions, Timothy Baldwin and Su Nam Kim
grammar = r"""
NBAR:
{<NN.*|JJ>*<NN.*>} # Nouns and Adjectives, terminated with Nouns
NP:
{<NBAR>}
{<NBAR><IN><NBAR>} # Above, connected with in/of/etc...
"""
chunker = RegexpParser(grammar)
for sentence in self.sentences:
tokens = word_tokenize(sentence)
if len(tokens) == 0:
continue
else:
pass
tagged = pos_tag(tokens)
tree = chunker.parse(tagged)
terms = []
leafCollection = []
for subtree in tree.subtrees(filter = lambda t : t.node == 'NP'):
leafCollection.append(subtree.leaves())
for leaf in leafCollection:
term = [w for w,t in leaf if len(w) > 2]
phrase = ' '.join(term)
terms.append(phrase)
featureSet += terms
#.........这里部分代码省略.........