本文整理汇总了Python中corpus.Corpus.loadCorpus方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.loadCorpus方法的具体用法?Python Corpus.loadCorpus怎么用?Python Corpus.loadCorpus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.loadCorpus方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: applyML2
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import loadCorpus [as 别名]
def applyML2(trainingSetFilename, testSetFilename=None, setPath=CORPUS_PATH):
"""
Uses machine learning approach to classify sentences.
Implements a truly simple 'Leave One Out' function.
"""
# TODO: Add condition to create corpus, if no file exists.
print("Training the classifiers using the set at '{path}{file}'".format(
path=setPath,
file=trainingSetFilename))
#trainingSet = Corpus(trainingSetFilename, corpusPath=CORPUS_PATH)
# trainingSet = Corpus.loadCorpus(filename=trainingSetFilename)
# trainingSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")
trainingSet = Corpus.loadCorpus(filename="shuffled_set.pk")
# for each in trainingSet.reviewIDs[0:10]:
# print(each)
# print()
# Get the ids - which are ordered ironic, regular - and shuffle them.
ids = trainingSet.reviewIDs
random.seed(44)
random.shuffle(ids)
# for each in ids[0:10]:
# print(each)
# print()
# Falls das -new flag nicht gesetzt ist ODER es keine Datei zum laden gibt,
# erstelle den Corpus neu.
print("Extracting features...")
# trainFeatures, trainFeatureVectors = extractFeatures(trainingSet.reviewIDs,
# trainingSet.reviews)
featureConfig = {
"minus Imba": { u"Positive Quotes": (u"\"..\"", scareQuotes),
u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
u"Positive Hyperbole": (u"3w+", positiveStreak),
u"Negative Hyperbole": (u"3w-", negativeStreak),
u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
},
"minus Quotes": {u"Positive Imbalance": (u"w-\u2605 ",
posStarPolarityDiscrepancy),
u"Negative Imbalance": (u"w+\u2606 ",
negStarPolarityDiscrepancy),
u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
u"Positive Hyperbole": (u"3w+", positiveStreak),
u"Negative Hyperbole": (u"3w-", negativeStreak),
u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
},
"minus Pos/Neg&Punctuation": {u"Positive Imbalance": (u"w-\u2605 ",
posStarPolarityDiscrepancy),
u"Negative Imbalance": (u"w+\u2606 ",
negStarPolarityDiscrepancy),
u"Positive Quotes": (u"\"..\"", scareQuotes),
u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
u"Positive Hyperbole": (u"3w+", positiveStreak),
u"Negative Hyperbole": (u"3w-", negativeStreak),
u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
},
"minus Hyperbole": {u"Positive Imbalance": (u"w-\u2605 ",
posStarPolarityDiscrepancy),
u"Negative Imbalance": (u"w+\u2606 ",
negStarPolarityDiscrepancy),
u"Positive Quotes": (u"\"..\"", scareQuotes),
u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
},
"minus Ellipsis and Punctuation": {u"Positive Imbalance": (u"w-\u2605 ",
posStarPolarityDiscrepancy),
u"Negative Imbalance": (u"w+\u2606 ",
negStarPolarityDiscrepancy),
u"Positive Quotes": (u"\"..\"", scareQuotes),
u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
u"Positive Hyperbole": (u"3w+", positiveStreak),
u"Negative Hyperbole": (u"3w-", negativeStreak),
u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
},
"minus Pos/Neg&Ellipsis": {u"Positive Imbalance": (u"w-\u2605 ",
posStarPolarityDiscrepancy),
u"Negative Imbalance": (u"w+\u2606 ",
#.........这里部分代码省略.........