当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.loadCorpus方法代码示例

本文整理汇总了Python中corpus.Corpus.loadCorpus方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.loadCorpus方法的具体用法?Python Corpus.loadCorpus怎么用?Python Corpus.loadCorpus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在corpus.Corpus的用法示例。


在下文中一共展示了Corpus.loadCorpus方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: applyML2

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import loadCorpus [as 别名]
def applyML2(trainingSetFilename, testSetFilename=None, setPath=CORPUS_PATH):
    """
    Uses machine learning approach to classify sentences.
    Implements a truly simple 'Leave One Out' function.
    """
    # TODO: Add condition to create corpus, if no file exists.
    print("Training the classifiers using the set at '{path}{file}'".format(
                                                    path=setPath, 
                                                    file=trainingSetFilename))
    
    #trainingSet = Corpus(trainingSetFilename, corpusPath=CORPUS_PATH)
    # trainingSet = Corpus.loadCorpus(filename=trainingSetFilename)
    # trainingSet = Corpus.loadCorpus(filename="training_and_validation_set.pk")
    trainingSet = Corpus.loadCorpus(filename="shuffled_set.pk")


    # for each in trainingSet.reviewIDs[0:10]:
    #     print(each)
    # print()

    # Get the ids - which are ordered ironic, regular - and shuffle them.
    ids = trainingSet.reviewIDs
    random.seed(44)
    random.shuffle(ids)
    # for each in ids[0:10]:
    #     print(each)
    # print()

    # Falls das -new flag nicht gesetzt ist ODER es keine Datei zum laden gibt,
    # erstelle den Corpus neu.


    print("Extracting features...")
#    trainFeatures, trainFeatureVectors = extractFeatures(trainingSet.reviewIDs, 
#                                                trainingSet.reviews)

    
    featureConfig = {
        "minus Imba": { u"Positive Quotes": (u"\"..\"", scareQuotes),
                        u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
                        u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
                        u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
                        u"Positive Hyperbole": (u"3w+", positiveStreak),
                        u"Negative Hyperbole": (u"3w-", negativeStreak),
                        u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
                        u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
                        u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
        },
        "minus Quotes": {u"Positive Imbalance": (u"w-\u2605 ", 
                            posStarPolarityDiscrepancy),
                        u"Negative Imbalance": (u"w+\u2606 ", 
                            negStarPolarityDiscrepancy),
                        u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
                        u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
                        u"Positive Hyperbole": (u"3w+", positiveStreak),
                        u"Negative Hyperbole": (u"3w-", negativeStreak),
                        u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
                        u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
                        u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
        },
        "minus Pos/Neg&Punctuation": {u"Positive Imbalance": (u"w-\u2605 ", 
                            posStarPolarityDiscrepancy),
                        u"Negative Imbalance": (u"w+\u2606 ", 
                            negStarPolarityDiscrepancy),
                        u"Positive Quotes": (u"\"..\"", scareQuotes),
                        u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
                        u"Positive Hyperbole": (u"3w+", positiveStreak),
                        u"Negative Hyperbole": (u"3w-", negativeStreak),
                        u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
                        u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
                        u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
        },
        "minus Hyperbole": {u"Positive Imbalance": (u"w-\u2605 ", 
                            posStarPolarityDiscrepancy),
                        u"Negative Imbalance": (u"w+\u2606 ", 
                            negStarPolarityDiscrepancy),
                        u"Positive Quotes": (u"\"..\"", scareQuotes),
                        u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
                        u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
                        u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
                        u"Ellipsis and Punctuation": (u"..?!", ellipsisPlusPunctuation),
                        u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
                        u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
        },
        "minus Ellipsis and Punctuation": {u"Positive Imbalance": (u"w-\u2605 ", 
                            posStarPolarityDiscrepancy),
                        u"Negative Imbalance": (u"w+\u2606 ", 
                            negStarPolarityDiscrepancy),
                        u"Positive Quotes": (u"\"..\"", scareQuotes),
                        u"Negative Quotes": (u"\"--\"", scareQuotesNegative),
                        u"Pos&Punctuation": (u"w+!?", positiveNGramPlusPunctuation),
                        u"Neg&Punctuation": (u"w-!?", negativeNGramPlusPunctuation),
                        u"Positive Hyperbole": (u"3w+", positiveStreak),
                        u"Negative Hyperbole": (u"3w-", negativeStreak),
                        u"Positive&Ellipsis": (u"w+..", lambda x: positiveNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
                        u"Negative&Ellipsis": (u"w-..", lambda x: negativeNGramPlusPunctuation(x, pattern=r"(\.\.|\. \. \.)$")),
        },
        "minus Pos/Neg&Ellipsis": {u"Positive Imbalance": (u"w-\u2605 ", 
                            posStarPolarityDiscrepancy),
                        u"Negative Imbalance": (u"w+\u2606 ", 
#.........这里部分代码省略.........
开发者ID:romanklinger,项目名称:irony-detection,代码行数:103,代码来源:machineLearning.py


注:本文中的corpus.Corpus.loadCorpus方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。