当前位置: 首页>>代码示例>>Python>>正文


Python SklearnClassifier.classify_many方法代码示例

本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier.classify_many方法的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier.classify_many方法的具体用法?Python SklearnClassifier.classify_many怎么用?Python SklearnClassifier.classify_many使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.classify.scikitlearn.SklearnClassifier的用法示例。


在下文中一共展示了SklearnClassifier.classify_many方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: chatBot

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
class chatBot(object):

    def __init__(self):
        self.posts = nltk.corpus.nps_chat.xml_posts()
        self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
                'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
                'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
        self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
        self.responses = {}
        self.featuresets = []
        self.train = []
        self.test = []
        self.testSet = []
        self.testSetClass = []
        self.classif = SklearnClassifier(LinearSVC())
        for i in range(0, 15):
            self.responses[i] = []
        for post in self.posts:
            self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
            self.temp = self.responses[self.categories.index(post.get('class'))]
            self.temp.append(post.text)

    def tokenize(self, sentence):
        """
            Extracts a set of features from a message.
        """
        features = {}
        tokens = nltk.word_tokenize(sentence)
        for t in tokens:
            features['contains(%s)' % t.lower()] = True
        return features

    def talk(self):
        while 1:
            inp = raw_input("YOU: ")
            features = self.tokenize(inp)
            pp = self.classif.classify_many(features)
            pp = pp[0]
            pp = int(pp)
            m = self.mapper[pp]
            r = self.responses[m]
            val = randint(0, len(r))
            print("BOT: "+r[val])

    def trainSet(self):
        shuffle(self.featuresets)
        size = int(len(self.featuresets) * .1) # 10% is used for the test set
        self.train = self.featuresets[size:]
        self.test = self.featuresets[:size]
        self.classif.train(self.train)

        self.testSet = []
        self.testSetClass = []
        for i in self.test:
            self.testSet.append(i[0])
            self.testSetClass.append(i[1])
        self.batch = self.classif.classify_many(self.testSet)

    def statistics(self):
        print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
开发者ID:donjuma,项目名称:NLP_chatBot,代码行数:62,代码来源:nps.py

示例2: __init__

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
class SKClassifier:

    classifier = None

    def __init__(self, cls='SVC'):
        self.classifier = SklearnClassifier({
            'SVC': SVC(),
            'LogisticRegression': LogisticRegression(),
            'BernoulliNB': BernoulliNB()
        }[cls])
        if not self.classifier:
            self.classifier = SklearnClassifier(SVC())

    def train(self, trainset):
        self.classifier.train(trainset)

    def test(self, tagged, featuresets):
        predict = self.classifier.classify_many(featuresets)
        print predict
        return accuracy_score(tagged, predict)

    def classify(self, featureset):
        return self.classifier.classify(featureset)

    def classify_many(self, featuresets):
        return self.classifier.classify_many(featuresets)
开发者ID:Palazor,项目名称:sentiment,代码行数:28,代码来源:SkClassifier.py

示例3: performCrossValidation

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def performCrossValidation(featureset, labels, foldsCount, sklearnclassifier, uniqLabels):
    accuracySum = 0.0
    precisionSums = defaultdict(float)
    recallSums = defaultdict(float)
    fscoreSums = defaultdict(float)
    crossValidationIterations = cross_validation.StratifiedKFold(labels, n_folds=foldsCount)
    for train, test in crossValidationIterations:
        trainset = [featureset[i] for i in train]
        testset = [featureset[i] for i in test]
        print("before train")
        classifier = SklearnClassifier(sklearnclassifier).train(trainset)

        true = [label for features, label in testset]
        predicted = classifier.classify_many([features for features, label in testset])

        precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
        accuracy = accuracy_score(true, predicted)
        accuracySum += accuracy

        for label, value in zip(uniqLabels, precisions):
            precisionSums[label] += value
        for label, value in zip(uniqLabels, recalls):
            recallSums[label] += value
        for label, value in zip(uniqLabels, fscores):
            fscoreSums[label] += value

    print("Average accurancy: {0:.3f}".format(accuracySum/foldsCount))
    measures = {label: (sum/foldsCount, recallSums.get(label)/foldsCount, fscoreSums.get(label)/foldsCount) for label, sum in precisionSums.items()}
    for label, (prec, recall, fscore) in measures.items():
        print("Average precision for {0}: {1:.3f}".format(label, prec))
        print("Average recall for {0}: {1:.3f}".format(label, recall))
        print("Average f score for {0}: {1:.3f}".format(label, fscore))
开发者ID:ekedziora,项目名称:sentiment,代码行数:34,代码来源:utils.py

示例4: main3

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def main3():
    from nltk.classify.scikitlearn import SklearnClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import confusion_matrix
    from matplotlib import pyplot

    svm = SklearnClassifier(LinearSVC(loss="hinge"))
    svm.train(trainData)
    print("SVM: ", nltk.classify.accuracy(svm, testData))
    results = svm.classify_many(item[0] for item in testData)

    print(results)
    from sklearn.metrics import classification_report

    # getting a full report
    print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))

    # Compute confusion matrix
    import numpy as np
    cmm = confusion_matrix([x[1] for x in testData], results)

    print(cmm)
    cmm = np.array(cmm, dtype = np.float)
    print(cmm.shape)

    #f=figure()
    #ax = f.add_subplot(111)
    #show()
    #%pylab inline

    # Show confusion matrix in a separate window
    print(pyplot.imshow(cmm, interpolation='nearest'))
开发者ID:listentojohan,项目名称:cjor,代码行数:34,代码来源:Main.py

示例5: learn_model

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def learn_model(data,target):
    bestwords = best_of_words(data, target)
    # preparing data for split validation. 80% training, 20% test
    data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
    #classifier = BernoulliNB().fit(data_train,target_train)
    train_feature=[]
    test_feature=[]
    for i in range(len(data_train)):
        d=data_train[i]
        d=jieba.cut(d, cut_all=False)
        l=target_train[i]
        #tmp=[bigram(d),l]
        tmp = [dict([(word, True) for word in d if word in bestwords]), l]
        train_feature.append(tmp)
        
    for i in range(len(data_test)):
        d=data_test[i]
        d=jieba.cut(d, cut_all=False)
        l=target_test[i]
        #tmp=bigram(d)
        tmp = dict([(word, True) for word in d if word in bestwords])
        test_feature.append(tmp)
    
        
    classifier = SklearnClassifier(MultinomialNB())
    classifier.train(train_feature)
   
    predicted = classifier.classify_many(test_feature)
    
    evaluate_model(target_test,predicted)

    return classifier, bestwords
开发者ID:cysjtu,项目名称:SentimentAnalysis,代码行数:34,代码来源:nlp_machine_v3.py

示例6: score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(trainset)

    # pred = classifier.batch_classify(test)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:vsooda,项目名称:Review-Helpfulness-Prediction,代码行数:9,代码来源:store_sentiment_classifier.py

示例7: SVM

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def SVM(training_set, test_set):
    classifier = SklearnClassifier(LinearSVC())
    print("Training a new SVM classifier")
    classifier.train(training_set)
    print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
#     classifier.show_most_informative_features(5)
    #print("Running new Decision Tree classifier")
    accuracy = nltk.classify.accuracy(classifier, test_set)
    trueLabels = [l for d, l in test_set]
    predictedLabels = classifier.classify_many([d for d,t in test_set])
    #print("Accuracy:",accuracy)
#     classifier.show_most_informative_features(MIF)
    def runTrained(test_set, hasTags=False):
        #print("Running pre-trained Decision Tree classifier")
        if hasTags:
            tagglessTest_set = [data for data, tag in test_set]
            acc = nltk.classify.accuracy(classifier, test_set)
            print("Accuracy:", acc)
            predictions = classifier.classify_many(tagglessTest_set)
            return ([e for e in zip(tagglessTest_set, predictions)], acc)
        else:
            tagglessTest_set = test_set         
        predictions = classifier.classify_many(tagglessTest_set)
        #print("Predicted Labels:",predictions)
        return [e for e in zip(tagglessTest_set, predictions)]
    return (runTrained, accuracy, predictedLabels, trueLabels) 
开发者ID:dfgerrity,项目名称:AuthorDetector,代码行数:28,代码来源:ClassifierRunner.py

示例8: clf_score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def clf_score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(train_set)
    # nltk.classify.scikitlearn(BernoulliNB())
    predict = classifier.classify_many(test)
    # classifier.prob_classify_many()
    return accuracy_score(tag_test, predict)
开发者ID:wac81,项目名称:LSI-for-ChineseDocument,代码行数:9,代码来源:store+sentiment+classifier.py

示例9: score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def score(trainset, testset, classifier):
    classifier = SklearnClassifier(classifier)
    classifier._vectorizer.sort = False
    classifier.train(trainset)
    (test, tag_test) = zip(*testset)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:eleanordong,项目名称:datamining,代码行数:9,代码来源:sentimentexample.py

示例10: coem

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def coem(L1, L2, U1, U2):

    pipeline = Pipeline([('tfidf', TfidfTransformer()),
                         ('chi2', SelectKBest(chi2, k=100)),
                         ('nb', MultinomialNB())])
    classifier1 = SklearnClassifier(pipeline)
    classifier1.train(L1)

    # Predict on U using 1st classifier
    U1_labels = classifier1.classify_many(U1)

    # Trained on A classifier.
    # Now B will learn on L as well as A's labels on U
    iterations = 0

    while iterations < 25:
        classifier2 = SklearnClassifier(pipeline)
        # Add everything in L
        L2_train = L2
        # Add everything in U with labels from A
        for i, sub_bow in enumerate(U2):
            L2_train.append((sub_bow, U1_labels[i]))
        classifier2.train(L2_train)
        # Now, label U.
        U2_labels = classifier2.classify_many(U2)

        # Now, classifier 2 has finished labelling everything in U

        # Classifer 1 starts again
        # Again , add all mails in L
        L1_train = L1

        # Add all mails in U, but with labels from B. (U2)
        for i, mail_bow in enumerate(U1):
            L1_train.append((mail_bow, U2_labels[i]))

        # Train it
        classifier1 = SklearnClassifier(pipeline)
        classifier1.train(L1_train)    
        U1_labels = classifier1.classify_many(U1)
        #print U1_labels,U2_labels    
        print labels_find_intersection(U1_labels,U2_labels)
        iterations += 1

    return U1_labels
开发者ID:aravindsankar28,项目名称:NLP,代码行数:47,代码来源:train_tfidf.py

示例11: buildClassifier_score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def buildClassifier_score(trainSet,devtestSet,classifier):
    #print devtestSet
    from nltk import compat
    dev, tag_dev = zip(*devtestSet) #把开发测试集(已经经过特征化和赋予标签了)分为数据和标签
    classifier = SklearnClassifier(classifier) #在nltk 中使用scikit-learn 的接口
    #x,y in  list(compat.izip(*trainSet))
    classifier.train(trainSet) #训练分类器
    #help('SklearnClassifier.batch_classify')
    pred = classifier.classify_many(dev)#batch_classify(testSet) #对开发测试集的数据进行分类,给出预测的标签
    return accuracy_score(tag_dev, pred) #对比分类预测结果和人工标注的正确结果,给出分类器准确度
开发者ID:coolspiderghy,项目名称:weibo_scrawler_app,代码行数:12,代码来源:evalueClassier.py

示例12: performTestValidation

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def performTestValidation(trainset, testset, sklearnclassifier, uniqLabels):
        classifier = SklearnClassifier(sklearnclassifier).train(trainset)
        true = [label for features, label in testset]
        predicted = classifier.classify_many([features for features, label in testset])

        precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
        accuracy = accuracy_score(true, predicted)

        print("Test accuracy: {0:.3f}".format(accuracy))
        measures = {label: (precision, recall, fscore) for label, precision, recall, fscore in zip(uniqLabels, precisions, recalls, fscores)}
        for label, (prec, recall, fscore) in measures.items():
            print("Precision for {0}: {1:.3f}".format(label, prec))
            print("Recall for {0}: {1:.3f}".format(label, recall))
            print("F score for {0}: {1:.3f}".format(label, fscore))
开发者ID:ekedziora,项目名称:sentiment,代码行数:16,代码来源:utils.py

示例13: train_model

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def train_model(classifier, name, printout = False):
	classifier = SklearnClassifier(classifier)
	classifier.train(trainData)
	#predict = classifier.classify_many(validSam)
	predict = classifier.classify_many(testSam)
	accuracy = accuracy_score(testTag, predict)
	if printout:
		print '*******模型: %s的测试结果*********' % name
		print '\n'
		print '%s`s accuracy is %f' % (name, accuracy)
		print '%s`s score report is \n' % name
		print classification_report(testTag, predict)
		print '%s`s confusion is \n' % name
		print confusion_matrix(testTag, predict)
		print '\n'
		model_file = data_path + name + ".pkl"
		pickle.dump(classifier, open(model_file, 'w'))
	return accuracy
开发者ID:yyr93520,项目名称:NLPproject,代码行数:20,代码来源:train_model.py

示例14: classifier_score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def classifier_score(tp, classifier, train_list, test, test_tag):
    '''
    传入分类器进行分类
    Output:pos_precision, pos_recall, accuracy_score
    '''
    starttime = datetime.datetime.now()
    classifier = SklearnClassifier(classifier)
    classifier.train(train_list)
    iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
    pred = classifier.classify_many(test)  # 返回的是结果集的list
    y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
    y_pred = [1 if tag == 'pos' else 0 for tag in pred]
    pos_precision = precision_score(y_true, y_pred)
    pos_recall = recall_score(y_true, y_pred)
    endtime = datetime.datetime.now()
    interval = (endtime - starttime).microseconds
    interval = interval / 100
    return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)
开发者ID:JoshuaMichaelKing,项目名称:Stock-SentimentAnalysis,代码行数:20,代码来源:classifiers_score.py

示例15: learn_model

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def learn_model(data,target):
    # preparing data for split validation. 60% training, 40% test
    state=43#randrange(1,23432)+123
    print "statue 6857"
    print state
    data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.20,random_state=state)
    #classifier = BernoulliNB().fit(data_train,target_train)
    stop_word_dict={}#build_stop_word_dict()
    sentiment_dict={}#build_sentiment_dict()
    global hinfo_dict
    hinfo_dict=build_hinfo_dict(data,target)
    
        
        
    #print stop_word_dict.keys()
    raw_input("begin train")
    train_feature=[]
    test_feature=[]
    for i in range(len(data_train)):
        print i
        d=data_train[i]
        #d=jieba.cut(d, cut_all=False)
        l=target_train[i]
        tmp=[best_word_feats(d,stop_word_dict,sentiment_dict,hinfo_dict),l]
        train_feature.append(tmp)
        
    for i in range(len(data_test)):
        print i
        d=data_test[i]
        #d=jieba.cut(d, cut_all=False)
        l=target_test[i]
        tmp=best_word_feats(d,stop_word_dict,sentiment_dict,hinfo_dict)
        test_feature.append(tmp)
    
    #BernoulliNB MultinomialNB LogisticRegression  SVC LinearSVC
    print "max_len %d"%(max_len)
    print "min_len %d"%(min_len)
    
    print "avg_len %d"%(sum/cnt)
    
    print "BernoulliNB"
    classifier = SklearnClassifier(BernoulliNB())
    classifier.train(train_feature)
    print "--------------"
    print len(classifier._vectorizer.get_feature_names())
    
    for f in classifier._vectorizer.get_feature_names():
        print f.encode("utf-8")
    
    predicted = classifier.classify_many(test_feature)
    evaluate_model(target_test,predicted)
    
    
    ids=range(len(data_test))
    result=[]
    for p in predicted:
        if p =='positive':
            result.append('1')
        else:
            result.append('-1')
        
    save_predict(data_test, ids, result, "BernoulliNB.xml")
    
    
    """
开发者ID:cysjtu,项目名称:SentimentAnalysis,代码行数:67,代码来源:nlp_machine.py


注:本文中的nltk.classify.scikitlearn.SklearnClassifier.classify_many方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。