当前位置: 首页>>代码示例>>Python>>正文


Python SklearnClassifier.train方法代码示例

本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier.train方法的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier.train方法的具体用法?Python SklearnClassifier.train怎么用?Python SklearnClassifier.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.classify.scikitlearn.SklearnClassifier的用法示例。


在下文中一共展示了SklearnClassifier.train方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
class SKClassifier:

    classifier = None

    def __init__(self, cls='SVC'):
        self.classifier = SklearnClassifier({
            'SVC': SVC(),
            'LogisticRegression': LogisticRegression(),
            'BernoulliNB': BernoulliNB()
        }[cls])
        if not self.classifier:
            self.classifier = SklearnClassifier(SVC())

    def train(self, trainset):
        self.classifier.train(trainset)

    def test(self, tagged, featuresets):
        predict = self.classifier.classify_many(featuresets)
        print predict
        return accuracy_score(tagged, predict)

    def classify(self, featureset):
        return self.classifier.classify(featureset)

    def classify_many(self, featuresets):
        return self.classifier.classify_many(featuresets)
开发者ID:Palazor,项目名称:sentiment,代码行数:28,代码来源:SkClassifier.py

示例2: classifier_for_lemma

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def classifier_for_lemma(lemma, filenames):
    # XXX: always doing non-null and Random Forest for initial version
    classifier = SklearnClassifier(RandomForestClassifier(), sparse=False)
    print("loading training data for", lemma)
    load_training_for_word(lemma, filenames.bitextfn, filenames.alignfn,
                           filenames.annotatedfn)

    training = trainingdata.trainingdata_for(lemma, nonnull=True)
    print("got {0} instances for {1}".format(len(training), lemma))

    # delete the sentences themselves; we have the instances
    trainingdata.set_examples([], [])
    trainingdata.set_sl_annotated([])
    gc.collect()

    if len(training) > (20 * 1000):
        print("capping to 20k instances to fit in memory")
        training = training[: 20 * 1000]

    labels = set(label for (feat,label) in training)
    print("loaded training data for", lemma)
    if (not training) or len(labels) < 2:
        return None
    classifier.train(training)
    return classifier
开发者ID:alexrudnick,项目名称:chipa,代码行数:27,代码来源:annotate_clwsd.py

示例3: trainClassifiers

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def trainClassifiers(tweets):
    # Generate the training set
    training_set = nltk.classify.util.apply_features(extract_features, tweets)
    print("Training set created!")

    # Train and save the Naive Bayes classifier to a file
    NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
    f = open('data/trained_classifiers/NBClassifier.pickle', 'wb')
    pickle.dump(NBClassifier, f, 1)
    f.close()
    print("NBClassifier Classifier Trained")

    #Train linear SVC
    linear_SVC_classifier = SklearnClassifier(LinearSVC())
    linear_SVC_classifier.train(training_set)

    # Train Max Entropy Classifier
    # MaxEntClassifier = nltk.classify.maxent.MaxentClassifier.train(training_set, 'IIS', trace=2, \
    #                        encoding=None, labels=None, sparse=True, gaussian_prior_sigma=0, max_iter = 5)
    # f = open('data/trained_classifiers/MaxEntClassifier.pickle', 'wb')
    # pickle.dump(MaxEntClassifier, f, 1)
    # f.close()
    # print("MaxEntClassifier Classifier Trained")

    # return (training_set, NBClassifier, MaxEntClassifier)
    return (training_set, NBClassifier, linear_SVC_classifier)
开发者ID:quiuquio,项目名称:Twitter-Sentiment-Analysis,代码行数:28,代码来源:main2.py

示例4: clf_score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def clf_score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(train_set)
    # nltk.classify.scikitlearn(BernoulliNB())
    predict = classifier.classify_many(test)
    # classifier.prob_classify_many()
    return accuracy_score(tag_test, predict)
开发者ID:wac81,项目名称:LSI-for-ChineseDocument,代码行数:9,代码来源:store+sentiment+classifier.py

示例5: score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(trainset)

    # pred = classifier.batch_classify(test)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:vsooda,项目名称:Review-Helpfulness-Prediction,代码行数:9,代码来源:store_sentiment_classifier.py

示例6: score

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def score(trainset, testset, classifier):
    classifier = SklearnClassifier(classifier)
    classifier._vectorizer.sort = False
    classifier.train(trainset)
    (test, tag_test) = zip(*testset)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:eleanordong,项目名称:datamining,代码行数:9,代码来源:sentimentexample.py

示例7: learn_model

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def learn_model(data,target):
    bestwords = best_of_words(data, target)
    # preparing data for split validation. 80% training, 20% test
    data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
    #classifier = BernoulliNB().fit(data_train,target_train)
    train_feature=[]
    test_feature=[]
    for i in range(len(data_train)):
        d=data_train[i]
        d=jieba.cut(d, cut_all=False)
        l=target_train[i]
        #tmp=[bigram(d),l]
        tmp = [dict([(word, True) for word in d if word in bestwords]), l]
        train_feature.append(tmp)
        
    for i in range(len(data_test)):
        d=data_test[i]
        d=jieba.cut(d, cut_all=False)
        l=target_test[i]
        #tmp=bigram(d)
        tmp = dict([(word, True) for word in d if word in bestwords])
        test_feature.append(tmp)
    
        
    classifier = SklearnClassifier(MultinomialNB())
    classifier.train(train_feature)
   
    predicted = classifier.classify_many(test_feature)
    
    evaluate_model(target_test,predicted)

    return classifier, bestwords
开发者ID:cysjtu,项目名称:SentimentAnalysis,代码行数:34,代码来源:nlp_machine_v3.py

示例8: svm

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def svm(train_data,preprocessing=True):
    training_data = []
    for data in train_data:
        training_data.append(preprocess(data[0],label=data[1]))
    cl = SklearnClassifier(LinearSVC())
    cl.train(training_data)
    return cl
开发者ID:EricSchles,项目名称:text_classify,代码行数:9,代码来源:algorithms.py

示例9: sentiment_classifier

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def sentiment_classifier(debug):
	# trainingfp = open('training.csv', 'rb')
	train = pd.read_csv( 'training.csv', delimiter=',', quotechar='"', escapechar='\\',header=0 )
	num_tweets = train['TweetText'].size
	
	cleantweets = []
	for i in xrange(0, num_tweets):
		if debug and ( (i+1)%1000 == 0 ):
			print "Tweet %d of %d\n" % ( i+1, num_tweets )          
		cleantweets.append((tweet_to_words(train['TweetText'][i]), train['Sentiment'][i]))

	# vectorizer = CountVectorizer(analyzer = "word",   \
 #                             tokenizer = None,    \
 #                             preprocessor = None, \
 #                             stop_words = None,   \
 #                             max_features = 5000) 

	# train_data_features = vectorizer.fit_transform([t for (t,_) in cleantweets])
	
	# feature_labels = [(m,l) for ((f,l),m) in zip(cleantweets, train_data_features)]

	# forest = RandomForestClassifier(n_estimators = sensitivity)
	# forest = forest.fit(train_data_features, train['Sentiment'])
	classif = SklearnClassifier(LinearSVC())
	classif.train(cleantweets)

	return (classif)
开发者ID:greensam,项目名称:am221project,代码行数:29,代码来源:sentiment.py

示例10: evaluate

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def evaluate(train_qs, test_qs, params, d):

    data = [train_qs, test_qs]
    (W, b, W2, b2, W3, b3, L) = params

    train_feats = []
    test_feats = []

    for tt, split in enumerate(data):

        for qs, ans in split:

            prev_qs = zeros((d, 1))
            prev_sum = zeros((d, 1))
            count = 0.
            history = []

            for dist in qs:

                sent = qs[dist]

                # input is average of all nouns in sentence
                # av = average(L[:, sent], axis=1).reshape((d, 1))
                history += sent
                prev_sum += sum(L[:, sent], axis=1).reshape((d, 1))
                if len(history) == 0:
                    av = zeros((d, 1))
                else:
                    av = prev_sum / len(history)

                # apply non-linearity
                p = relu(W.dot(av) + b)
                p2 = relu(W2.dot(p) + b2)
                p3 = relu(W3.dot(p2) + b3)

                curr_feats = {}
                for dim, val in ndenumerate(p3):
                    curr_feats['__' + str(dim)] = val

                if tt == 0:
                    train_feats.append( (curr_feats, ans[0]) )

                else:
                    test_feats.append( (curr_feats, ans[0]) )

    print 'total training instances:', len(train_feats)
    print 'total testing instances:', len(test_feats)
    random.shuffle(train_feats)

    # can modify this classifier / do grid search on regularization parameter using sklearn
    classifier = SklearnClassifier(LogisticRegression(C=10))
    classifier.train(train_feats)

    print 'accuracy train:', nltk.classify.util.accuracy(classifier, train_feats)
    print 'accuracy test:', nltk.classify.util.accuracy(classifier, test_feats)
    print ''

    print 'dumping classifier'
    cPickle.dump(classifier, open('data/deep/classifier', 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
开发者ID:jankim,项目名称:qb,代码行数:62,代码来源:learn_classifiers.py

示例11: train

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def train(cleanedDataCollection, tagPool):
	posSamples = []
	negSamples = []

	featuresets = [(extractFeatures(d,tagPool), c) for (d,c) in cleanedDataCollection]
	for sample in featuresets:
		if sample[1] == "trash":
			negSamples.append(sample)
		else:
			posSamples.append(sample)

	train_set = negSamples[10:]+posSamples[10:]
	test_set = negSamples[:10]+posSamples[:10]


	# classifier = nltk.NaiveBayesClassifier.train(train_set)
	# print(nltk.classify.accuracy(classifier, test_set))
	# classifier.show_most_informative_features(5) 
	# return classifier

	sk_classifier = SklearnClassifier(MultinomialNB())
	sk_classifier.train(train_set)
	print "accuracy is: %s" % (accuracy(sk_classifier, test_set))

	precision, recall, fMeasure = precision_recall_fmeasure(sk_classifier,  test_set, "useful")

	print "precision is: %s" % (precision)
	print "recall is: %s" % (recall)
	print "F-measure is: %s" % (fMeasure)
	return sk_classifier
开发者ID:iaoshili,项目名称:NLP_Project,代码行数:32,代码来源:InterestingArticleIdentifier.py

示例12: evaluate

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def evaluate(classifier_alo):
    
    classifier = SklearnClassifier(classifier_alo) #在nltk 中使用scikit-learn 的接口
    classifier.train(trainFeatures) #训练分类器
    
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)	
    i = 0
    for item in testFeatures:
        referenceSets[item[1]].add(i)
        predicted = classifier.classify(item[0])
        testSets[predicted].add(i)	
        i += 1
    
    pos_pre = nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
    pos_recall = nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
    neg_pre =  nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
    neg_recall = nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
    
    print (str('{0:.3f}'.format(float(pos_pre))) + "  "
    +str('{0:.3f}'.format(float(pos_recall))) + "  "
    +str('{0:.3f}'.format(float(neg_pre))) + "  "
    +str( '{0:.3f}'.format(float(neg_recall))) + "  "
    +str('{0:.3f}'.format(2*(float(pos_pre)*float(pos_recall)) / (float(pos_recall)+float(pos_pre)))) + "  "
    +str('{0:.3f}'.format(2*(float(neg_pre)*float(neg_recall)) / (float(neg_recall)+float(neg_pre)))))
开发者ID:delili,项目名称:NLP_Comments_Sentiment_Analysis,代码行数:27,代码来源:process.py

示例13: chatBot

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
class chatBot(object):

    def __init__(self):
        self.posts = nltk.corpus.nps_chat.xml_posts()
        self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
                'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
                'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
        self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
        self.responses = {}
        self.featuresets = []
        self.train = []
        self.test = []
        self.testSet = []
        self.testSetClass = []
        self.classif = SklearnClassifier(LinearSVC())
        for i in range(0, 15):
            self.responses[i] = []
        for post in self.posts:
            self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
            self.temp = self.responses[self.categories.index(post.get('class'))]
            self.temp.append(post.text)

    def tokenize(self, sentence):
        """
            Extracts a set of features from a message.
        """
        features = {}
        tokens = nltk.word_tokenize(sentence)
        for t in tokens:
            features['contains(%s)' % t.lower()] = True
        return features

    def talk(self):
        while 1:
            inp = raw_input("YOU: ")
            features = self.tokenize(inp)
            pp = self.classif.classify_many(features)
            pp = pp[0]
            pp = int(pp)
            m = self.mapper[pp]
            r = self.responses[m]
            val = randint(0, len(r))
            print("BOT: "+r[val])

    def trainSet(self):
        shuffle(self.featuresets)
        size = int(len(self.featuresets) * .1) # 10% is used for the test set
        self.train = self.featuresets[size:]
        self.test = self.featuresets[:size]
        self.classif.train(self.train)

        self.testSet = []
        self.testSetClass = []
        for i in self.test:
            self.testSet.append(i[0])
            self.testSetClass.append(i[1])
        self.batch = self.classif.classify_many(self.testSet)

    def statistics(self):
        print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
开发者ID:donjuma,项目名称:NLP_chatBot,代码行数:62,代码来源:nps.py

示例14: main3

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def main3():
    from nltk.classify.scikitlearn import SklearnClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import confusion_matrix
    from matplotlib import pyplot

    svm = SklearnClassifier(LinearSVC(loss="hinge"))
    svm.train(trainData)
    print("SVM: ", nltk.classify.accuracy(svm, testData))
    results = svm.classify_many(item[0] for item in testData)

    print(results)
    from sklearn.metrics import classification_report

    # getting a full report
    print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))

    # Compute confusion matrix
    import numpy as np
    cmm = confusion_matrix([x[1] for x in testData], results)

    print(cmm)
    cmm = np.array(cmm, dtype = np.float)
    print(cmm.shape)

    #f=figure()
    #ax = f.add_subplot(111)
    #show()
    #%pylab inline

    # Show confusion matrix in a separate window
    print(pyplot.imshow(cmm, interpolation='nearest'))
开发者ID:listentojohan,项目名称:cjor,代码行数:34,代码来源:Main.py

示例15: SVM

# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import train [as 别名]
def SVM(training_set, test_set):
    classifier = SklearnClassifier(LinearSVC())
    print("Training a new SVM classifier")
    classifier.train(training_set)
    print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
#     classifier.show_most_informative_features(5)
    #print("Running new Decision Tree classifier")
    accuracy = nltk.classify.accuracy(classifier, test_set)
    trueLabels = [l for d, l in test_set]
    predictedLabels = classifier.classify_many([d for d,t in test_set])
    #print("Accuracy:",accuracy)
#     classifier.show_most_informative_features(MIF)
    def runTrained(test_set, hasTags=False):
        #print("Running pre-trained Decision Tree classifier")
        if hasTags:
            tagglessTest_set = [data for data, tag in test_set]
            acc = nltk.classify.accuracy(classifier, test_set)
            print("Accuracy:", acc)
            predictions = classifier.classify_many(tagglessTest_set)
            return ([e for e in zip(tagglessTest_set, predictions)], acc)
        else:
            tagglessTest_set = test_set         
        predictions = classifier.classify_many(tagglessTest_set)
        #print("Predicted Labels:",predictions)
        return [e for e in zip(tagglessTest_set, predictions)]
    return (runTrained, accuracy, predictedLabels, trueLabels) 
开发者ID:dfgerrity,项目名称:AuthorDetector,代码行数:28,代码来源:ClassifierRunner.py


注:本文中的nltk.classify.scikitlearn.SklearnClassifier.train方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。