本文整理匯總了Python中nltk.classify.MaxentClassifier類的典型用法代碼示例。如果您正苦於以下問題:Python MaxentClassifier類的具體用法?Python MaxentClassifier怎麽用?Python MaxentClassifier使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了MaxentClassifier類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'],
user=DATABASES['date_cutoff']['USER'],
passwd=DATABASES['date_cutoff']['PASSWORD'],
db=DATABASES['date_cutoff']['NAME'])
training_tweets = classify.get_training_tweets(conn_analysis)
training_feature_set = process_tweets(training_tweets)
config_megam('/opt/packages')
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
error_dict = {'+':0, '-':0, 'I':0, 'O':0}
count_dict = {'+':0, '-':0, 'I':0, 'O':0}
guess_dict = {'+':0, '-':0, 'I':0, 'O':0}
full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
'-':{'+':0, '-':0, 'I':0, 'O':0},
'I':{'+':0, '-':0, 'I':0, 'O':0},
'O':{'+':0, '-':0, 'I':0, 'O':0}}
test_tweets = classify.get_test_tweets(conn_analysis)
test_feature_set = process_tweets(test_tweets)
classifier.show_most_informative_features(10)
classifier_accuracy = accuracy(classifier, test_feature_set)
print "classifier accuracy: " + repr(classifier_accuracy)
示例2: __maxent_train
def __maxent_train(fs):
return MaxentClassifier.train(fs,
algorithm=algorithm,
gaussian_prior_sigma=gaussian_prior_sigma,
count_cutoff=count_cutoff,
min_lldelta=min_lldelta,
trace=trace)
示例3: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
user=DATABASES['default']['USER'],
passwd=DATABASES['default']['PASSWORD'],
db=DATABASES['default']['NAME'])
training_tweets = classify.get_training_tweets(conn_analysis)
training_feature_set = classify.process_tweets(training_tweets)
config_megam('/opt/packages')
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
count_table = {'+':0, '-':0, 'I':0, 'O':0}
tweets = classify.get_tweets_to_classify(conn_analysis);
for tweet in tweets:
text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
guess = classifier.classify(classify.process_tweet(text))
update_tweet_polarity(tweet[0], guess, conn_analysis)
count_table[guess] += 1
#For the tweets where polarity was determined manually, copy from
#majority_vote to auto_vote
fix_manual_tweets(conn_analysis)
print count_table
示例4: train
def train(self, d):
"""
Given a labeled set, train our classifier.
"""
t = self.__tag_data_set(d)
self.classifier = MaxentClassifier.train(t)
logging.info("Training on %s records complete." % len(d))
示例5: _train
def _train(self, algo='iis', trace=0, max_iter=10):
'''
Internal method to train and return a NLTK maxent classifier.
'''
data = [(p.text, p.quote) for p in train_query]
train_set = [(get_features(n), g) for (n, g) in data]
return MaxentClassifier.train(train_set, algorithm=algo, trace=trace, max_iter=max_iter)
示例6: classify_maxent
def classify_maxent(X_train, Y_train, X_test):
training_input = X_train
training_output = Y_train
training_data = []
for i in range(len(training_input)):
training_data.append((training_input[i], training_output[i]))
clf = MaxentClassifier.train(training_data)
pred_labels = clf.classify_many(X_test)
return pred_labels
示例7: maxent_train
def maxent_train (self):
self.classifier_all = MaxentClassifier.train (self.maxent_memes_all, trace=100, max_iter=5)
#classifier_bottom = MaxentClassifier.train (maxent_memes_bottom, trace=100, max_iter=250)
#classifier_all = MaxentClassifier.train (maxent_memes_all, trace=100, max_iter=250)
weights = self.classifier_all.weights()
f = open ("lambdas.txt", "w")
for weight in weights:
f.write("weight = %f" % weight)
f.write ("\n")
示例8: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
user=DATABASES['default']['USER'],
passwd=DATABASES['default']['PASSWORD'],
db=DATABASES['default']['NAME'])
training_tweets = classify.get_training_tweets(conn)
training_feature_set = classify.process_tweets(training_tweets)
bayes_classifier = NaiveBayesClassifier.train(training_feature_set)
count_table = {'+':0, '-':0, 'I':0, 'O':0}
test_tweets = classify.get_test_tweets(conn)
for tweet in test_tweets:
text = classify.get_tweet_text(conn, tweet[0])[0][0]
guess = bayes_classifier.classify(classify.process_tweet(text))
classify.update_tweet_polarity(tweet[0], guess, conn)
count_table[guess] += 1
print "Naive Bayes"
print count_table
count_table = {'+':0, '-':0, 'I':0, 'O':0}
config_megam('/opt/packages')
max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
for tweet in test_tweets:
text = classify.get_tweet_text(conn, tweet[0])[0][0]
guess = max_ent_classifier.classify(classify.process_tweet(text))
update_tweet_polarity_ensemble(tweet[0], guess, conn)
count_table[guess] += 1
print "Maximum Entropy"
print count_table
#generate the accuracy matrix
full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
'-':{'+':0, '-':0, 'I':0, 'O':0},
'I':{'+':0, '-':0, 'I':0, 'O':0},
'O':{'+':0, '-':0, 'I':0, 'O':0}}
for tweet in test_tweets:
result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0])
guess = result[0][0]
actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0])
actual = actual_result[0][0]
if guess is not None:
if actual is not None:
full_matrix[actual][guess] += 1
print full_matrix
示例9: axentClassifier
def axentClassifier(features_train, features_test):
print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
classifier = MaxentClassifier.train(features_train,algorithm='gis')
print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
precisions, recalls = precision_recall(classifier, features_test)
print "accuracy: ", precisions, "fitness: ", recalls
# def sklearnMultinomialNB(features_train, features_test):
# print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
# classifier = SklearnClassifier(MultinomialNB())
# classifier.train
# print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
示例10: run
def run(training):
"""
To create and train a MaxentClassifier
:return: a trained Classifier
"""
print "Training ME Classifier..."
# feats = label_feat_from_corps(movie_reviews)
# training, testing = split_label_feats(feats)
me_classifier = MaxentClassifier.train(training, algorithm='GIS', trace=0, max_iter=10, min_lldelta=0.5)
print "ME Classifier trained..."
return save_classifier(me_classifier)
示例11: trainMaxent
def trainMaxent(featuresets):
#idx = 2*len(featuresets) / ratio
#train_set, test_set = featuresets[idx:], featuresets[:idx]
train_set = featuresets
algo = MaxentClassifier.ALGORITHMS[1]
#max_iter=20
classifier = MaxentClassifier.train(train_set, algo, max_iter=3)
#print accuracy(classifier, test_set)
classifier.show_most_informative_features(100)
#train_set, test_set = featuresets[idx:], featuresets[:idx]
#classifier.train(train_set, algo, max_iter=20)
#print accuracy(classifier, test_set)
#classifier.show_most_informative_features(100)
return classifier
示例12: train
def train(cls, training_sequence, **kwargs):
feature_detector = kwargs.get('feature_detector')
gaussian_prior_sigma = kwargs.get('gaussian_prior_sigma', 10)
count_cutoff = kwargs.get('count_cutoff', 1)
stopping_condition = kwargs.get('stopping_condition', 1e-7)
def __featurize(tagged_token):
tag = tagged_token[-1]
feats = feature_detector(tagged_token)
return (feats, tag)
labeled_featuresets = LazyMap(__featurize, training_sequence)
classifier = MaxentClassifier.train(labeled_featuresets,
algorithm='megam',
gaussian_prior_sigma=gaussian_prior_sigma,
count_cutoff=count_cutoff,
min_lldelta=stopping_condition)
return cls(classifier._encoding, classifier.weights())
示例13: trainCorpus
def trainCorpus():
if os.path.exists(classifier_fname):
return LoadClassifier()
else:
c = getDealsCorpus()
hiwords = corpus_high_info_words(c)
featdet = lambda words: bag_of_words_in_set(words, hiwords)
train_feats, test_feats = corpus_train_test_feats(c, featdet)
trainf = lambda train_feats: MaxentClassifier.train(train_feats, algorithm='megam', trace=0, max_iter=10)
labelset = set(c.categories())
classifiers = train_binary_classifiers(trainf, train_feats, labelset)
multi_classifier = MultiBinaryClassifier(*classifiers.items())
multi_p, multi_r, avg_md = multi_metrics(multi_classifier, test_feats)
print multi_p['activitiesevents'], multi_r['activitiesevents'], avg_md
SaveClassifier(multi_classifier)
return multi_classifier
示例14: train
def train(self, featureset=None):
"""
Trains the maximum entropy classifier and returns it. If a
featureset is specified it trains on that, otherwise it trains on
the models featureset.
Pass in a featureset during cross validation.
Returns the training time and the classifier.
"""
featureset = featureset or self.featureset()
# Time how long it takes to train
start = time.time()
classifier = MaxentClassifier.train(featureset,
algorithm='megam', trace=1, gaussian_prior_sigma=1)
delta = time.time() - start
return classifier, delta
示例15: parse
def parse():
tagger_classes=([nltk.UnigramTagger, nltk.BigramTagger])
trained_sents, tagged_sents = trainer("WSJ_02-21.pos-chunk","WSJ_23.pos")
#tagger = nltk.UnigramTagger(trained_sents)
print len(trained_sents)
tagger = ClassifierBasedPOSTagger(train=trained_sents[:10000], classifier_builder=lambda train_feats:
MaxentClassifier.train(train_feats, trace = 0,max_iter=10))
f = open("WSJ_23.chunk",'w')
#print sents
for sents in tagged_sents:
(words,tags)=sents[0],sents[1]
chunks = tagger.tag(tags)
#print words, chunks
wtc = zip(words, chunks)
for tup in wtc:
f.write("%s\t%s\n" %(tup[0],tup[1][1]))
f.write("\n")