本文整理汇总了Python中corpus.Corpus.load_from_file方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.load_from_file方法的具体用法?Python Corpus.load_from_file怎么用?Python Corpus.load_from_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.load_from_file方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_load_and_save
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
def test_load_and_save(self):
"""Load and save functions must be inverses."""
filename = 'testing_file'
self.co.save_to_file(filename)
new_co = Corpus()
new_co.load_from_file(filename)
self.assertTrue(_eq_crs_matrix(new_co.instances, self.co.instances))
for index in range(len(self.co)):
self.assertEqual(self.co.full_targets[index],
new_co.full_targets[index])
self.assertEqual(self.co.representations[index],
new_co.representations[index])
self.assertIsNotNone(new_co.primary_targets)
示例2: ActivePipeline
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
class ActivePipeline(object):
"""
Attributes:
session_filename:
emulate: A boolean. If is set to True, the pipe will search for labels
in the unlabeled_corpus and in the feature_corpus and will only ask the
user if there is no information available.
training_corpus:
unlabeled_corpus:
test_corpus:
feature_corpus: A matrix of shape [n_class, n_feat] with three possible
values. -1 indicates that the feature was never asked to the user for
that class, 0 indicates no relation, and 1 indicates relation between
feature and class. The feature corpus will be loaded from the file
self.feature_label_f intruduced by the config, and will be used only
during user emulation. It can be updated using the function
label_feature_corpus.
recorded_precision:
new_instances:
new_features:
classes:
user_features:
user_corpus:
"""
def __init__(self, session_filename='', emulate=False, **kwargs):
"""
Args:
session_filename: Optional. The name of a file storing a session
that will be loaded using the method load_session.
emulate: a boolean. Will set the attribute emulate acordinly.
**kwargs: the configuration for the pipe. Each parameters passed
will be converted to an attribute of the pipe. The minimum
configuration possible is set in the defaults file, and each value
not passed as a parameter will be taken from there.
"""
self.session_filename = session_filename
self.emulate = emulate
self._set_config(kwargs)
self._get_corpus()
self._get_feature_corpus()
self.recorded_precision = []
self.load_session()
self.user_features = None
self.new_instances = 0
self.new_features = 0
self.classes = []
self._train()
self._build_feature_boost()
def _set_config(self, config):
"""Sets the keys of config+default_config dict as an attribute of self.
"""
default_config.update(config)
for key, value in default_config.items():
if value is not None:
setattr(self, key, value)
def _get_corpus(self):
self.training_corpus = Corpus()
self.training_corpus.load_from_file(self.training_corpus_f)
self.unlabeled_corpus = Corpus()
self.unlabeled_corpus.load_from_file(self.u_corpus_f)
self.test_corpus = Corpus()
self.test_corpus.load_from_file(self.test_corpus_f)
self.user_corpus = Corpus()
def _get_feature_corpus(self):
"""Loads the feature corpus from self.feature_corpus_f"""
f = open(self.feature_corpus_f, 'r')
self.feature_corpus = pickle.load(f)
f.close()
def _build_feature_boost(self):
"""Creates the user_features np.array with defaults values."""
self.alpha = self.classifier.alpha
self.n_class, self.n_feat = self.classifier.feature_log_prob_.shape
self.user_features = np.array([[self.alpha] * self.n_feat] * self.n_class)
if self.emulate:
self.asked_features = self.feature_corpus == 0
else:
self.asked_features = self.user_features != self.alpha # False
def _train(self):
"""Fit the classifier with the training set plus the new vectors and
features. Then performs a step of EM.
#.........这里部分代码省略.........
示例3: Corpus
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
from corpus import Corpus
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, classification_report
co = Corpus()
c2 = Corpus()
ct = Corpus()
co.load_from_file('corpus/experimental2/unlabeled_new_corpus.pickle')
ct.load_from_file('corpus/experimental2/test_new_corpus.pickle')
c2.load_from_file('corpus/experimental2/training_new_corpus.pickle')
co.concetenate_corpus(c2)
mnb = MultinomialNB()
mnb.fit(co.instances, co.primary_targets)
print mnb.score(ct.instances, ct.primary_targets)
predicted_targets = mnb.predict(ct.instances)
print classification_report(ct.primary_targets, predicted_targets)
cm = confusion_matrix(ct.primary_targets, predicted_targets)
for index, row in enumerate(cm):
print mnb.classes_[index], [(mnb.classes_[j], row[j])
for j in range(len(row))
if row[j]]
new_q = 0
for index, row in enumerate(cm):
if mnb.classes_[index] != 'other':
new_q += cm[index][index]
print new_q / float(cm.sum()-135)
示例4: Corpus
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
from corpus import Corpus
from collections import defaultdict
import numpy as np
co = Corpus()
co.load_from_file('experimental/unlabeled_new_corpus2.pickle')
def count_by_class(corpus):
"""Returns a dictionary with the number of instances by class"""
result = defaultdict(lambda: 0)
for target in co.primary_targets:
result[target] += 1
return result
c_by_class = count_by_class(co)
for k, v in c_by_class.items():
print k, v
limit = sorted(c_by_class.values())[-2]
# limit = 10
to_remove = c_by_class['other'] - limit
to_remove = {k: c_by_class[k] - limit for k in c_by_class}
print to_remove, limit
for i in range(len(co)-1, 0, -1):
target = co.primary_targets[i]
if to_remove[target] > 0:
co.pop_instance(i)
to_remove[target] -= 1