当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.load_from_file方法代码示例

本文整理汇总了Python中corpus.Corpus.load_from_file方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.load_from_file方法的具体用法?Python Corpus.load_from_file怎么用?Python Corpus.load_from_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在corpus.Corpus的用法示例。


在下文中一共展示了Corpus.load_from_file方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_load_and_save

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
 def test_load_and_save(self):
     """Load and save functions must be inverses."""
     filename = 'testing_file'
     self.co.save_to_file(filename)
     new_co = Corpus()
     new_co.load_from_file(filename)
     self.assertTrue(_eq_crs_matrix(new_co.instances, self.co.instances))
     for index in range(len(self.co)):
         self.assertEqual(self.co.full_targets[index],
                          new_co.full_targets[index])
         self.assertEqual(self.co.representations[index],
                          new_co.representations[index])
     self.assertIsNotNone(new_co.primary_targets)
开发者ID:lucianosilvi,项目名称:mit0110_tesis,代码行数:15,代码来源:test_corpus.py

示例2: ActivePipeline

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
class ActivePipeline(object):
    """
    Attributes:
        session_filename:

        emulate: A boolean. If is set to True, the pipe will search for labels
        in the unlabeled_corpus and in the feature_corpus and will only ask the
        user if there is no information available.

        training_corpus:

        unlabeled_corpus:

        test_corpus:

        feature_corpus: A matrix of shape [n_class, n_feat] with three possible
        values. -1 indicates that the feature was never asked to the user for
        that class, 0 indicates no relation, and 1 indicates relation between
        feature and class. The feature corpus will be loaded from the file
        self.feature_label_f intruduced by the config, and will be used only
        during user emulation. It can be updated using the function
        label_feature_corpus.

        recorded_precision:

        new_instances:

        new_features:

        classes:

        user_features:

        user_corpus:
    """

    def __init__(self, session_filename='', emulate=False, **kwargs):
        """
        Args:
            session_filename: Optional. The name of a file storing a session
            that will be loaded using the method load_session.
            emulate: a boolean. Will set the attribute emulate acordinly.
            **kwargs: the configuration for the pipe. Each parameters passed
            will be converted to an attribute of the pipe. The minimum
            configuration possible is set in the defaults file, and each value
            not passed as a parameter will be taken from there.
        """
        self.session_filename = session_filename
        self.emulate = emulate
        self._set_config(kwargs)
        self._get_corpus()
        self._get_feature_corpus()
        self.recorded_precision = []
        self.load_session()
        self.user_features = None
        self.new_instances = 0
        self.new_features = 0
        self.classes = []
        self._train()
        self._build_feature_boost()

    def _set_config(self, config):
        """Sets the keys of config+default_config dict as an attribute of self.
        """
        default_config.update(config)
        for key, value in default_config.items():
            if value is not None:
                setattr(self, key, value)

    def _get_corpus(self):
        self.training_corpus = Corpus()
        self.training_corpus.load_from_file(self.training_corpus_f)

        self.unlabeled_corpus = Corpus()
        self.unlabeled_corpus.load_from_file(self.u_corpus_f)

        self.test_corpus = Corpus()
        self.test_corpus.load_from_file(self.test_corpus_f)

        self.user_corpus = Corpus()

    def _get_feature_corpus(self):
        """Loads the feature corpus from self.feature_corpus_f"""
        f = open(self.feature_corpus_f, 'r')
        self.feature_corpus = pickle.load(f)
        f.close()

    def _build_feature_boost(self):
        """Creates the user_features np.array with defaults values."""
        self.alpha = self.classifier.alpha
        self.n_class, self.n_feat = self.classifier.feature_log_prob_.shape
        self.user_features = np.array([[self.alpha] * self.n_feat] * self.n_class)
        if self.emulate:
            self.asked_features = self.feature_corpus == 0
        else:
            self.asked_features = self.user_features != self.alpha # False

    def _train(self):
        """Fit the classifier with the training set plus the new vectors and
        features. Then performs a step of EM.
#.........这里部分代码省略.........
开发者ID:mit0110,项目名称:tesis,代码行数:103,代码来源:activepipe.py

示例3: Corpus

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
from corpus import Corpus
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, classification_report

co = Corpus()
c2 = Corpus()
ct = Corpus()

co.load_from_file('corpus/experimental2/unlabeled_new_corpus.pickle')
ct.load_from_file('corpus/experimental2/test_new_corpus.pickle')
c2.load_from_file('corpus/experimental2/training_new_corpus.pickle')

co.concetenate_corpus(c2)

mnb = MultinomialNB()
mnb.fit(co.instances, co.primary_targets)
print mnb.score(ct.instances, ct.primary_targets)
predicted_targets = mnb.predict(ct.instances)
print classification_report(ct.primary_targets, predicted_targets)
cm = confusion_matrix(ct.primary_targets, predicted_targets)
for index, row in enumerate(cm):
    print mnb.classes_[index], [(mnb.classes_[j], row[j])
                                for j in range(len(row))
                                if row[j]]

new_q = 0
for index, row in enumerate(cm):
    if mnb.classes_[index] != 'other':
        new_q += cm[index][index]
print new_q / float(cm.sum()-135)
开发者ID:mit0110,项目名称:tesis,代码行数:32,代码来源:prueba.py

示例4: Corpus

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load_from_file [as 别名]
from corpus import Corpus
from collections import defaultdict
import numpy as np

co = Corpus()
co.load_from_file('experimental/unlabeled_new_corpus2.pickle')

def count_by_class(corpus):
    """Returns a dictionary with the number of instances by class"""
    result = defaultdict(lambda: 0)
    for target in co.primary_targets:
        result[target] += 1
    return result

c_by_class = count_by_class(co)

for k, v in c_by_class.items():
    print k, v

limit = sorted(c_by_class.values())[-2]
# limit = 10
to_remove = c_by_class['other'] - limit
to_remove = {k: c_by_class[k] - limit for k in c_by_class}
print to_remove, limit

for i in range(len(co)-1, 0, -1):
    target = co.primary_targets[i]
    if to_remove[target] > 0:
        co.pop_instance(i)
        to_remove[target] -= 1
开发者ID:mit0110,项目名称:tesis,代码行数:32,代码来源:balance_tr_corpus.py


注:本文中的corpus.Corpus.load_from_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。