当前位置: 首页>>代码示例>>Python>>正文


Python Dictionary.load方法代码示例

本文整理汇总了Python中gensim.corpora.Dictionary.load方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.load方法的具体用法?Python Dictionary.load怎么用?Python Dictionary.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.corpora.Dictionary的用法示例。


在下文中一共展示了Dictionary.load方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loadmodel

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def loadmodel(self, nameprefix):
        """ Load a trained model from files.

        Given the prefix of the file paths, load the model from files with name given by the prefix
        followed by "_classlabels.txt", ".json", ".h5", "_labelidx.pkl", and "_dictionary.dict".

        If this has not been run, or a model was not trained by :func:`~train`,
        a `ModelNotTrainedException` will be raised while performing prediction or saving the model.

        :param nameprefix: prefix of the file path
        :return: None
        :type nameprefix: str
        """
        self.model = kerasio.load_model(nameprefix)

        self.dictionary = Dictionary.load(nameprefix+'_dictionary.dict')

        labelfile = open(nameprefix+'_classlabels.txt', 'r')
        self.classlabels = [s.strip() for s in labelfile.readlines()]
        labelfile.close()

        self.labels2idx = pickle.load(open(nameprefix+'_labelidx.pkl', 'rb'))

        self.trained = True 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:26,代码来源:MaxEntClassification.py

示例2: load_maxent_classifier

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def load_maxent_classifier(name, compact=True):
    """ Load the maximum entropy classifier from saved model.

    Given a moel file(s), load the maximum entropy classifier.

    :param name: name or prefix of the file, if compact is True or False respectively
    :param compact: whether the model file is compact (Default:True)
    :return: maximum entropy classifier
    :type name: str
    :type compact: bool
    :rtype: MaxEntClassifier
    """
    classifier = MaxEntClassifier()
    if compact:
        classifier.load_compact_model(name)
    else:
        classifier.loadmodel(name)
    return classifier 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:20,代码来源:MaxEntClassification.py

示例3: loadmodel

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def loadmodel(self, prefix):
        """ Load the model.

        :param prefix: prefix of the model path
        :return: None
        :type prefix: str
        """
        self.dictionary = Dictionary.load(prefix+'_vocabs.gensimdict')
        parameters = json.load(open(prefix+'_config.json', 'r'))
        self.operation = parameters['operation']
        self.alph = parameters['alph']
        self.specialsignals = parameters['special_signals']
        self.binarizer = SCRNNBinarizer(self.alph, self.specialsignals)
        self.concatcharvec_encoder = SpellingToConcatCharVecEncoder(self.alph)
        self.batchsize = parameters['batchsize']
        self.nb_hiddenunits = parameters['nb_hiddenunits']
        self.onehotencoder = OneHotEncoder()
        self.onehotencoder.fit(np.arange(len(self.dictionary)).reshape((len(self.dictionary), 1)))
        self.model = kerasio.load_model(prefix)
        self.trained = True 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:22,代码来源:sakaguchi.py

示例4: __init__

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def __init__(self, dirpath=".", tofull=False):
        """
        Pass in a directory that holds the lexicon in corpus.dict and the
        TFIDF model in tfidf.model (for now).

        Set tofull = True if the next thing is a Scikit-Learn estimator
        otherwise keep False if the next thing is a Gensim model.
        """
        self._lexicon_path = os.path.join(dirpath, "corpus.dict")
        self._tfidf_path = os.path.join(dirpath, "tfidf.model")

        self.lexicon = None
        self.tfidf = None
        self.tofull = tofull

        self.load() 
开发者ID:foxbook,项目名称:atap,代码行数:18,代码来源:transformers.py

示例5: raw2ppmi

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def raw2ppmi(pathtoraw, corpusname, shifts=(1, 5, 10)):
        """
        Creates a PPMI matrix out of a raw co-occurrence matrix.
        First a PMI matrix is created (see raw2pmi, below).
        Any negative entries in this matrix are then truncated to 0 and shifted by a factor of -log(k).

        This function can take multiple shift magnitudes, each of which is performed and saved separately.

        :param pathtoraw: The path to the raw co-occurrence matrix.
        :param corpusname: The name of the corpus.
        :param shifts: A tuple containing shift magnitudes.
        :return: None
        """

        # Create the PMI matrix
        pmi = SPPMIFactory.raw2pmi(np.load(pathtoraw))

        for k in shifts:
            # Shift and clip a copy of the pmi matrix.
            sparse = SPPMIFactory.shift_clip_pmi(np.copy(pmi), k_shift=k)
            # save the PPMI matrix.
            SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
            del sparse 
开发者ID:clips,项目名称:dutchembeddings,代码行数:25,代码来源:create_sppmi.py

示例6: loadmodel

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def loadmodel(self, prefix):
        """ Load the model.

        :param prefix: prefix of the files
        :return: None
        :type prefix: str
        """
        self.docids = pickle.load(open(prefix+'_docids.pkl', 'rb'))
        self.docid_dict = {docid: i for i, docid in enumerate(self.docids)}
        self.dictionary = Dictionary.load(prefix+'_dictionary.dict')
        self.dtm = pickle.load(open(prefix+'_dtm.pkl', 'rb')) 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:13,代码来源:dtm.py

示例7: loadmodel

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def loadmodel(self, nameprefix, load_incomplete=False):
        """ Save the model with names according to the prefix.

        Given the prefix of the file paths, load the model into files, with name given by the prefix.
        There are files with names ending with "_encoder.json" and "_encoder.h5", which are
        the JSON and HDF5 files for the encoder respectively.
        They also include a gensim dictionary (.gensimdict).

        :param nameprefix: prefix of the paths of the file
        :param load_incomplete: load encoder only, not decoder and autoencoder file (Default: False; put True for model built in version <= 0.2.1)
        :return: None
        :type nameprefix: str
        :type load_incomplete: bool
        """
        # load the JSON file (parameters)
        parameters = json.load(open(nameprefix+'.json', 'r'))
        self.nb_topics = parameters['nb_topics']
        self.classlabels = parameters['classlabels']

        self.dictionary = Dictionary.load(nameprefix + '.gensimdict')
        self.encoder = kerasio.load_model(nameprefix+'_encoder')
        self.classtopicvecs = pickle.load(open(nameprefix+'_classtopicvecs.pkl', 'rb'))
        if not load_incomplete:
            self.decoder = kerasio.load_model(nameprefix+'_decoder')
            self.autoencoder = kerasio.load_model(nameprefix+'_autoencoder')
        self.trained = True 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:28,代码来源:AutoEncodingTopicModeling.py

示例8: savemodel

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def savemodel(self, nameprefix):
        """ Save the model with names according to the prefix.

        Given the prefix of the file paths, save the corresponding topic model. The files
        include a JSON (.json) file that specifies various parameters, a gensim dictionary (.gensimdict),
        and a topic model (.gensimmodel). If weighing is applied, load also the tf-idf model (.gensimtfidf).

        If neither :func:`~train` nor :func:`~loadmodel` was run, it will raise `ModelNotTrainedException`.

        :param nameprefix: prefix of the file paths
        :return: None
        :raise: ModelNotTrainedException
        :type nameprefix: str
        """
        if not self.trained:
            raise e.ModelNotTrainedException()
        parameters = {}
        parameters['nb_topics'] = self.nb_topics
        parameters['toweigh'] = self.toweigh
        parameters['algorithm'] = self.algorithm
        parameters['classlabels'] = self.classlabels
        json.dump(parameters, open(nameprefix+'.json', 'w'))

        self.dictionary.save(nameprefix+'.gensimdict')
        self.topicmodel.save(nameprefix+'.gensimmodel')
        self.matsim.save(nameprefix+'.gensimmat')
        if self.toweigh:
            self.tfidf.save(nameprefix+'.gensimtfidf') 
开发者ID:stephenhky,项目名称:PyShortTextCategorization,代码行数:30,代码来源:GensimTopicModeling.py

示例9: load

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def load(self):

        if os.path.exists(self._lexicon_path):
            self.lexicon = Dictionary.load(self._lexicon_path)

        if os.path.exists(self._tfidf_path):
            self.tfidf = TfidfModel().load(self._tfidf_path) 
开发者ID:foxbook,项目名称:atap,代码行数:9,代码来源:transformers.py

示例10: init

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def init(self,system,subclass):
		conn = self.data_processor.connect_db(
			self.conf.db_host,
			self.conf.db_database,
			self.conf.db_user,
			self.conf.db_pass
		)
		#装载词表,#装载模型
		t = time.time()

		logger.debug("正在初始化[%s-%s]的模型加载",system,subclass)

		dic_name = "dictionary_" + system + "_" + subclass + ".dic"
		dictionary = Dictionary.load(self.model_dir+"/" + dic_name)
		logger.debug("加载了字典:%s", dic_name)
		logger.debug("词袋一共%d个词", len(dictionary.keys()))

		model_name = "tfidf_" + system + "_" + subclass + ".model"
		model = TfidfModel.load(self.model_dir+"/" + model_name)
		logger.debug("加载了TFIDF模型:%s", model_name)

		df_train = pd.read_sql(
			"select * from monitor_cluster_dbscan where business_system_code='{}' and rule_type_code='{}'".format(system,subclass)
			,conn)

		#KNN聚类,然后预测
		knn = self.get_KNN_model(df_train,dictionary,model)
		duration(t,"根据字典和此分类数据,基于tfidf向量,训练出KNN模型")

		if knn is not None:
			key = system+"-"+subclass
			value = {'model':model, 'dictionary':dictionary, 'knn':knn}
			self.models[key] = value 
开发者ID:newsettle,项目名称:ns4_chatbot,代码行数:35,代码来源:predictor.py

示例11: main

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def main():

	_init()

	#装载词表,#装载模型
	t = time()
	dictionary = Dictionary.load("../out/dictionary.dic")
	logger.debug("词袋一共%d个词",len(dictionary.keys()))
	model = TfidfModel.load("../out/tfidf.model")
	t = duration(t,"加载词表和TFIDF模型")

	#加载训练数据集
	t = time()
	#df_train = pd.read_csv(open("../out/cluster_dbscan_9900.csv",'rU'), encoding='utf-8', engine='c')
	df_train = pd.read_sql("select * from cluster_dbscan limit  9900", connect_db())
	t = duration(t,"加载历史数据用于训练KNN")

	knn = get_KNN_model(df_train,dictionary,model)
	t = duration(t,"训练出KNN模型")

	df_test = pd.read_sql("select * from clean_cut_data  limit 9900,100", connect_db())
	doc_list = df_test['html_cut'].tolist()
	x_test = get_tfidf_vector(doc_list,dictionary,model)
	logger.debug("x_test's shape:%r",x_test.shape)
	t = duration(t,"加载测试数据")

	pred = knn.predict(x_test)
	t = duration(t,"预测结果")
	logger.debug("预测结果:")
	logger.debug(pred)

	df_test['classes'] = pred

	for index, row in df_test.iterrows():
		_class = row['classes']
		test_title = row['work_order_title']
		label_title = df_train[df_train['classes']==_class].iloc[0,:]['work_order_title']
		logger.debug("类别(%d),测试标题(%s),样本标题(%s)",_class,test_title,label_title) 
开发者ID:newsettle,项目名称:ns4_chatbot,代码行数:40,代码来源:tfidf_test.py

示例12: _save_sparse_mtr

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def _save_sparse_mtr(sparse_mtr, filename):
        """
        Save a sparse matrix to a specified filepath.

        snippet from: http://stackoverflow.com/questions/8955448/save-load-scipy-sparse-csr-matrix-in-portable-data-format

        :param sparse_mtr: the matrix to save.
        :param filename: the filename to which to save the matrix.
        :return:
        """
        np.savez(filename, data=sparse_mtr.data, indices=sparse_mtr.indices,
                 indptr=sparse_mtr.indptr, shape=sparse_mtr.shape) 
开发者ID:clips,项目名称:dutchembeddings,代码行数:14,代码来源:create_sppmi.py

示例13: load

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def load(self, dir_path):
        dir_path = Path(dir_path)

        vocab_path = str(dir_path / self.VOCAB_FNAME)
        model_path = str(dir_path / self.TFIDF_FNAME)
        index_path = str(dir_path / self.INDEX_FNAME)

        self.vocab = Dictionary.load(vocab_path)
        self.model = TfidfModel.load(model_path)
        self.index = SparseMatrixSimilarity.load(index_path) 
开发者ID:naver,项目名称:claf,代码行数:12,代码来源:tfidf.py

示例14: create

# 需要导入模块: from gensim.corpora import Dictionary [as 别名]
# 或者: from gensim.corpora.Dictionary import load [as 别名]
def create(pathtomapping, pathtocorpus, corpusname, window, numtokeep=50000, save_raw=True, shifts=(1, 5, 10)):
        """
        Creates an Shifted Positive Pointwise Mutual Information matrix.

        :param pathtomapping: The path to the id2word mapping. If this is left empty, the id2word mapping gets
        recreated. Warning: this takes a long time.
        :param pathtocorpus: The path to the corpus folder. The corpus can be spread out over multiple files or folders,
        and is read iteratively.
        :param corpusname: The name of the corpus. Used for saving the files.
        :param window: The window used to consider co-occurrences.
        :param numtokeep: The number of most frequent words to keep. Note that the matrix is non-sparse.
        Because of this, the memory requirements of the code are quadratic.
        :param save_raw: Whether to save the raw co-occurrence matrix as a numpy matrix.
        :param shifts: The shifts to apply to the co-occurrence matrix. Each shifted matrix
        gets saved as a separate model.
        """

        start = time.time()

        if not pathtomapping:
            id2word = Dictionary(SentenceIter(pathtocorpus), prune_at=None)
            id2word.filter_extremes(no_below=5, keep_n=numtokeep)
            id2word.compactify()
            logger.info("Creating the word2id took {0} seconds".format(time.time() - start))
        else:
            id2word = Dictionary.load(pathtomapping)

        inter = time.time()

        word2id = gensim.utils.revdict(id2word)

        corpus = SentenceIter(pathtocorpus)
        raw = get_cooccur(corpus, word2id, window=window)

        logger.info("Creating raw co-occurrence matrix took {0} seconds".format(time.time() - inter))

        if save_raw:
            np.save('{0}-cooccur.npy'.format(corpusname), raw)

        SPPMIFactory._save_word2id(word2id, "{0}mapping.json".format(corpusname))
        SPPMIFactory._save_freqs(id2word, "{0}freqs.json".format(corpusname))

        raw = SPPMIFactory.raw2pmi(raw)

        for k in shifts:
            sparse = SPPMIFactory.shift_clip_pmi(np.copy(raw), k_shift=k)
            SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
            del sparse 
开发者ID:clips,项目名称:dutchembeddings,代码行数:50,代码来源:create_sppmi.py


注:本文中的gensim.corpora.Dictionary.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。