當前位置: 首頁>>代碼示例>>Python>>正文


Python Dictionary.load方法代碼示例

本文整理匯總了Python中gensim.corpora.Dictionary.load方法的典型用法代碼示例。如果您正苦於以下問題:Python Dictionary.load方法的具體用法?Python Dictionary.load怎麽用?Python Dictionary.load使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.corpora.Dictionary的用法示例。


在下文中一共展示了Dictionary.load方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: loadmodel

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, nameprefix):
        """ Load a trained model from files.

        Given the prefix of the file paths, load the model from files with name given by the prefix
        followed by "_classlabels.txt", ".json", ".h5", "_labelidx.pkl", and "_dictionary.dict".

        If this has not been run, or a model was not trained by :func:`~train`,
        a `ModelNotTrainedException` will be raised while performing prediction or saving the model.

        :param nameprefix: prefix of the file path
        :return: None
        :type nameprefix: str
        """
        self.model = kerasio.load_model(nameprefix)

        self.dictionary = Dictionary.load(nameprefix+'_dictionary.dict')

        labelfile = open(nameprefix+'_classlabels.txt', 'r')
        self.classlabels = [s.strip() for s in labelfile.readlines()]
        labelfile.close()

        self.labels2idx = pickle.load(open(nameprefix+'_labelidx.pkl', 'rb'))

        self.trained = True 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:26,代碼來源:MaxEntClassification.py

示例2: load_maxent_classifier

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load_maxent_classifier(name, compact=True):
    """ Load the maximum entropy classifier from saved model.

    Given a moel file(s), load the maximum entropy classifier.

    :param name: name or prefix of the file, if compact is True or False respectively
    :param compact: whether the model file is compact (Default:True)
    :return: maximum entropy classifier
    :type name: str
    :type compact: bool
    :rtype: MaxEntClassifier
    """
    classifier = MaxEntClassifier()
    if compact:
        classifier.load_compact_model(name)
    else:
        classifier.loadmodel(name)
    return classifier 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:20,代碼來源:MaxEntClassification.py

示例3: loadmodel

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, prefix):
        """ Load the model.

        :param prefix: prefix of the model path
        :return: None
        :type prefix: str
        """
        self.dictionary = Dictionary.load(prefix+'_vocabs.gensimdict')
        parameters = json.load(open(prefix+'_config.json', 'r'))
        self.operation = parameters['operation']
        self.alph = parameters['alph']
        self.specialsignals = parameters['special_signals']
        self.binarizer = SCRNNBinarizer(self.alph, self.specialsignals)
        self.concatcharvec_encoder = SpellingToConcatCharVecEncoder(self.alph)
        self.batchsize = parameters['batchsize']
        self.nb_hiddenunits = parameters['nb_hiddenunits']
        self.onehotencoder = OneHotEncoder()
        self.onehotencoder.fit(np.arange(len(self.dictionary)).reshape((len(self.dictionary), 1)))
        self.model = kerasio.load_model(prefix)
        self.trained = True 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:22,代碼來源:sakaguchi.py

示例4: __init__

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def __init__(self, dirpath=".", tofull=False):
        """
        Pass in a directory that holds the lexicon in corpus.dict and the
        TFIDF model in tfidf.model (for now).

        Set tofull = True if the next thing is a Scikit-Learn estimator
        otherwise keep False if the next thing is a Gensim model.
        """
        self._lexicon_path = os.path.join(dirpath, "corpus.dict")
        self._tfidf_path = os.path.join(dirpath, "tfidf.model")

        self.lexicon = None
        self.tfidf = None
        self.tofull = tofull

        self.load() 
開發者ID:foxbook,項目名稱:atap,代碼行數:18,代碼來源:transformers.py

示例5: raw2ppmi

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def raw2ppmi(pathtoraw, corpusname, shifts=(1, 5, 10)):
        """
        Creates a PPMI matrix out of a raw co-occurrence matrix.
        First a PMI matrix is created (see raw2pmi, below).
        Any negative entries in this matrix are then truncated to 0 and shifted by a factor of -log(k).

        This function can take multiple shift magnitudes, each of which is performed and saved separately.

        :param pathtoraw: The path to the raw co-occurrence matrix.
        :param corpusname: The name of the corpus.
        :param shifts: A tuple containing shift magnitudes.
        :return: None
        """

        # Create the PMI matrix
        pmi = SPPMIFactory.raw2pmi(np.load(pathtoraw))

        for k in shifts:
            # Shift and clip a copy of the pmi matrix.
            sparse = SPPMIFactory.shift_clip_pmi(np.copy(pmi), k_shift=k)
            # save the PPMI matrix.
            SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
            del sparse 
開發者ID:clips,項目名稱:dutchembeddings,代碼行數:25,代碼來源:create_sppmi.py

示例6: loadmodel

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, prefix):
        """ Load the model.

        :param prefix: prefix of the files
        :return: None
        :type prefix: str
        """
        self.docids = pickle.load(open(prefix+'_docids.pkl', 'rb'))
        self.docid_dict = {docid: i for i, docid in enumerate(self.docids)}
        self.dictionary = Dictionary.load(prefix+'_dictionary.dict')
        self.dtm = pickle.load(open(prefix+'_dtm.pkl', 'rb')) 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:13,代碼來源:dtm.py

示例7: loadmodel

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, nameprefix, load_incomplete=False):
        """ Save the model with names according to the prefix.

        Given the prefix of the file paths, load the model into files, with name given by the prefix.
        There are files with names ending with "_encoder.json" and "_encoder.h5", which are
        the JSON and HDF5 files for the encoder respectively.
        They also include a gensim dictionary (.gensimdict).

        :param nameprefix: prefix of the paths of the file
        :param load_incomplete: load encoder only, not decoder and autoencoder file (Default: False; put True for model built in version <= 0.2.1)
        :return: None
        :type nameprefix: str
        :type load_incomplete: bool
        """
        # load the JSON file (parameters)
        parameters = json.load(open(nameprefix+'.json', 'r'))
        self.nb_topics = parameters['nb_topics']
        self.classlabels = parameters['classlabels']

        self.dictionary = Dictionary.load(nameprefix + '.gensimdict')
        self.encoder = kerasio.load_model(nameprefix+'_encoder')
        self.classtopicvecs = pickle.load(open(nameprefix+'_classtopicvecs.pkl', 'rb'))
        if not load_incomplete:
            self.decoder = kerasio.load_model(nameprefix+'_decoder')
            self.autoencoder = kerasio.load_model(nameprefix+'_autoencoder')
        self.trained = True 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:28,代碼來源:AutoEncodingTopicModeling.py

示例8: savemodel

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def savemodel(self, nameprefix):
        """ Save the model with names according to the prefix.

        Given the prefix of the file paths, save the corresponding topic model. The files
        include a JSON (.json) file that specifies various parameters, a gensim dictionary (.gensimdict),
        and a topic model (.gensimmodel). If weighing is applied, load also the tf-idf model (.gensimtfidf).

        If neither :func:`~train` nor :func:`~loadmodel` was run, it will raise `ModelNotTrainedException`.

        :param nameprefix: prefix of the file paths
        :return: None
        :raise: ModelNotTrainedException
        :type nameprefix: str
        """
        if not self.trained:
            raise e.ModelNotTrainedException()
        parameters = {}
        parameters['nb_topics'] = self.nb_topics
        parameters['toweigh'] = self.toweigh
        parameters['algorithm'] = self.algorithm
        parameters['classlabels'] = self.classlabels
        json.dump(parameters, open(nameprefix+'.json', 'w'))

        self.dictionary.save(nameprefix+'.gensimdict')
        self.topicmodel.save(nameprefix+'.gensimmodel')
        self.matsim.save(nameprefix+'.gensimmat')
        if self.toweigh:
            self.tfidf.save(nameprefix+'.gensimtfidf') 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:30,代碼來源:GensimTopicModeling.py

示例9: load

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load(self):

        if os.path.exists(self._lexicon_path):
            self.lexicon = Dictionary.load(self._lexicon_path)

        if os.path.exists(self._tfidf_path):
            self.tfidf = TfidfModel().load(self._tfidf_path) 
開發者ID:foxbook,項目名稱:atap,代碼行數:9,代碼來源:transformers.py

示例10: init

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def init(self,system,subclass):
		conn = self.data_processor.connect_db(
			self.conf.db_host,
			self.conf.db_database,
			self.conf.db_user,
			self.conf.db_pass
		)
		#裝載詞表,#裝載模型
		t = time.time()

		logger.debug("正在初始化[%s-%s]的模型加載",system,subclass)

		dic_name = "dictionary_" + system + "_" + subclass + ".dic"
		dictionary = Dictionary.load(self.model_dir+"/" + dic_name)
		logger.debug("加載了字典:%s", dic_name)
		logger.debug("詞袋一共%d個詞", len(dictionary.keys()))

		model_name = "tfidf_" + system + "_" + subclass + ".model"
		model = TfidfModel.load(self.model_dir+"/" + model_name)
		logger.debug("加載了TFIDF模型:%s", model_name)

		df_train = pd.read_sql(
			"select * from monitor_cluster_dbscan where business_system_code='{}' and rule_type_code='{}'".format(system,subclass)
			,conn)

		#KNN聚類,然後預測
		knn = self.get_KNN_model(df_train,dictionary,model)
		duration(t,"根據字典和此分類數據,基於tfidf向量,訓練出KNN模型")

		if knn is not None:
			key = system+"-"+subclass
			value = {'model':model, 'dictionary':dictionary, 'knn':knn}
			self.models[key] = value 
開發者ID:newsettle,項目名稱:ns4_chatbot,代碼行數:35,代碼來源:predictor.py

示例11: main

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def main():

	_init()

	#裝載詞表,#裝載模型
	t = time()
	dictionary = Dictionary.load("../out/dictionary.dic")
	logger.debug("詞袋一共%d個詞",len(dictionary.keys()))
	model = TfidfModel.load("../out/tfidf.model")
	t = duration(t,"加載詞表和TFIDF模型")

	#加載訓練數據集
	t = time()
	#df_train = pd.read_csv(open("../out/cluster_dbscan_9900.csv",'rU'), encoding='utf-8', engine='c')
	df_train = pd.read_sql("select * from cluster_dbscan limit  9900", connect_db())
	t = duration(t,"加載曆史數據用於訓練KNN")

	knn = get_KNN_model(df_train,dictionary,model)
	t = duration(t,"訓練出KNN模型")

	df_test = pd.read_sql("select * from clean_cut_data  limit 9900,100", connect_db())
	doc_list = df_test['html_cut'].tolist()
	x_test = get_tfidf_vector(doc_list,dictionary,model)
	logger.debug("x_test's shape:%r",x_test.shape)
	t = duration(t,"加載測試數據")

	pred = knn.predict(x_test)
	t = duration(t,"預測結果")
	logger.debug("預測結果:")
	logger.debug(pred)

	df_test['classes'] = pred

	for index, row in df_test.iterrows():
		_class = row['classes']
		test_title = row['work_order_title']
		label_title = df_train[df_train['classes']==_class].iloc[0,:]['work_order_title']
		logger.debug("類別(%d),測試標題(%s),樣本標題(%s)",_class,test_title,label_title) 
開發者ID:newsettle,項目名稱:ns4_chatbot,代碼行數:40,代碼來源:tfidf_test.py

示例12: _save_sparse_mtr

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def _save_sparse_mtr(sparse_mtr, filename):
        """
        Save a sparse matrix to a specified filepath.

        snippet from: http://stackoverflow.com/questions/8955448/save-load-scipy-sparse-csr-matrix-in-portable-data-format

        :param sparse_mtr: the matrix to save.
        :param filename: the filename to which to save the matrix.
        :return:
        """
        np.savez(filename, data=sparse_mtr.data, indices=sparse_mtr.indices,
                 indptr=sparse_mtr.indptr, shape=sparse_mtr.shape) 
開發者ID:clips,項目名稱:dutchembeddings,代碼行數:14,代碼來源:create_sppmi.py

示例13: load

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load(self, dir_path):
        dir_path = Path(dir_path)

        vocab_path = str(dir_path / self.VOCAB_FNAME)
        model_path = str(dir_path / self.TFIDF_FNAME)
        index_path = str(dir_path / self.INDEX_FNAME)

        self.vocab = Dictionary.load(vocab_path)
        self.model = TfidfModel.load(model_path)
        self.index = SparseMatrixSimilarity.load(index_path) 
開發者ID:naver,項目名稱:claf,代碼行數:12,代碼來源:tfidf.py

示例14: create

# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def create(pathtomapping, pathtocorpus, corpusname, window, numtokeep=50000, save_raw=True, shifts=(1, 5, 10)):
        """
        Creates an Shifted Positive Pointwise Mutual Information matrix.

        :param pathtomapping: The path to the id2word mapping. If this is left empty, the id2word mapping gets
        recreated. Warning: this takes a long time.
        :param pathtocorpus: The path to the corpus folder. The corpus can be spread out over multiple files or folders,
        and is read iteratively.
        :param corpusname: The name of the corpus. Used for saving the files.
        :param window: The window used to consider co-occurrences.
        :param numtokeep: The number of most frequent words to keep. Note that the matrix is non-sparse.
        Because of this, the memory requirements of the code are quadratic.
        :param save_raw: Whether to save the raw co-occurrence matrix as a numpy matrix.
        :param shifts: The shifts to apply to the co-occurrence matrix. Each shifted matrix
        gets saved as a separate model.
        """

        start = time.time()

        if not pathtomapping:
            id2word = Dictionary(SentenceIter(pathtocorpus), prune_at=None)
            id2word.filter_extremes(no_below=5, keep_n=numtokeep)
            id2word.compactify()
            logger.info("Creating the word2id took {0} seconds".format(time.time() - start))
        else:
            id2word = Dictionary.load(pathtomapping)

        inter = time.time()

        word2id = gensim.utils.revdict(id2word)

        corpus = SentenceIter(pathtocorpus)
        raw = get_cooccur(corpus, word2id, window=window)

        logger.info("Creating raw co-occurrence matrix took {0} seconds".format(time.time() - inter))

        if save_raw:
            np.save('{0}-cooccur.npy'.format(corpusname), raw)

        SPPMIFactory._save_word2id(word2id, "{0}mapping.json".format(corpusname))
        SPPMIFactory._save_freqs(id2word, "{0}freqs.json".format(corpusname))

        raw = SPPMIFactory.raw2pmi(raw)

        for k in shifts:
            sparse = SPPMIFactory.shift_clip_pmi(np.copy(raw), k_shift=k)
            SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
            del sparse 
開發者ID:clips,項目名稱:dutchembeddings,代碼行數:50,代碼來源:create_sppmi.py


注:本文中的gensim.corpora.Dictionary.load方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。