本文整理匯總了Python中gensim.corpora.Dictionary.load方法的典型用法代碼示例。如果您正苦於以下問題:Python Dictionary.load方法的具體用法?Python Dictionary.load怎麽用?Python Dictionary.load使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類gensim.corpora.Dictionary
的用法示例。
在下文中一共展示了Dictionary.load方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: loadmodel
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, nameprefix):
""" Load a trained model from files.
Given the prefix of the file paths, load the model from files with name given by the prefix
followed by "_classlabels.txt", ".json", ".h5", "_labelidx.pkl", and "_dictionary.dict".
If this has not been run, or a model was not trained by :func:`~train`,
a `ModelNotTrainedException` will be raised while performing prediction or saving the model.
:param nameprefix: prefix of the file path
:return: None
:type nameprefix: str
"""
self.model = kerasio.load_model(nameprefix)
self.dictionary = Dictionary.load(nameprefix+'_dictionary.dict')
labelfile = open(nameprefix+'_classlabels.txt', 'r')
self.classlabels = [s.strip() for s in labelfile.readlines()]
labelfile.close()
self.labels2idx = pickle.load(open(nameprefix+'_labelidx.pkl', 'rb'))
self.trained = True
示例2: load_maxent_classifier
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load_maxent_classifier(name, compact=True):
""" Load the maximum entropy classifier from saved model.
Given a moel file(s), load the maximum entropy classifier.
:param name: name or prefix of the file, if compact is True or False respectively
:param compact: whether the model file is compact (Default:True)
:return: maximum entropy classifier
:type name: str
:type compact: bool
:rtype: MaxEntClassifier
"""
classifier = MaxEntClassifier()
if compact:
classifier.load_compact_model(name)
else:
classifier.loadmodel(name)
return classifier
示例3: loadmodel
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, prefix):
""" Load the model.
:param prefix: prefix of the model path
:return: None
:type prefix: str
"""
self.dictionary = Dictionary.load(prefix+'_vocabs.gensimdict')
parameters = json.load(open(prefix+'_config.json', 'r'))
self.operation = parameters['operation']
self.alph = parameters['alph']
self.specialsignals = parameters['special_signals']
self.binarizer = SCRNNBinarizer(self.alph, self.specialsignals)
self.concatcharvec_encoder = SpellingToConcatCharVecEncoder(self.alph)
self.batchsize = parameters['batchsize']
self.nb_hiddenunits = parameters['nb_hiddenunits']
self.onehotencoder = OneHotEncoder()
self.onehotencoder.fit(np.arange(len(self.dictionary)).reshape((len(self.dictionary), 1)))
self.model = kerasio.load_model(prefix)
self.trained = True
示例4: __init__
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def __init__(self, dirpath=".", tofull=False):
"""
Pass in a directory that holds the lexicon in corpus.dict and the
TFIDF model in tfidf.model (for now).
Set tofull = True if the next thing is a Scikit-Learn estimator
otherwise keep False if the next thing is a Gensim model.
"""
self._lexicon_path = os.path.join(dirpath, "corpus.dict")
self._tfidf_path = os.path.join(dirpath, "tfidf.model")
self.lexicon = None
self.tfidf = None
self.tofull = tofull
self.load()
示例5: raw2ppmi
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def raw2ppmi(pathtoraw, corpusname, shifts=(1, 5, 10)):
"""
Creates a PPMI matrix out of a raw co-occurrence matrix.
First a PMI matrix is created (see raw2pmi, below).
Any negative entries in this matrix are then truncated to 0 and shifted by a factor of -log(k).
This function can take multiple shift magnitudes, each of which is performed and saved separately.
:param pathtoraw: The path to the raw co-occurrence matrix.
:param corpusname: The name of the corpus.
:param shifts: A tuple containing shift magnitudes.
:return: None
"""
# Create the PMI matrix
pmi = SPPMIFactory.raw2pmi(np.load(pathtoraw))
for k in shifts:
# Shift and clip a copy of the pmi matrix.
sparse = SPPMIFactory.shift_clip_pmi(np.copy(pmi), k_shift=k)
# save the PPMI matrix.
SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
del sparse
示例6: loadmodel
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, prefix):
""" Load the model.
:param prefix: prefix of the files
:return: None
:type prefix: str
"""
self.docids = pickle.load(open(prefix+'_docids.pkl', 'rb'))
self.docid_dict = {docid: i for i, docid in enumerate(self.docids)}
self.dictionary = Dictionary.load(prefix+'_dictionary.dict')
self.dtm = pickle.load(open(prefix+'_dtm.pkl', 'rb'))
示例7: loadmodel
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def loadmodel(self, nameprefix, load_incomplete=False):
""" Save the model with names according to the prefix.
Given the prefix of the file paths, load the model into files, with name given by the prefix.
There are files with names ending with "_encoder.json" and "_encoder.h5", which are
the JSON and HDF5 files for the encoder respectively.
They also include a gensim dictionary (.gensimdict).
:param nameprefix: prefix of the paths of the file
:param load_incomplete: load encoder only, not decoder and autoencoder file (Default: False; put True for model built in version <= 0.2.1)
:return: None
:type nameprefix: str
:type load_incomplete: bool
"""
# load the JSON file (parameters)
parameters = json.load(open(nameprefix+'.json', 'r'))
self.nb_topics = parameters['nb_topics']
self.classlabels = parameters['classlabels']
self.dictionary = Dictionary.load(nameprefix + '.gensimdict')
self.encoder = kerasio.load_model(nameprefix+'_encoder')
self.classtopicvecs = pickle.load(open(nameprefix+'_classtopicvecs.pkl', 'rb'))
if not load_incomplete:
self.decoder = kerasio.load_model(nameprefix+'_decoder')
self.autoencoder = kerasio.load_model(nameprefix+'_autoencoder')
self.trained = True
示例8: savemodel
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def savemodel(self, nameprefix):
""" Save the model with names according to the prefix.
Given the prefix of the file paths, save the corresponding topic model. The files
include a JSON (.json) file that specifies various parameters, a gensim dictionary (.gensimdict),
and a topic model (.gensimmodel). If weighing is applied, load also the tf-idf model (.gensimtfidf).
If neither :func:`~train` nor :func:`~loadmodel` was run, it will raise `ModelNotTrainedException`.
:param nameprefix: prefix of the file paths
:return: None
:raise: ModelNotTrainedException
:type nameprefix: str
"""
if not self.trained:
raise e.ModelNotTrainedException()
parameters = {}
parameters['nb_topics'] = self.nb_topics
parameters['toweigh'] = self.toweigh
parameters['algorithm'] = self.algorithm
parameters['classlabels'] = self.classlabels
json.dump(parameters, open(nameprefix+'.json', 'w'))
self.dictionary.save(nameprefix+'.gensimdict')
self.topicmodel.save(nameprefix+'.gensimmodel')
self.matsim.save(nameprefix+'.gensimmat')
if self.toweigh:
self.tfidf.save(nameprefix+'.gensimtfidf')
示例9: load
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load(self):
if os.path.exists(self._lexicon_path):
self.lexicon = Dictionary.load(self._lexicon_path)
if os.path.exists(self._tfidf_path):
self.tfidf = TfidfModel().load(self._tfidf_path)
示例10: init
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def init(self,system,subclass):
conn = self.data_processor.connect_db(
self.conf.db_host,
self.conf.db_database,
self.conf.db_user,
self.conf.db_pass
)
#裝載詞表,#裝載模型
t = time.time()
logger.debug("正在初始化[%s-%s]的模型加載",system,subclass)
dic_name = "dictionary_" + system + "_" + subclass + ".dic"
dictionary = Dictionary.load(self.model_dir+"/" + dic_name)
logger.debug("加載了字典:%s", dic_name)
logger.debug("詞袋一共%d個詞", len(dictionary.keys()))
model_name = "tfidf_" + system + "_" + subclass + ".model"
model = TfidfModel.load(self.model_dir+"/" + model_name)
logger.debug("加載了TFIDF模型:%s", model_name)
df_train = pd.read_sql(
"select * from monitor_cluster_dbscan where business_system_code='{}' and rule_type_code='{}'".format(system,subclass)
,conn)
#KNN聚類,然後預測
knn = self.get_KNN_model(df_train,dictionary,model)
duration(t,"根據字典和此分類數據,基於tfidf向量,訓練出KNN模型")
if knn is not None:
key = system+"-"+subclass
value = {'model':model, 'dictionary':dictionary, 'knn':knn}
self.models[key] = value
示例11: main
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def main():
_init()
#裝載詞表,#裝載模型
t = time()
dictionary = Dictionary.load("../out/dictionary.dic")
logger.debug("詞袋一共%d個詞",len(dictionary.keys()))
model = TfidfModel.load("../out/tfidf.model")
t = duration(t,"加載詞表和TFIDF模型")
#加載訓練數據集
t = time()
#df_train = pd.read_csv(open("../out/cluster_dbscan_9900.csv",'rU'), encoding='utf-8', engine='c')
df_train = pd.read_sql("select * from cluster_dbscan limit 9900", connect_db())
t = duration(t,"加載曆史數據用於訓練KNN")
knn = get_KNN_model(df_train,dictionary,model)
t = duration(t,"訓練出KNN模型")
df_test = pd.read_sql("select * from clean_cut_data limit 9900,100", connect_db())
doc_list = df_test['html_cut'].tolist()
x_test = get_tfidf_vector(doc_list,dictionary,model)
logger.debug("x_test's shape:%r",x_test.shape)
t = duration(t,"加載測試數據")
pred = knn.predict(x_test)
t = duration(t,"預測結果")
logger.debug("預測結果:")
logger.debug(pred)
df_test['classes'] = pred
for index, row in df_test.iterrows():
_class = row['classes']
test_title = row['work_order_title']
label_title = df_train[df_train['classes']==_class].iloc[0,:]['work_order_title']
logger.debug("類別(%d),測試標題(%s),樣本標題(%s)",_class,test_title,label_title)
示例12: _save_sparse_mtr
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def _save_sparse_mtr(sparse_mtr, filename):
"""
Save a sparse matrix to a specified filepath.
snippet from: http://stackoverflow.com/questions/8955448/save-load-scipy-sparse-csr-matrix-in-portable-data-format
:param sparse_mtr: the matrix to save.
:param filename: the filename to which to save the matrix.
:return:
"""
np.savez(filename, data=sparse_mtr.data, indices=sparse_mtr.indices,
indptr=sparse_mtr.indptr, shape=sparse_mtr.shape)
示例13: load
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def load(self, dir_path):
dir_path = Path(dir_path)
vocab_path = str(dir_path / self.VOCAB_FNAME)
model_path = str(dir_path / self.TFIDF_FNAME)
index_path = str(dir_path / self.INDEX_FNAME)
self.vocab = Dictionary.load(vocab_path)
self.model = TfidfModel.load(model_path)
self.index = SparseMatrixSimilarity.load(index_path)
示例14: create
# 需要導入模塊: from gensim.corpora import Dictionary [as 別名]
# 或者: from gensim.corpora.Dictionary import load [as 別名]
def create(pathtomapping, pathtocorpus, corpusname, window, numtokeep=50000, save_raw=True, shifts=(1, 5, 10)):
"""
Creates an Shifted Positive Pointwise Mutual Information matrix.
:param pathtomapping: The path to the id2word mapping. If this is left empty, the id2word mapping gets
recreated. Warning: this takes a long time.
:param pathtocorpus: The path to the corpus folder. The corpus can be spread out over multiple files or folders,
and is read iteratively.
:param corpusname: The name of the corpus. Used for saving the files.
:param window: The window used to consider co-occurrences.
:param numtokeep: The number of most frequent words to keep. Note that the matrix is non-sparse.
Because of this, the memory requirements of the code are quadratic.
:param save_raw: Whether to save the raw co-occurrence matrix as a numpy matrix.
:param shifts: The shifts to apply to the co-occurrence matrix. Each shifted matrix
gets saved as a separate model.
"""
start = time.time()
if not pathtomapping:
id2word = Dictionary(SentenceIter(pathtocorpus), prune_at=None)
id2word.filter_extremes(no_below=5, keep_n=numtokeep)
id2word.compactify()
logger.info("Creating the word2id took {0} seconds".format(time.time() - start))
else:
id2word = Dictionary.load(pathtomapping)
inter = time.time()
word2id = gensim.utils.revdict(id2word)
corpus = SentenceIter(pathtocorpus)
raw = get_cooccur(corpus, word2id, window=window)
logger.info("Creating raw co-occurrence matrix took {0} seconds".format(time.time() - inter))
if save_raw:
np.save('{0}-cooccur.npy'.format(corpusname), raw)
SPPMIFactory._save_word2id(word2id, "{0}mapping.json".format(corpusname))
SPPMIFactory._save_freqs(id2word, "{0}freqs.json".format(corpusname))
raw = SPPMIFactory.raw2pmi(raw)
for k in shifts:
sparse = SPPMIFactory.shift_clip_pmi(np.copy(raw), k_shift=k)
SPPMIFactory._save_sparse_mtr(sparse, "{0}-SPPMI-sparse-{1}-shift.npz".format(corpusname, k))
del sparse