本文整理汇总了Python中gensim.models.Doc2Vec.load方法的典型用法代码示例。如果您正苦于以下问题:Python Doc2Vec.load方法的具体用法?Python Doc2Vec.load怎么用?Python Doc2Vec.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.Doc2Vec
的用法示例。
在下文中一共展示了Doc2Vec.load方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_idf_dict
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_idf_dict(self, dict_name='idf_dict'):
if dict_name not in self.dict_manager:
word_frequencies = {}
file_name = config.EX_DICT_DIR + '/word-frequencies.txt'
print('load dict from file %s \n' % file_name)
f_dict = utils.create_read_file(file_name)
for idx, line in enumerate(f_dict):
if idx == 0:
totfreq = int(line)
else:
w, freq = line.strip().split()
freq = float(freq)
if freq < 10:
continue
word_frequencies[w] = math.log(totfreq / freq) / math.log(2)
self.dict_manager[dict_name] = word_frequencies
return self.dict_manager[dict_name]
示例2: load_from_pickle
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_from_pickle(self, filename):
"""
This loads a pretrained Word2Vec file into this Doc2Vec class.
"""
model_w2v = Doc2Vec.load(filename)
for attr in dir(model_w2v):
if attr == '__dict__':
continue
# Skip methods that we already have in this class
if attr in dir(self) and callable(getattr(model_w2v, attr)):
continue
try:
setattr(self, attr, getattr(model_w2v, attr))
except AttributeError:
continue
示例3: __init__
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def __init__(self, model_fname="data/doc2vec.vecs", use_notebook=False):
self.model = Doc2Vec.load(model_fname)
self.doc2idx = {el:idx for idx, el in enumerate(self.model.docvecs.doctags.keys())}
self.use_notebook = use_notebook
示例4: load_doc2vec
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_doc2vec(mod_file):
return Doc2Vec.load(mod_file)
示例5: load_dict
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_dict(self, dict_name, path=config.DICT_DIR):
"""
path: config.DICT_DIR
config.DICT_EX_DIR
"""
if dict_name not in self.dict_manager:
dict_object = {}
cur_dir = os.path.dirname(__file__)
path = os.path.join(cur_dir, '../resources')
''' load dict from file '''
file_name = path + '/dict_%s.txt' % dict_name
print('load dict from file %s \n' % file_name)
f_dict = utils.create_read_file(file_name)
for idx, line in enumerate(f_dict):
line = line.strip().split('\t')
if len(line) == 1:
dict_object[line[0]] = idx + 1
elif len(line) == 2:
dict_object[line[0]] = eval(line[1])
else:
raise NotImplementedError
self.dict_manager[dict_name] = dict_object
return self.dict_manager[dict_name]
示例6: load_doc2vec
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_doc2vec(self):
dict_name = 'doc2vec'
if dict_name not in self.dict_manager:
from gensim.models import Doc2Vec
model = Doc2Vec.load(config.EX_DICT_DIR + '/doc2vec.model')
self.dict_manager[dict_name] = model
return self.dict_manager[dict_name]
示例7: test_doc2vec_inference_saveload
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def test_doc2vec_inference_saveload():
tagged_docs = [TaggedDocument(simple_preprocess(doc), [i])
for i, doc in enumerate(documents)]
model = Doc2Vec(tagged_docs, epochs=1, min_count=1, vector_size=10)
model.save(TEST_FILE)
del model
model = Doc2Vec.load(TEST_FILE)
os.remove(TEST_FILE)
d2v = Doc2VecInference(model, DEFAULT_ANALYZER)
match_op = Matching()
retrieval = Retrieval(d2v, matching=match_op).fit(documents)
result = retrieval.query("scientists")
assert result[0] == 1
示例8: retrainModel
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def retrainModel(vectorFile, dataFile, outputFile, iterations):
documents = LabeledLineSentence("Data\\" + dataFile)
model = Doc2Vec.load("Models\\" + vectorFile)
for epoch in range(iterations):
model.train(documents)
model.save("Models\\" + outputFile)
示例9: testModel
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def testModel(inputFile):
model = Doc2Vec.load("Models\\" + inputFile)
while True:
choice = input("Press 1 to compare documents within the model to each other.\n"
"Press 2 to run similarity tests on individual words.\n"
"Press 3 to get the top related subreddits for an inferred new vector (comment).\n"
"Hit any key to exit.\n")
if choice == "1":
docChoice = input("Enter the subreddit you want to test.\n")
print(model.docvecs.most_similar(docChoice))
elif choice == "2":
wordChoice = input("Enter the word you wish to analyze.\n").lower()
print(model.most_similar(wordChoice))
elif choice == "3":
with open("testing.txt") as t:
resultList = []
testDocs = t.readlines()
for doc in testDocs:
doc = doc.split("\t")
tag = doc[0]
body = doc[1]
newVec = model.infer_vector(body.split())
resultList.append("The original category is {}: {}\n {}\n".
format(tag, body, model.docvecs.most_similar(positive=[newVec])))
with open("clusteredResults.txt", "a") as x:
for element in resultList:
x.write(element)
else:
break
示例10: newKMeansModel
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def newKMeansModel(vectorFile, outputFile, numClusters):
# https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering
model = Doc2Vec.load("Models\\" + vectorFile)
docVecs = model.docvecs.doctag_syn0
km = KMeans(n_clusters=numClusters)
print("Starting")
km.fit(docVecs)
print("Fitting Data")
joblib.dump(km, outputFile)
示例11: loadKMeansModel
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def loadKMeansModel(vectorFile, clusterFile, csvFile):
# https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering
model = Doc2Vec.load("Models\\" + vectorFile)
km = joblib.load(clusterFile)
clusters = km.labels_.tolist()
cluster_info = {'labels': model.docvecs.offset2doctag,
"index, wordcount and repeated words": [model.docvecs.doctags[x] for x in model.docvecs.offset2doctag],
'clusters': clusters}
sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
columns=['labels', "index, wordcount and repeated words", 'clusters'])
print(sentenceDF)
sentenceDF.to_csv(csvFile)
示例12: newDBSCANModel
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def newDBSCANModel(vectorFile, outputFile):
model = Doc2Vec.load("Models\\" + vectorFile)
vecs = []
for doc in range(0, len(model.docvecs)):
doc_vec = model.docvecs[doc]
# print doc_vec
vecs.append(doc_vec.reshape((1, 300)))
doc_vecs = np.array(vecs, dtype='float') # TSNE expects float type values
# print doc_vecs
docs = []
for i in doc_vecs:
docs.append(i[0])
db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs)
joblib.dump(db, outputFile)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
clusters = db.labels_.tolist()
cluster_info = {'labels': model.docvecs.offset2doctag,
"index, wordcount and repeated words": [model.docvecs.doctags[x] for x in
model.docvecs.offset2doctag],
'clusters': clusters}
sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
columns=['labels', "index, wordcount and repeated words", 'clusters'])
print(sentenceDF)
sentenceDF.to_csv("DBSCAN.csv")
print('Estimated number of clusters: %d' % n_clusters_)
示例13: plotModel3D
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def plotModel3D(vectorFile, numClusters):
# http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html
model = Doc2Vec.load("Models\\" + vectorFile)
docVecs = model.docvecs.doctag_syn0
reduced_data = PCA(n_components=10).fit_transform(docVecs)
kmeans = KMeans(init='k-means++', n_clusters=numClusters, n_init=10)
fig = plt.figure(1, figsize=(10, 10))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
kmeans.fit(reduced_data)
labels = kmeans.labels_
ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float))
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
# Plot the ground truth
fig = plt.figure(1, figsize=(10, 10))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float))
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
plt.show()
示例14: load_model
# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_model(self):
logger.info('loading doc2vec model name %s', self.model_fname)
self.model = Doc2Vec.load(join(self.model_dir, self.model_fname))
logger.info('doc2vec model %s loaded', self.model_fname)
return self.model