当前位置: 首页>>代码示例>>Python>>正文


Python Doc2Vec.load方法代码示例

本文整理汇总了Python中gensim.models.Doc2Vec.load方法的典型用法代码示例。如果您正苦于以下问题:Python Doc2Vec.load方法的具体用法?Python Doc2Vec.load怎么用?Python Doc2Vec.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.Doc2Vec的用法示例。


在下文中一共展示了Doc2Vec.load方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_idf_dict

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_idf_dict(self, dict_name='idf_dict'):

        if dict_name not in self.dict_manager:

            word_frequencies = {}

            file_name = config.EX_DICT_DIR + '/word-frequencies.txt'
            print('load dict from file %s \n' % file_name)

            f_dict = utils.create_read_file(file_name)

            for idx, line in enumerate(f_dict):
                if idx == 0:
                    totfreq = int(line)
                else:
                    w, freq = line.strip().split()
                    freq = float(freq)
                    if freq < 10:
                        continue
                    word_frequencies[w] = math.log(totfreq / freq)  / math.log(2)
            self.dict_manager[dict_name] = word_frequencies

        return self.dict_manager[dict_name] 
开发者ID:rgtjf,项目名称:Semantic-Texual-Similarity-Toolkits,代码行数:25,代码来源:dict_utils.py

示例2: load_from_pickle

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_from_pickle(self, filename):
        """
        This loads a pretrained Word2Vec file into this Doc2Vec class.
        """
        model_w2v = Doc2Vec.load(filename)
        for attr in dir(model_w2v):
            if attr == '__dict__':
                continue
            # Skip methods that we already have in this class
            if attr in dir(self) and callable(getattr(model_w2v, attr)):
                continue
            try:
                setattr(self, attr, getattr(model_w2v, attr))
            except AttributeError:
                continue 
开发者ID:cemoody,项目名称:Document2Vec,代码行数:17,代码来源:document2vec.py

示例3: __init__

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def __init__(self, model_fname="data/doc2vec.vecs", use_notebook=False):
        self.model = Doc2Vec.load(model_fname)
        self.doc2idx = {el:idx for idx, el in enumerate(self.model.docvecs.doctags.keys())}
        self.use_notebook = use_notebook 
开发者ID:ratsgo,项目名称:embedding,代码行数:6,代码来源:sent_eval.py

示例4: load_doc2vec

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_doc2vec(mod_file):
    return Doc2Vec.load(mod_file) 
开发者ID:hugochan,项目名称:KATE,代码行数:4,代码来源:doc2vec.py

示例5: load_dict

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_dict(self, dict_name, path=config.DICT_DIR):
        """
        path: config.DICT_DIR
              config.DICT_EX_DIR
        """
        if dict_name not in self.dict_manager:

            dict_object = {}

            cur_dir = os.path.dirname(__file__)
            path = os.path.join(cur_dir, '../resources')

            ''' load dict from file '''
            file_name = path + '/dict_%s.txt' % dict_name
            print('load dict from file %s \n' % file_name)

            f_dict = utils.create_read_file(file_name)

            for idx, line in enumerate(f_dict):
                line = line.strip().split('\t')
                if len(line) == 1:
                    dict_object[line[0]] = idx + 1
                elif len(line) == 2:
                    dict_object[line[0]] = eval(line[1])
                else:
                    raise NotImplementedError

            self.dict_manager[dict_name] = dict_object

        return self.dict_manager[dict_name] 
开发者ID:rgtjf,项目名称:Semantic-Texual-Similarity-Toolkits,代码行数:32,代码来源:dict_utils.py

示例6: load_doc2vec

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_doc2vec(self):
        dict_name = 'doc2vec'
        if dict_name not in self.dict_manager:
            from gensim.models import Doc2Vec
            model = Doc2Vec.load(config.EX_DICT_DIR + '/doc2vec.model')
            self.dict_manager[dict_name] = model
        return self.dict_manager[dict_name] 
开发者ID:rgtjf,项目名称:Semantic-Texual-Similarity-Toolkits,代码行数:9,代码来源:dict_utils.py

示例7: test_doc2vec_inference_saveload

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def test_doc2vec_inference_saveload():
    tagged_docs = [TaggedDocument(simple_preprocess(doc), [i])
                   for i, doc in enumerate(documents)]
    model = Doc2Vec(tagged_docs, epochs=1, min_count=1, vector_size=10)
    model.save(TEST_FILE)
    del model
    model = Doc2Vec.load(TEST_FILE)
    os.remove(TEST_FILE)
    d2v = Doc2VecInference(model, DEFAULT_ANALYZER)
    match_op = Matching()
    retrieval = Retrieval(d2v, matching=match_op).fit(documents)
    result = retrieval.query("scientists")
    assert result[0] == 1 
开发者ID:lgalke,项目名称:vec4ir,代码行数:15,代码来源:test_vec4ir.py

示例8: retrainModel

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def retrainModel(vectorFile, dataFile, outputFile, iterations):
    documents = LabeledLineSentence("Data\\" + dataFile)
    model = Doc2Vec.load("Models\\" + vectorFile)
    for epoch in range(iterations):
        model.train(documents)
    model.save("Models\\" + outputFile) 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:8,代码来源:echoDoc0.1.py

示例9: testModel

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def testModel(inputFile):
    model = Doc2Vec.load("Models\\" + inputFile)
    while True:
        choice = input("Press 1 to compare documents within the model to each other.\n"
                       "Press 2 to run similarity tests on individual words.\n"
                       "Press 3 to get the top related subreddits for an inferred new vector (comment).\n"
                       "Hit any key to exit.\n")
        if choice == "1":
            docChoice = input("Enter the subreddit you want to test.\n")
            print(model.docvecs.most_similar(docChoice))
        elif choice == "2":
            wordChoice = input("Enter the word you wish to analyze.\n").lower()
            print(model.most_similar(wordChoice))
        elif choice == "3":
            with open("testing.txt") as t:
                resultList = []
                testDocs = t.readlines()
                for doc in testDocs:
                    doc = doc.split("\t")
                    tag = doc[0]
                    body = doc[1]
                    newVec = model.infer_vector(body.split())
                    resultList.append("The original category is {}: {}\n {}\n".
                                      format(tag, body, model.docvecs.most_similar(positive=[newVec])))
                with open("clusteredResults.txt", "a") as x:
                    for element in resultList:
                        x.write(element)
        else:
            break 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:31,代码来源:echoDoc0.1.py

示例10: newKMeansModel

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def newKMeansModel(vectorFile, outputFile, numClusters):
    # https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering

    model = Doc2Vec.load("Models\\" + vectorFile)
    docVecs = model.docvecs.doctag_syn0
    km = KMeans(n_clusters=numClusters)
    print("Starting")
    km.fit(docVecs)
    print("Fitting Data")
    joblib.dump(km, outputFile) 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:12,代码来源:echoDoc0.1.py

示例11: loadKMeansModel

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def loadKMeansModel(vectorFile, clusterFile, csvFile):
    # https://stackoverflow.com/questions/43476869/doc2vec-sentence-clustering

    model = Doc2Vec.load("Models\\" + vectorFile)
    km = joblib.load(clusterFile)
    clusters = km.labels_.tolist()
    cluster_info = {'labels': model.docvecs.offset2doctag,
                    "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in model.docvecs.offset2doctag],
                    'clusters': clusters}
    sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
                              columns=['labels', "index, wordcount and repeated words", 'clusters'])
    print(sentenceDF)
    sentenceDF.to_csv(csvFile) 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:15,代码来源:echoDoc0.1.py

示例12: newDBSCANModel

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def newDBSCANModel(vectorFile, outputFile):
    model = Doc2Vec.load("Models\\" + vectorFile)
    vecs = []
    for doc in range(0, len(model.docvecs)):
        doc_vec = model.docvecs[doc]
        # print doc_vec
        vecs.append(doc_vec.reshape((1, 300)))

    doc_vecs = np.array(vecs, dtype='float')  # TSNE expects float type values

    # print doc_vecs
    docs = []
    for i in doc_vecs:
        docs.append(i[0])
    db = DBSCAN(eps=0.03, algorithm="brute", metric='cosine').fit(docs)
    joblib.dump(db, outputFile)


    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    clusters = db.labels_.tolist()
    cluster_info = {'labels': model.docvecs.offset2doctag,
                    "index, wordcount and repeated words": [model.docvecs.doctags[x] for x in
                                                            model.docvecs.offset2doctag],
                    'clusters': clusters}
    sentenceDF = pd.DataFrame(cluster_info, index=[clusters],
                              columns=['labels', "index, wordcount and repeated words", 'clusters'])
    print(sentenceDF)
    sentenceDF.to_csv("DBSCAN.csv")

    print('Estimated number of clusters: %d' % n_clusters_) 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:35,代码来源:echoDoc0.1.py

示例13: plotModel3D

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def plotModel3D(vectorFile, numClusters):
    # http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html

    model = Doc2Vec.load("Models\\" + vectorFile)
    docVecs = model.docvecs.doctag_syn0
    reduced_data = PCA(n_components=10).fit_transform(docVecs)
    kmeans = KMeans(init='k-means++', n_clusters=numClusters, n_init=10)

    fig = plt.figure(1, figsize=(10, 10))
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    kmeans.fit(reduced_data)
    labels = kmeans.labels_

    ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float))
    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])
    # Plot the ground truth
    fig = plt.figure(1, figsize=(10, 10))
    plt.clf()
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    plt.cla()
    ax.scatter(reduced_data[:, 5], reduced_data[:, 2], reduced_data[:, 3], c=labels.astype(np.float))
    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])
    plt.show() 
开发者ID:TyJK,项目名称:EchoBurst,代码行数:29,代码来源:echoDoc0.1.py

示例14: load_model

# 需要导入模块: from gensim.models import Doc2Vec [as 别名]
# 或者: from gensim.models.Doc2Vec import load [as 别名]
def load_model(self):
        logger.info('loading doc2vec model name %s', self.model_fname)
        self.model = Doc2Vec.load(join(self.model_dir, self.model_fname))
        logger.info('doc2vec model %s loaded', self.model_fname)
        return self.model 
开发者ID:THUDM,项目名称:OAG,代码行数:7,代码来源:title2vec.py


注:本文中的gensim.models.Doc2Vec.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。