当前位置: 首页>>代码示例>>Python>>正文


Python Doc2Vec.load方法代码示例

本文整理汇总了Python中gensim.models.doc2vec.Doc2Vec.load方法的典型用法代码示例。如果您正苦于以下问题:Python Doc2Vec.load方法的具体用法?Python Doc2Vec.load怎么用?Python Doc2Vec.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.doc2vec.Doc2Vec的用法示例。


在下文中一共展示了Doc2Vec.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setUp

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
    def setUp(self):
        filename = datapath("alldata-id-10.txt")
        train_docs = read_sentiment_docs(filename)
        self.train_docs = train_docs
        self.source_doc_vec_file = datapath("small_tag_doc_5_iter50")
        self.target_doc_vec_file = datapath("large_tag_doc_10_iter50")

        self.source_doc_vec = Doc2Vec.load(self.source_doc_vec_file)
        self.target_doc_vec = Doc2Vec.load(self.target_doc_vec_file)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:11,代码来源:test_translation_matrix.py

示例2: do_command

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def do_command(args):
    # Load data
    data = load_data(args.input)
    #ids, documents = zip(*data)
    data = [(id, tokenize(doc)) for id, doc in data]
    ids = [id for id, _ in data]

    if not os.path.exists(args.modelfile):
        model = embed_documents(data)
        # Save model
        model.save(args.modelfile)
    else:
        model = Doc2Vec.load(args.modelfile)
        #map(model.infer_tokens, tokenized)
    print("Loaded model.")
    # Do k-nearest neighbors search.

    writer = csv.writer(args.output, delimiter='\t')
    writer.writerow(["id1", "id2", "score"])
    count = int(args.count) if args.count > 0 else len(model.docvecs)
    vectors = np.array([model.docvecs[i] for i in range(count)])
    del model # clear up memory

    for i, j, score in find_nearest_neighbors(vectors):
        id1, id2 = ids[i], ids[j]
        writer.writerow([id1, id2, score])
开发者ID:arunchaganty,项目名称:aeschines,代码行数:28,代码来源:doc2vec.py

示例3: __init__

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
    def __init__(self, sentences, name, dataset_name, epochs=1, dimension=50, modelfile=None):
        self.inner_model = None

        # parameters
        self.dataset = dataset_name
        self.sentences = sentences
        self.name = name
        self.epochs = epochs
        self.dimension = dimension

        # data file path
        models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
        if modelfile is not None:
            filename = modelfile
        else:
            filename = "DOC2VEC_%s_%s_%s_%s" % (self.dataset, self.name, self.epochs, self.dimension)
        self.filepath = os.path.join(models_folder, filename)
        model_exists = os.path.isfile(self.filepath)

        # train initial model
        if model_exists:
            logging.info("found data file %s" % (self.filepath, ))
            self.inner_model = Doc2Vec.load(self.filepath)
        else:
            self.inner_model = Doc2Vec(sentences, size=self.dimension)
            print self.inner_model.vocab.keys()
            self.inner_model.save(fname=self.filepath)
开发者ID:carriercomm,项目名称:medical-text,代码行数:29,代码来源:D2Vmodel.py

示例4: test_category

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def test_category():
    from gensim.models.doc2vec import Doc2Vec
    from sematch.utility import FileIO
    from sematch.semantic.relatedness import ConceptRelatedness
    model_category = Doc2Vec.load(FileIO.filename('models/category/cat2vec'))
    cat2vec_rel = ConceptRelatedness(model_category)
    print(cat2vec_rel.word_similarity('happy','sad'))
开发者ID:gsi-upm,项目名称:sematch,代码行数:9,代码来源:test_relatedness.py

示例5: __init__

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
    def __init__(self, size=300, window=8, min_count=2, workers=8, path_to_model=None, stream_train=False):

        '''
        Initializes the Doc2Vec_Wrapper class. 

        Args:
            size (int): Specifies the size of the feature-vector. Defaults to 300
            window (int): Specifies the size of the context window from which the feature vector is learned
            min_count (int): Specifices the minimum number of instances of each word that is saved in the model
            workers (int): number of parallel processes
            path_to_model (str): Specifies model on disk 
            stream_train (bool): If true, update word vectors with new sentences. If false, just get doc vecs
        '''

        self.stream_train=stream_train

        self.is_trained = False
        self.model = None

        ## if a path is passed, try to load from disk. Otherwise, retrain anyway
        if path_to_model:
            try:
                self.is_trained = True
                self.model = Doc2Vec.load(path_to_model)
            except:
                pass

        ## params for Doc2Vec 
        self.size = size ## size of the vector
        self.window = window ## size of the context window
        self.min_count = min_count ## minimum count of vocab to store in binary tree
        self.workers = workers ## number of parallel processes == number of cores on the computer
开发者ID:redreamality,项目名称:broca,代码行数:34,代码来源:doc2vec_wrapper.py

示例6: load_external

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
 def load_external(self, model_file_name):
     """
     load a word2vec model from the file specified
     :param model_file_name: name of the model file
     :return:
     """
     self.model = Doc2Vec.load(model_file_name)
开发者ID:subhadeepmaji,项目名称:ml_algorithms,代码行数:9,代码来源:DocumentEmbedding.py

示例7: varify

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def varify():
    from gensim.models.doc2vec import Doc2Vec
    model = Doc2Vec.load('data/doc2vec.d2v')
    documents = pickle.load(open('data/fedcorpus.pick', 'r'))
    for i in xrange(3):
        inferred_docvec = model.infer_vector(documents[i].words)
        print documents[i].tags
        print('%s:\n %s' % (model, model.docvecs.most_similar([inferred_docvec], topn=3)))
开发者ID:wtgme,项目名称:ohsn,代码行数:10,代码来源:doc2vec.py

示例8: get_model

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def get_model(model_num, model_names):
    
    
    if model_num < 10:
        model = Word2Vec.load(model_path + model_names)
    elif model_num < 99:
        model = Doc2Vec.load(model_path + model_names)
    else:
        model = Word2Vec.load_word2vec_format(model_path + model_names, binary=True)  # C text format
    return model
开发者ID:DirkBrand,项目名称:Comment-Classification,代码行数:12,代码来源:ModelTraining.py

示例9: create_and_train_models_d2vec

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def create_and_train_models_d2vec(tag, cores=6):
    """
    Build vocabulary and train models
    :param tag: small or big 
    :param cores: number of cores
    :return: the current models
    """
    simple_models = get_models_d2vec(cores)
    model_files = get_models_filename_d2vec(tag)
    if all([os.path.exists(file) for file in model_files]):
        print('Models exist, loading...')
        for i, fname in enumerate(model_files):
            simple_models[i] = Doc2Vec.load(fname)
        models_by_name = OrderedDict((str(model), model) for model in simple_models)
        return models_by_name
    else:
        print('Building models...')
        voc_model = build_vocab_d2vec(tag, cores)
        # Share vocabulary between models
        for model in simple_models:
            model.reset_from(voc_model)

        models_by_name = OrderedDict((str(model), model) for model in simple_models)
        print('Training models...')
        print("START %s" % datetime.datetime.now())
        best_error = defaultdict(lambda: 1.0)  # to selectively-print only best errors achieved

        alpha, min_alpha, passes = (0.025, 0.001, 20)
        alpha_delta = (alpha - min_alpha) / passes
        file = x_train_str.format(tag)
        x_train = pd.read_hdf(file)
        train_list = x_train.tolist()

        for epoch in range(passes):
            shuffle(train_list)  # shuffling gets best results

            for name, train_model in models_by_name.items():
                # train
                duration = 'na'
                train_model.alpha, train_model.min_alpha = alpha, alpha
                with elapsed_timer() as elapsed:
                    train_model.train(CorpusStream(train_list, 'train'), total_examples=train_model.corpus_count,
                                      epochs=train_model.iter)
                    duration = '%.1f' % elapsed()

            print('completed pass %i at alpha %f' % (epoch + 1, alpha))
            alpha -= alpha_delta

        print("END %s" % str(datetime.datetime.now()))
        for name, model in models_by_name.items():
            name = name.replace('/', '').replace(',', '_')
            model.save('models/{0}_{1}.m'.format(name, tag))

    return models_by_name
开发者ID:papapana,项目名称:data_science,代码行数:56,代码来源:yelp_runner.py

示例10: get_WordVector_matrix

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def get_WordVector_matrix(label):
    model = Doc2Vec.load('./WordVector_model.d2v')
    size = len(label)
    vectors = np.zeros((size,depth))
    for i in range(size):
        try:
            doc_vector = model.docvecs[str(i)]
            vectors[i]=(doc_vector[0])
        except KeyError:
            print str(i) + ' occurs KeyError'
            pass
    return map(list,vectors)
开发者ID:azhe825,项目名称:CSC510,代码行数:14,代码来源:get_model.py

示例11: test_models

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def test_models( FULL_SIM, models_files ):
    test_papers = pd.read_csv( TEST_FILEPATH )

    # NOTE: Only need for testing with AII:
    keywords_docsrels = populate_iks_dict()
    authorities = initialize_authorities()

    for mod_f in models_files:
        print( 'Testing '+ mod_f )
        model = Doc2Vec.load( mod_f )
        print( 'Model loaded.' )

        test_model( FULL_SIM, model, test_papers, keywords_docsrels, authorities )
开发者ID:cuptrail,项目名称:papertrail-backend,代码行数:15,代码来源:doc2vec_train.py

示例12: build_model

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def build_model(x_train, x_test, iteration =5, save=True):
    if(save):
        big_list = x_train + x_test
        model = Doc2Vec(min_count=2, window=10, size=100, sample=1e-4, negative=5, workers=8)
        model.build_vocab(big_list)
	for i in range(iteration):
            model.train(big_list)
	print 'saving model to file.....'  
        model.save('./sentim.d2v')
    else:
	print 'loading model from file.....'
	model = Doc2Vec.load('./sentim.d2v')
    return model
开发者ID:moliq1,项目名称:sentiment_analysis,代码行数:15,代码来源:doc2vec.py

示例13: datacluster

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def datacluster(data):
	infered_vectors_list = []
	print "load model..."
	model_dm = Doc2Vec.load(model_path)
	print "load train vectors..."
	for text, label in data:
		vector = model_dm.infer_vector(text)
		infered_vectors_list.append(vector)
	'''
	print "Check the optimized parameter..."
	Nc = range(1, 50)
	pca_data = [PCA(n_components = i).fit(infered_vectors_list).transform(infered_vectors_list) for i in Nc]
	kmeans = cluster.KMeans(init='k-means++',n_clusters=20,max_iter=300)
	score = [kmeans.fit(pca_data[i]).score(pca_data[i]) for i in range(len(pca_data))]
	print score
	plt.plot(Nc,score)
	plt.xlabel('PCA components')
	plt.ylabel('Score')
	plt.title('Elbow Curve')
	plt.show()
	'''

	print "PCA decomposition..."
	pca = PCA(n_components = 10).fit(infered_vectors_list)
	pca_data = pca.transform(infered_vectors_list)
	print "train K-Means model..."
	kmean_model = cluster.KMeans(init='k-means++',n_clusters=16,max_iter=300)
	kmean_model.fit(pca_data)
	#get the classified index
	result = kmean_model.fit_predict(pca_data)
	print "Predicting result:", result
	#save the cluster result
	joblib.dump(kmean_model, cluster_path)
	#load the cluster result
#	new_km = joblib.load(cluster_path)
	numSamples = len(pca_data) 
	print numSamples
	centroids = kmean_model.labels_
	
	#print centroids,type(centroids) #显示中心点
	#print kmean_model.inertia_  #显示聚类效果
	'''	
	marker = ['o', '.', ',', 'x', '*', 'd', 's', 'p']
	color = ['r', 'g', 'b', 'c', 'm', 'k', 'y', 'w']
	for i in xrange(numSamples):
		plt.scatter(pca_data[i][0], pca_data[i][1], \
				marker=marker[centroids[i]], color=color[centroids[i]])
	plt.show()
	'''
	return centroids
开发者ID:NeoCui,项目名称:Codebackup,代码行数:52,代码来源:cluster.py

示例14: get_vec

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def get_vec(vector_file, id_file, w_file):
    p2v = Doc2Vec.load(vector_file)
    fout = open(w_file, "w")
    index = 0
    with open(id_file) as f:
        for line in f:
            index += 1
            if index % 1000 == 0:
                logging("%d cases" % index)
            line = line.strip()
            vec = p2v.docvecs[line]
            line_w = line + "\t" + "\t".join([str(x) for x in vec]) + "\t" + "\n"
            fout.write(line_w)
    fout.close()
开发者ID:lienzhen,项目名称:review_rating,代码行数:16,代码来源:generate_vector.py

示例15: main

# 需要导入模块: from gensim.models.doc2vec import Doc2Vec [as 别名]
# 或者: from gensim.models.doc2vec.Doc2Vec import load [as 别名]
def main():
    #load data set
    training_reviews = load_dataset(TRAIN_FILE)
    testing_reviews = load_dataset(TEST_FILE)

    #load doc2vec model
    doc2vec_model = Doc2Vec.load(DOC2VEC_MODEL)

    cate_index = get_all_categories(training_reviews)
    cates = dict2list(cate_index)
    n_cates = len(cates)

    train_X = get_X(training_reviews, doc2vec_model)
    test_X = get_X(testing_reviews, doc2vec_model)

    train_labels = get_labels(training_reviews, cate_index)
    test_labels = get_labels(testing_reviews, cate_index)

    labelwise_acc = []
    labelwise_output = []

    for cate in range(n_cates):
        # train a bonary model
        train_Y = get_Y(train_labels, cate)
        prob = svm_problem(train_Y, train_X)
        param = svm_parameter("-s 0 -t 2 -b 1")
        m = svm_train(prob, param)

        # test
        test_Y = get_Y(test_labels, cate)
        p_label, p_acc, p_val = svm_predict(test_Y, test_X, m, '-b 1')

        labelwise_acc.append(p_acc)
        labelwise_output.append(p_label)

    # evaluation
    p, r, f = microF1(labelwise_output, test_labels)

    # output
    out_dir = "../data/use_doc2vec/"
    out_file = out_dir + "laptop.txt"
    labelwise_acc = [(cates[i], labelwise_acc[i][0]) for i in range(n_cates)]
    labelwise_acc = sorted(labelwise_acc, key=lambda x:x[1])
    with open(out_file, 'w') as out:
        out.write("Precision:\t{}\nRecall:\t{}\nF1:\t{}\n".format(p, r, f))
        print("{}\n{}\n{}".format(p, r, f))
        for cate_i in range(n_cates):
            out.write("{}:\t{}\n".format(labelwise_acc[cate_i][0], labelwise_acc[cate_i][1]))
开发者ID:Friedmannn,项目名称:ABSA,代码行数:50,代码来源:use_sent2vec.py


注:本文中的gensim.models.doc2vec.Doc2Vec.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。