當前位置: 首頁>>代碼示例>>Python>>正文


Python models.Word2Vec方法代碼示例

本文整理匯總了Python中gensim.models.Word2Vec方法的典型用法代碼示例。如果您正苦於以下問題:Python models.Word2Vec方法的具體用法?Python models.Word2Vec怎麽用?Python models.Word2Vec使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.models的用法示例。


在下文中一共展示了models.Word2Vec方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: skipgram_baseline

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def skipgram_baseline(graph, **kwargs):
    scale = kwargs.get('scale', -1)
    representation_size = kwargs.get('representation_size', 128)

    if scale == 1:
        edges, weights = graph.get_edges()
    else:
        path_length = kwargs.get('path_length', 40)
        num_paths = kwargs.get('num_paths', 80)
        output = kwargs.get('output', 'default')
        edges = graph_coarsening.build_deepwalk_corpus(graph, num_paths, path_length, output)

    if kwargs['hs'] == 0:
        print ('Training the Negative Sampling Model...')
        model = Word2Vec(edges, size=representation_size, window=kwargs['window_size'], min_count=0, sg=1, hs=0, iter=kwargs['iter_count'], negative=5, workers=20)
    else:
        print ('Training the Hierarchical Softmax Model...')
        model = Word2Vec(edges, size=kwargs['representation_size'], window=kwargs['window_size'], min_count=0, sg=1, hs=1, iter=kwargs['iter_count'], workers=20)

    print ('Finish training the Skip-gram model.')
    return model 
開發者ID:GTmac,項目名稱:HARP,代碼行數:23,代碼來源:baseline.py

示例2: sum_trigram

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def sum_trigram(sent, model):
    sent = sent.split()
    first = True
    second = True
    tot = 0
    for i in range(len(sent)):
        try:
            if first:
                tot += model[None, None][sent[i]]
                first = False
            elif second:
                tot += model[None, sent[i-1]][sent[i]]
                second = False
            else:
                tot += model[sent[i-2], sent[i-1]][sent[i]]
        except:
            continue
    return tot

#Word2Vec Training(Returns Vector): 
開發者ID:GauravBh1010tt,項目名稱:DeepLearn,代碼行數:22,代碼來源:lex_sem_ft.py

示例3: train_save

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train_save(self, list_csv):
        sentences = MySentences(list_csv)
        num_features = 256
        min_word_count = 1
        num_workers = 20
        context = 5
        epoch = 20
        sample = 1e-5
        model = Word2Vec(
            sentences,
            size=num_features,
            min_count=min_word_count,
            workers=num_workers,
            sample=sample,
            window=context,
            iter=epoch,
        )
        #model.save(model_fn)
        return model 
開發者ID:chenyuntc,項目名稱:PyTorchText,代碼行數:21,代碼來源:graph2vec.py

示例4: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):

        kwargs["sentences"] = self.sentences
        kwargs["min_count"] = kwargs.get("min_count", 0)
        kwargs["size"] = embed_size
        kwargs["sg"] = 1  # skip gram
        kwargs["hs"] = 1  # deepwalk use Hierarchical Softmax
        kwargs["workers"] = workers
        kwargs["window"] = window_size
        kwargs["iter"] = iter

        print("Learning embedding vectors...")
        model = Word2Vec(**kwargs)
        print("Learning embedding vectors done!")

        self.w2v_model = model
        return model 
開發者ID:shenweichen,項目名稱:GraphEmbedding,代碼行數:19,代碼來源:deepwalk.py

示例5: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):

        kwargs["sentences"] = self.sentences
        kwargs["min_count"] = kwargs.get("min_count", 0)
        kwargs["size"] = embed_size
        kwargs["sg"] = 1
        kwargs["hs"] = 0  # node2vec not use Hierarchical Softmax
        kwargs["workers"] = workers
        kwargs["window"] = window_size
        kwargs["iter"] = iter

        print("Learning embedding vectors...")
        model = Word2Vec(**kwargs)
        print("Learning embedding vectors done!")

        self.w2v_model = model

        return model 
開發者ID:shenweichen,項目名稱:GraphEmbedding,代碼行數:20,代碼來源:node2vec.py

示例6: train_word2vec_by_word

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train_word2vec_by_word():
    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logging.info("running")

    inp = "cut_zhwiki_wiki_parse.txt"
    outp1 = "w2v_model_wiki.model"
    outp2 = "w2v_model_wiki_word.vec"

    print(multiprocessing.cpu_count())
    model = Word2Vec(LineSentence(inp), size=300, window=10,
                     # 這裏用skip-heriber
                     min_count=1, sg=1, hs=1, iter=10, workers=multiprocessing.cpu_count())

    model.save(outp1)
    model.wv.save_word2vec_format(outp2, binary=False) 
開發者ID:yongzhuo,項目名稱:nlg-yongzhuo,代碼行數:18,代碼來源:keyword_word2vec.py

示例7: learn_base_embedding

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def learn_base_embedding(self):
        """
        Learning an embedding of nodes in the base graph.
        :return self.embedding: Embedding of nodes in the latent space.
        """
        self.paths = [[str(node) for node in walk] for walk in self.paths]

        model = Word2Vec(self.paths,
                         size=self.args.dimensions,
                         window=self.args.window_size,
                         min_count=1,
                         sg=1,
                         workers=self.args.workers,
                         iter=1)

        self.embedding = np.array([list(model[str(n)]) for n in self.graph.nodes()])
        return self.embedding 
開發者ID:benedekrozemberczki,項目名稱:Splitter,代碼行數:19,代碼來源:walkers.py

示例8: build

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def build(train_seg_path, test_seg_path, out_path=None, sentence_path='',
          w2v_bin_path="w2v.bin", min_count=1, col_sep='\t'):
    sentences = extract_sentence(train_seg_path, test_seg_path, col_sep=col_sep)
    save_sentence(sentences, sentence_path)
    print('train w2v model...')
    # train model
    w2v = Word2Vec(sg=1, sentences=LineSentence(sentence_path),
                   size=256, window=5, min_count=min_count, iter=40)
    w2v.wv.save_word2vec_format(w2v_bin_path, binary=True)
    print("save %s ok." % w2v_bin_path)
    # test
    # sim = w2v.wv.similarity('大', '小')
    # print('大 vs 小 similarity score:', sim)
    # load model
    model = KeyedVectors.load_word2vec_format(w2v_bin_path, binary=True)
    word_dict = {}
    for word in model.vocab:
        word_dict[word] = model[word]
    save_pkl(word_dict, out_path, overwrite=True) 
開發者ID:shibing624,項目名稱:text-classifier,代碼行數:21,代碼來源:build_w2v.py

示例9: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, G):
        self.G = G
        walks = self._simulate_walks(self.walk_length, self.walk_num)
        walks = [[str(node) for node in walk] for walk in walks]
        model = Word2Vec(
            walks,
            size=self.dimension,
            window=self.window_size,
            min_count=0,
            sg=1,
            workers=self.worker,
            iter=self.iteration,
        )
        id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
        embeddings = np.asarray([model[str(id2node[i])] for i in range(len(id2node))])
        return embeddings 
開發者ID:imsheridan,項目名稱:CogDL-TensorFlow,代碼行數:18,代碼來源:deepwalk.py

示例10: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, G):
        self.G = G
        is_directed = nx.is_directed(self.G)
        for i, j in G.edges():
            G[i][j]["weight"] = G[i][j].get("weight", 1.0)
            if not is_directed:
                G[j][i]["weight"] = G[j][i].get("weight", 1.0)
        self._preprocess_transition_probs()
        walks = self._simulate_walks(self.walk_num, self.walk_length)
        walks = [[str(node) for node in walk] for walk in walks]
        model = Word2Vec(
            walks,
            size=self.dimension,
            window=self.window_size,
            min_count=0,
            sg=1,
            workers=self.worker,
            iter=self.iteration,
        )
        id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
        self.embeddings = np.asarray(
            [model[str(id2node[i])] for i in range(len(id2node))]
        )
        return self.embeddings 
開發者ID:imsheridan,項目名稱:CogDL-TensorFlow,代碼行數:26,代碼來源:node2vec.py

示例11: learn_pooled_embeddings

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def learn_pooled_embeddings(walks, counts, args):
    """
    Method to learn an embedding given the sequences and arguments.
    :param walks: Linear vertex sequences.
    :param counts: Number of nodes.
    :param args: Arguments.
    """
    model = Word2Vec(walks,
                     size=args.dimensions,
                     window=args.window_size,
                     min_count=1,
                     sg=1,
                     workers=args.workers,
                     iter=args.iter,
                     alpha=args.alpha)

    save_embedding(args, model, counts) 
開發者ID:benedekrozemberczki,項目名稱:diff2vec,代碼行數:19,代碼來源:diffusion_2_vec.py

示例12: uptrain

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def uptrain(corpus,
            model_path=None,
            binary=True,
            lockf=0.0,
            min_count=1,
            size=300,
            **word2vec_params):
    wv = Word2Vec(min_count=min_count, size=size, **word2vec_params)
    print("Building vocabulary...")
    wv.build_vocab(corpus)
    print("Found %d distinct words." % len(wv.index2word))
    if model_path is not None:
        print("Intersecting with", model_path, "...")
        wv.intersect_word2vec_format(model_path, binary=binary, lockf=lockf)
        print("Intersected vectors locked with", lockf)

    total_examples = len(corpus)
    print("Training on %d documents..." % total_examples)
    wv.train(corpus, total_examples=total_examples)

    return wv 
開發者ID:lgalke,項目名稱:vec4ir,代碼行數:23,代碼來源:postprocessing.py

示例13: learn_embeddings

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def learn_embeddings(self, output):
        """
        Learn embeddings by optimizing the Skipgram objective using SGD.
        """

        walks = self._simulate_walks()  # simulate random walks

        model = Word2Vec(walks, size=self.dimensions, window=self.window_size, min_count=0,
                         workers=self.workers, iter=self.iter, negative=25, sg=1)

        print("defined model using w2v")

        model.wv.save_word2vec_format(output, binary=True)

        # free memory
        del walks
        self.alias_nodes = None
        self.alias_edges = None
        self.G = None

        print("saved model in word2vec binary format")

        return 
開發者ID:D2KLab,項目名稱:entity2rec,代碼行數:25,代碼來源:node2vec.py

示例14: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, G):
        self.G = G
        walks = self._simulate_walks(self.walk_length, self.walk_num)
        walks = [[str(node) for node in walk] for walk in walks]
        model = Word2Vec(
            walks,
            size=self.dimension,
            window=self.window_size,
            min_count=0,
            sg=1,
            workers=self.worker,
            iter=self.iteration,
        )
        id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
        embeddings = np.asarray([model.wv[str(id2node[i])] for i in range(len(id2node))])
        return embeddings 
開發者ID:THUDM,項目名稱:cogdl,代碼行數:18,代碼來源:deepwalk.py

示例15: train

# 需要導入模塊: from gensim import models [as 別名]
# 或者: from gensim.models import Word2Vec [as 別名]
def train(self, G):
        self.G = G
        is_directed = nx.is_directed(self.G)
        for i, j in G.edges():
            G[i][j]["weight"] = G[i][j].get("weight", 1.0)
            if not is_directed:
                G[j][i]["weight"] = G[j][i].get("weight", 1.0)
        self._preprocess_transition_probs()
        walks = self._simulate_walks(self.walk_num, self.walk_length)
        walks = [[str(node) for node in walk] for walk in walks]
        model = Word2Vec(
            walks,
            size=self.dimension,
            window=self.window_size,
            min_count=0,
            sg=1,
            workers=self.worker,
            iter=self.iteration,
        )
        id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
        self.embeddings = np.asarray(
            [model.wv[str(id2node[i])] for i in range(len(id2node))]
        )
        return self.embeddings 
開發者ID:THUDM,項目名稱:cogdl,代碼行數:26,代碼來源:node2vec.py


注:本文中的gensim.models.Word2Vec方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。