当前位置: 首页>>代码示例>>Python>>正文


Python glove.Glove类代码示例

本文整理汇总了Python中glove.Glove的典型用法代码示例。如果您正苦于以下问题:Python Glove类的具体用法?Python Glove怎么用?Python Glove使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Glove类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_pythonObj

    def test_pythonObj(self):
        """
        :return:
        """

        class A():
            def __init__(self):
                self.b1 = B()
                self.b2 = B()
                self.list = [1000, 23424.2, 'asdf0', u'unicode编码', self.b1]
                self.dic = {
                    132323423412312311: 'utf8编码',
                    '232': self.b2,
                    self.b2: set([1,2]),
                    u'unicode编码': None,
                    123: (11,1,111),
                    11: (11,1,111),
                }

        class B():
            def __init__(self):
                self.none = None
                self.str = '1111'
                self.int = 15151515151515155151
                self.float = 11231231231212342323.
                self.list = [1,2,3,4]
                self.dict = {1:2, 2:3}
                self.tuple = (1,2,3, 4)

                return

        glove = Glove(A())
        glove.meaure()
        print glove.report
开发者ID:Ron3,项目名称:tails,代码行数:34,代码来源:test_glove.py

示例2: word_embedding

def word_embedding(sentences,embedding_size,windows_len):
    """
    Verify that the square error diminishes with fitting
    """

     

    corpus_model = Corpus()

    corpus_model.fit(sentences,window=windows_len)

    # Check that the performance is poor without fitting
    glove_model = Glove(no_components=embedding_size, learning_rate=0.05)
    glove_model.fit(corpus_model.matrix,
                    epochs=0,
                    no_threads=2)

    log_cooc_mat = corpus_model.matrix.copy()
    log_cooc_mat.data = np.log(log_cooc_mat.data)
    log_cooc_mat = np.asarray(log_cooc_mat.todense())
    
    
    
    corpus_dict=corpus_model.dictionary
    corpus_inverse_dict=dict(map(reversed, corpus_dict.items()))

        
    

    return glove_model,corpus_dict,corpus_inverse_dict
开发者ID:hessamoddin,项目名称:summarycode,代码行数:30,代码来源:HMDB_Torch.py

示例3: build_model_glove

def build_model_glove(args):

    from glove import Glove, Corpus

    if not os.path.exists(args.corpus_model) or \
            max(map(os.path.getmtime, args.input)) >= os.path.getmtime(args.corpus_model):

        # Build the corpus dictionary and the cooccurrence matrix.
        logging.info('Pre-processing corpus')

        corpus_model = Corpus()
        corpus_model.fit(get_sentences(args), window=CONFIG['glove']['window'])
        corpus_model.save(args.corpus_model)

        logging.info('Dict size: %s' % len(corpus_model.dictionary))
        logging.info('Collocations: %s' % corpus_model.matrix.nnz)
    else:
        # Try to load a corpus from disk.
        logging.info('Reading corpus statistics')
        corpus_model = Corpus.load(args.corpus_model)

        logging.info('Dict size: %s' % len(corpus_model.dictionary))
        logging.info('Collocations: %s' % corpus_model.matrix.nnz)

    # Train the GloVe model and save it to disk.
    logging.info('Training the GloVe model')

    glove = Glove(no_components=CONFIG['glove']['size'], learning_rate=CONFIG['glove']['learning_rate'])
    glove.fit(corpus_model.matrix, epochs=CONFIG['glove']['epochs'],
              no_threads=args.workers, verbose=args.verbose)
    glove.add_dictionary(corpus_model.dictionary)
    return glove
开发者ID:escherba,项目名称:flaubert,代码行数:32,代码来源:pretrain.py

示例4: get_model

def get_model():
    ''' lazy initialization for glove model so it works in pool '''
    global model
    if model == None:
        print 'loading the glove model...'
        model = Glove.load('w2v/glove_lemma_stopwords')
    return model
开发者ID:alexeygrigorev,项目名称:avito-duplicates-kaggle,代码行数:7,代码来源:calculate_glove_features.py

示例5: __init__

 def __init__(self,data_src,num_features=100,window=10,learning_rate=0.05,epochs=10):
     self.learning_rate = learning_rate
     self.num_features = num_features
     self.window = window
     self.epochs = epochs
     self.pretrain(data_src)
     self.model = Glove.load("glove.model")
开发者ID:saatvikshah1994,项目名称:SmartMM,代码行数:7,代码来源:gloveavgvec.py

示例6: train_glove

def train_glove(sentences):
    print 'training glove model...'
    t0 = time()
    
    num_features = 300    # Word vector dimensionality
    context = 5          # Context window size
    learning_rate = 0.05
    
    corpus = Corpus()
    corpus.fit(sentences, window=context)

    glove = Glove(no_components=num_features, learning_rate=learning_rate)
    glove.fit(corpus.matrix, epochs=30, no_threads=8, verbose=True)
    glove.add_dictionary(corpus.dictionary)

    print 'took %0.5fs.' % (time() - t0)
    return glove
开发者ID:alexeygrigorev,项目名称:avito-duplicates-kaggle,代码行数:17,代码来源:prepare_glove_model.py

示例7: run_glove

    def run_glove(self):
        """ run global vector """
        #sentences = [["hi","good","to"],["see","u"]]
        sentences = self.get_sentences()

        print '\n' + '-'*80
        print "Fitting words into corpus"
        corpus = Corpus()
        corpus.fit(sentences, window=10)

        print "Running Glove"
        glove = Glove(no_components=200, learning_rate=0.05)
        glove.fit(corpus.matrix, epochs=5, no_threads=10, verbose=True)
        glove.add_dictionary(corpus.dictionary)

        print "Fitting words and vectors into unique_words and vectors200"
        unique_words = []
        vectors200 = []

        cnt1 = 0
        length1 = len(glove.inverse_dictionary)
        for word_id in glove.inverse_dictionary:
            cnt1 += 1
            unique_words.append(glove.inverse_dictionary[word_id])
            vectors200.append(glove.word_vectors[word_id])

            sys.stdout.write("\rStatus: %s / %s"%(cnt1, length1))
            sys.stdout.flush()

        print '\n' + "Processing vectors200"
        processed_vectors200 = []
        processed_vector = []

        cnt2 = 0
        length2 = len(vectors200)
        for vector in vectors200:
            cnt2 += 1
            for float_num in vector:
                processed_vector.append(float_num)

            processed_vectors200.append(processed_vector)

            sys.stdout.write("\rStatus: %s / %s"%(cnt2, length2))
            sys.stdout.flush()

        return unique_words, processed_vectors200
开发者ID:Denffer,项目名称:yelp-re,代码行数:46,代码来源:Glove.py

示例8: main

def main():
    corpus_model = Corpus()
    corpus_model = Corpus.load('bioc-corpus-AZ2.model')
    glove = Glove(no_components=100, learning_rate=0.05)
    glove.fit(corpus_model.matrix, epochs=10, no_threads=16, verbose=True)
    glove.add_dictionary(corpus_model.dictionary)
    glove.save('bioc-glove-AZ2.model')
开发者ID:jn7163,项目名称:pubmedcentral-glove,代码行数:7,代码来源:pubmedcentral-glove-py34.py

示例9: load_wv_model

def load_wv_model(word_vector_file, word_vector_type):
    if word_vector_type == WordVectorTypes.glove.name:
        from glove import Glove
        glove_model = Glove.load_stanford(word_vector_file)
        wv_model = GloveWrapper(glove_model)
    else: 
        import word2vec
        w2v_model = word2vec.load(word_vector_file)
        wv_model = W2VWrapper(w2v_model)
    return wv_model
开发者ID:agude,项目名称:attalos,代码行数:10,代码来源:main.py

示例10: create_vectors_dataset

def create_vectors_dataset(input_files, vector_files, max_len=500):
    print('Creating word vectors file')

    training_set_file, test_set_file = input_files
    train_word_file, test_word_file = vector_files
    
    train_stories = pickle.load(open(training_set_file,'r'))
    test_stories = pickle.load(open(test_set_file,'r'))

    train_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in train_stories]
    test_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in test_stories]

    vocab = sorted(reduce(lambda x, y: x | y, (set(story + [answer]) for story, answer in train_stories + test_stories)))

    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1
    story_maxlen = max(map(len, (x for x, _ in train_stories + test_stories)))


    print('-')
    print('Vocab size:', vocab_size, 'unique words')
    print('Story max length:', story_maxlen, 'words')
    print('Number of training stories:', len(train_stories))
    print('Number of test stories:', len(test_stories))
    print('-')
    print('Here\'s what a "story" tuple looks like (input, query, answer):')
    print(train_stories[0])
    print('-')
    print('Vectorizing the word sequences...')

    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    answer_vocab = sorted(reduce(lambda x, y: x | y, (set([answer]) for _, answer in train_stories + test_stories)))
    # Reserve 0 for masking via pad_sequences
    answer_dict = dict((word, i) for i, word in enumerate(answer_vocab))
    print('Answers dict len: {0}'.format(len(answer_dict)))

    # I need to check also if this exist
    word_vectors_dir = 'word_vectors/glove.42B.300d.txt'
    word_vectors_model = Glove.load_stanford(word_vectors_dir)

    inputs_train, answers_train = get_word_vectors(train_stories, answer_dict, 
                                                   max_len, word_vectors_model)
    inputs_test, answers_test = get_word_vectors(test_stories, answer_dict, max_len,
                                                 word_vectors_model)

    with h5py.File(train_word_file,'w') as train_f:
        _ = train_f.create_dataset('inputs',data=inputs_train)
        _ = train_f.create_dataset('answers',data=answers_train)
    with h5py.File(test_word_file,'w') as test_f:
        _ = test_f.create_dataset('inputs',data=inputs_test)
        _ = test_f.create_dataset('answers',data=answers_test)
        
    return (inputs_train, answers_train),(inputs_test, answers_test)
开发者ID:alejandrorivas,项目名称:MedicalDiagnosis,代码行数:54,代码来源:utils.py

示例11: test_measure

    def test_measure(self):
        """

        :return:
        """
        class A():
            pass

        a = A()

        for i in xrange(100):
            a1 = A()
            for j in xrange(100):
                a2 = A()
                setattr(a1, 'a%s' % j, a2)
            setattr(a, 'a%s' % i, a1)

        glove = Glove(a)
        glove.meaure()
        print glove.report
开发者ID:Ron3,项目名称:tails,代码行数:20,代码来源:test_glove.py

示例12: __init__

	def __init__(self, data):
		self.data = data
		self.corpus = None
		self.liu = LiuLexicon()
		self.subj = SubjLexicon()
		self.buildTweetCorpus()
		self.word_vec_model = Word2Vec(self.corpus)
		self.glove_vec_model = Glove(100, self.corpus)
		self.clusters = Cluster(100)
		self.initEncoders()
		self.topicVecs = self.word_vec_model.getVectorsForTopics(self.topicenc.classes_)
		self.collectTopUnigrams()
		self.collectTopBigrams()
开发者ID:rahul003,项目名称:stance-detection,代码行数:13,代码来源:featureExtractor.py

示例13: pretrain

 def pretrain(self,data_src):
     if not os.path.isfile("glove.model"):
         data_src = DataClean([
                             ["[^a-z]"," "],  # only letters
                             [" [ ]+", " "],  # remove extra spaces
                             ],html_clean=True,split_words=True).fit(data_src).transform(data_src)
         corpus_model = Corpus()
         corpus_model.fit(data_src,window=self.window)
         glove = Glove(no_components=self.num_features,learning_rate=self.learning_rate)
         glove.fit(corpus_model.matrix,epochs=self.epochs,verbose=True)
         glove.add_dictionary(corpus_model.dictionary)
         glove.save("glove.model")
开发者ID:saatvikshah1994,项目名称:SmartMM,代码行数:12,代码来源:gloveavgvec.py

示例14: glove_vector_download_and_save

def glove_vector_download_and_save(url, outdir, maxmegabytes):

    # construct filenames
    filename_full = os.path.basename(url)
    filename_name = os.path.splitext(filename_full)[0]

    # create file-specific output directory
    dirname_file = "{}/{}".format(outdir, filename_name)
    if not os.path.isdir(dirname_file):
        os.mkdir(dirname_file)

    # download file
    filename_save = "{}/{}".format(dirname_file, filename_full)
    if not os.path.isfile(filename_save):
        print("downloading {}...".format(filename_save))
        urllib.urlretrieve(url, filename_save)

    # extract zip
    print("extracting {}...".format(filename_save))
    with zipfile.ZipFile(filename_save, "r") as z:
        z.extractall(dirname_file)

    # build model for each file
    file_pattern = "{}/*.txt".format(dirname_file)
    for file_glove_in in glob.glob(file_pattern):

        try:
            # ensure file isn't too big
            filesize = os.path.getsize(file_glove_in) / 1024 / 1024
            if filesize > maxmegabytes:
                print("skipping {}M file {}...".format(filesize, file_glove_in))

            else:

                # load vectors
                print("importing glove vectors from {}".format(file_glove_in))
                model = Glove.load_stanford(file_glove_in)

                # save model object
                file_glove_out = "{}.obj".format(os.path.splitext(file_glove_in)[0])
                print("saving glove model to {}...".format(file_glove_out))
                model.save_obj(file_glove_out)

                # delete extracted file
                os.remove(file_glove_in)

        except MemoryError as e:
            print e.strerror
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:48,代码来源:models-load-stanford.py

示例15: test_stanford_loading

def test_stanford_loading():

    model = Glove.load_stanford('glove/tests/stanford_test.txt')

    assert model.word_vectors is not None
    assert model.word_vectors.shape == (100, 25)
    assert len(model.dictionary) == 100

    # Python 2/3 compatibility. Check the ellipsis
    # character is in the dictionary.
    try:
        # Python 2
        assert unichr(8230) in model.dictionary
    except NameError:
        # Pyton 3
        assert '…' in model.dictionary
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:16,代码来源:test_glove.py


注:本文中的glove.Glove类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。