当前位置: 首页>>代码示例>>Python>>正文


Python Glove.add_dictionary方法代码示例

本文整理汇总了Python中glove.Glove.add_dictionary方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.add_dictionary方法的具体用法?Python Glove.add_dictionary怎么用?Python Glove.add_dictionary使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在glove.Glove的用法示例。


在下文中一共展示了Glove.add_dictionary方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def main():
    corpus_model = Corpus()
    corpus_model = Corpus.load('bioc-corpus-AZ2.model')
    glove = Glove(no_components=100, learning_rate=0.05)
    glove.fit(corpus_model.matrix, epochs=10, no_threads=16, verbose=True)
    glove.add_dictionary(corpus_model.dictionary)
    glove.save('bioc-glove-AZ2.model')
开发者ID:jn7163,项目名称:pubmedcentral-glove,代码行数:9,代码来源:pubmedcentral-glove-py34.py

示例2: build_model_glove

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def build_model_glove(args):

    from glove import Glove, Corpus

    if not os.path.exists(args.corpus_model) or \
            max(map(os.path.getmtime, args.input)) >= os.path.getmtime(args.corpus_model):

        # Build the corpus dictionary and the cooccurrence matrix.
        logging.info('Pre-processing corpus')

        corpus_model = Corpus()
        corpus_model.fit(get_sentences(args), window=CONFIG['glove']['window'])
        corpus_model.save(args.corpus_model)

        logging.info('Dict size: %s' % len(corpus_model.dictionary))
        logging.info('Collocations: %s' % corpus_model.matrix.nnz)
    else:
        # Try to load a corpus from disk.
        logging.info('Reading corpus statistics')
        corpus_model = Corpus.load(args.corpus_model)

        logging.info('Dict size: %s' % len(corpus_model.dictionary))
        logging.info('Collocations: %s' % corpus_model.matrix.nnz)

    # Train the GloVe model and save it to disk.
    logging.info('Training the GloVe model')

    glove = Glove(no_components=CONFIG['glove']['size'], learning_rate=CONFIG['glove']['learning_rate'])
    glove.fit(corpus_model.matrix, epochs=CONFIG['glove']['epochs'],
              no_threads=args.workers, verbose=args.verbose)
    glove.add_dictionary(corpus_model.dictionary)
    return glove
开发者ID:escherba,项目名称:flaubert,代码行数:34,代码来源:pretrain.py

示例3: pretrain

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
 def pretrain(self,data_src):
     if not os.path.isfile("glove.model"):
         data_src = DataClean([
                             ["[^a-z]"," "],  # only letters
                             [" [ ]+", " "],  # remove extra spaces
                             ],html_clean=True,split_words=True).fit(data_src).transform(data_src)
         corpus_model = Corpus()
         corpus_model.fit(data_src,window=self.window)
         glove = Glove(no_components=self.num_features,learning_rate=self.learning_rate)
         glove.fit(corpus_model.matrix,epochs=self.epochs,verbose=True)
         glove.add_dictionary(corpus_model.dictionary)
         glove.save("glove.model")
开发者ID:saatvikshah1994,项目名称:SmartMM,代码行数:14,代码来源:gloveavgvec.py

示例4: train_glove

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def train_glove(sentences):
    print 'training glove model...'
    t0 = time()
    
    num_features = 300    # Word vector dimensionality
    context = 5          # Context window size
    learning_rate = 0.05
    
    corpus = Corpus()
    corpus.fit(sentences, window=context)

    glove = Glove(no_components=num_features, learning_rate=learning_rate)
    glove.fit(corpus.matrix, epochs=30, no_threads=8, verbose=True)
    glove.add_dictionary(corpus.dictionary)

    print 'took %0.5fs.' % (time() - t0)
    return glove
开发者ID:alexeygrigorev,项目名称:avito-duplicates-kaggle,代码行数:19,代码来源:prepare_glove_model.py

示例5: run_glove

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
    def run_glove(self):
        """ run global vector """
        #sentences = [["hi","good","to"],["see","u"]]
        sentences = self.get_sentences()

        print '\n' + '-'*80
        print "Fitting words into corpus"
        corpus = Corpus()
        corpus.fit(sentences, window=10)

        print "Running Glove"
        glove = Glove(no_components=200, learning_rate=0.05)
        glove.fit(corpus.matrix, epochs=5, no_threads=10, verbose=True)
        glove.add_dictionary(corpus.dictionary)

        print "Fitting words and vectors into unique_words and vectors200"
        unique_words = []
        vectors200 = []

        cnt1 = 0
        length1 = len(glove.inverse_dictionary)
        for word_id in glove.inverse_dictionary:
            cnt1 += 1
            unique_words.append(glove.inverse_dictionary[word_id])
            vectors200.append(glove.word_vectors[word_id])

            sys.stdout.write("\rStatus: %s / %s"%(cnt1, length1))
            sys.stdout.flush()

        print '\n' + "Processing vectors200"
        processed_vectors200 = []
        processed_vector = []

        cnt2 = 0
        length2 = len(vectors200)
        for vector in vectors200:
            cnt2 += 1
            for float_num in vector:
                processed_vector.append(float_num)

            processed_vectors200.append(processed_vector)

            sys.stdout.write("\rStatus: %s / %s"%(cnt2, length2))
            sys.stdout.flush()

        return unique_words, processed_vectors200
开发者ID:Denffer,项目名称:yelp-re,代码行数:48,代码来源:Glove.py

示例6: build_glove_embeddings

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def build_glove_embeddings(training, testing, args):
    
    ''' Trains the model on the sentiment140 dataset

    @Arguments:
        data:  the loaded sentiment140 dataset from module
        num_epochs: the number of epochs to train on
        num_threads: the number of threads to use
        num_components: the number of components the glove model should use
        learning_rate: the model's learning rate
        window_size: the size of the window to use when looking for word co-occurence
        verbose: boolean for whether or not extensive output should be printed to screen

    @Return:
        A trained glove model
    '''
        
    # initialize model
    glove = Glove(no_components = args.vecsize, learning_rate = args.learningRate)
    
    txtSource = chain( imap(lambda (txt,lbl): txt, training), imap(lambda (txt,lbl): txt, testing))
    
    # read in the data to train on
    corpus_model = Corpus()
    corpus_model.fit( imap(preprocess.tokenize, txtSource), window = args.window)
        
    # fit the model using the given parameters
    logging.info("Training GloVe")
    glove.fit(corpus_model.matrix, epochs = args.epochs, no_threads = args.parallelism, verbose = args.verbose)
    
    # add a dictionary just to make it easier for similarity queries
    glove.add_dictionary(corpus_model.dictionary)
    
    transformer = lambda words: glove.transform_paragraph(words, use_pca = args.pca)

    fromTraining = to_sklearn_format(transformer, training, args.vecsize)
    fromTesting = to_sklearn_format(transformer, testing, args.vecsize)
    
    return fromTraining, fromTesting
开发者ID:danforth36phd,项目名称:sunny-side-up,代码行数:41,代码来源:sklearn_embeddings.py

示例7: print

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
        print('Collocations: %s' % corpus_model.matrix.nnz)

    if args.train:
        # Train the GloVe model and save it to disk.

        if not args.create:
            # Try to load a corpus from disk.
            print('Reading corpus statistics')
            corpus_model = Corpus.load('corpus.model')

            print('Dict size: %s' % len(corpus_model.dictionary))
            print('Collocations: %s' % corpus_model.matrix.nnz)

        print('Training the GloVe model')

        glove = Glove(no_components=100, learning_rate=0.05)
        glove.fit(corpus_model.matrix, epochs=int(args.train),
                  no_threads=args.parallelism, verbose=True)
        glove.add_dictionary(corpus_model.dictionary)

        glove.save('glove.model')

    if args.query:
        # Finally, query the model for most similar words.
        if not args.train:
            print('Loading pre-trained GloVe model')
            glove = Glove.load('glove.model')

        print('Querying for %s' % args.query)
        pprint.pprint(glove.most_similar(args.query, number=10))
开发者ID:mouhidine,项目名称:glove-python,代码行数:32,代码来源:example.py

示例8: Glove

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
@author: dannl
'''
from glove import Glove
from glove import Corpus
import time

cooc_file='/home/dannl/tmp/newstech/glove/word.cooc'
model_file='/home/dannl/tmp/newstech/glove/glove.model'

oldtime=time.time()
# get a cooccurrence matrix
corpus_cooc = Corpus.load(cooc_file)

# get a model
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_cooc.matrix, epochs=5,no_threads=4, verbose=True)
glove.add_dictionary(corpus_cooc.dictionary)
glove.save(model_file)

# count=0
# for word,wid in corpus_cooc.dictionary.items():
#     count+=1
#     if count>100:
#         break
#     print word,wid
    
print('Dict size: %s' % len(corpus_cooc.dictionary))
print('Collocations: %s' % corpus_cooc.matrix.nnz)

print 'time cost:%.2f'%(time.time()-oldtime)
开发者ID:JohnDannl,项目名称:NewsTechNLP,代码行数:32,代码来源:cooc2mod.py

示例9: mlp_model

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
mlp1000 = mlp_model(1000)
mlp1000_accuracy = train_test(mlp1000, x, y, folds)

print((mlp1_accuracy, mlp10_accuracy, mlp100_accuracy, mlp1000_accuracy))

#3CNN
#Glove Vectors from reviews
c = [review.split() for review in data.data]

corpus = Corpus()
corpus.fit(c, window=10)

glv = Glove(no_components=100, learning_rate=0.05)
glv.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)

glv.add_dictionary(corpus.dictionary)

embeddings_index = glv.dictionary

BASE_DIR = ''
GLOVE_DIR = BASE_DIR + '/glove.6B/'
TEXT_DATA_DIR = 'txt_sentoken/'
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2
texts = []  # list of text samples
labels_index = {}  # dictionary mapping label name to numeric id
labels = []  # list of label ids
for name in sorted(os.listdir(TEXT_DATA_DIR)):
    path = os.path.join(TEXT_DATA_DIR, name)
开发者ID:flashbob,项目名称:MLassignment3,代码行数:33,代码来源:hw3.py

示例10: print

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
    print("Loading pretrained corpus...")
    corpus = Corpus.load("cache/corpus.p")
except:
    print("Training corpus...")
    corpus.fit(texts, window=max_sentence_length)
    corpus.save("cache/corpus.p")

glove = Glove(no_components=number_components, learning_rate=0.05)
try:
    print("Loading pretrained GloVe vectors...")
    glove = Glove.load("cache/glove.p")
except:
    print("Training GloVe vectors...")
    # More epochs seems to make it worse
    glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
    glove.add_dictionary(corpus.dictionary)
    glove.save("cache/glove.p")

# Convert input text
print("Vectorizing input sentences...")
X = vectify(texts, previous_message, glove.dictionary, max_sentence_length, contextual)
y = np.array([x == u'1' for x in classes]).astype(np.int32)

X, y, texts = X[:207458], y[:207458], texts[:207458]

def print_accurate_forwards(net, history):
    X_train, X_valid, y_train, y_valid = net.train_split(X, y, net)
    y_classified = net.predict(X_valid)
    acc_fwd = np.mean([x == y_ and y_ == 1 for x, y_ in zip(y_valid, y_classified)])/np.mean(y_valid)
    fls_pos = np.mean([x != y_ and y_ == 0 for x, y_ in zip(y_classified, y_valid)])/(np.mean(y_valid))
    print('Accurately forwarded: {:.4f}'.format(acc_fwd) + ', False Positives: {:.4f}'.format(fls_pos) + ', Valid forwards: {:.4f}'.format((acc_fwd / (acc_fwd + fls_pos))) )
开发者ID:feilen,项目名称:morewell,代码行数:33,代码来源:chat_optimize.py


注:本文中的glove.Glove.add_dictionary方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。