本文整理汇总了Python中glove.Glove.load方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.load方法的具体用法?Python Glove.load怎么用?Python Glove.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类glove.Glove
的用法示例。
在下文中一共展示了Glove.load方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_model
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def get_model():
''' lazy initialization for glove model so it works in pool '''
global model
if model == None:
print 'loading the glove model...'
model = Glove.load('w2v/glove_lemma_stopwords')
return model
示例2: __init__
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def __init__(self,data_src,num_features=100,window=10,learning_rate=0.05,epochs=10):
self.learning_rate = learning_rate
self.num_features = num_features
self.window = window
self.epochs = epochs
self.pretrain(data_src)
self.model = Glove.load("glove.model")
示例3: get_data
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def get_data(args):
feature_set_names = CONFIG['train']['features']
if set(feature_set_names).intersection(['word2vec', 'doc2vec']) and not args.embedding:
raise RuntimeError("--embedding argument must be supplied")
# get Y labels
training_set = read_tsv(args.train)
y_labels = training_set["sentiment"]
sentences = [obj['review'] for obj in read_json_lines(args.sentences)]
if not args.embedding or feature_set_names == ['bow']:
# don't drop NaNs -- have a sparse matrix here
return False, (get_bow_features(sentences), y_labels)
# load embedding
if CONFIG['pretrain']['algorithm'] == 'word2vec':
embedding = word2vec.Word2Vec.load(args.embedding)
elif CONFIG['pretrain']['algorithm'] == 'glove':
embedding = Glove.load(args.embedding)
# dynamicaly add GloveWrapper mixin
embedding.__class__ = type('MyGlove', (Glove, GloveWrapper), {})
# get feature vectors
if 'doc2vec' in CONFIG['train']['features']:
embedding_vectors = get_doc2vec_features(sentences, embedding)
elif 'word2vec' in CONFIG['train']['features']:
embedding_vectors = get_word2vec_features(sentences, embedding)
else:
raise RuntimeError("Invalid config setting train:features=%s" % CONFIG['train']['features'])
if 'bow' in feature_set_names:
return True, get_mixed_features(sentences, embedding_vectors, y_labels)
else:
# matrix is dense -- drop NaNs
return False, drop_nans(embedding_vectors, y_labels)
示例4: get_data
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def get_data(args):
feature_set_names = CONFIG['train']['features']
if set(feature_set_names).intersection(['embedding']) and not args.embedding:
raise RuntimeError("--embedding argument must be supplied")
# get input data
sentences, y_labels = sample_by_y(args)
if not args.embedding or feature_set_names == ['bow']:
# don't drop NaNs -- have a sparse matrix here
X = get_bow_features(sentences)
return False, (X, y_labels)
# load embedding
if CONFIG['pretrain']['algorithm'] == 'word2vec':
from gensim.models import word2vec
embedding = word2vec.Word2Vec.load(args.embedding)
elif CONFIG['pretrain']['algorithm'] == 'glove':
from glove import Glove
embedding = Glove.load(args.embedding)
# dynamicaly add GloveWrapper mixin
embedding.__class__ = type('MyGlove', (Glove, GloveWrapper), {})
# get feature vectors
if 'embedding' in CONFIG['train']['features']:
embedding_vectors = get_word2vec_features(sentences, embedding)
else:
raise RuntimeError("Invalid config setting train:features=%s" % CONFIG['train']['features'])
if 'bow' in feature_set_names:
X, y_labels = get_mixed_features(sentences, embedding_vectors, y_labels)
return True, (X, y_labels)
else:
# matrix is dense -- drop NaNs
X, y_labels = drop_nans(embedding_vectors, y_labels)
return False, (X, y_labels)
示例5: loadGloveModel
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def loadGloveModel(self, modelFile = MODEL_FILE):
print("Loading pre-trained GloVe model \"{}\"...").format(modelFile)
self.glove = Glove.load(modelFile)
print("Done loading.")
print("")
示例6: print
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
print('Collocations: %s' % corpus_model.matrix.nnz)
if args.train:
# Train the GloVe model and save it to disk.
if not args.create:
# Try to load a corpus from disk.
print('Reading corpus statistics')
corpus_model = Corpus.load('corpus.model')
print('Dict size: %s' % len(corpus_model.dictionary))
print('Collocations: %s' % corpus_model.matrix.nnz)
print('Training the GloVe model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=int(args.train),
no_threads=args.parallelism, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('glove.model')
if args.query:
# Finally, query the model for most similar words.
if not args.train:
print('Loading pre-trained GloVe model')
glove = Glove.load('glove.model')
print('Querying for %s' % args.query)
pprint.pprint(glove.most_similar(args.query, number=10))
示例7: defaultdict
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
required=True,
help='The filename of the stored GloVe model.')
parser.add_argument('--encode', '-e', action='store_true',
default=False,
help=('If True, words from the '
'evaluation set will be utf-8 encoded '
'before looking them up in the '
'model dictionary'))
parser.add_argument('--parallelism', '-p', action='store',
default=1,
help=('Number of parallel threads to use'))
args = parser.parse_args()
# Load the GloVe model
glove = Glove.load(args.model)
if args.encode:
encode = lambda words: [x.lower().encode('utf-8') for x in words]
else:
encode = lambda words: [unicode(x.lower()) for x in words]
# Load the analogy task dataset. One example can be obtained at
# https://word2vec.googlecode.com/svn/trunk/questions-words.txt
sections = defaultdict(list)
evaluation_words = [sections[section].append(encode(words)) for section, words in
metrics.read_analogy_file(args.test)]
section_ranks = []
示例8: fit
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
def fit(self,X,y=None):
self.model = Glove.load("glove.model")
return self
示例9: print
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
print("Max sentence length: {}, put that in settings.json.".format(max_sentence_length))
corpus = Corpus()
try:
print("Loading pretrained corpus...")
corpus = Corpus.load("cache/corpus.p")
except:
print("Training corpus...")
corpus.fit(texts, window=max_sentence_length)
corpus.save("cache/corpus.p")
glove = Glove(no_components=number_components, learning_rate=0.05)
try:
print("Loading pretrained GloVe vectors...")
glove = Glove.load("cache/glove.p")
except:
print("Training GloVe vectors...")
# More epochs seems to make it worse
glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glove.add_dictionary(corpus.dictionary)
glove.save("cache/glove.p")
# Convert input text
print("Vectorizing input sentences...")
X = vectify(texts, previous_message, glove.dictionary, max_sentence_length, contextual)
y = np.array([x == u'1' for x in classes]).astype(np.int32)
X, y, texts = X[:207458], y[:207458], texts[:207458]
def print_accurate_forwards(net, history):
示例10: _get_wl_vec
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load [as 别名]
#-*- coding:utf-8 -*-
'''
Created on 2016-3-12
@author: dannl
'''
from glove import Glove
from glove import Corpus
import scipy
import numpy as np
model_file='/home/dannl/tmp/newstech/glove/glove.model'
cooc_file='/home/dannl/tmp/newstech/glove/word.cooc'
# corpus_coocc=Corpus.load(cooc_file)
model = Glove.load(model_file)
def _get_wl_vec(wordList):
# wordList is a list of word:[word1,word2,...,wordn]
total_vec=scipy.zeros(model.no_components)
wordStr=' '.join(wordList)
if isinstance(wordStr,unicode): # make sure word is utf-8 str type
wordList=wordStr.encode('utf-8').split()
for word in wordList:
# make sure the word2vec model contain key 'word'
if model.dictionary.has_key(word):
total_vec+=model.word_vectors[model.dictionary[word]]
return total_vec
def getSimofNews(wordList1,wordList2):
vec1=_get_wl_vec(wordList1)