本文整理汇总了Python中glove.Glove类的典型用法代码示例。如果您正苦于以下问题:Python Glove类的具体用法?Python Glove怎么用?Python Glove使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Glove类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pythonObj
def test_pythonObj(self):
"""
:return:
"""
class A():
def __init__(self):
self.b1 = B()
self.b2 = B()
self.list = [1000, 23424.2, 'asdf0', u'unicode编码', self.b1]
self.dic = {
132323423412312311: 'utf8编码',
'232': self.b2,
self.b2: set([1,2]),
u'unicode编码': None,
123: (11,1,111),
11: (11,1,111),
}
class B():
def __init__(self):
self.none = None
self.str = '1111'
self.int = 15151515151515155151
self.float = 11231231231212342323.
self.list = [1,2,3,4]
self.dict = {1:2, 2:3}
self.tuple = (1,2,3, 4)
return
glove = Glove(A())
glove.meaure()
print glove.report
示例2: word_embedding
def word_embedding(sentences,embedding_size,windows_len):
"""
Verify that the square error diminishes with fitting
"""
corpus_model = Corpus()
corpus_model.fit(sentences,window=windows_len)
# Check that the performance is poor without fitting
glove_model = Glove(no_components=embedding_size, learning_rate=0.05)
glove_model.fit(corpus_model.matrix,
epochs=0,
no_threads=2)
log_cooc_mat = corpus_model.matrix.copy()
log_cooc_mat.data = np.log(log_cooc_mat.data)
log_cooc_mat = np.asarray(log_cooc_mat.todense())
corpus_dict=corpus_model.dictionary
corpus_inverse_dict=dict(map(reversed, corpus_dict.items()))
return glove_model,corpus_dict,corpus_inverse_dict
示例3: build_model_glove
def build_model_glove(args):
from glove import Glove, Corpus
if not os.path.exists(args.corpus_model) or \
max(map(os.path.getmtime, args.input)) >= os.path.getmtime(args.corpus_model):
# Build the corpus dictionary and the cooccurrence matrix.
logging.info('Pre-processing corpus')
corpus_model = Corpus()
corpus_model.fit(get_sentences(args), window=CONFIG['glove']['window'])
corpus_model.save(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
else:
# Try to load a corpus from disk.
logging.info('Reading corpus statistics')
corpus_model = Corpus.load(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
# Train the GloVe model and save it to disk.
logging.info('Training the GloVe model')
glove = Glove(no_components=CONFIG['glove']['size'], learning_rate=CONFIG['glove']['learning_rate'])
glove.fit(corpus_model.matrix, epochs=CONFIG['glove']['epochs'],
no_threads=args.workers, verbose=args.verbose)
glove.add_dictionary(corpus_model.dictionary)
return glove
示例4: get_model
def get_model():
''' lazy initialization for glove model so it works in pool '''
global model
if model == None:
print 'loading the glove model...'
model = Glove.load('w2v/glove_lemma_stopwords')
return model
示例5: __init__
def __init__(self,data_src,num_features=100,window=10,learning_rate=0.05,epochs=10):
self.learning_rate = learning_rate
self.num_features = num_features
self.window = window
self.epochs = epochs
self.pretrain(data_src)
self.model = Glove.load("glove.model")
示例6: train_glove
def train_glove(sentences):
print 'training glove model...'
t0 = time()
num_features = 300 # Word vector dimensionality
context = 5 # Context window size
learning_rate = 0.05
corpus = Corpus()
corpus.fit(sentences, window=context)
glove = Glove(no_components=num_features, learning_rate=learning_rate)
glove.fit(corpus.matrix, epochs=30, no_threads=8, verbose=True)
glove.add_dictionary(corpus.dictionary)
print 'took %0.5fs.' % (time() - t0)
return glove
示例7: run_glove
def run_glove(self):
""" run global vector """
#sentences = [["hi","good","to"],["see","u"]]
sentences = self.get_sentences()
print '\n' + '-'*80
print "Fitting words into corpus"
corpus = Corpus()
corpus.fit(sentences, window=10)
print "Running Glove"
glove = Glove(no_components=200, learning_rate=0.05)
glove.fit(corpus.matrix, epochs=5, no_threads=10, verbose=True)
glove.add_dictionary(corpus.dictionary)
print "Fitting words and vectors into unique_words and vectors200"
unique_words = []
vectors200 = []
cnt1 = 0
length1 = len(glove.inverse_dictionary)
for word_id in glove.inverse_dictionary:
cnt1 += 1
unique_words.append(glove.inverse_dictionary[word_id])
vectors200.append(glove.word_vectors[word_id])
sys.stdout.write("\rStatus: %s / %s"%(cnt1, length1))
sys.stdout.flush()
print '\n' + "Processing vectors200"
processed_vectors200 = []
processed_vector = []
cnt2 = 0
length2 = len(vectors200)
for vector in vectors200:
cnt2 += 1
for float_num in vector:
processed_vector.append(float_num)
processed_vectors200.append(processed_vector)
sys.stdout.write("\rStatus: %s / %s"%(cnt2, length2))
sys.stdout.flush()
return unique_words, processed_vectors200
示例8: main
def main():
corpus_model = Corpus()
corpus_model = Corpus.load('bioc-corpus-AZ2.model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=10, no_threads=16, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('bioc-glove-AZ2.model')
示例9: load_wv_model
def load_wv_model(word_vector_file, word_vector_type):
if word_vector_type == WordVectorTypes.glove.name:
from glove import Glove
glove_model = Glove.load_stanford(word_vector_file)
wv_model = GloveWrapper(glove_model)
else:
import word2vec
w2v_model = word2vec.load(word_vector_file)
wv_model = W2VWrapper(w2v_model)
return wv_model
示例10: create_vectors_dataset
def create_vectors_dataset(input_files, vector_files, max_len=500):
print('Creating word vectors file')
training_set_file, test_set_file = input_files
train_word_file, test_word_file = vector_files
train_stories = pickle.load(open(training_set_file,'r'))
test_stories = pickle.load(open(test_set_file,'r'))
train_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in train_stories]
test_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in test_stories]
vocab = sorted(reduce(lambda x, y: x | y, (set(story + [answer]) for story, answer in train_stories + test_stories)))
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
story_maxlen = max(map(len, (x for x, _ in train_stories + test_stories)))
print('-')
print('Vocab size:', vocab_size, 'unique words')
print('Story max length:', story_maxlen, 'words')
print('Number of training stories:', len(train_stories))
print('Number of test stories:', len(test_stories))
print('-')
print('Here\'s what a "story" tuple looks like (input, query, answer):')
print(train_stories[0])
print('-')
print('Vectorizing the word sequences...')
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
answer_vocab = sorted(reduce(lambda x, y: x | y, (set([answer]) for _, answer in train_stories + test_stories)))
# Reserve 0 for masking via pad_sequences
answer_dict = dict((word, i) for i, word in enumerate(answer_vocab))
print('Answers dict len: {0}'.format(len(answer_dict)))
# I need to check also if this exist
word_vectors_dir = 'word_vectors/glove.42B.300d.txt'
word_vectors_model = Glove.load_stanford(word_vectors_dir)
inputs_train, answers_train = get_word_vectors(train_stories, answer_dict,
max_len, word_vectors_model)
inputs_test, answers_test = get_word_vectors(test_stories, answer_dict, max_len,
word_vectors_model)
with h5py.File(train_word_file,'w') as train_f:
_ = train_f.create_dataset('inputs',data=inputs_train)
_ = train_f.create_dataset('answers',data=answers_train)
with h5py.File(test_word_file,'w') as test_f:
_ = test_f.create_dataset('inputs',data=inputs_test)
_ = test_f.create_dataset('answers',data=answers_test)
return (inputs_train, answers_train),(inputs_test, answers_test)
示例11: test_measure
def test_measure(self):
"""
:return:
"""
class A():
pass
a = A()
for i in xrange(100):
a1 = A()
for j in xrange(100):
a2 = A()
setattr(a1, 'a%s' % j, a2)
setattr(a, 'a%s' % i, a1)
glove = Glove(a)
glove.meaure()
print glove.report
示例12: __init__
def __init__(self, data):
self.data = data
self.corpus = None
self.liu = LiuLexicon()
self.subj = SubjLexicon()
self.buildTweetCorpus()
self.word_vec_model = Word2Vec(self.corpus)
self.glove_vec_model = Glove(100, self.corpus)
self.clusters = Cluster(100)
self.initEncoders()
self.topicVecs = self.word_vec_model.getVectorsForTopics(self.topicenc.classes_)
self.collectTopUnigrams()
self.collectTopBigrams()
示例13: pretrain
def pretrain(self,data_src):
if not os.path.isfile("glove.model"):
data_src = DataClean([
["[^a-z]"," "], # only letters
[" [ ]+", " "], # remove extra spaces
],html_clean=True,split_words=True).fit(data_src).transform(data_src)
corpus_model = Corpus()
corpus_model.fit(data_src,window=self.window)
glove = Glove(no_components=self.num_features,learning_rate=self.learning_rate)
glove.fit(corpus_model.matrix,epochs=self.epochs,verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save("glove.model")
示例14: glove_vector_download_and_save
def glove_vector_download_and_save(url, outdir, maxmegabytes):
# construct filenames
filename_full = os.path.basename(url)
filename_name = os.path.splitext(filename_full)[0]
# create file-specific output directory
dirname_file = "{}/{}".format(outdir, filename_name)
if not os.path.isdir(dirname_file):
os.mkdir(dirname_file)
# download file
filename_save = "{}/{}".format(dirname_file, filename_full)
if not os.path.isfile(filename_save):
print("downloading {}...".format(filename_save))
urllib.urlretrieve(url, filename_save)
# extract zip
print("extracting {}...".format(filename_save))
with zipfile.ZipFile(filename_save, "r") as z:
z.extractall(dirname_file)
# build model for each file
file_pattern = "{}/*.txt".format(dirname_file)
for file_glove_in in glob.glob(file_pattern):
try:
# ensure file isn't too big
filesize = os.path.getsize(file_glove_in) / 1024 / 1024
if filesize > maxmegabytes:
print("skipping {}M file {}...".format(filesize, file_glove_in))
else:
# load vectors
print("importing glove vectors from {}".format(file_glove_in))
model = Glove.load_stanford(file_glove_in)
# save model object
file_glove_out = "{}.obj".format(os.path.splitext(file_glove_in)[0])
print("saving glove model to {}...".format(file_glove_out))
model.save_obj(file_glove_out)
# delete extracted file
os.remove(file_glove_in)
except MemoryError as e:
print e.strerror
示例15: test_stanford_loading
def test_stanford_loading():
model = Glove.load_stanford('glove/tests/stanford_test.txt')
assert model.word_vectors is not None
assert model.word_vectors.shape == (100, 25)
assert len(model.dictionary) == 100
# Python 2/3 compatibility. Check the ellipsis
# character is in the dictionary.
try:
# Python 2
assert unichr(8230) in model.dictionary
except NameError:
# Pyton 3
assert '…' in model.dictionary