本文整理汇总了Python中glove.Glove.fit方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.fit方法的具体用法?Python Glove.fit怎么用?Python Glove.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类glove.Glove
的用法示例。
在下文中一共展示了Glove.fit方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def main():
corpus_model = Corpus()
corpus_model = Corpus.load('bioc-corpus-AZ2.model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=10, no_threads=16, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('bioc-glove-AZ2.model')
示例2: word_embedding
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def word_embedding(sentences,embedding_size,windows_len):
"""
Verify that the square error diminishes with fitting
"""
corpus_model = Corpus()
corpus_model.fit(sentences,window=windows_len)
# Check that the performance is poor without fitting
glove_model = Glove(no_components=embedding_size, learning_rate=0.05)
glove_model.fit(corpus_model.matrix,
epochs=0,
no_threads=2)
log_cooc_mat = corpus_model.matrix.copy()
log_cooc_mat.data = np.log(log_cooc_mat.data)
log_cooc_mat = np.asarray(log_cooc_mat.todense())
corpus_dict=corpus_model.dictionary
corpus_inverse_dict=dict(map(reversed, corpus_dict.items()))
return glove_model,corpus_dict,corpus_inverse_dict
示例3: build_model_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def build_model_glove(args):
from glove import Glove, Corpus
if not os.path.exists(args.corpus_model) or \
max(map(os.path.getmtime, args.input)) >= os.path.getmtime(args.corpus_model):
# Build the corpus dictionary and the cooccurrence matrix.
logging.info('Pre-processing corpus')
corpus_model = Corpus()
corpus_model.fit(get_sentences(args), window=CONFIG['glove']['window'])
corpus_model.save(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
else:
# Try to load a corpus from disk.
logging.info('Reading corpus statistics')
corpus_model = Corpus.load(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
# Train the GloVe model and save it to disk.
logging.info('Training the GloVe model')
glove = Glove(no_components=CONFIG['glove']['size'], learning_rate=CONFIG['glove']['learning_rate'])
glove.fit(corpus_model.matrix, epochs=CONFIG['glove']['epochs'],
no_threads=args.workers, verbose=args.verbose)
glove.add_dictionary(corpus_model.dictionary)
return glove
示例4: pretrain
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def pretrain(self,data_src):
if not os.path.isfile("glove.model"):
data_src = DataClean([
["[^a-z]"," "], # only letters
[" [ ]+", " "], # remove extra spaces
],html_clean=True,split_words=True).fit(data_src).transform(data_src)
corpus_model = Corpus()
corpus_model.fit(data_src,window=self.window)
glove = Glove(no_components=self.num_features,learning_rate=self.learning_rate)
glove.fit(corpus_model.matrix,epochs=self.epochs,verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save("glove.model")
示例5: train_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def train_glove(sentences):
print 'training glove model...'
t0 = time()
num_features = 300 # Word vector dimensionality
context = 5 # Context window size
learning_rate = 0.05
corpus = Corpus()
corpus.fit(sentences, window=context)
glove = Glove(no_components=num_features, learning_rate=learning_rate)
glove.fit(corpus.matrix, epochs=30, no_threads=8, verbose=True)
glove.add_dictionary(corpus.dictionary)
print 'took %0.5fs.' % (time() - t0)
return glove
示例6: run_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def run_glove(self):
""" run global vector """
#sentences = [["hi","good","to"],["see","u"]]
sentences = self.get_sentences()
print '\n' + '-'*80
print "Fitting words into corpus"
corpus = Corpus()
corpus.fit(sentences, window=10)
print "Running Glove"
glove = Glove(no_components=200, learning_rate=0.05)
glove.fit(corpus.matrix, epochs=5, no_threads=10, verbose=True)
glove.add_dictionary(corpus.dictionary)
print "Fitting words and vectors into unique_words and vectors200"
unique_words = []
vectors200 = []
cnt1 = 0
length1 = len(glove.inverse_dictionary)
for word_id in glove.inverse_dictionary:
cnt1 += 1
unique_words.append(glove.inverse_dictionary[word_id])
vectors200.append(glove.word_vectors[word_id])
sys.stdout.write("\rStatus: %s / %s"%(cnt1, length1))
sys.stdout.flush()
print '\n' + "Processing vectors200"
processed_vectors200 = []
processed_vector = []
cnt2 = 0
length2 = len(vectors200)
for vector in vectors200:
cnt2 += 1
for float_num in vector:
processed_vector.append(float_num)
processed_vectors200.append(processed_vector)
sys.stdout.write("\rStatus: %s / %s"%(cnt2, length2))
sys.stdout.flush()
return unique_words, processed_vectors200
示例7: build_glove_embeddings
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def build_glove_embeddings(training, testing, args):
''' Trains the model on the sentiment140 dataset
@Arguments:
data: the loaded sentiment140 dataset from module
num_epochs: the number of epochs to train on
num_threads: the number of threads to use
num_components: the number of components the glove model should use
learning_rate: the model's learning rate
window_size: the size of the window to use when looking for word co-occurence
verbose: boolean for whether or not extensive output should be printed to screen
@Return:
A trained glove model
'''
# initialize model
glove = Glove(no_components = args.vecsize, learning_rate = args.learningRate)
txtSource = chain( imap(lambda (txt,lbl): txt, training), imap(lambda (txt,lbl): txt, testing))
# read in the data to train on
corpus_model = Corpus()
corpus_model.fit( imap(preprocess.tokenize, txtSource), window = args.window)
# fit the model using the given parameters
logging.info("Training GloVe")
glove.fit(corpus_model.matrix, epochs = args.epochs, no_threads = args.parallelism, verbose = args.verbose)
# add a dictionary just to make it easier for similarity queries
glove.add_dictionary(corpus_model.dictionary)
transformer = lambda words: glove.transform_paragraph(words, use_pca = args.pca)
fromTraining = to_sklearn_format(transformer, training, args.vecsize)
fromTesting = to_sklearn_format(transformer, testing, args.vecsize)
return fromTraining, fromTesting
示例8: test_fitting
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def test_fitting():
"""
Verify that the square error diminishes with fitting
"""
num_sentences = 5000
seed = 10
corpus = Corpus()
corpus.fit(generate_training_corpus(num_sentences,
vocabulary_size=50,
seed=seed))
# Check that the performance is poor without fitting
glove_model = Glove(no_components=100, learning_rate=0.05)
glove_model.fit(corpus.matrix,
epochs=0,
no_threads=2)
log_cooc_mat = corpus.matrix.copy()
log_cooc_mat.data = np.log(log_cooc_mat.data)
log_cooc_mat = np.asarray(log_cooc_mat.todense())
repr_matrix = _reproduce_input_matrix(glove_model)
assert ((repr_matrix - log_cooc_mat) ** 2).sum() > 30000.0
# Check that it is good with fitting
glove_model = Glove(no_components=100, learning_rate=0.05)
glove_model.fit(corpus.matrix,
epochs=500,
no_threads=2)
repr_matrix = _reproduce_input_matrix(glove_model)
assert ((repr_matrix - log_cooc_mat) ** 2).sum() < 1500.0
示例9: print
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
print('Collocations: %s' % corpus_model.matrix.nnz)
if args.train:
# Train the GloVe model and save it to disk.
if not args.create:
# Try to load a corpus from disk.
print('Reading corpus statistics')
corpus_model = Corpus.load('corpus.model')
print('Dict size: %s' % len(corpus_model.dictionary))
print('Collocations: %s' % corpus_model.matrix.nnz)
print('Training the GloVe model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=int(args.train),
no_threads=args.parallelism, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('glove.model')
if args.query:
# Finally, query the model for most similar words.
if not args.train:
print('Loading pre-trained GloVe model')
glove = Glove.load('glove.model')
print('Querying for %s' % args.query)
pprint.pprint(glove.most_similar(args.query, number=10))
示例10: Glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
@author: dannl
'''
from glove import Glove
from glove import Corpus
import time
cooc_file='/home/dannl/tmp/newstech/glove/word.cooc'
model_file='/home/dannl/tmp/newstech/glove/glove.model'
oldtime=time.time()
# get a cooccurrence matrix
corpus_cooc = Corpus.load(cooc_file)
# get a model
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_cooc.matrix, epochs=5,no_threads=4, verbose=True)
glove.add_dictionary(corpus_cooc.dictionary)
glove.save(model_file)
# count=0
# for word,wid in corpus_cooc.dictionary.items():
# count+=1
# if count>100:
# break
# print word,wid
print('Dict size: %s' % len(corpus_cooc.dictionary))
print('Collocations: %s' % corpus_cooc.matrix.nnz)
print 'time cost:%.2f'%(time.time()-oldtime)
示例11: embedding_func
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
def embedding_func(gridded_words_overall,embedding_size):
"""***************
GLOVE for Video
***************"""
glove_bins=np.asarray(gridded_words_overall)
print(glove_bins)
glove_shape=glove_bins.shape
glove_weights=np.ones((glove_shape))
#bovw_shape=(3,5)
#bovw_bins = np.random.randint(9,13, size=bovw_shape)
#bovw_weights = np.random.randint(2, size=bovw_shape)
#print('Bovw bins')
#print(bovw_bins)
#print('Bovw weights')
#print(bovw_weights)
dictionary = {}
rows = []
cols = []
data = array.array('f')
k=0
#print(bovw_bins)
for frame in glove_bins:
for i, first_word in enumerate(frame):
first_word_idx = dictionary.setdefault(first_word,
len(dictionary))
w1=glove_weights[k,i]
for j, second_word in enumerate(frame):
second_word_idx = dictionary.setdefault(second_word,
len(dictionary))
w2=glove_weights[k,j]
distance = 1
w=w1*w2
if first_word_idx == second_word_idx:
pass
elif first_word_idx < second_word_idx:
rows.append(first_word_idx)
cols.append(second_word_idx)
data.append(np.double(w*np.double(1.0) / distance))
else:
rows.append(second_word_idx)
cols.append(first_word_idx)
data.append(np.double(w*np.double(1.0) / distance))
k=k+1
x=sp.coo_matrix((data, (rows, cols)),
shape=(len(dictionary),
len(dictionary)),
dtype=np.double).tocsr().tocoo()
print(dictionary)
xarr=x.toarray()
xarr/=np.amax(xarr)
print("coocurance matrix")
print(xarr)
xsparse=sp.coo_matrix(xarr)
glove_model = Glove(no_components=embedding_size, learning_rate=0.05)
glove_model.fit(xsparse,
epochs=500,
no_threads=2)
new_word_representation=glove_model.word_vectors
return new_word_representation,dictionary
示例12: train_test
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
mlp100_accuracy = train_test(mlp100, x, y, folds)
mlp1000 = mlp_model(1000)
mlp1000_accuracy = train_test(mlp1000, x, y, folds)
print((mlp1_accuracy, mlp10_accuracy, mlp100_accuracy, mlp1000_accuracy))
#3CNN
#Glove Vectors from reviews
c = [review.split() for review in data.data]
corpus = Corpus()
corpus.fit(c, window=10)
glv = Glove(no_components=100, learning_rate=0.05)
glv.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glv.add_dictionary(corpus.dictionary)
embeddings_index = glv.dictionary
BASE_DIR = ''
GLOVE_DIR = BASE_DIR + '/glove.6B/'
TEXT_DATA_DIR = 'txt_sentoken/'
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2
texts = [] # list of text samples
labels_index = {} # dictionary mapping label name to numeric id
labels = [] # list of label ids
示例13: len
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
len(dictionary)),
dtype=np.double).tocsr().tocoo()
print(dictionary)
dic_keys=dictionary.keys()
dic_values=dictionary.values()
xarr=x.toarray()
xarr/=np.amax(xarr)
print("coocurancem matrix")
print(xarr)
xsparse=sp.coo_matrix(xarr)
glove_model = Glove(no_components=5, learning_rate=0.05)
glove_model.fit(xsparse,
epochs=500,
no_threads=2)
new_word_representation=glove_model.word_vectors
print("New word representation")
print(new_word_representation)
print("*** Query ***")
query=10
query_pos=dic_values[dic_keys.index(query)]
target=12
target_pos=dic_values[dic_keys.index(target)]
sim=np.dot(glove_model.word_vectors[query_pos],glove_model.word_vectors[target_pos])
print(sim)
示例14: Glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import fit [as 别名]
for line in datafile:
# list of tokenized words
yield line.lower().translate(None, delchars).split(' ')
if __name__ == '__main__':
# initialize glove object
glove = Glove(no_components=100, learning_rate=0.05)
# read in the data to train on; this file is shakespeare text
corpus_model = Corpus()
corpus_model.fit(read_corpus("data/input.txt"), window=10)
# fit the model using the given parameters
glove.fit(corpus_model.matrix, epochs=10, no_threads=1, verbose=True)
# add a dictionary just to make it easier for similarity queries
glove.add_dictionary(corpus_model.dictionary)
# save glove object to file
glove.save_obj('glove.model.obj')
# give me the 5 words most similar to each word in the words list in this
# corpus and show me how similar the words are in this corpus to each word
# in the words list in general
words = ['sky', 'queen', 'car']
for word in words:
glove.most_similar(word, show_hist=True)