本文整理汇总了Python中glove.Glove.add_dictionary方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.add_dictionary方法的具体用法?Python Glove.add_dictionary怎么用?Python Glove.add_dictionary使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类glove.Glove
的用法示例。
在下文中一共展示了Glove.add_dictionary方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def main():
corpus_model = Corpus()
corpus_model = Corpus.load('bioc-corpus-AZ2.model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=10, no_threads=16, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('bioc-glove-AZ2.model')
示例2: build_model_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def build_model_glove(args):
from glove import Glove, Corpus
if not os.path.exists(args.corpus_model) or \
max(map(os.path.getmtime, args.input)) >= os.path.getmtime(args.corpus_model):
# Build the corpus dictionary and the cooccurrence matrix.
logging.info('Pre-processing corpus')
corpus_model = Corpus()
corpus_model.fit(get_sentences(args), window=CONFIG['glove']['window'])
corpus_model.save(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
else:
# Try to load a corpus from disk.
logging.info('Reading corpus statistics')
corpus_model = Corpus.load(args.corpus_model)
logging.info('Dict size: %s' % len(corpus_model.dictionary))
logging.info('Collocations: %s' % corpus_model.matrix.nnz)
# Train the GloVe model and save it to disk.
logging.info('Training the GloVe model')
glove = Glove(no_components=CONFIG['glove']['size'], learning_rate=CONFIG['glove']['learning_rate'])
glove.fit(corpus_model.matrix, epochs=CONFIG['glove']['epochs'],
no_threads=args.workers, verbose=args.verbose)
glove.add_dictionary(corpus_model.dictionary)
return glove
示例3: pretrain
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def pretrain(self,data_src):
if not os.path.isfile("glove.model"):
data_src = DataClean([
["[^a-z]"," "], # only letters
[" [ ]+", " "], # remove extra spaces
],html_clean=True,split_words=True).fit(data_src).transform(data_src)
corpus_model = Corpus()
corpus_model.fit(data_src,window=self.window)
glove = Glove(no_components=self.num_features,learning_rate=self.learning_rate)
glove.fit(corpus_model.matrix,epochs=self.epochs,verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save("glove.model")
示例4: train_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def train_glove(sentences):
print 'training glove model...'
t0 = time()
num_features = 300 # Word vector dimensionality
context = 5 # Context window size
learning_rate = 0.05
corpus = Corpus()
corpus.fit(sentences, window=context)
glove = Glove(no_components=num_features, learning_rate=learning_rate)
glove.fit(corpus.matrix, epochs=30, no_threads=8, verbose=True)
glove.add_dictionary(corpus.dictionary)
print 'took %0.5fs.' % (time() - t0)
return glove
示例5: run_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def run_glove(self):
""" run global vector """
#sentences = [["hi","good","to"],["see","u"]]
sentences = self.get_sentences()
print '\n' + '-'*80
print "Fitting words into corpus"
corpus = Corpus()
corpus.fit(sentences, window=10)
print "Running Glove"
glove = Glove(no_components=200, learning_rate=0.05)
glove.fit(corpus.matrix, epochs=5, no_threads=10, verbose=True)
glove.add_dictionary(corpus.dictionary)
print "Fitting words and vectors into unique_words and vectors200"
unique_words = []
vectors200 = []
cnt1 = 0
length1 = len(glove.inverse_dictionary)
for word_id in glove.inverse_dictionary:
cnt1 += 1
unique_words.append(glove.inverse_dictionary[word_id])
vectors200.append(glove.word_vectors[word_id])
sys.stdout.write("\rStatus: %s / %s"%(cnt1, length1))
sys.stdout.flush()
print '\n' + "Processing vectors200"
processed_vectors200 = []
processed_vector = []
cnt2 = 0
length2 = len(vectors200)
for vector in vectors200:
cnt2 += 1
for float_num in vector:
processed_vector.append(float_num)
processed_vectors200.append(processed_vector)
sys.stdout.write("\rStatus: %s / %s"%(cnt2, length2))
sys.stdout.flush()
return unique_words, processed_vectors200
示例6: build_glove_embeddings
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
def build_glove_embeddings(training, testing, args):
''' Trains the model on the sentiment140 dataset
@Arguments:
data: the loaded sentiment140 dataset from module
num_epochs: the number of epochs to train on
num_threads: the number of threads to use
num_components: the number of components the glove model should use
learning_rate: the model's learning rate
window_size: the size of the window to use when looking for word co-occurence
verbose: boolean for whether or not extensive output should be printed to screen
@Return:
A trained glove model
'''
# initialize model
glove = Glove(no_components = args.vecsize, learning_rate = args.learningRate)
txtSource = chain( imap(lambda (txt,lbl): txt, training), imap(lambda (txt,lbl): txt, testing))
# read in the data to train on
corpus_model = Corpus()
corpus_model.fit( imap(preprocess.tokenize, txtSource), window = args.window)
# fit the model using the given parameters
logging.info("Training GloVe")
glove.fit(corpus_model.matrix, epochs = args.epochs, no_threads = args.parallelism, verbose = args.verbose)
# add a dictionary just to make it easier for similarity queries
glove.add_dictionary(corpus_model.dictionary)
transformer = lambda words: glove.transform_paragraph(words, use_pca = args.pca)
fromTraining = to_sklearn_format(transformer, training, args.vecsize)
fromTesting = to_sklearn_format(transformer, testing, args.vecsize)
return fromTraining, fromTesting
示例7: print
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
print('Collocations: %s' % corpus_model.matrix.nnz)
if args.train:
# Train the GloVe model and save it to disk.
if not args.create:
# Try to load a corpus from disk.
print('Reading corpus statistics')
corpus_model = Corpus.load('corpus.model')
print('Dict size: %s' % len(corpus_model.dictionary))
print('Collocations: %s' % corpus_model.matrix.nnz)
print('Training the GloVe model')
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=int(args.train),
no_threads=args.parallelism, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
glove.save('glove.model')
if args.query:
# Finally, query the model for most similar words.
if not args.train:
print('Loading pre-trained GloVe model')
glove = Glove.load('glove.model')
print('Querying for %s' % args.query)
pprint.pprint(glove.most_similar(args.query, number=10))
示例8: Glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
@author: dannl
'''
from glove import Glove
from glove import Corpus
import time
cooc_file='/home/dannl/tmp/newstech/glove/word.cooc'
model_file='/home/dannl/tmp/newstech/glove/glove.model'
oldtime=time.time()
# get a cooccurrence matrix
corpus_cooc = Corpus.load(cooc_file)
# get a model
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus_cooc.matrix, epochs=5,no_threads=4, verbose=True)
glove.add_dictionary(corpus_cooc.dictionary)
glove.save(model_file)
# count=0
# for word,wid in corpus_cooc.dictionary.items():
# count+=1
# if count>100:
# break
# print word,wid
print('Dict size: %s' % len(corpus_cooc.dictionary))
print('Collocations: %s' % corpus_cooc.matrix.nnz)
print 'time cost:%.2f'%(time.time()-oldtime)
示例9: mlp_model
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
mlp1000 = mlp_model(1000)
mlp1000_accuracy = train_test(mlp1000, x, y, folds)
print((mlp1_accuracy, mlp10_accuracy, mlp100_accuracy, mlp1000_accuracy))
#3CNN
#Glove Vectors from reviews
c = [review.split() for review in data.data]
corpus = Corpus()
corpus.fit(c, window=10)
glv = Glove(no_components=100, learning_rate=0.05)
glv.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glv.add_dictionary(corpus.dictionary)
embeddings_index = glv.dictionary
BASE_DIR = ''
GLOVE_DIR = BASE_DIR + '/glove.6B/'
TEXT_DATA_DIR = 'txt_sentoken/'
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2
texts = [] # list of text samples
labels_index = {} # dictionary mapping label name to numeric id
labels = [] # list of label ids
for name in sorted(os.listdir(TEXT_DATA_DIR)):
path = os.path.join(TEXT_DATA_DIR, name)
示例10: print
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import add_dictionary [as 别名]
print("Loading pretrained corpus...")
corpus = Corpus.load("cache/corpus.p")
except:
print("Training corpus...")
corpus.fit(texts, window=max_sentence_length)
corpus.save("cache/corpus.p")
glove = Glove(no_components=number_components, learning_rate=0.05)
try:
print("Loading pretrained GloVe vectors...")
glove = Glove.load("cache/glove.p")
except:
print("Training GloVe vectors...")
# More epochs seems to make it worse
glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glove.add_dictionary(corpus.dictionary)
glove.save("cache/glove.p")
# Convert input text
print("Vectorizing input sentences...")
X = vectify(texts, previous_message, glove.dictionary, max_sentence_length, contextual)
y = np.array([x == u'1' for x in classes]).astype(np.int32)
X, y, texts = X[:207458], y[:207458], texts[:207458]
def print_accurate_forwards(net, history):
X_train, X_valid, y_train, y_valid = net.train_split(X, y, net)
y_classified = net.predict(X_valid)
acc_fwd = np.mean([x == y_ and y_ == 1 for x, y_ in zip(y_valid, y_classified)])/np.mean(y_valid)
fls_pos = np.mean([x != y_ and y_ == 0 for x, y_ in zip(y_classified, y_valid)])/(np.mean(y_valid))
print('Accurately forwarded: {:.4f}'.format(acc_fwd) + ', False Positives: {:.4f}'.format(fls_pos) + ', Valid forwards: {:.4f}'.format((acc_fwd / (acc_fwd + fls_pos))) )