本文整理汇总了Python中glove.Glove.load_stanford方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.load_stanford方法的具体用法?Python Glove.load_stanford怎么用?Python Glove.load_stanford使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类glove.Glove
的用法示例。
在下文中一共展示了Glove.load_stanford方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_wv_model
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def load_wv_model(word_vector_file, word_vector_type):
if word_vector_type == WordVectorTypes.glove.name:
from glove import Glove
glove_model = Glove.load_stanford(word_vector_file)
wv_model = GloveWrapper(glove_model)
else:
import word2vec
w2v_model = word2vec.load(word_vector_file)
wv_model = W2VWrapper(w2v_model)
return wv_model
示例2: create_vectors_dataset
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def create_vectors_dataset(input_files, vector_files, max_len=500):
print('Creating word vectors file')
training_set_file, test_set_file = input_files
train_word_file, test_word_file = vector_files
train_stories = pickle.load(open(training_set_file,'r'))
test_stories = pickle.load(open(test_set_file,'r'))
train_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in train_stories]
test_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in test_stories]
vocab = sorted(reduce(lambda x, y: x | y, (set(story + [answer]) for story, answer in train_stories + test_stories)))
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
story_maxlen = max(map(len, (x for x, _ in train_stories + test_stories)))
print('-')
print('Vocab size:', vocab_size, 'unique words')
print('Story max length:', story_maxlen, 'words')
print('Number of training stories:', len(train_stories))
print('Number of test stories:', len(test_stories))
print('-')
print('Here\'s what a "story" tuple looks like (input, query, answer):')
print(train_stories[0])
print('-')
print('Vectorizing the word sequences...')
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
answer_vocab = sorted(reduce(lambda x, y: x | y, (set([answer]) for _, answer in train_stories + test_stories)))
# Reserve 0 for masking via pad_sequences
answer_dict = dict((word, i) for i, word in enumerate(answer_vocab))
print('Answers dict len: {0}'.format(len(answer_dict)))
# I need to check also if this exist
word_vectors_dir = 'word_vectors/glove.42B.300d.txt'
word_vectors_model = Glove.load_stanford(word_vectors_dir)
inputs_train, answers_train = get_word_vectors(train_stories, answer_dict,
max_len, word_vectors_model)
inputs_test, answers_test = get_word_vectors(test_stories, answer_dict, max_len,
word_vectors_model)
with h5py.File(train_word_file,'w') as train_f:
_ = train_f.create_dataset('inputs',data=inputs_train)
_ = train_f.create_dataset('answers',data=answers_train)
with h5py.File(test_word_file,'w') as test_f:
_ = test_f.create_dataset('inputs',data=inputs_test)
_ = test_f.create_dataset('answers',data=answers_test)
return (inputs_train, answers_train),(inputs_test, answers_test)
示例3: glove_vector_download_and_save
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def glove_vector_download_and_save(url, outdir, maxmegabytes):
# construct filenames
filename_full = os.path.basename(url)
filename_name = os.path.splitext(filename_full)[0]
# create file-specific output directory
dirname_file = "{}/{}".format(outdir, filename_name)
if not os.path.isdir(dirname_file):
os.mkdir(dirname_file)
# download file
filename_save = "{}/{}".format(dirname_file, filename_full)
if not os.path.isfile(filename_save):
print("downloading {}...".format(filename_save))
urllib.urlretrieve(url, filename_save)
# extract zip
print("extracting {}...".format(filename_save))
with zipfile.ZipFile(filename_save, "r") as z:
z.extractall(dirname_file)
# build model for each file
file_pattern = "{}/*.txt".format(dirname_file)
for file_glove_in in glob.glob(file_pattern):
try:
# ensure file isn't too big
filesize = os.path.getsize(file_glove_in) / 1024 / 1024
if filesize > maxmegabytes:
print("skipping {}M file {}...".format(filesize, file_glove_in))
else:
# load vectors
print("importing glove vectors from {}".format(file_glove_in))
model = Glove.load_stanford(file_glove_in)
# save model object
file_glove_out = "{}.obj".format(os.path.splitext(file_glove_in)[0])
print("saving glove model to {}...".format(file_glove_out))
model.save_obj(file_glove_out)
# delete extracted file
os.remove(file_glove_in)
except MemoryError as e:
print e.strerror
示例4: test_stanford_loading
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def test_stanford_loading():
model = Glove.load_stanford('glove/tests/stanford_test.txt')
assert model.word_vectors is not None
assert model.word_vectors.shape == (100, 25)
assert len(model.dictionary) == 100
# Python 2/3 compatibility. Check the ellipsis
# character is in the dictionary.
try:
# Python 2
assert unichr(8230) in model.dictionary
except NameError:
# Pyton 3
assert '…' in model.dictionary
示例5: from_glove_model
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def from_glove_model(cls, vector_file):
"""
WARNING: `glove_python` is required to use this function!
Load a GloVe vector model.
:param vector_path: path to glove model
:return: a `Vectors` object
"""
from glove import Glove
model = Glove.load_stanford(vector_file) if isinstance(vector_file, str) else vector_file
vocab = model.dictionary.keys()
vectors = {}
dims = model.no_components # vector dimensionality
dimension_names = ['f%02d' % i for i in range(dims)]
for word in vocab:
vectors[word] = zip(dimension_names, model.word_vectors[model.dictionary[word]])
return Vectors(vectors)
示例6: download_and_save_vectors_glove
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def download_and_save_vectors_glove(self, url, outdir, datafile=None, maxmegabytes=None):
'''
download and save pre-trained glove model
'''
# download file
dirname_file = self.download_and_extract_file(url, outdir)
# extract file
file_in = "{}/{}.txt".format(dirname_file, datafile)
# build output filename
fullpath_out = self.download_fullpath(outdir, datafile)
# catch memory exceptions
try:
# ensure file isn't too big
filesize = os.path.getsize(file_in) / 1024 / 1024
filesize_ok = (not maxmegabytes or filesize <= int(maxmegabytes))
# download specific file and/or files under specific limit
if filesize_ok:
print("importing glove vectors from {}".format(file_in))
model = Glove.load_stanford(file_in)
# save model object to specified output directory
print("saving glove model to {}...".format(fullpath_out))
model.save_obj(fullpath_out)
else:
print("skipping file {}...".format(file_in))
except MemoryError as e:
print e.strerror
# remove extracted directory
shutil.rmtree(dirname_file)
示例7: get_lines
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
for line in file:
d = json.loads(line)
uris.append(d[0])
questions.append(d[1])
answers.append(d[2])
cats.append(d[3])
def get_lines():
for a in answers:
yield a.split()
# Build the corpus dictionary and cooccurence matrix
corpus_model = Corpus()
corpus_model.fit(get_lines(), window=8)
print('Dict size: %s' % len(corpus_model.dictionary))
print('Collocations: %s' % corpus_model.matrix.nnz)
# Train GloVe model
#glove = Glove(no_components = no_comp, learning_rate=0.05)
glove = Glove.load_stanford('vectors.6B.100d.txt')
glove.fit(corpus_model.matrix, epochs=10, no_threads=4, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
# Save
with open('model.glove', 'w+') as file:
file.write('%i %i \n' % (len(glove.dictionary), no_comp))
for (word, idx) in glove.dictionary.iteritems():
file.write('%s %s \n' % (word, ' '.join(str(n) for n in glove.word_vectors[idx])))
示例8: main
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def main():
parser = argparse.ArgumentParser(description='Linear regression')
# Required args
parser.add_argument("image_feature_file_train",
type=str,
help="Image Feature file for the training set")
parser.add_argument("text_feature_file_train",
type=str,
help="Text Feature file for the training set")
parser.add_argument("image_feature_file_test",
type=str,
help="Image Feature file for the test set")
parser.add_argument("text_feature_file_test",
type=str,
help="Text Feature file for the test set")
parser.add_argument("word_vector_file",
type=str,
help="Text file containing the word vectors")
# Optional Args
parser.add_argument("--word_vector_type",
choices=("word2vec", "glove"),
default="word2vec",
help="Word vector type")
parser.add_argument("--logging_level",
choices=("debug", "info", "warning", "error"),
default="warning",
help="Python logging level")
args = parser.parse_args()
logger.setLevel(getattr(logging, args.logging_level.upper()))
logger.info("Parsing train and test datasets.")
train_dataset = Dataset(args.image_feature_file_train, args.text_feature_file_train)
test_dataset = Dataset(args.image_feature_file_test, args.text_feature_file_test)
logger.info("Reading word vectors from file.")
if args.word_vector_type == "glove":
from glove import Glove
glove_model = Glove.load_stanford(args.word_vector_file)
w2v_model = GloveWrapper(glove_model)
else: # args.word_vector_type == "word2vec" (default)
import word2vec
w2v_model = W2VWrapper(word2vec.load(args.word_vector_file))
logger.info("Creating one hot tag mapper.")
one_hot = OneHot([train_dataset, test_dataset], valid_vocab=w2v_model.vocab)
logger.info("Creating w2v transformer.")
w2v_transformer = NaiveW2V(one_hot, w2v_model, vocab=one_hot.keys())
logger.info("Preparing train data from train datasets.")
train_x, train_y = get_xy(train_dataset, tag_transformer=one_hot)
logger.info("Transforming y using w2v transformer.")
transformed_y = w2v_transformer.transform(train_y)
train_data = (train_x, transformed_y)
logger.info("Preparing test data from test dataset.")
test_data = get_xy(test_dataset, tag_transformer=one_hot)
logger.info("Training model.")
model = train(train_data, test_data, interpreter=w2v_transformer)
logger.info("Done.")
示例9: open
# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
from glove import Glove,metrics
vecf = "/home/naomi/data/mittens/vectors/twitter_win10_d100.txt"
formal_vocab_fname = "/home/naomi/data/mittens/wikipedia_en.txt.vocab"
informal_vocab_fname = "/home/naomi/data/mittens/twitter_en.txt.vocab"
eval_fname = "/home/naomi/embeddings/urbandic-scraper/spelling_variants_valid.txt"
with open(formal_vocab_fname) as formal_vocab_fh:
formal_vocab = set([line.split()[0] for line in formal_vocab_fh])
with open(informal_vocab_fname) as informal_vocab_fh:
informal_vocab = set([line.split()[0] for line in informal_vocab_fh])
vectors = Glove.load_stanford(vecf)
def find_rank(similarity_list, target):
for (i, (word, score)) in enumerate(similarity_list):
if word == target:
return i
raise LookupError
def filter_informal(similarity_list):
return filter(lambda ((word, score)): word in formal_vocab, similarity_list)
def eval_data_from_file(eval_fh):
variants = []
excluded_formal = 0
excluded_informal = 0
for line in eval_fh:
(informal, formal) = line.split()
if formal not in formal_vocab:
excluded_formal += 1