当前位置: 首页>>代码示例>>Python>>正文


Python Glove.load_stanford方法代码示例

本文整理汇总了Python中glove.Glove.load_stanford方法的典型用法代码示例。如果您正苦于以下问题:Python Glove.load_stanford方法的具体用法?Python Glove.load_stanford怎么用?Python Glove.load_stanford使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在glove.Glove的用法示例。


在下文中一共展示了Glove.load_stanford方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_wv_model

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def load_wv_model(word_vector_file, word_vector_type):
    if word_vector_type == WordVectorTypes.glove.name:
        from glove import Glove
        glove_model = Glove.load_stanford(word_vector_file)
        wv_model = GloveWrapper(glove_model)
    else: 
        import word2vec
        w2v_model = word2vec.load(word_vector_file)
        wv_model = W2VWrapper(w2v_model)
    return wv_model
开发者ID:agude,项目名称:attalos,代码行数:12,代码来源:main.py

示例2: create_vectors_dataset

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def create_vectors_dataset(input_files, vector_files, max_len=500):
    print('Creating word vectors file')

    training_set_file, test_set_file = input_files
    train_word_file, test_word_file = vector_files
    
    train_stories = pickle.load(open(training_set_file,'r'))
    test_stories = pickle.load(open(test_set_file,'r'))

    train_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in train_stories]
    test_stories = [(reduce(lambda x,y: x + y, map(list,fact)),q) for fact,q in test_stories]

    vocab = sorted(reduce(lambda x, y: x | y, (set(story + [answer]) for story, answer in train_stories + test_stories)))

    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1
    story_maxlen = max(map(len, (x for x, _ in train_stories + test_stories)))


    print('-')
    print('Vocab size:', vocab_size, 'unique words')
    print('Story max length:', story_maxlen, 'words')
    print('Number of training stories:', len(train_stories))
    print('Number of test stories:', len(test_stories))
    print('-')
    print('Here\'s what a "story" tuple looks like (input, query, answer):')
    print(train_stories[0])
    print('-')
    print('Vectorizing the word sequences...')

    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    answer_vocab = sorted(reduce(lambda x, y: x | y, (set([answer]) for _, answer in train_stories + test_stories)))
    # Reserve 0 for masking via pad_sequences
    answer_dict = dict((word, i) for i, word in enumerate(answer_vocab))
    print('Answers dict len: {0}'.format(len(answer_dict)))

    # I need to check also if this exist
    word_vectors_dir = 'word_vectors/glove.42B.300d.txt'
    word_vectors_model = Glove.load_stanford(word_vectors_dir)

    inputs_train, answers_train = get_word_vectors(train_stories, answer_dict, 
                                                   max_len, word_vectors_model)
    inputs_test, answers_test = get_word_vectors(test_stories, answer_dict, max_len,
                                                 word_vectors_model)

    with h5py.File(train_word_file,'w') as train_f:
        _ = train_f.create_dataset('inputs',data=inputs_train)
        _ = train_f.create_dataset('answers',data=answers_train)
    with h5py.File(test_word_file,'w') as test_f:
        _ = test_f.create_dataset('inputs',data=inputs_test)
        _ = test_f.create_dataset('answers',data=answers_test)
        
    return (inputs_train, answers_train),(inputs_test, answers_test)
开发者ID:alejandrorivas,项目名称:MedicalDiagnosis,代码行数:56,代码来源:utils.py

示例3: glove_vector_download_and_save

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def glove_vector_download_and_save(url, outdir, maxmegabytes):

    # construct filenames
    filename_full = os.path.basename(url)
    filename_name = os.path.splitext(filename_full)[0]

    # create file-specific output directory
    dirname_file = "{}/{}".format(outdir, filename_name)
    if not os.path.isdir(dirname_file):
        os.mkdir(dirname_file)

    # download file
    filename_save = "{}/{}".format(dirname_file, filename_full)
    if not os.path.isfile(filename_save):
        print("downloading {}...".format(filename_save))
        urllib.urlretrieve(url, filename_save)

    # extract zip
    print("extracting {}...".format(filename_save))
    with zipfile.ZipFile(filename_save, "r") as z:
        z.extractall(dirname_file)

    # build model for each file
    file_pattern = "{}/*.txt".format(dirname_file)
    for file_glove_in in glob.glob(file_pattern):

        try:
            # ensure file isn't too big
            filesize = os.path.getsize(file_glove_in) / 1024 / 1024
            if filesize > maxmegabytes:
                print("skipping {}M file {}...".format(filesize, file_glove_in))

            else:

                # load vectors
                print("importing glove vectors from {}".format(file_glove_in))
                model = Glove.load_stanford(file_glove_in)

                # save model object
                file_glove_out = "{}.obj".format(os.path.splitext(file_glove_in)[0])
                print("saving glove model to {}...".format(file_glove_out))
                model.save_obj(file_glove_out)

                # delete extracted file
                os.remove(file_glove_in)

        except MemoryError as e:
            print e.strerror
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:50,代码来源:models-load-stanford.py

示例4: test_stanford_loading

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def test_stanford_loading():

    model = Glove.load_stanford('glove/tests/stanford_test.txt')

    assert model.word_vectors is not None
    assert model.word_vectors.shape == (100, 25)
    assert len(model.dictionary) == 100

    # Python 2/3 compatibility. Check the ellipsis
    # character is in the dictionary.
    try:
        # Python 2
        assert unichr(8230) in model.dictionary
    except NameError:
        # Pyton 3
        assert '…' in model.dictionary
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:18,代码来源:test_glove.py

示例5: from_glove_model

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
    def from_glove_model(cls, vector_file):
        """
        WARNING: `glove_python` is required to use this function!

        Load a GloVe vector model.
        :param vector_path: path to glove model
        :return: a `Vectors` object
        """
        from glove import Glove

        model = Glove.load_stanford(vector_file) if isinstance(vector_file, str) else vector_file
        vocab = model.dictionary.keys()

        vectors = {}

        dims = model.no_components  # vector dimensionality

        dimension_names = ['f%02d' % i for i in range(dims)]
        for word in vocab:
            vectors[word] = zip(dimension_names, model.word_vectors[model.dictionary[word]])

        return Vectors(vectors)
开发者ID:tttthomasssss,项目名称:DiscoUtils,代码行数:24,代码来源:thesaurus_loader.py

示例6: download_and_save_vectors_glove

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
    def download_and_save_vectors_glove(self, url, outdir, datafile=None, maxmegabytes=None):
        '''
            download and save pre-trained glove model
        '''

        # download file
        dirname_file = self.download_and_extract_file(url, outdir)

        # extract file
        file_in = "{}/{}.txt".format(dirname_file, datafile)

        # build output filename
        fullpath_out = self.download_fullpath(outdir, datafile)

        # catch memory exceptions
        try:

            # ensure file isn't too big
            filesize = os.path.getsize(file_in) / 1024 / 1024
            filesize_ok = (not maxmegabytes or filesize <= int(maxmegabytes))

            # download specific file and/or files under specific limit
            if filesize_ok:
                print("importing glove vectors from {}".format(file_in))
                model = Glove.load_stanford(file_in)

                # save model object to specified output directory
                print("saving glove model to {}...".format(fullpath_out))
                model.save_obj(fullpath_out)
            else:
                print("skipping file {}...".format(file_in))


        except MemoryError as e:
            print e.strerror

        # remove extracted directory
        shutil.rmtree(dirname_file)
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:40,代码来源:model_downloader.py

示例7: get_lines

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
    for line in file:
        d = json.loads(line)

        uris.append(d[0])
        questions.append(d[1])
        answers.append(d[2])
        cats.append(d[3])

def get_lines():
    for a in answers:
        yield a.split()

# Build the corpus dictionary and cooccurence matrix
corpus_model = Corpus()
corpus_model.fit(get_lines(), window=8)

print('Dict size: %s' % len(corpus_model.dictionary))
print('Collocations: %s' % corpus_model.matrix.nnz)

# Train GloVe model
#glove = Glove(no_components = no_comp, learning_rate=0.05)
glove = Glove.load_stanford('vectors.6B.100d.txt')
glove.fit(corpus_model.matrix, epochs=10, no_threads=4, verbose=True)
glove.add_dictionary(corpus_model.dictionary)

# Save
with open('model.glove', 'w+') as file:
    file.write('%i %i \n' % (len(glove.dictionary), no_comp))
    for (word, idx) in glove.dictionary.iteritems():
        file.write('%s %s \n' % (word, ' '.join(str(n) for n in glove.word_vectors[idx])))
开发者ID:rlrs,项目名称:AMLstuff,代码行数:32,代码来源:train_glove.py

示例8: main

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
def main():
    parser = argparse.ArgumentParser(description='Linear regression')

    # Required args
    parser.add_argument("image_feature_file_train",
                        type=str,
                        help="Image Feature file for the training set")
    parser.add_argument("text_feature_file_train",
                        type=str,
                        help="Text Feature file for the training set")
    parser.add_argument("image_feature_file_test",
                        type=str,
                        help="Image Feature file for the test set")
    parser.add_argument("text_feature_file_test",
                        type=str,
                        help="Text Feature file for the test set")
    parser.add_argument("word_vector_file",
                        type=str,
                        help="Text file containing the word vectors")

    # Optional Args
    parser.add_argument("--word_vector_type",
                        choices=("word2vec", "glove"),
                        default="word2vec",
                        help="Word vector type")
    parser.add_argument("--logging_level",
                        choices=("debug", "info", "warning", "error"),
                        default="warning",
                        help="Python logging level")

    args = parser.parse_args()

    logger.setLevel(getattr(logging, args.logging_level.upper()))

    logger.info("Parsing train and test datasets.")
    train_dataset = Dataset(args.image_feature_file_train, args.text_feature_file_train)
    test_dataset = Dataset(args.image_feature_file_test, args.text_feature_file_test)

    logger.info("Reading word vectors from file.")
    if args.word_vector_type == "glove":
        from glove import Glove
        glove_model = Glove.load_stanford(args.word_vector_file)
        w2v_model = GloveWrapper(glove_model)
    else:  # args.word_vector_type == "word2vec" (default)
        import word2vec
        w2v_model = W2VWrapper(word2vec.load(args.word_vector_file))

    logger.info("Creating one hot tag mapper.")
    one_hot = OneHot([train_dataset, test_dataset], valid_vocab=w2v_model.vocab)

    logger.info("Creating w2v transformer.")
    w2v_transformer = NaiveW2V(one_hot, w2v_model, vocab=one_hot.keys())

    logger.info("Preparing train data from train datasets.")
    train_x, train_y = get_xy(train_dataset, tag_transformer=one_hot)

    logger.info("Transforming y using w2v transformer.")
    transformed_y = w2v_transformer.transform(train_y)
    train_data = (train_x, transformed_y)

    logger.info("Preparing test data from test dataset.")
    test_data = get_xy(test_dataset, tag_transformer=one_hot)

    logger.info("Training model.")
    model = train(train_data, test_data, interpreter=w2v_transformer)
    logger.info("Done.")
开发者ID:Andrew62,项目名称:attalos,代码行数:68,代码来源:lr_naivew2v_correlation_main.py

示例9: open

# 需要导入模块: from glove import Glove [as 别名]
# 或者: from glove.Glove import load_stanford [as 别名]
from glove import Glove,metrics

vecf = "/home/naomi/data/mittens/vectors/twitter_win10_d100.txt"
formal_vocab_fname = "/home/naomi/data/mittens/wikipedia_en.txt.vocab"
informal_vocab_fname = "/home/naomi/data/mittens/twitter_en.txt.vocab"
eval_fname = "/home/naomi/embeddings/urbandic-scraper/spelling_variants_valid.txt"

with open(formal_vocab_fname) as formal_vocab_fh:
    formal_vocab = set([line.split()[0] for line in formal_vocab_fh])
with open(informal_vocab_fname) as informal_vocab_fh:
    informal_vocab = set([line.split()[0] for line in informal_vocab_fh])
vectors = Glove.load_stanford(vecf)

def find_rank(similarity_list, target):
    for (i, (word, score)) in enumerate(similarity_list):
        if word == target:
            return i
    raise LookupError

def filter_informal(similarity_list):
    return filter(lambda ((word, score)): word in formal_vocab, similarity_list)

def eval_data_from_file(eval_fh):
    variants = []
    excluded_formal = 0
    excluded_informal = 0

    for line in eval_fh:
        (informal, formal) = line.split()
        if formal not in formal_vocab:
            excluded_formal += 1
开发者ID:nsaphra,项目名称:urbandic-scraper,代码行数:33,代码来源:eval_spelling_variants.py


注:本文中的glove.Glove.load_stanford方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。