当前位置: 首页>>代码示例>>Python>>正文


Python DataLoader.get_data_separately方法代码示例

本文整理汇总了Python中DataLoader.get_data_separately方法的典型用法代码示例。如果您正苦于以下问题:Python DataLoader.get_data_separately方法的具体用法?Python DataLoader.get_data_separately怎么用?Python DataLoader.get_data_separately使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在DataLoader的用法示例。


在下文中一共展示了DataLoader.get_data_separately方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import get_data_separately [as 别名]
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], '', ['max_words_abstract=', 'max_words_title=', 'max_words_mesh=',
                                                      'path=', 'w2v_path=', 'w2v_length=', '=filter_small_data'])

    except getopt.GetoptError as error:
        print(error)
        sys.exit(2)

    max_words = {'text': 270, 'mesh': 50, 'title': 17}
    path = 'Data/'
    w2v_path = '/Users/ericrincon/PycharmProjects/Deep-PICO/wikipedia-pubmed-and-PMC-w2v.bin'
    word_vector_size = 200
    filter_small_data = False

    for opt, arg in opts:
        if opt == '--max_words_abstract':
            max_words['text'] = int(arg)
        elif opt == '--max_words_title':
            max_words['mesh'] = int(arg)
        elif opt == '--max_words_mesh':
            max_words['mesh'] = int(arg)
        elif opt == '--path':
            path = arg
        elif opt == '--w2v_path':
            w2v_path = arg
        elif opt == '--filter_small_data':
            if int(arg) == 1:
                filter_small_data = True
            elif int(arg):
                filter_small_data = False


    print('Loading word2vec...')
    w2v = Word2Vec.load_word2vec_format(w2v_path, binary=True)
    print('Loaded word2vec...')

    X_list, y_list, data_names = DataLoader.get_data_separately(max_words, word_vector_size, w2v, use_abstract_cnn=True,
                                                        preprocess_text=False, filter_small_data=filter_small_data)

    for X, y, name in zip(X_list, y_list, data_names):
        X_abstract, X_title, X_mesh = X

        f = h5py.File("DataProcessed/" + name + ".hdf5", "w")
        f.create_dataset('X_abstract', data=X_abstract, shape=X_abstract.shape)
        f.create_dataset('X_title', data=X_title, shape=X_title.shape)
        f.create_dataset('X_mesh', data=X_mesh, shape=X_mesh.shape)
        f.create_dataset('y', data=y, shape=y.shape)
开发者ID:ericrincon,项目名称:DeepAbstractRelevance,代码行数:50,代码来源:ProcessData.py

示例2: main

# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import get_data_separately [as 别名]

#.........这里部分代码省略.........
        elif opt == '--patience':
            patience = int(arg)

        elif opt == '--undersample':
            if int(arg) == 0:
                undersample = False
            elif int(arg) == 1:
                undersample = True
        elif opt == '--tacc':
            if int(arg) == 1:
                using_tacc = True

        else:
            print("Option {} is not valid!".format(opt))


    if using_tacc:
        nltk.data.path.append('/work/03186/ericr/nltk_data/')
    print('Loading data...')

    if load_data_from_scratch:

        print('Loading Word2Vec...')
        w2v = Word2Vec.load_word2vec_format(w2v_path, binary=True)
        print('Loaded Word2Vec...')
        X_list = []
        y_list = []

        if use_embedding:

            X_list, y_list, embedding_list = DataLoader.get_data_as_seq(w2v, w2v_size, max_words)

        else:
            X_list, y_list = DataLoader.get_data_separately(max_words, word_vector_size,
                                                            w2v, use_abstract_cnn=True,
                                                            preprocess_text=False,
                                                            filter_small_data=filter_small_data)
    else:
        X_list, y_list = DataLoader.load_datasets_from_h5py('DataProcessed', True)


    print('Loaded data...')
    dataset_names = DataLoader.get_all_files('DataProcessed')
    dataset_names = [x.split('/')[-1].split('.')[0] for x in dataset_names]

    results_file = open(experiment_name + "_results.txt", "w+")

    for dataset_i, (X, y) in enumerate(zip(X_list, y_list)):
        if use_embedding:
            embedding = embedding_list[dataset_i]

        model_name = dataset_names[dataset_i]

        print("Dataset: {}".format(model_name))

        results_file.write(model_name)
        results_file.write("Dataset: {}".format(model_name))

        X_abstract, X_title, X_mesh = X['text'], X['title'], X['mesh']
        n = X_abstract.shape[0]
        kf = KFold(n, random_state=1337, shuffle=True, n_folds=5)

        if pretrain:
            pretrain_fold_accuracies = []
            pretrain_fold_recalls = []
            pretrain_fold_precisions =[]
开发者ID:ericrincon,项目名称:DeepAbstractRelevance,代码行数:70,代码来源:AbstractCNNExperiment.py


注:本文中的DataLoader.get_data_separately方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。