本文整理汇总了Python中DataLoader.get_data_separately方法的典型用法代码示例。如果您正苦于以下问题:Python DataLoader.get_data_separately方法的具体用法?Python DataLoader.get_data_separately怎么用?Python DataLoader.get_data_separately使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DataLoader
的用法示例。
在下文中一共展示了DataLoader.get_data_separately方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import get_data_separately [as 别名]
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], '', ['max_words_abstract=', 'max_words_title=', 'max_words_mesh=',
'path=', 'w2v_path=', 'w2v_length=', '=filter_small_data'])
except getopt.GetoptError as error:
print(error)
sys.exit(2)
max_words = {'text': 270, 'mesh': 50, 'title': 17}
path = 'Data/'
w2v_path = '/Users/ericrincon/PycharmProjects/Deep-PICO/wikipedia-pubmed-and-PMC-w2v.bin'
word_vector_size = 200
filter_small_data = False
for opt, arg in opts:
if opt == '--max_words_abstract':
max_words['text'] = int(arg)
elif opt == '--max_words_title':
max_words['mesh'] = int(arg)
elif opt == '--max_words_mesh':
max_words['mesh'] = int(arg)
elif opt == '--path':
path = arg
elif opt == '--w2v_path':
w2v_path = arg
elif opt == '--filter_small_data':
if int(arg) == 1:
filter_small_data = True
elif int(arg):
filter_small_data = False
print('Loading word2vec...')
w2v = Word2Vec.load_word2vec_format(w2v_path, binary=True)
print('Loaded word2vec...')
X_list, y_list, data_names = DataLoader.get_data_separately(max_words, word_vector_size, w2v, use_abstract_cnn=True,
preprocess_text=False, filter_small_data=filter_small_data)
for X, y, name in zip(X_list, y_list, data_names):
X_abstract, X_title, X_mesh = X
f = h5py.File("DataProcessed/" + name + ".hdf5", "w")
f.create_dataset('X_abstract', data=X_abstract, shape=X_abstract.shape)
f.create_dataset('X_title', data=X_title, shape=X_title.shape)
f.create_dataset('X_mesh', data=X_mesh, shape=X_mesh.shape)
f.create_dataset('y', data=y, shape=y.shape)
示例2: main
# 需要导入模块: import DataLoader [as 别名]
# 或者: from DataLoader import get_data_separately [as 别名]
#.........这里部分代码省略.........
elif opt == '--patience':
patience = int(arg)
elif opt == '--undersample':
if int(arg) == 0:
undersample = False
elif int(arg) == 1:
undersample = True
elif opt == '--tacc':
if int(arg) == 1:
using_tacc = True
else:
print("Option {} is not valid!".format(opt))
if using_tacc:
nltk.data.path.append('/work/03186/ericr/nltk_data/')
print('Loading data...')
if load_data_from_scratch:
print('Loading Word2Vec...')
w2v = Word2Vec.load_word2vec_format(w2v_path, binary=True)
print('Loaded Word2Vec...')
X_list = []
y_list = []
if use_embedding:
X_list, y_list, embedding_list = DataLoader.get_data_as_seq(w2v, w2v_size, max_words)
else:
X_list, y_list = DataLoader.get_data_separately(max_words, word_vector_size,
w2v, use_abstract_cnn=True,
preprocess_text=False,
filter_small_data=filter_small_data)
else:
X_list, y_list = DataLoader.load_datasets_from_h5py('DataProcessed', True)
print('Loaded data...')
dataset_names = DataLoader.get_all_files('DataProcessed')
dataset_names = [x.split('/')[-1].split('.')[0] for x in dataset_names]
results_file = open(experiment_name + "_results.txt", "w+")
for dataset_i, (X, y) in enumerate(zip(X_list, y_list)):
if use_embedding:
embedding = embedding_list[dataset_i]
model_name = dataset_names[dataset_i]
print("Dataset: {}".format(model_name))
results_file.write(model_name)
results_file.write("Dataset: {}".format(model_name))
X_abstract, X_title, X_mesh = X['text'], X['title'], X['mesh']
n = X_abstract.shape[0]
kf = KFold(n, random_state=1337, shuffle=True, n_folds=5)
if pretrain:
pretrain_fold_accuracies = []
pretrain_fold_recalls = []
pretrain_fold_precisions =[]