本文整理汇总了Python中config.TRAIN_FILE属性的典型用法代码示例。如果您正苦于以下问题:Python config.TRAIN_FILE属性的具体用法?Python config.TRAIN_FILE怎么用?Python config.TRAIN_FILE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类config
的用法示例。
在下文中一共展示了config.TRAIN_FILE属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _load_data
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def _load_data():
dfTrain = pd.read_csv(config.TRAIN_FILE)
dfTest = pd.read_csv(config.TEST_FILE)
def preprocess(df):
cols = [c for c in df.columns if c not in ["id", "target"]]
df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"]
return df
dfTrain = preprocess(dfTrain)
dfTest = preprocess(dfTest)
cols = [c for c in dfTrain.columns if c not in ["id", "target"]]
cols = [c for c in cols if (not c in config.IGNORE_COLS)]
X_train = dfTrain[cols].values
y_train = dfTrain["target"].values
X_test = dfTest[cols].values
ids_test = dfTest["id"].values
cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices
示例2: precompute_forward_items_and_cache
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def precompute_forward_items_and_cache():
# 3 places need to switch from dev to train !!!
is_training = False
doc_results = common.load_json(
# config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
# config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_dev_doc_retrieval_v8_before_multihop_filtering.json")
config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_test_doc_retrieval_v8_before_multihop_filtering.json")
doc_results = results_multihop_filtering(doc_results, multihop_retrieval_top_k=3, strict_mode=True)
# db_cursor = wiki_db_tool.get_cursor(config.WHOLE_WIKI_DB)
t_db_cursor = wiki_db_tool.get_cursor(config.WHOLE_PROCESS_FOR_RINDEX_DB)
# data_list = common.load_json(config.DEV_FULLWIKI_FILE)
data_list = common.load_json(config.TEST_FULLWIKI_FILE)
# data_list = common.load_json(config.TRAIN_FILE)
append_baseline_context(doc_results, data_list)
fitem_list = build_full_wiki_document_forward_item(doc_results, data_list, is_training, t_db_cursor, True)
print(len(fitem_list))
common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_test_p_level_unlabeled.jsonl")
# common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl")
# common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level.jsonl")
示例3: inputs
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def inputs(train, batch_size):
filename = os.path.join(RECORD_DIR,
TRAIN_FILE if train else VALID_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename])
image, label = read_and_decode(filename_queue)
if train:
images, sparse_labels = tf.train.shuffle_batch([image, label],
batch_size=batch_size,
num_threads=6,
capacity=2000 + 3 * batch_size,
min_after_dequeue=2000)
else:
images, sparse_labels = tf.train.batch([image, label],
batch_size=batch_size,
num_threads=6,
capacity=2000 + 3 * batch_size)
return images, sparse_labels
示例4: load_train
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def load_train():
df = pd.read_csv(config.TRAIN_FILE)
df["q1"] = df.q1.apply(_to_ind)
df["q2"] = df.q2.apply(_to_ind)
return df
示例5: load_data
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def load_data():
dfTrain = pd.read_csv(config.TRAIN_FILE)
dfTest = pd.read_csv(config.TEST_FILE)
df = pd.concat([dfTrain,dfTest])
feature_dict = {}
total_feature = 0
for col in df.columns:
if col in IGNORE_COLS:
continue
elif col in NUMERIC_COLS:
feature_dict[col] = total_feature
total_feature += 1
else:
unique_val = df[col].unique()
feature_dict[col] = dict(zip(unique_val,range(total_feature,len(unique_val) + total_feature)))
total_feature += len(unique_val)
dfm_params['feature_size'] = total_feature
# 转化训练集
train_y = dfTrain[['target']].values.tolist()
dfTrain.drop(['target','id'],axis=1,inplace=True)
train_feature_index = dfTrain.copy()
train_feature_value = dfTrain.copy()
for col in train_feature_index.columns:
if col in IGNORE_COLS:
train_feature_index.drop(col,axis=1,inplace=True)
train_feature_value.drop(col,axis=1,inplace=True)
continue
elif col in NUMERIC_COLS:
train_feature_index[col] = feature_dict[col]
else:
train_feature_index[col] = train_feature_index[col].map(feature_dict[col])
train_feature_value[col] = 1
dfm_params['field_size'] = len(train_feature_index.columns)
train_y = np.reshape(np.array(train_y), (-1,1))
return train_feature_index, train_feature_value, train_y
示例6: results_analysis
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def results_analysis():
doc_results = common.load_json(
# config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_dev_doc_retrieval_v8_before_multihop_filtering.json")
doc_results = results_multihop_filtering(doc_results, multihop_retrieval_top_k=3, strict_mode=True)
# terms_based_results_list = common.load_jsonl(
# config.RESULT_PATH / "doc_retri_results/term_based_methods_results/hotpot_tf_idf_dev.jsonl")
data_list = common.load_json(config.DEV_FULLWIKI_FILE)
# data_list = common.load_json(config.TRAIN_FILE)
append_baseline_context(doc_results, data_list)
len_list = []
for rset in doc_results['sp_doc'].values():
len_list.append(len(rset))
print("Results with filtering:")
print(collections.Counter(len_list).most_common(10000))
print(len(len_list))
print("Mean:\t", np.mean(len_list))
print("Std:\t", np.std(len_list))
print("Max:\t", np.max(len_list))
print("Min:\t", np.min(len_list))
ext_hotpot_eval.eval(doc_results, data_list)
示例7: get_train_sentence_pair
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def get_train_sentence_pair(top_k, is_training, debug=False, cur_train_eval_results_list=None):
train_list = common.load_json(config.TRAIN_FILE)
if cur_train_eval_results_list is None:
cur_train_eval_results_list = common.load_jsonl(
config.PRO_ROOT / "data/p_hotpotqa/hotpotqa_paragraph_level/04-10-17:44:54_hotpot_v0_cs/"
"i(40000)|e(4)|t5_doc_recall(0.8793382849426064)|t5_sp_recall(0.879496479212887)|t10_doc_recall(0.888656313301823)|t5_sp_recall(0.8888325134240054)|seed(12)/train_p_level_bert_v1_results.jsonl")
if debug:
train_list = train_list[:100]
id_set = set([item['_id'] for item in train_list])
cur_train_eval_results_list = [item for item in cur_train_eval_results_list if item['qid'] in id_set]
return get_sentence_pair(top_k, train_list, cur_train_eval_results_list, is_training)
示例8: get_sp_position_count
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def get_sp_position_count():
train_list = common.load_json(config.TRAIN_FILE)
c = Counter()
for item in train_list:
sp_position_analysis(item, c)
print(c)
示例9: main
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def main(_):
training_data = create_data_list(FLAGS.train_dir)
conver_to_tfrecords(training_data, TRAIN_FILE)
validation_data = create_data_list(FLAGS.valid_dir)
conver_to_tfrecords(validation_data, VALID_FILE)
示例10: experiment_train_full_wiki
# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def experiment_train_full_wiki():
multihop_retrieval_top_k = 3
match_filtering_k = 3
term_retrieval_top_k = 5
multihop_strict_mode = True
debug_mode = None
# data_list = common.load_json(config.DEV_FULLWIKI_FILE)
data_list = common.load_json(config.TRAIN_FILE)
if debug_mode is not None:
data_list = data_list[:debug_mode]
terms_based_results_list = common.load_jsonl(config.RESULT_PATH / "doc_retri_results/term_based_methods_results/hotpot_tf_idf_train.jsonl")
g_score_dict = dict()
load_from_file(g_score_dict,
config.PDATA_ROOT / "reverse_indexing/abs_rindexdb/scored_db/default-tf-idf.score.txt")
doc_retri_pred_dict = init_results_v8(data_list, data_list, terms_based_results_list, g_score_dict,
match_filtering_k=match_filtering_k, term_retrieval_top_k=term_retrieval_top_k)
len_list = []
for rset in doc_retri_pred_dict['sp_doc'].values():
len_list.append(len(rset))
print("Results without filtering:")
print(collections.Counter(len_list).most_common(10000))
print(len(len_list))
print("Mean:\t", np.mean(len_list))
print("Std:\t", np.std(len_list))
print("Max:\t", np.max(len_list))
print("Min:\t", np.min(len_list))
# common.save_json(doc_retri_pred_dict, f"hotpot_doc_retrieval_v8_before_multihop_filtering_{debug_mode}.json")
common.save_json(doc_retri_pred_dict, f"hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
# Filtering
new_doc_retri_pred_dict = results_multihop_filtering(doc_retri_pred_dict,
multihop_retrieval_top_k=multihop_retrieval_top_k,
strict_mode=multihop_strict_mode)
print("Results with filtering:")
len_list = []
for rset in new_doc_retri_pred_dict['sp_doc'].values():
len_list.append(len(rset))
print("Results with filtering:")
print(collections.Counter(len_list).most_common(10000))
print(len(len_list))
print("Mean:\t", np.mean(len_list))
print("Std:\t", np.std(len_list))
print("Max:\t", np.max(len_list))
print("Min:\t", np.min(len_list))
ext_hotpot_eval.eval(new_doc_retri_pred_dict, data_list)
# common.save_json(new_doc_retri_pred_dict, f"hotpot_doc_retrieval_v8_{debug_mode}.json")
common.save_json(new_doc_retri_pred_dict, f"hotpot_train_doc_retrieval_v8.json")