当前位置: 首页>>代码示例>>Python>>正文


Python config.TRAIN_FILE属性代码示例

本文整理汇总了Python中config.TRAIN_FILE属性的典型用法代码示例。如果您正苦于以下问题:Python config.TRAIN_FILE属性的具体用法?Python config.TRAIN_FILE怎么用?Python config.TRAIN_FILE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在config的用法示例。


在下文中一共展示了config.TRAIN_FILE属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _load_data

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def _load_data():

    dfTrain = pd.read_csv(config.TRAIN_FILE)
    dfTest = pd.read_csv(config.TEST_FILE)

    def preprocess(df):
        cols = [c for c in df.columns if c not in ["id", "target"]]
        df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
        df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"]
        return df

    dfTrain = preprocess(dfTrain)
    dfTest = preprocess(dfTest)

    cols = [c for c in dfTrain.columns if c not in ["id", "target"]]
    cols = [c for c in cols if (not c in config.IGNORE_COLS)]

    X_train = dfTrain[cols].values
    y_train = dfTrain["target"].values
    X_test = dfTest[cols].values
    ids_test = dfTest["id"].values
    cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]

    return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices 
开发者ID:ChenglongChen,项目名称:tensorflow-DeepFM,代码行数:26,代码来源:main.py

示例2: precompute_forward_items_and_cache

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def precompute_forward_items_and_cache():
    # 3 places need to switch from dev to train !!!

    is_training = False
    doc_results = common.load_json(
        # config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
        # config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_dev_doc_retrieval_v8_before_multihop_filtering.json")
        config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_test_doc_retrieval_v8_before_multihop_filtering.json")
    doc_results = results_multihop_filtering(doc_results, multihop_retrieval_top_k=3, strict_mode=True)

    # db_cursor = wiki_db_tool.get_cursor(config.WHOLE_WIKI_DB)

    t_db_cursor = wiki_db_tool.get_cursor(config.WHOLE_PROCESS_FOR_RINDEX_DB)

    # data_list = common.load_json(config.DEV_FULLWIKI_FILE)
    data_list = common.load_json(config.TEST_FULLWIKI_FILE)
    # data_list = common.load_json(config.TRAIN_FILE)
    append_baseline_context(doc_results, data_list)

    fitem_list = build_full_wiki_document_forward_item(doc_results, data_list, is_training, t_db_cursor, True)

    print(len(fitem_list))
    common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_test_p_level_unlabeled.jsonl")
    # common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl")
    # common.save_jsonl(fitem_list, config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level.jsonl") 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:27,代码来源:sampler_full_wiki.py

示例3: inputs

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def inputs(train, batch_size):
  filename = os.path.join(RECORD_DIR,
                          TRAIN_FILE if train else VALID_FILE)

  with tf.name_scope('input'):
    filename_queue = tf.train.string_input_producer([filename])
    image, label = read_and_decode(filename_queue)
    if train:
        images, sparse_labels = tf.train.shuffle_batch([image, label],
                                                       batch_size=batch_size,
                                                       num_threads=6,
                                                       capacity=2000 + 3 * batch_size,
                                                       min_after_dequeue=2000)
    else:
        images, sparse_labels = tf.train.batch([image, label],
                                               batch_size=batch_size,
                                               num_threads=6,
                                               capacity=2000 + 3 * batch_size)

    return images, sparse_labels 
开发者ID:PatrickLib,项目名称:captcha_recognize,代码行数:22,代码来源:captcha_input.py

示例4: load_train

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def load_train():
    df = pd.read_csv(config.TRAIN_FILE)
    df["q1"] = df.q1.apply(_to_ind)
    df["q2"] = df.q2.apply(_to_ind)
    return df 
开发者ID:yyht,项目名称:BERT,代码行数:7,代码来源:data.py

示例5: load_data

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def load_data():
    dfTrain = pd.read_csv(config.TRAIN_FILE)
    dfTest = pd.read_csv(config.TEST_FILE)
    df = pd.concat([dfTrain,dfTest])
    
    feature_dict = {}
    total_feature = 0
    
    for col in df.columns:
        if col in IGNORE_COLS:
            continue
        elif col in NUMERIC_COLS:
            feature_dict[col] = total_feature
            total_feature += 1
        else:
            unique_val = df[col].unique()
            feature_dict[col] = dict(zip(unique_val,range(total_feature,len(unique_val) + total_feature)))
            total_feature += len(unique_val)
    dfm_params['feature_size'] = total_feature
    # 转化训练集
    train_y = dfTrain[['target']].values.tolist()
    dfTrain.drop(['target','id'],axis=1,inplace=True)
    train_feature_index = dfTrain.copy()
    train_feature_value = dfTrain.copy()
    
    for col in train_feature_index.columns:
        if col in IGNORE_COLS:
            train_feature_index.drop(col,axis=1,inplace=True)
            train_feature_value.drop(col,axis=1,inplace=True)
            continue
        elif col in NUMERIC_COLS:
            train_feature_index[col] = feature_dict[col]
        else:
            train_feature_index[col] = train_feature_index[col].map(feature_dict[col])
            train_feature_value[col] = 1
    dfm_params['field_size'] = len(train_feature_index.columns)

    train_y = np.reshape(np.array(train_y), (-1,1))
    return train_feature_index, train_feature_value, train_y 
开发者ID:wyl6,项目名称:Recommender-Systems-Samples,代码行数:41,代码来源:main.py

示例6: results_analysis

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def results_analysis():
    doc_results = common.load_json(
        # config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")
        config.PRO_ROOT / "results/doc_retri_results/doc_retrieval_final_v8/hotpot_dev_doc_retrieval_v8_before_multihop_filtering.json")
    doc_results = results_multihop_filtering(doc_results, multihop_retrieval_top_k=3, strict_mode=True)

    # terms_based_results_list = common.load_jsonl(
    #     config.RESULT_PATH / "doc_retri_results/term_based_methods_results/hotpot_tf_idf_dev.jsonl")

    data_list = common.load_json(config.DEV_FULLWIKI_FILE)
    # data_list = common.load_json(config.TRAIN_FILE)

    append_baseline_context(doc_results, data_list)

    len_list = []
    for rset in doc_results['sp_doc'].values():
        len_list.append(len(rset))

    print("Results with filtering:")

    print(collections.Counter(len_list).most_common(10000))
    print(len(len_list))
    print("Mean:\t", np.mean(len_list))
    print("Std:\t", np.std(len_list))
    print("Max:\t", np.max(len_list))
    print("Min:\t", np.min(len_list))

    ext_hotpot_eval.eval(doc_results, data_list) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:30,代码来源:sampler_full_wiki.py

示例7: get_train_sentence_pair

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def get_train_sentence_pair(top_k, is_training, debug=False, cur_train_eval_results_list=None):
    train_list = common.load_json(config.TRAIN_FILE)

    if cur_train_eval_results_list is None:
        cur_train_eval_results_list = common.load_jsonl(
            config.PRO_ROOT / "data/p_hotpotqa/hotpotqa_paragraph_level/04-10-17:44:54_hotpot_v0_cs/"
                              "i(40000)|e(4)|t5_doc_recall(0.8793382849426064)|t5_sp_recall(0.879496479212887)|t10_doc_recall(0.888656313301823)|t5_sp_recall(0.8888325134240054)|seed(12)/train_p_level_bert_v1_results.jsonl")

    if debug:
        train_list = train_list[:100]
        id_set = set([item['_id'] for item in train_list])
        cur_train_eval_results_list = [item for item in cur_train_eval_results_list if item['qid'] in id_set]

    return get_sentence_pair(top_k, train_list, cur_train_eval_results_list, is_training) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:16,代码来源:sentence_level_sampler.py

示例8: get_sp_position_count

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def get_sp_position_count():
    train_list = common.load_json(config.TRAIN_FILE)
    c = Counter()
    for item in train_list:
        sp_position_analysis(item, c)

    print(c) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:9,代码来源:hotpot_doc_retri_analysis.py

示例9: main

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def main(_):
  training_data = create_data_list(FLAGS.train_dir)
  conver_to_tfrecords(training_data, TRAIN_FILE)
    
  validation_data = create_data_list(FLAGS.valid_dir)
  conver_to_tfrecords(validation_data, VALID_FILE) 
开发者ID:PatrickLib,项目名称:captcha_recognize,代码行数:8,代码来源:captcha_records.py

示例10: experiment_train_full_wiki

# 需要导入模块: import config [as 别名]
# 或者: from config import TRAIN_FILE [as 别名]
def experiment_train_full_wiki():
    multihop_retrieval_top_k = 3
    match_filtering_k = 3
    term_retrieval_top_k = 5
    multihop_strict_mode = True
    debug_mode = None

    # data_list = common.load_json(config.DEV_FULLWIKI_FILE)
    data_list = common.load_json(config.TRAIN_FILE)

    if debug_mode is not None:
        data_list = data_list[:debug_mode]

    terms_based_results_list = common.load_jsonl(config.RESULT_PATH / "doc_retri_results/term_based_methods_results/hotpot_tf_idf_train.jsonl")

    g_score_dict = dict()
    load_from_file(g_score_dict,
                   config.PDATA_ROOT / "reverse_indexing/abs_rindexdb/scored_db/default-tf-idf.score.txt")
    doc_retri_pred_dict = init_results_v8(data_list, data_list, terms_based_results_list, g_score_dict,
                                          match_filtering_k=match_filtering_k, term_retrieval_top_k=term_retrieval_top_k)

    len_list = []
    for rset in doc_retri_pred_dict['sp_doc'].values():
        len_list.append(len(rset))

    print("Results without filtering:")
    print(collections.Counter(len_list).most_common(10000))
    print(len(len_list))
    print("Mean:\t", np.mean(len_list))
    print("Std:\t", np.std(len_list))
    print("Max:\t", np.max(len_list))
    print("Min:\t", np.min(len_list))

    # common.save_json(doc_retri_pred_dict, f"hotpot_doc_retrieval_v8_before_multihop_filtering_{debug_mode}.json")
    common.save_json(doc_retri_pred_dict, f"hotpot_train_doc_retrieval_v8_before_multihop_filtering.json")

    # Filtering
    new_doc_retri_pred_dict = results_multihop_filtering(doc_retri_pred_dict,
                                                         multihop_retrieval_top_k=multihop_retrieval_top_k,
                                                         strict_mode=multihop_strict_mode)
    print("Results with filtering:")

    len_list = []
    for rset in new_doc_retri_pred_dict['sp_doc'].values():
        len_list.append(len(rset))

    print("Results with filtering:")
    print(collections.Counter(len_list).most_common(10000))
    print(len(len_list))
    print("Mean:\t", np.mean(len_list))
    print("Std:\t", np.std(len_list))
    print("Max:\t", np.max(len_list))
    print("Min:\t", np.min(len_list))

    ext_hotpot_eval.eval(new_doc_retri_pred_dict, data_list)
    # common.save_json(new_doc_retri_pred_dict, f"hotpot_doc_retrieval_v8_{debug_mode}.json")
    common.save_json(new_doc_retri_pred_dict, f"hotpot_train_doc_retrieval_v8.json") 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:59,代码来源:hotpot_doc_retri_v0.py


注:本文中的config.TRAIN_FILE属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。