Python datasets.fetch_kddcup99方法代码示例

本文整理汇总了Python中sklearn.datasets.fetch_kddcup99方法的典型用法代码示例。如果您正苦于以下问题：Python datasets.fetch_kddcup99方法的具体用法？Python datasets.fetch_kddcup99怎么用？Python datasets.fetch_kddcup99使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets的用法示例。

在下文中一共展示了datasets.fetch_kddcup99方法的4个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_percent10

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_kddcup99 [as 别名]
def test_percent10():
    try:
        data = fetch_kddcup99(download_if_missing=False)
    except IOError:
        raise SkipTest("kddcup99 dataset can not be loaded.")

    assert_equal(data.data.shape, (494021, 41))
    assert_equal(data.target.shape, (494021,))

    data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
    assert_equal(data.data.shape, data_shuffled.data.shape)
    assert_equal(data.target.shape, data_shuffled.target.shape)

    data = fetch_kddcup99('SA')
    assert_equal(data.data.shape, (100655, 41))
    assert_equal(data.target.shape, (100655,))

    data = fetch_kddcup99('SF')
    assert_equal(data.data.shape, (73237, 4))
    assert_equal(data.target.shape, (73237,))

    data = fetch_kddcup99('http')
    assert_equal(data.data.shape, (58725, 3))
    assert_equal(data.target.shape, (58725,))

    data = fetch_kddcup99('smtp')
    assert_equal(data.data.shape, (9571, 3))
    assert_equal(data.target.shape, (9571,))

    fetch_func = partial(fetch_kddcup99, 'smtp')
    check_return_X_y(data, fetch_func)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:33，代码来源:test_kddcup99.py

示例2: test_shuffle

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_kddcup99 [as 别名]
def test_shuffle():
    try:
        dataset = fetch_kddcup99(random_state=0, subset='SA', shuffle=True,
                                 percent10=True, download_if_missing=False)
    except IOError:
        raise SkipTest("kddcup99 dataset can not be loaded.")

    assert(any(dataset.target[-100:] == b'normal.'))

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:10，代码来源:test_kddcup99.py

示例3: load_train_test_data

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_kddcup99 [as 别名]
def load_train_test_data(small: bool, train_normal_only: bool) -> Tuple[Tuple[pd.DataFrame, np.ndarray], Tuple[pd.DataFrame, np.ndarray]]:
    X, y = fetch_kddcup99(subset='SA', percent10=small, return_X_y=True)
    columns = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment",
               "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted",
               "num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login",
               "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate",
               "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", "dst_host_same_srv_rate",
               "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
               "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate"]
    categorical_columns = ["protocol_type", "flag", "service"]
    features = pd.DataFrame(X, columns=columns)
    target = (y == b'normal.') * 1
    for categorical_column in categorical_columns:
        features[categorical_column] = features[categorical_column].astype('category')
    number_anomalies = np.sum(1 - target)
    number_test_samples = 2 * number_anomalies
    if train_normal_only:
        features_train, features_test = features.iloc[:-number_test_samples], features.iloc[-number_test_samples:]
        target_train, target_test = target[:-number_test_samples], target[-number_test_samples:]
    else:
        test_indices = np.random.choice(a=range(len(features)), size=number_test_samples, replace=False)
        features_train, features_test = features.drop(test_indices), features.loc[test_indices]
        target_train, target_test = np.delete(target, test_indices), target[test_indices]
    return (features_train, target_train), (features_test, target_test)


# features, target= load_train_test_data(small=True, train_normal_only=True)

# print(features.columns)

开发者ID:cubonacci，项目名称:mixed-anomaly，代码行数:31，代码来源:kdd.py

示例4: test_percent10

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_kddcup99 [as 别名]
def test_percent10():
    try:
        data = fetch_kddcup99(download_if_missing=False)
    except IOError:
        raise SkipTest("kddcup99 dataset can not be loaded.")

    assert_equal(data.data.shape, (494021, 41))
    assert_equal(data.target.shape, (494021,))

    data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
    assert_equal(data.data.shape, data_shuffled.data.shape)
    assert_equal(data.target.shape, data_shuffled.target.shape)

    data = fetch_kddcup99('SA')
    assert_equal(data.data.shape, (100655, 41))
    assert_equal(data.target.shape, (100655,))

    data = fetch_kddcup99('SF')
    assert_equal(data.data.shape, (73237, 4))
    assert_equal(data.target.shape, (73237,))

    data = fetch_kddcup99('http')
    assert_equal(data.data.shape, (58725, 3))
    assert_equal(data.target.shape, (58725,))

    data = fetch_kddcup99('smtp')
    assert_equal(data.data.shape, (9571, 3))
    assert_equal(data.target.shape, (9571,))

开发者ID:alvarobartt，项目名称:twitter-stock-recommendation，代码行数:30，代码来源:test_kddcup99.py

注：本文中的sklearn.datasets.fetch_kddcup99方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。