本文整理汇总了Python中pml.data.model.DataSet类的典型用法代码示例。如果您正苦于以下问题:Python DataSet类的具体用法?Python DataSet怎么用?Python DataSet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_get_row
def test_get_row(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]])
row = dataset.get_row(1)
assert_that(row.values, contains(3, 4))
# check that changes made to selected row are reflected in original
row[:] = 1
assert_that(dataset.get_row(1), contains(1, 1))
示例2: test_has_missing_values
def test_has_missing_values(self):
dataset1 = DataSet([[4.2, np.NaN, 3.1], [2.5, 1.9, np.NaN],
[1.1, 1.2, 1.7]])
self.assertTrue(dataset1.has_missing_values())
dataset2 = DataSet([[4.2, 3.9, 3.1], [2.5, 1.9, 2.2], [1.1, 1.2, 1.7]])
self.assertFalse(dataset2.has_missing_values())
示例3: test_split_random
def test_split_random(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]])
first, second = dataset.split(0.5, random=True)
# since the split is random, can't assert that first or second
# contain particular rows, just the number of rows
self.assertEqual(first.num_samples(), 2)
self.assertEqual(second.num_samples(), 2)
示例4: test_unequal_split
def test_unequal_split(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]])
first, second = dataset.split(0.3)
self.assertEqual(first.num_samples(), 1)
assert_that(first, equals_dataset([[1, 2]]))
self.assertEqual(second.num_samples(), 3)
assert_that(second, equals_dataset([[3, 4], [5, 6], [7, 8]]))
示例5: test_split_0
def test_split_0(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]])
first, second = dataset.split(0)
self.assertEqual(first.num_samples(), 0)
assert_that(first, equals_dataset([]))
self.assertEqual(second.num_samples(), 4)
assert_that(second, equals_dataset([[1, 2], [3, 4], [5, 6], [7, 8]]))
示例6: test_get_label_value_counts
def test_get_label_value_counts(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]],
labels=["a", "b", "b", "c", "a", "b"])
expected = {"a": 2, "b": 3, "c": 1}
value_counts = dataset.get_label_value_counts()
assert_that(value_counts, equals_series(expected))
assert_that(value_counts.index, contains("b", "a", "c"))
示例7: test_slice_features_list_indices
def test_slice_features_list_indices(self):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
labels = ["m", "f", "m"]
dataset = DataSet(df, labels=labels)
sliced = dataset.slice_features([1, 2])
assert_that(sliced, equals_dataset([[2, 3], [5, 6], [8, 9]]))
assert_that(sliced.feature_list(), contains(1, 2))
assert_that(sliced.get_labels(), contains(*labels))
示例8: test_split_labelled
def test_split_labelled(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]],
labels=["b", "b", "b", "a"])
first, second = dataset.split(0.5)
self.assertTrue(first.is_labelled())
assert_that(first.get_labels(), equals_series({0: "b", 1: "b"}))
self.assertTrue(second.is_labelled())
assert_that(second.get_labels(), equals_series({2: "b", 3: "a"}))
示例9: test_filter_by_feature_value_with_labels
def test_filter_by_feature_value_with_labels(self):
features = ["name", "hair colour"]
df = pd.DataFrame([["Bill", "brown"], ["Bob", "black"],
["Jim", "brown"]], columns=features)
dataset = DataSet(df, labels=["SENG", "SENG", "CENG"])
filtered = dataset.value_filter("hair colour", "brown")
assert_that(filtered.get_labels(),
equals_series({0: "SENG", 2: "CENG"}))
示例10: test_bin_feature
def test_bin_feature(self):
df = pd.DataFrame([[0, 1], [7, 2], [6, 3]],
columns=["MATH100", "PHYS125"])
dataset = DataSet(df)
dataset.bin("MATH100", [4, 7])
assert_that(dataset, equals_dataset([[0, 1], [2, 2], [1, 3]]))
示例11: test_get_labelled_data_frame
def test_get_labelled_data_frame(self):
dataset = DataSet([[1, 2], [3, 4], [5, 6], [7, 8]],
labels=pd.Series(["b", "b", "b", "a"]))
df = dataset.get_labelled_data_frame()
# TODO: non-numeric values in DataFrame matcher
expected = [[1, 2, "b"], [3, 4, "b"], [5, 6, "b"], [7, 8, "a"]]
for i in range(len(expected)):
self.assertTrue(df.ix[i].tolist(), expected[i])
示例12: test_get_row_by_id
def test_get_row_by_id(self):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["V01", "V02", "V03"])
dataset = DataSet(df)
sample = dataset.get_row("V02")
assert_that(sample, contains(4, 5, 6))
# make sure position based index is still usable
sample = dataset.get_row(1)
assert_that(sample, contains(4, 5, 6))
示例13: test_copy
def test_copy(self):
dataset1 = DataSet([[1, 2], [3, 4]], labels=pd.Series(["a", "b"]))
dataset2 = dataset1.copy()
dataset2.set_column(1, pd.Series([4, 5]))
assert_that(dataset2, equals_dataset([[1, 4], [3, 5]]))
assert_that(dataset2.get_labels(), equals_series({0: "a", 1: "b"}))
assert_that(dataset1, equals_dataset([[1, 2], [3, 4]]))
assert_that(dataset2.get_labels(), equals_series({0: "a", 1: "b"}))
示例14: test_copy_no_labels
def test_copy_no_labels(self):
dataset1 = DataSet([[1, 2], [3, 4]])
dataset2 = dataset1.copy()
dataset2.set_column(1, pd.Series([4, 5]))
assert_that(dataset2, equals_dataset([[1, 4], [3, 5]]))
self.assertFalse(dataset2.is_labelled())
assert_that(dataset1, equals_dataset([[1, 2], [3, 4]]))
self.assertFalse(dataset1.is_labelled())
示例15: test_slice_features_list_string
def test_slice_features_list_string(self):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
columns=["weight", "height", "age"])
labels = ["m", "f", "m"]
dataset = DataSet(df, labels=labels)
sliced = dataset.slice_features(["weight", "height"])
assert_that(sliced, equals_dataset([[1, 2], [4, 5], [7, 8]]))
assert_that(sliced.feature_list(), contains("weight", "height"))
assert_that(sliced.get_labels(), contains(*labels))