本文整理汇总了Python中pgmpy.models.BayesianModel.predict方法的典型用法代码示例。如果您正苦于以下问题:Python BayesianModel.predict方法的具体用法?Python BayesianModel.predict怎么用?Python BayesianModel.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pgmpy.models.BayesianModel
的用法示例。
在下文中一共展示了BayesianModel.predict方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bayesnet_examples
# 需要导入模块: from pgmpy.models import BayesianModel [as 别名]
# 或者: from pgmpy.models.BayesianModel import predict [as 别名]
def bayesnet_examples():
from pgmpy.factors import TabularCPD
from pgmpy.models import BayesianModel
import pandas as pd
student_model = BayesianModel([('D', 'G'),
('I', 'G'),
('G', 'L'),
('I', 'S')])
# we can generate some random data.
raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
data_train = data[: int(data.shape[0] * 0.75)]
student_model.fit(data_train)
student_model.get_cpds()
data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
data_test.drop('D', axis=1, inplace=True)
student_model.predict(data_test)
grade_cpd = TabularCPD(
variable='G',
variable_card=3,
values=[[0.3, 0.05, 0.9, 0.5],
[0.4, 0.25, 0.08, 0.3],
[0.3, 0.7, 0.02, 0.2]],
evidence=['I', 'D'],
evidence_card=[2, 2])
difficulty_cpd = TabularCPD(
variable='D',
variable_card=2,
values=[[0.6, 0.4]])
intel_cpd = TabularCPD(
variable='I',
variable_card=2,
values=[[0.7, 0.3]])
letter_cpd = TabularCPD(
variable='L',
variable_card=2,
values=[[0.1, 0.4, 0.99],
[0.9, 0.6, 0.01]],
evidence=['G'],
evidence_card=[3])
sat_cpd = TabularCPD(
variable='S',
variable_card=2,
values=[[0.95, 0.2],
[0.05, 0.8]],
evidence=['I'],
evidence_card=[2])
student_model.add_cpds(grade_cpd, difficulty_cpd,
intel_cpd, letter_cpd,
sat_cpd)
示例2: TestBayesianModelFitPredict
# 需要导入模块: from pgmpy.models import BayesianModel [as 别名]
# 或者: from pgmpy.models.BayesianModel import predict [as 别名]
class TestBayesianModelFitPredict(unittest.TestCase):
def setUp(self):
self.model_disconnected = BayesianModel()
self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
def test_disconnected_fit(self):
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
columns=['A', 'B', 'C', 'D', 'E'])
self.model_disconnected.fit(values)
for node in ['A', 'B', 'C', 'D', 'E']:
cpd = self.model_disconnected.get_cpds(node)
self.assertEqual(cpd.variable, node)
np_test.assert_array_equal(cpd.cardinality, np.array([2]))
value = (values.ix[:, node].value_counts() /
values.ix[:, node].value_counts().sum())
value = value.reindex(sorted(value.index)).values
np_test.assert_array_equal(cpd.values, value)
def test_connected_predict(self):
np.random.seed(42)
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
columns=['A', 'B', 'C', 'D', 'E'])
fit_data = values[:800]
predict_data = values[800:].copy()
self.model_connected.fit(fit_data)
self.assertRaises(ValueError, self.model_connected.predict, predict_data)
predict_data.drop('E', axis=1, inplace=True)
e_predict = self.model_connected.predict(predict_data)
np_test.assert_array_equal(e_predict.values.ravel(),
np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0]))
def tearDown(self):
del self.model_connected
del self.model_disconnected
示例3: test_predict
# 需要导入模块: from pgmpy.models import BayesianModel [as 别名]
# 或者: from pgmpy.models.BayesianModel import predict [as 别名]
def test_predict(self):
titanic = BayesianModel()
titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
titanic.fit(self.titanic_data2[500:])
p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])
p1_res = np.array(['0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
'0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
'0', '0', '0', '0'])
p2_res = np.array(['male', 'female', 'female', 'female', 'male', 'male', 'male',
'male', 'female', 'female', 'female', 'female', 'male', 'male',
'male', 'female', 'male', 'female', 'male', 'female', 'male',
'female', 'female', 'female', 'male', 'female', 'male', 'male',
'female', 'male'])
p3_res = np.array(['3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
'3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
'3', '3', '1', '3'])
np_test.assert_array_equal(p1.values.ravel(), p1_res)
np_test.assert_array_equal(p2.values.ravel(), p2_res)
np_test.assert_array_equal(p3.values.ravel(), p3_res)
示例4: TestBayesianModelFitPredict
# 需要导入模块: from pgmpy.models import BayesianModel [as 别名]
# 或者: from pgmpy.models.BayesianModel import predict [as 别名]
class TestBayesianModelFitPredict(unittest.TestCase):
def setUp(self):
self.model_disconnected = BayesianModel()
self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
self.model2 = BayesianModel([('A', 'C'), ('B', 'C')])
self.data1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
self.data2 = pd.DataFrame(data={'A': [0, np.NaN, 1],
'B': [0, 1, 0],
'C': [1, 1, np.NaN],
'D': [np.NaN, 'Y', np.NaN]})
# data_link - "https://www.kaggle.com/c/titanic/download/train.csv"
self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv', dtype=str)
self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
def test_bayesian_fit(self):
print(isinstance(BayesianEstimator, BaseEstimator))
print(isinstance(MaximumLikelihoodEstimator, BaseEstimator))
self.model2.fit(self.data1, estimator=BayesianEstimator, prior_type="dirichlet", pseudo_counts=[9, 3])
self.assertEqual(self.model2.get_cpds('B'), TabularCPD('B', 2, [[11.0 / 15], [4.0 / 15]]))
def test_fit_missing_data(self):
self.model2.fit(self.data2, state_names={'C': [0, 1]}, complete_samples_only=False)
cpds = set([TabularCPD('A', 2, [[0.5], [0.5]]),
TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
evidence=['A', 'B'], evidence_card=[2, 2])])
self.assertSetEqual(cpds, set(self.model2.get_cpds()))
def test_disconnected_fit(self):
values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
columns=['A', 'B', 'C', 'D', 'E'])
self.model_disconnected.fit(values)
for node in ['A', 'B', 'C', 'D', 'E']:
cpd = self.model_disconnected.get_cpds(node)
self.assertEqual(cpd.variable, node)
np_test.assert_array_equal(cpd.cardinality, np.array([2]))
value = (values.ix[:, node].value_counts() /
values.ix[:, node].value_counts().sum())
value = value.reindex(sorted(value.index)).values
np_test.assert_array_equal(cpd.values, value)
def test_predict(self):
titanic = BayesianModel()
titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
titanic.fit(self.titanic_data2[500:])
p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])
p1_res = np.array(['0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
'0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
'0', '0', '0', '0'])
p2_res = np.array(['male', 'female', 'female', 'female', 'male', 'male', 'male',
'male', 'female', 'female', 'female', 'female', 'male', 'male',
'male', 'female', 'male', 'female', 'male', 'female', 'male',
'female', 'female', 'female', 'male', 'female', 'male', 'male',
'female', 'male'])
p3_res = np.array(['3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
'3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
'3', '3', '1', '3'])
np_test.assert_array_equal(p1.values.ravel(), p1_res)
np_test.assert_array_equal(p2.values.ravel(), p2_res)
np_test.assert_array_equal(p3.values.ravel(), p3_res)
def test_connected_predict(self):
np.random.seed(42)
values = pd.DataFrame(np.array(np.random.randint(low=0, high=2, size=(1000, 5)),
dtype=str),
columns=['A', 'B', 'C', 'D', 'E'])
fit_data = values[:800]
predict_data = values[800:].copy()
self.model_connected.fit(fit_data)
self.assertRaises(ValueError, self.model_connected.predict, predict_data)
predict_data.drop('E', axis=1, inplace=True)
e_predict = self.model_connected.predict(predict_data)
np_test.assert_array_equal(e_predict.values.ravel(),
np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0], dtype=str))
def test_connected_predict_probability(self):
#.........这里部分代码省略.........
示例5: variable
# 需要导入模块: from pgmpy.models import BayesianModel [as 别名]
# 或者: from pgmpy.models.BayesianModel import predict [as 别名]
# Now in general machine learning problems it doesn't matter which
# column of the array represents which variable (until we use same
# order for both training and prediction) because all the values
# are on symmetrical axis but in graphical models each variable is
# different (in the way it is connected to other variables etc) so
# we will need to specify which columns of data are for which
# variable. For that we will use pandas.
import pandas as pd
data = pd.DataFrame(data, columns=['cost', 'quality',
'location', 'no_of_people'])
data
train = data[:750]
# We will try to predict the no_of_people from our model. So for
# test data we will delete that column and then later on predict
# those values.
test = data[750:].drop('no_of_people', axis=1)
test
# Now we will need to create the base network structure for the
# model.
restaurant_model = BayesianModel([('location', 'cost'),
('quality', 'cost'),
('location', 'no_of_people'),
('cost', 'no_of_people')])
restaurant_model.fit(train)
# Fit computes the cpd of all the variables from the training data
# that we provided.
restaurant_model.get_cpds()
# Now for predicting the values of no_of_people using this model
# we can simply call the predict method on our test data.
restaurant_model.predict(test).values.ravel()