当前位置: 首页>>代码示例>>Python>>正文


Python Evaluator.compute_model_performance方法代码示例

本文整理汇总了Python中deepchem.utils.evaluate.Evaluator.compute_model_performance方法的典型用法代码示例。如果您正苦于以下问题:Python Evaluator.compute_model_performance方法的具体用法?Python Evaluator.compute_model_performance怎么用?Python Evaluator.compute_model_performance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在deepchem.utils.evaluate.Evaluator的用法示例。


在下文中一共展示了Evaluator.compute_model_performance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_and_eval_model

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
def create_and_eval_model(train_dataset, test_dataset, task_type,
                          model_params, model_name, model_dir, tasks):
  """Helper method to create model for test."""
  # Fit model
  task_types = {task: task_type for task in tasks}
  model_params["data_shape"] = train_dataset.get_data_shape()
  print("Creating Model object.")
  import deepchem.models.deep
  model = Model.model_builder(model_name, task_types, model_params)
  print("About to fit model")
  model.fit(train_dataset)
  print("Done fitting, about to save...")
  model.save(model_dir)

  # Eval model on train
  evaluator = Evaluator(model, train_dataset, verbose=True)
  with tempfile.NamedTemporaryFile() as train_csv_out:
    with tempfile.NamedTemporaryFile() as train_stats_out:
      _, performance_df = evaluator.compute_model_performance(
          train_csv_out, train_stats_out)
  print("train_performance_df")
  print(performance_df)   

  evaluator = Evaluator(model, test_dataset, verbose=True)
  with tempfile.NamedTemporaryFile() as test_csv_out:
    with tempfile.NamedTemporaryFile() as test_stats_out:
      _, performance_df = evaluator.compute_model_performance(
          test_csv_out, test_stats_out)
  print("test_performance_df")
  print(performance_df)  

  return performance_df.iterrows().next()[1]["r2_score"]
开发者ID:rbharath,项目名称:deepchem,代码行数:34,代码来源:pdbbind_nnscore.py

示例2: evaluate

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def evaluate(self, dataset, metrics, transformers=[], per_task_metrics=False):
    """
    Evaluates the performance of this model on specified dataset.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset object.
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list
      List of deepchem.transformers.Transformer
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
    evaluator = Evaluator(self, dataset, transformers)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores
开发者ID:AhlamMD,项目名称:deepchem,代码行数:30,代码来源:models.py

示例3: test_singletask_to_multitask_classification

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_singletask_to_multitask_classification(self):
    splittype = "scaffold"
    compound_featurizers = [CircularFingerprint(size=1024)]
    complex_featurizers = []
    output_transformers = []
    tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
             "task7", "task8", "task9", "task10", "task11", "task12",
             "task13", "task14", "task15", "task16"]
    task_types = {task: "classification" for task in tasks}
    input_file = "multitask_example.csv"

    n_features = 10
    n_tasks = len(tasks)
    # Define train dataset
    n_train = 100
    X_train = np.random.rand(n_train, n_features)
    y_train = np.random.randint(2, size=(n_train, n_tasks))
    w_train = np.ones_like(y_train)
    ids_train = ["C"] * n_train
    train_dataset = Dataset.from_numpy(self.train_dir,
                                       X_train, y_train, w_train, ids_train,
                                       tasks)

    # Define test dataset
    n_test = 10
    X_test = np.random.rand(n_test, n_features)
    y_test = np.random.randint(2, size=(n_test, n_tasks))
    w_test = np.ones_like(y_test)
    ids_test = ["C"] * n_test
    test_dataset = Dataset.from_numpy(self.test_dir,
                                      X_test, y_test, w_test, ids_test,
                                      tasks)

    params_dict = {
        "batch_size": 32,
        "data_shape": train_dataset.get_data_shape()
    }
    classification_metrics = [Metric(metrics.roc_auc_score)]
    def model_builder(tasks, task_types, model_params, model_builder, verbosity=None):
      return SklearnModel(tasks, task_types, model_params, model_builder,
                          model_instance=LogisticRegression())
    multitask_model = SingletaskToMultitask(tasks, task_types, params_dict,
                                            self.model_dir, model_builder)

    # Fit trained model
    multitask_model.fit(train_dataset)
    multitask_model.save()

    # Eval multitask_model on train
    evaluator = Evaluator(multitask_model, train_dataset, output_transformers,
                          verbosity=True)
    _ = evaluator.compute_model_performance(classification_metrics)

    # Eval multitask_model on test
    evaluator = Evaluator(multitask_model, test_dataset, output_transformers,
                          verbosity=True)
    _ = evaluator.compute_model_performance(classification_metrics)
开发者ID:rbharath,项目名称:deepchem,代码行数:59,代码来源:test_singletask_to_multitask.py

示例4: test_sklearn_multitask_classification

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_sklearn_multitask_classification(self):
    """Test that sklearn models can learn on simple multitask classification."""
    np.random.seed(123)
    n_tasks = 4
    dataset = sklearn.datasets.load_digits(n_class=2)
    X, y = dataset.data, dataset.target
    y = np.reshape(y, (len(y), 1))
    y = np.hstack([y] * n_tasks)
    
    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "classification" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    def model_builder(tasks, task_types, model_params, model_dir, verbosity=None):
      return SklearnModel(tasks, task_types, model_params, model_dir,
                          mode="classification",
                          model_instance=LogisticRegression(),
                          verbosity=verbosity)
    model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir,
                                  model_builder, verbosity=verbosity)

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on train
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([classification_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])
    print("scores")
    print(scores)

    for score in scores[classification_metric.name]:
      assert score > .5
开发者ID:rbharath,项目名称:deepchem,代码行数:58,代码来源:test_generalize.py

示例5: test_multitask_keras_mlp_ECFP_classification_API

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_multitask_keras_mlp_ECFP_classification_API(self):
    """Straightforward test of Keras multitask deepchem classification API."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      task_type = "classification"
      # TODO(rbharath): There should be some automatic check to ensure that all
      # required model_params are specified.
      # TODO(rbharath): Turning off dropout to make tests behave.
      model_params = {"nb_hidden": 10, "activation": "relu",
                      "dropout": .0, "learning_rate": .01,
                      "momentum": .9, "nesterov": False,
                      "decay": 1e-4, "batch_size": 5,
                      "nb_epoch": 2, "init": "glorot_uniform",
                      "nb_layers": 1, "batchnorm": False}

      input_file = os.path.join(self.current_dir, "multitask_example.csv")
      tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
               "task7", "task8", "task9", "task10", "task11", "task12",
               "task13", "task14", "task15", "task16"]
      task_types = {task: task_type for task in tasks}

      featurizer = CircularFingerprint(size=1024)

      loader = DataLoader(tasks=tasks,
                          smiles_field=self.smiles_field,
                          featurizer=featurizer,
                          verbosity="low")
      dataset = loader.featurize(input_file, self.data_dir)
      splitter = ScaffoldSplitter()
      train_dataset, test_dataset = splitter.train_test_split(
          dataset, self.train_dir, self.test_dir)

      transformers = []
      model_params["data_shape"] = train_dataset.get_data_shape()
      classification_metrics = [Metric(metrics.roc_auc_score),
                                Metric(metrics.matthews_corrcoef),
                                Metric(metrics.recall_score),
                                Metric(metrics.accuracy_score)]
      
      model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir)

      # Fit trained model
      model.fit(train_dataset)
      model.save()

      # Eval model on train
      evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)

      # Eval model on test
      evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)
开发者ID:rbharath,项目名称:deepchem,代码行数:56,代码来源:test_api.py

示例6: test_sklearn_classification

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_sklearn_classification(self):
    """Test that sklearn models can learn on simple classification datasets."""
    np.random.seed(123)
    dataset = sklearn.datasets.load_digits(n_class=2)
    X, y = dataset.data, dataset.target

    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    print("X_train.shape, y_train.shape, X_test.shape, y_test.shape")
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "classification" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="classification",
                         model_instance=LogisticRegression())

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on train
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([classification_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])
    print("scores")
    print(scores)

    assert scores[classification_metric.name] > .5
开发者ID:rbharath,项目名称:deepchem,代码行数:53,代码来源:test_generalize.py

示例7: test_singletask_sklearn_rf_RDKIT_descriptor_regression_API

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_singletask_sklearn_rf_RDKIT_descriptor_regression_API(self):
    """Test of singletask RF RDKIT-descriptor regression API."""
    splittype = "scaffold"
    featurizer = RDKitDescriptors()
    tasks = ["log-solubility"]
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    model_params = {}
    input_file = os.path.join(self.current_dir, "example.csv")
    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
                        featurizer=featurizer,
                        verbosity="low")
    
    dataset = loader.featurize(input_file, self.data_dir)

    splitter = ScaffoldSplitter()
    train_dataset, test_dataset = splitter.train_test_split(
        dataset, self.train_dir, self.test_dir)

    input_transformers = [
        NormalizationTransformer(transform_X=True, dataset=train_dataset),
        ClippingTransformer(transform_X=True, dataset=train_dataset)]
    output_transformers = [
        NormalizationTransformer(transform_y=True, dataset=train_dataset)]
    transformers = input_transformers + output_transformers
    for dataset in [train_dataset, test_dataset]:
      for transformer in transformers:
        transformer.transform(dataset)

    model_params["data_shape"] = train_dataset.get_data_shape()
    regression_metrics = [Metric(metrics.r2_score),
                          Metric(metrics.mean_squared_error),
                          Metric(metrics.mean_absolute_error)]

    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="regression",
                         model_instance=RandomForestRegressor())
  

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on train
    evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(regression_metrics)

    # Eval model on test
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(regression_metrics)
开发者ID:rbharath,项目名称:deepchem,代码行数:53,代码来源:test_api.py

示例8: test_singletask_sklearn_rf_ECFP_regression_sharded_API

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_singletask_sklearn_rf_ECFP_regression_sharded_API(self):
    """Test of singletask RF ECFP regression API: sharded edition."""
    splittype = "scaffold"
    featurizer = CircularFingerprint(size=1024)
    model_params = {}
    tasks = ["label"]
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(
        self.current_dir, "../../../datasets/pdbbind_core_df.pkl.gz")

    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
                        featurizer=featurizer,
                        verbosity="low")
    dataset = loader.featurize(input_file, self.data_dir)

    splitter = ScaffoldSplitter()
    train_dataset, test_dataset = splitter.train_test_split(
        dataset, self.train_dir, self.test_dir)
    input_transformers = []
    output_transformers = [
        NormalizationTransformer(transform_y=True, dataset=train_dataset)]
    transformers = input_transformers + output_transformers
    for dataset in [train_dataset, test_dataset]:
      for transformer in transformers:
        transformer.transform(dataset)
    # We set shard size above to force the creation of multiple shards of the data.
    # pdbbind_core has ~200 examples.
    model_params["data_shape"] = train_dataset.get_data_shape()
    regression_metrics = [Metric(metrics.r2_score),
                          Metric(metrics.mean_squared_error),
                          Metric(metrics.mean_absolute_error)]

    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="regression",
                         model_instance=RandomForestRegressor())

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on train
    evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(regression_metrics)

    # Eval model on test
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(regression_metrics)
开发者ID:rbharath,项目名称:deepchem,代码行数:51,代码来源:test_api.py

示例9: test_API

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_API(self):
    """Straightforward test of multitask deepchem classification API."""
    splittype = "scaffold"
    feature_types = ["ECFP"]
    output_transforms = []
    input_transforms = []
    task_type = "classification"
    # TODO(rbharath): There should be some automatic check to ensure that all
    # required model_params are specified.
    model_params = {"nb_hidden": 10, "activation": "relu",
                    "dropout": .5, "learning_rate": .01,
                    "momentum": .9, "nesterov": False,
                    "decay": 1e-4, "batch_size": 5,
                    "nb_epoch": 2}
    model_name = "multitask_deep_classifier"

    # Featurize input
    featurizer = DataFeaturizer(tasks=self.tasks,
                                smiles_field=self.smiles_field,
                                verbose=True)
    feature_files = featurizer.featurize(self.input_file, feature_types, self.feature_dir)

    # Transform data into arrays for ML
    samples = FeaturizedSamples(self.samplesdir, feature_files,
                                reload_data=False)

    # Split into train/test
    train_samples, test_samples = samples.train_test_split(
        splittype, self.train_dir, self.test_dir)
    train_dataset = Dataset(self.train_dir, train_samples, feature_types)
    test_dataset = Dataset(self.test_dir, test_samples, feature_types)

    # Transforming train/test data
    train_dataset.transform(input_transforms, output_transforms)
    test_dataset.transform(input_transforms, output_transforms)

    # Fit model
    task_types = {task: task_type for task in self.tasks}
    model_params["data_shape"] = train_dataset.get_data_shape()
    model = Model.model_builder(model_name, task_types, model_params)
    model.fit(train_dataset)
    model.save(self.model_dir)

    # Eval model on train
    evaluator = Evaluator(model, test_dataset, verbose=True)
    with tempfile.NamedTemporaryFile() as test_csv_out:
      with tempfile.NamedTemporaryFile() as test_stats_out:
        evaluator.compute_model_performance(test_csv_out, test_stats_out)
开发者ID:evanfeinberg,项目名称:deepchem,代码行数:50,代码来源:test_vector_api.py

示例10: test_tf_reload

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
    def test_tf_reload(self):
        """Test that tensorflow models can overfit simple classification datasets."""
        tasks = ["task0"]
        task_types = {task: "classification" for task in tasks}
        n_samples = 10
        n_features = 3
        n_tasks = len(tasks)
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.random.randint(n_classes, size=(n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

        model_params = {
            "layer_sizes": [1000],
            "dropouts": [0.0],
            "learning_rate": 0.003,
            "momentum": 0.9,
            "batch_size": n_samples,
            "num_classification_tasks": 1,
            "num_classes": n_classes,
            "num_features": n_features,
            "weight_init_stddevs": [1.0],
            "bias_init_consts": [1.0],
            "nb_epoch": 100,
            "penalty": 0.0,
            "optimizer": "adam",
            "data_shape": dataset.get_data_shape(),
        }

        verbosity = "high"
        classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity)
        model = TensorflowModel(
            tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity
        )

        # Fit trained model
        model.fit(dataset)
        model.save()

        # Load trained model
        reloaded_model = TensorflowModel(
            tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity
        )
        reloaded_model.reload()
        assert reloaded_model.eval_model._restored_model

        # Eval model on train
        transformers = []
        evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity)
        scores = evaluator.compute_model_performance([classification_metric])

        assert scores[classification_metric.name] > 0.9
开发者ID:rbharath,项目名称:deepchem,代码行数:60,代码来源:test_reload.py

示例11: eval_trained_model

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
def eval_trained_model(model_type, model_dir, data_dir, csv_out, stats_out):
    """Evaluates a trained model on specified data."""
    model = Model.load(model_type, model_dir)
    data = Dataset(data_dir)

    evaluator = Evaluator(model, data, verbose=True)
    _, perf_df = evaluator.compute_model_performance(csv_out, stats_out)
    print("Model Performance.")
    print(perf_df)
开发者ID:evanfeinberg,项目名称:deep-learning,代码行数:11,代码来源:modeler.py

示例12: test_keras_reload

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
    def test_keras_reload(self):
        """Test that trained keras models can be reloaded correctly."""
        g = tf.Graph()
        sess = tf.Session(graph=g)
        K.set_session(sess)
        with g.as_default():
            tasks = ["task0"]
            task_types = {task: "classification" for task in tasks}
            n_samples = 10
            n_features = 3
            n_tasks = len(tasks)

            # Generate dummy dataset
            np.random.seed(123)
            ids = np.arange(n_samples)
            X = np.random.rand(n_samples, n_features)
            y = np.random.randint(2, size=(n_samples, n_tasks))
            w = np.ones((n_samples, n_tasks))

            dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

            model_params = {
                "nb_hidden": 1000,
                "activation": "relu",
                "dropout": 0.0,
                "learning_rate": 0.15,
                "momentum": 0.9,
                "nesterov": False,
                "decay": 1e-4,
                "batch_size": n_samples,
                "nb_epoch": 200,
                "init": "glorot_uniform",
                "nb_layers": 1,
                "batchnorm": False,
                "data_shape": dataset.get_data_shape(),
            }

            verbosity = "high"
            classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
            model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity)

            # Fit trained model
            model.fit(dataset)
            model.save()

            # Load trained model
            reloaded_model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity)
            reloaded_model.reload()

            # Eval model on train
            transformers = []
            evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity)
            scores = evaluator.compute_model_performance([classification_metric])

            assert scores[classification_metric.name] > 0.9
开发者ID:rbharath,项目名称:deepchem,代码行数:57,代码来源:test_reload.py

示例13: test_tf_skewed_classification_overfit

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_tf_skewed_classification_overfit(self):
    """Test tensorflow models can overfit 0/1 datasets with few actives."""
    tasks = ["task0"]
    task_types = {task: "classification" for task in tasks}
    #n_samples = 100
    n_samples = 100
    n_features = 3
    n_tasks = len(tasks)
    n_classes = 2
    
    # Generate dummy dataset
    np.random.seed(123)
    p = .05
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.binomial(1, p, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
  
    dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

    model_params = {
      "layer_sizes": [1500],
      "dropouts": [.0],
      "learning_rate": 0.003,
      "momentum": .9,
      "batch_size": n_samples,
      "num_classification_tasks": 1,
      "num_classes": n_classes,
      "num_features": n_features,
      "weight_init_stddevs": [1.],
      "bias_init_consts": [1.],
      "nb_epoch": 200,
      "penalty": 0.0,
      "optimizer": "adam",
      "data_shape": dataset.get_data_shape()
    }

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    model = TensorflowModel(
        tasks, task_types, model_params, self.model_dir,
        tf_class=TensorflowMultiTaskClassifier,
        verbosity=verbosity)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    transformers = []
    evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])

    assert scores[classification_metric.name] > .8
开发者ID:rbharath,项目名称:deepchem,代码行数:56,代码来源:test_overfit.py

示例14: test_tf_multitask_regression_overfit

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def test_tf_multitask_regression_overfit(self):
    """Test tf multitask overfits tiny data."""
    n_tasks = 10
    tasks = ["task%d" % task for task in range(n_tasks)]
    task_types = {task: "regression" for task in tasks}
    n_samples = 10
    n_features = 3
    n_classes = 2
    
    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    #y = np.random.randint(n_classes, size=(n_samples, n_tasks))
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
  
    dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

    model_params = {
      "layer_sizes": [1000],
      "dropouts": [.0],
      "learning_rate": 0.0003,
      "momentum": .9,
      "batch_size": n_samples,
      "num_regression_tasks": n_tasks,
      "num_classes": n_classes,
      "num_features": n_features,
      "weight_init_stddevs": [.1],
      "bias_init_consts": [1.],
      "nb_epoch": 100,
      "penalty": 0.0,
      "optimizer": "adam",
      "data_shape": dataset.get_data_shape()
    }

    verbosity = "high"
    regression_metric = Metric(metrics.r2_score, verbosity=verbosity)
    model = TensorflowModel(
        tasks, task_types, model_params, self.model_dir,
        tf_class=TensorflowMultiTaskRegressor,
        verbosity=verbosity)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    transformers = []
    evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([regression_metric])

    assert scores[regression_metric.name] > .9
开发者ID:rbharath,项目名称:deepchem,代码行数:55,代码来源:test_overfit.py

示例15: _create_model

# 需要导入模块: from deepchem.utils.evaluate import Evaluator [as 别名]
# 或者: from deepchem.utils.evaluate.Evaluator import compute_model_performance [as 别名]
  def _create_model(self, train_dataset, test_dataset, model, transformers,
                    metrics):
    """Helper method to create model for test."""

    # Fit trained model
    model.fit(train_dataset)
    model.save(self.model_dir)

    # Eval model on train
    evaluator = Evaluator(model, train_dataset, transformers, verbose=True)
    with tempfile.NamedTemporaryFile() as train_csv_out:
      with tempfile.NamedTemporaryFile() as train_stats_out:
        _, _, _ = evaluator.compute_model_performance(
            metrics, train_csv_out, train_stats_out)

    # Eval model on test
    evaluator = Evaluator(model, test_dataset, transformers, verbose=True)
    with tempfile.NamedTemporaryFile() as test_csv_out:
      with tempfile.NamedTemporaryFile() as test_stats_out:
        _, _, _ = evaluator.compute_model_performance(
            metrics, test_csv_out, test_stats_out)
开发者ID:skearnes,项目名称:deepchem,代码行数:23,代码来源:__init__.py


注:本文中的deepchem.utils.evaluate.Evaluator.compute_model_performance方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。