Python under_sampling.RandomUnderSampler方法代码示例

本文整理汇总了Python中imblearn.under_sampling.RandomUnderSampler方法的典型用法代码示例。如果您正苦于以下问题：Python under_sampling.RandomUnderSampler方法的具体用法？Python under_sampling.RandomUnderSampler怎么用？Python under_sampling.RandomUnderSampler使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.under_sampling的用法示例。

在下文中一共展示了under_sampling.RandomUnderSampler方法的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: transform

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def transform(self, X, y=None):
        # TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
        # TODO      simple trainer in the correct order and leave this to advanced users?

        # Extract predicted column
        y = np.squeeze(X[[self.predicted_column]])

        # Copy the dataframe without the predicted column
        temp_dataframe = X.drop([self.predicted_column], axis=1)

        # Initialize and fit the under sampler
        under_sampler = RandomUnderSampler(random_state=self.random_seed)
        x_under_sampled, y_under_sampled = under_sampler.fit_sample(temp_dataframe, y)

        # Build the resulting under sampled dataframe
        result = pd.DataFrame(x_under_sampled)

        # Restore the column names
        result.columns = temp_dataframe.columns

        # Restore the y values
        y_under_sampled = pd.Series(y_under_sampled)
        result[self.predicted_column] = y_under_sampled

        return result

开发者ID:HealthCatalyst，项目名称:healthcareai-py，代码行数:27，代码来源:transformers.py

示例2: create_sampler

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def create_sampler(sampler_name, random_state=None):

    if sampler_name is None or sampler_name == 'None':
        return None
    if sampler_name.lower() == 'randomundersampler':
        return RandomUnderSampler(random_state=random_state)
    if sampler_name.lower() == 'tomeklinks':
        return TomekLinks(random_state=random_state)
    if sampler_name.lower() == 'enn':
        return EditedNearestNeighbours(random_state=random_state)
    if sampler_name.lower() == 'ncl':
        return NeighbourhoodCleaningRule(random_state=random_state)
    if sampler_name.lower() == 'randomoversampler':
        return RandomOverSampler(random_state=random_state)
    if sampler_name.lower() == 'smote':
        return SMOTE(random_state=random_state)
    if sampler_name.lower() == 'smotetomek':
        return SMOTETomek(random_state=random_state)
    if sampler_name.lower() == 'smoteenn':
        return SMOTEENN(random_state=random_state)
    else:
        raise ValueError('Unsupported value \'%s\' for sampler' % sampler_name)

开发者ID:melqkiades，项目名称:yelp，代码行数:24，代码来源:sampler_factory.py

示例3: test_objectmapper_undersampling

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def test_objectmapper_undersampling(self):
        import imblearn.under_sampling as us
        df = pdml.ModelFrame([])
        self.assertIs(df.imbalance.under_sampling.ClusterCentroids,
                      us.ClusterCentroids)
        self.assertIs(df.imbalance.under_sampling.CondensedNearestNeighbour,
                      us.CondensedNearestNeighbour)
        self.assertIs(df.imbalance.under_sampling.EditedNearestNeighbours,
                      us.EditedNearestNeighbours)
        self.assertIs(df.imbalance.under_sampling.RepeatedEditedNearestNeighbours,
                      us.RepeatedEditedNearestNeighbours)
        self.assertIs(df.imbalance.under_sampling.InstanceHardnessThreshold,
                      us.InstanceHardnessThreshold)
        self.assertIs(df.imbalance.under_sampling.NearMiss,
                      us.NearMiss)
        self.assertIs(df.imbalance.under_sampling.NeighbourhoodCleaningRule,
                      us.NeighbourhoodCleaningRule)
        self.assertIs(df.imbalance.under_sampling.OneSidedSelection,
                      us.OneSidedSelection)
        self.assertIs(df.imbalance.under_sampling.RandomUnderSampler,
                      us.RandomUnderSampler)
        self.assertIs(df.imbalance.under_sampling.TomekLinks,
                      us.TomekLinks)

开发者ID:pandas-ml，项目名称:pandas-ml，代码行数:25，代码来源:test_imbalance.py

示例4: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self):
        super(DownSampling, self).__init__(RandomUnderSampler(random_state=RANDOM_SEED[BALANCE_DOWN_SAMPLING]),
                                           BALANCE_DOWN_SAMPLING)

开发者ID:salan668，项目名称:FAE，代码行数:5，代码来源:DataBalance.py

示例5: __undersample

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __undersample(self, feature_names, X, y):
        logging.info("Random undersampling")
        undersampler = RandomUnderSampler(ratio=QuincyConfig.RATIO)
        X, y = undersampler.fit_sample(X, y)
        X = pandas.DataFrame(X, columns=feature_names)
        return X, y

开发者ID:tbarabosch，项目名称:quincy，代码行数:8，代码来源:QuincyLearn.py

示例6: resample

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def resample(self, X, y, target_size_strategy, seed):
        from imblearn.under_sampling import RandomUnderSampler as imblearn_RandomUnderSampler
        resampler = imblearn_RandomUnderSampler(sampling_strategy=target_size_strategy, random_state=seed)
        return resampler.fit_resample(X, y)

开发者ID:automl，项目名称:Auto-PyTorch，代码行数:6，代码来源:random.py

示例7: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
        CommitModel.__init__(self, lemmatization)

        self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            commit_features.source_code_file_size(),
            commit_features.other_file_size(),
            commit_features.test_file_size(),
            commit_features.source_code_added(),
            commit_features.other_added(),
            commit_features.test_added(),
            commit_features.source_code_deleted(),
            commit_features.other_deleted(),
            commit_features.test_deleted(),
            # commit_features.author_experience(),
            # commit_features.reviewer_experience(),
            commit_features.reviewers_num(),
            # commit_features.component_touched_prev(),
            # commit_features.directory_touched_prev(),
            # commit_features.file_touched_prev(),
            commit_features.types(),
            commit_features.files(),
            commit_features.components(),
            commit_features.components_modified_num(),
            commit_features.directories(),
            commit_features.directories_modified_num(),
            commit_features.source_code_files_modified_num(),
            commit_features.other_files_modified_num(),
            commit_features.test_files_modified_num(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "commit_extractor",
                    commit_features.CommitExtractor(feature_extractors, []),
                ),
                ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor")

开发者ID:mozilla，项目名称:bugbug，代码行数:48，代码来源:testfailure.py

示例8: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords(),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor")

开发者ID:mozilla，项目名称:bugbug，代码行数:54，代码来源:uplift.py

示例9: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords({"qawanted"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor")

开发者ID:mozilla，项目名称:bugbug，代码行数:54，代码来源:qaneeded.py

示例10: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization, commit_data=True)

        self.cross_validation_enabled = False

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords({"dev-doc-needed", "dev-doc-complete"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
            bug_features.product(),
            bug_features.component(),
            bug_features.commit_added(),
            bug_features.commit_deleted(),
            bug_features.commit_types(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                        commit_data=True,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor")

开发者ID:mozilla，项目名称:bugbug，代码行数:62，代码来源:devdocneeded.py

示例11: init

# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.severity(),
            bug_features.keywords({"regression", "regressionwindow-wanted"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(feature_extractors, cleanup_functions),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor")

开发者ID:mozilla，项目名称:bugbug，代码行数:48，代码来源:regressionrange.py

注：本文中的imblearn.under_sampling.RandomUnderSampler方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: transform

示例2: create_sampler

示例3: test_objectmapper_undersampling

示例4: __init__

示例5: __undersample

示例6: resample

示例7: __init__

示例8: __init__

示例9: __init__