當前位置: 首頁>>代碼示例>>Python>>正文


Python under_sampling.RandomUnderSampler方法代碼示例

本文整理匯總了Python中imblearn.under_sampling.RandomUnderSampler方法的典型用法代碼示例。如果您正苦於以下問題:Python under_sampling.RandomUnderSampler方法的具體用法?Python under_sampling.RandomUnderSampler怎麽用?Python under_sampling.RandomUnderSampler使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在imblearn.under_sampling的用法示例。


在下文中一共展示了under_sampling.RandomUnderSampler方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: transform

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def transform(self, X, y=None):
        # TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
        # TODO      simple trainer in the correct order and leave this to advanced users?

        # Extract predicted column
        y = np.squeeze(X[[self.predicted_column]])

        # Copy the dataframe without the predicted column
        temp_dataframe = X.drop([self.predicted_column], axis=1)

        # Initialize and fit the under sampler
        under_sampler = RandomUnderSampler(random_state=self.random_seed)
        x_under_sampled, y_under_sampled = under_sampler.fit_sample(temp_dataframe, y)

        # Build the resulting under sampled dataframe
        result = pd.DataFrame(x_under_sampled)

        # Restore the column names
        result.columns = temp_dataframe.columns

        # Restore the y values
        y_under_sampled = pd.Series(y_under_sampled)
        result[self.predicted_column] = y_under_sampled

        return result 
開發者ID:HealthCatalyst,項目名稱:healthcareai-py,代碼行數:27,代碼來源:transformers.py

示例2: create_sampler

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def create_sampler(sampler_name, random_state=None):

    if sampler_name is None or sampler_name == 'None':
        return None
    if sampler_name.lower() == 'randomundersampler':
        return RandomUnderSampler(random_state=random_state)
    if sampler_name.lower() == 'tomeklinks':
        return TomekLinks(random_state=random_state)
    if sampler_name.lower() == 'enn':
        return EditedNearestNeighbours(random_state=random_state)
    if sampler_name.lower() == 'ncl':
        return NeighbourhoodCleaningRule(random_state=random_state)
    if sampler_name.lower() == 'randomoversampler':
        return RandomOverSampler(random_state=random_state)
    if sampler_name.lower() == 'smote':
        return SMOTE(random_state=random_state)
    if sampler_name.lower() == 'smotetomek':
        return SMOTETomek(random_state=random_state)
    if sampler_name.lower() == 'smoteenn':
        return SMOTEENN(random_state=random_state)
    else:
        raise ValueError('Unsupported value \'%s\' for sampler' % sampler_name) 
開發者ID:melqkiades,項目名稱:yelp,代碼行數:24,代碼來源:sampler_factory.py

示例3: test_objectmapper_undersampling

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def test_objectmapper_undersampling(self):
        import imblearn.under_sampling as us
        df = pdml.ModelFrame([])
        self.assertIs(df.imbalance.under_sampling.ClusterCentroids,
                      us.ClusterCentroids)
        self.assertIs(df.imbalance.under_sampling.CondensedNearestNeighbour,
                      us.CondensedNearestNeighbour)
        self.assertIs(df.imbalance.under_sampling.EditedNearestNeighbours,
                      us.EditedNearestNeighbours)
        self.assertIs(df.imbalance.under_sampling.RepeatedEditedNearestNeighbours,
                      us.RepeatedEditedNearestNeighbours)
        self.assertIs(df.imbalance.under_sampling.InstanceHardnessThreshold,
                      us.InstanceHardnessThreshold)
        self.assertIs(df.imbalance.under_sampling.NearMiss,
                      us.NearMiss)
        self.assertIs(df.imbalance.under_sampling.NeighbourhoodCleaningRule,
                      us.NeighbourhoodCleaningRule)
        self.assertIs(df.imbalance.under_sampling.OneSidedSelection,
                      us.OneSidedSelection)
        self.assertIs(df.imbalance.under_sampling.RandomUnderSampler,
                      us.RandomUnderSampler)
        self.assertIs(df.imbalance.under_sampling.TomekLinks,
                      us.TomekLinks) 
開發者ID:pandas-ml,項目名稱:pandas-ml,代碼行數:25,代碼來源:test_imbalance.py

示例4: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self):
        super(DownSampling, self).__init__(RandomUnderSampler(random_state=RANDOM_SEED[BALANCE_DOWN_SAMPLING]),
                                           BALANCE_DOWN_SAMPLING) 
開發者ID:salan668,項目名稱:FAE,代碼行數:5,代碼來源:DataBalance.py

示例5: __undersample

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __undersample(self, feature_names, X, y):
        logging.info("Random undersampling")
        undersampler = RandomUnderSampler(ratio=QuincyConfig.RATIO)
        X, y = undersampler.fit_sample(X, y)
        X = pandas.DataFrame(X, columns=feature_names)
        return X, y 
開發者ID:tbarabosch,項目名稱:quincy,代碼行數:8,代碼來源:QuincyLearn.py

示例6: resample

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def resample(self, X, y, target_size_strategy, seed):
        from imblearn.under_sampling import RandomUnderSampler as imblearn_RandomUnderSampler
        resampler = imblearn_RandomUnderSampler(sampling_strategy=target_size_strategy, random_state=seed)
        return resampler.fit_resample(X, y) 
開發者ID:automl,項目名稱:Auto-PyTorch,代碼行數:6,代碼來源:random.py

示例7: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self, lemmatization=False):
        CommitModel.__init__(self, lemmatization)

        self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            commit_features.source_code_file_size(),
            commit_features.other_file_size(),
            commit_features.test_file_size(),
            commit_features.source_code_added(),
            commit_features.other_added(),
            commit_features.test_added(),
            commit_features.source_code_deleted(),
            commit_features.other_deleted(),
            commit_features.test_deleted(),
            # commit_features.author_experience(),
            # commit_features.reviewer_experience(),
            commit_features.reviewers_num(),
            # commit_features.component_touched_prev(),
            # commit_features.directory_touched_prev(),
            # commit_features.file_touched_prev(),
            commit_features.types(),
            commit_features.files(),
            commit_features.components(),
            commit_features.components_modified_num(),
            commit_features.directories(),
            commit_features.directories_modified_num(),
            commit_features.source_code_files_modified_num(),
            commit_features.other_files_modified_num(),
            commit_features.test_files_modified_num(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "commit_extractor",
                    commit_features.CommitExtractor(feature_extractors, []),
                ),
                ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor") 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:48,代碼來源:testfailure.py

示例8: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords(),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor") 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:54,代碼來源:uplift.py

示例9: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords({"qawanted"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor") 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:54,代碼來源:qaneeded.py

示例10: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization, commit_data=True)

        self.cross_validation_enabled = False

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords({"dev-doc-needed", "dev-doc-complete"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
            bug_features.product(),
            bug_features.component(),
            bug_features.commit_added(),
            bug_features.commit_deleted(),
            bug_features.commit_types(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                        commit_data=True,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor") 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:62,代碼來源:devdocneeded.py

示例11: __init__

# 需要導入模塊: from imblearn import under_sampling [as 別名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 別名]
def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.severity(),
            bug_features.keywords({"regression", "regressionwindow-wanted"}),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
        ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(feature_extractors, cleanup_functions),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(), "title"),
                            ("comments", self.text_vectorizer(), "comments"),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
        self.clf.set_params(predictor="cpu_predictor") 
開發者ID:mozilla,項目名稱:bugbug,代碼行數:48,代碼來源:regressionrange.py


注:本文中的imblearn.under_sampling.RandomUnderSampler方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。