本文整理汇总了Python中imblearn.under_sampling.RandomUnderSampler方法的典型用法代码示例。如果您正苦于以下问题:Python under_sampling.RandomUnderSampler方法的具体用法?Python under_sampling.RandomUnderSampler怎么用?Python under_sampling.RandomUnderSampler使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.under_sampling
的用法示例。
在下文中一共展示了under_sampling.RandomUnderSampler方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: transform
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def transform(self, X, y=None):
# TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
# TODO simple trainer in the correct order and leave this to advanced users?
# Extract predicted column
y = np.squeeze(X[[self.predicted_column]])
# Copy the dataframe without the predicted column
temp_dataframe = X.drop([self.predicted_column], axis=1)
# Initialize and fit the under sampler
under_sampler = RandomUnderSampler(random_state=self.random_seed)
x_under_sampled, y_under_sampled = under_sampler.fit_sample(temp_dataframe, y)
# Build the resulting under sampled dataframe
result = pd.DataFrame(x_under_sampled)
# Restore the column names
result.columns = temp_dataframe.columns
# Restore the y values
y_under_sampled = pd.Series(y_under_sampled)
result[self.predicted_column] = y_under_sampled
return result
示例2: create_sampler
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def create_sampler(sampler_name, random_state=None):
if sampler_name is None or sampler_name == 'None':
return None
if sampler_name.lower() == 'randomundersampler':
return RandomUnderSampler(random_state=random_state)
if sampler_name.lower() == 'tomeklinks':
return TomekLinks(random_state=random_state)
if sampler_name.lower() == 'enn':
return EditedNearestNeighbours(random_state=random_state)
if sampler_name.lower() == 'ncl':
return NeighbourhoodCleaningRule(random_state=random_state)
if sampler_name.lower() == 'randomoversampler':
return RandomOverSampler(random_state=random_state)
if sampler_name.lower() == 'smote':
return SMOTE(random_state=random_state)
if sampler_name.lower() == 'smotetomek':
return SMOTETomek(random_state=random_state)
if sampler_name.lower() == 'smoteenn':
return SMOTEENN(random_state=random_state)
else:
raise ValueError('Unsupported value \'%s\' for sampler' % sampler_name)
示例3: test_objectmapper_undersampling
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def test_objectmapper_undersampling(self):
import imblearn.under_sampling as us
df = pdml.ModelFrame([])
self.assertIs(df.imbalance.under_sampling.ClusterCentroids,
us.ClusterCentroids)
self.assertIs(df.imbalance.under_sampling.CondensedNearestNeighbour,
us.CondensedNearestNeighbour)
self.assertIs(df.imbalance.under_sampling.EditedNearestNeighbours,
us.EditedNearestNeighbours)
self.assertIs(df.imbalance.under_sampling.RepeatedEditedNearestNeighbours,
us.RepeatedEditedNearestNeighbours)
self.assertIs(df.imbalance.under_sampling.InstanceHardnessThreshold,
us.InstanceHardnessThreshold)
self.assertIs(df.imbalance.under_sampling.NearMiss,
us.NearMiss)
self.assertIs(df.imbalance.under_sampling.NeighbourhoodCleaningRule,
us.NeighbourhoodCleaningRule)
self.assertIs(df.imbalance.under_sampling.OneSidedSelection,
us.OneSidedSelection)
self.assertIs(df.imbalance.under_sampling.RandomUnderSampler,
us.RandomUnderSampler)
self.assertIs(df.imbalance.under_sampling.TomekLinks,
us.TomekLinks)
示例4: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self):
super(DownSampling, self).__init__(RandomUnderSampler(random_state=RANDOM_SEED[BALANCE_DOWN_SAMPLING]),
BALANCE_DOWN_SAMPLING)
示例5: __undersample
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __undersample(self, feature_names, X, y):
logging.info("Random undersampling")
undersampler = RandomUnderSampler(ratio=QuincyConfig.RATIO)
X, y = undersampler.fit_sample(X, y)
X = pandas.DataFrame(X, columns=feature_names)
return X, y
示例6: resample
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def resample(self, X, y, target_size_strategy, seed):
from imblearn.under_sampling import RandomUnderSampler as imblearn_RandomUnderSampler
resampler = imblearn_RandomUnderSampler(sampling_strategy=target_size_strategy, random_state=seed)
return resampler.fit_resample(X, y)
示例7: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
CommitModel.__init__(self, lemmatization)
self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
self.sampler = RandomUnderSampler(random_state=0)
feature_extractors = [
commit_features.source_code_file_size(),
commit_features.other_file_size(),
commit_features.test_file_size(),
commit_features.source_code_added(),
commit_features.other_added(),
commit_features.test_added(),
commit_features.source_code_deleted(),
commit_features.other_deleted(),
commit_features.test_deleted(),
# commit_features.author_experience(),
# commit_features.reviewer_experience(),
commit_features.reviewers_num(),
# commit_features.component_touched_prev(),
# commit_features.directory_touched_prev(),
# commit_features.file_touched_prev(),
commit_features.types(),
commit_features.files(),
commit_features.components(),
commit_features.components_modified_num(),
commit_features.directories(),
commit_features.directories_modified_num(),
commit_features.source_code_files_modified_num(),
commit_features.other_files_modified_num(),
commit_features.test_files_modified_num(),
]
self.extraction_pipeline = Pipeline(
[
(
"commit_extractor",
commit_features.CommitExtractor(feature_extractors, []),
),
("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
]
)
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
self.clf.set_params(predictor="cpu_predictor")
示例8: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
BugModel.__init__(self, lemmatization)
self.sampler = RandomUnderSampler(random_state=0)
feature_extractors = [
bug_features.has_str(),
bug_features.has_regression_range(),
bug_features.severity(),
bug_features.keywords(),
bug_features.is_coverity_issue(),
bug_features.has_crash_signature(),
bug_features.has_url(),
bug_features.has_w3c_url(),
bug_features.has_github_url(),
bug_features.whiteboard(),
bug_features.patches(),
bug_features.landings(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]
self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(
feature_extractors,
cleanup_functions,
rollback=True,
rollback_when=self.rollback,
),
),
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(), "title"),
("comments", self.text_vectorizer(), "comments"),
]
),
),
]
)
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
self.clf.set_params(predictor="cpu_predictor")
示例9: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
BugModel.__init__(self, lemmatization)
self.sampler = RandomUnderSampler(random_state=0)
feature_extractors = [
bug_features.has_str(),
bug_features.has_regression_range(),
bug_features.severity(),
bug_features.keywords({"qawanted"}),
bug_features.is_coverity_issue(),
bug_features.has_crash_signature(),
bug_features.has_url(),
bug_features.has_w3c_url(),
bug_features.has_github_url(),
bug_features.whiteboard(),
bug_features.patches(),
bug_features.landings(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]
self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(
feature_extractors,
cleanup_functions,
rollback=True,
rollback_when=self.rollback,
),
),
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(), "title"),
("comments", self.text_vectorizer(), "comments"),
]
),
),
]
)
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
self.clf.set_params(predictor="cpu_predictor")
示例10: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
BugModel.__init__(self, lemmatization, commit_data=True)
self.cross_validation_enabled = False
self.sampler = RandomUnderSampler(random_state=0)
feature_extractors = [
bug_features.has_str(),
bug_features.has_regression_range(),
bug_features.severity(),
bug_features.keywords({"dev-doc-needed", "dev-doc-complete"}),
bug_features.is_coverity_issue(),
bug_features.has_crash_signature(),
bug_features.has_url(),
bug_features.has_w3c_url(),
bug_features.has_github_url(),
bug_features.whiteboard(),
bug_features.patches(),
bug_features.landings(),
bug_features.product(),
bug_features.component(),
bug_features.commit_added(),
bug_features.commit_deleted(),
bug_features.commit_types(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]
self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(
feature_extractors,
cleanup_functions,
rollback=True,
rollback_when=self.rollback,
commit_data=True,
),
),
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(), "title"),
("comments", self.text_vectorizer(), "comments"),
]
),
),
]
)
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
self.clf.set_params(predictor="cpu_predictor")
示例11: __init__
# 需要导入模块: from imblearn import under_sampling [as 别名]
# 或者: from imblearn.under_sampling import RandomUnderSampler [as 别名]
def __init__(self, lemmatization=False):
BugModel.__init__(self, lemmatization)
self.sampler = RandomUnderSampler(random_state=0)
feature_extractors = [
bug_features.has_str(),
bug_features.severity(),
bug_features.keywords({"regression", "regressionwindow-wanted"}),
bug_features.is_coverity_issue(),
bug_features.has_crash_signature(),
bug_features.has_url(),
bug_features.has_w3c_url(),
bug_features.has_github_url(),
bug_features.whiteboard(),
bug_features.patches(),
bug_features.landings(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]
self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(feature_extractors, cleanup_functions),
),
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(), "title"),
("comments", self.text_vectorizer(), "comments"),
]
),
),
]
)
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
self.clf.set_params(predictor="cpu_predictor")