当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.MultiLabelBinarizer方法代码示例

本文整理汇总了Python中sklearn.preprocessing.MultiLabelBinarizer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.MultiLabelBinarizer方法的具体用法?Python preprocessing.MultiLabelBinarizer怎么用?Python preprocessing.MultiLabelBinarizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在模块sklearn.preprocessing的用法示例。

在下文中一共展示了preprocessing.MultiLabelBinarizer方法的29个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def __init__(self, model_module, weights_path, evaluation_strategy="s2"):
        """
        Test metadata format
        ---------------------
        filename : string
        class_ids: string of ints with space as a delimiter
        """
        test_dataset = pd.read_csv(IRMAS_TESTING_META_PATH, names=["filename", "class_ids"])
        self.X = list(test_dataset.filename)
        targets = [[int(category) for category in target.split()] for target in test_dataset.class_ids]
        self.ml_binarizer = MultiLabelBinarizer().fit(targets)
        self.y_true = self.ml_binarizer.transform(targets)

        self.y_pred = np.zeros(shape=self.y_true.shape)
        self.y_pred_raw = np.zeros(shape=self.y_true.shape)
        self.y_pred_raw_average = np.zeros(shape=self.y_true.shape)
        self.model_module = model_module
        self.weights_path = weights_path
        self.feature_filenames = os.listdir(os.path.join(IRMAS_TEST_FEATURE_BASEPATH, model_module.BASE_NAME))
        self.dataset_mean = np.load(os.path.join(MODEL_MEANS_BASEPATH, "{}_mean.npy".format(model_module.BASE_NAME)))
        self.evaluation_strategy = evaluation_strategy
        self.thresholds_s1 = [0.10, 0.12, 0.14, 0.16, 0.18, 0.20, 0.22, 0.24]
        self.thresholds_s2 = [0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60] 
开发者ID:Veleslavia,项目名称:EUSIPCO2017,代码行数:25,代码来源:evaluation.py


示例2: cat_onehot_encoder_m

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def cat_onehot_encoder_m(df,y,col,selection=True):
    ## ZJN: test raise memory error
    # raise MemoryError


    mlbs = MultiLabelBinarizer(sparse_output=True).fit(df.values)
    from scipy.sparse import csr_matrix
    features_tmp = mlbs.transform(df.values)
    features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
    models = None
    auc_score = None
    if selection is True:
        auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
        print(col, "auc", auc_score)
    #new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
    new_feature = features_tmp
    from scipy.sparse import hstack



    return new_feature,mlbs,models,auc_score 
开发者ID:DominickZhang,项目名称:KDDCup2019_admin,代码行数:23,代码来源:feature_expansion.py


示例3: setUp

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def setUp(self):
        FILENAME = "../data/images/overfeat_raw.txt"
        data = prepare.data_from_csv(FILENAME, sep='\\t')

        TARGET = 'Labels'
        self.SENS = ['Race']
        self.EXPL = []

        labeled_data = [ast.literal_eval(s) for s in data[TARGET]]
        for l in labeled_data:
            assert len(l) == 5
        label_encoder = preprocessing.MultiLabelBinarizer()
        labeled_data = label_encoder.fit_transform(labeled_data)
        labels = label_encoder.classes_
        df_labels = pd.DataFrame(labeled_data, columns=labels)
        self.data = DataSource(pd.concat([data.drop(TARGET, axis=1), df_labels],
                                         axis=1))
        self.TARGET = labels.tolist() 
开发者ID:columbia,项目名称:fairtest,代码行数:20,代码来源:test_discovery.py


示例4: evaluate

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def evaluate(self, preds):
        acc = eval_func.sequential_accuracy(
            [self.label_dict[srcid] for srcid in preds.keys()],
            [preds[srcid] for srcid in preds.keys()])
        pred = [preds[srcid] for srcid in preds.keys()]
        true = [self.label_dict[srcid] for srcid in preds.keys()]
        mlb = MultiLabelBinarizer()
        mlb.fit(pred + true)
        encoded_true = mlb.transform(true)
        encoded_pred = mlb.transform(pred)
        macro_f1 = f1_score(encoded_true, encoded_pred, average='macro')
        f1 = f1_score(encoded_true, encoded_pred, average='weighted')
        res = {
            'accuracy': acc,
            'f1': f1,
            'macro_f1': macro_f1
        }
        return res 
开发者ID:plastering,项目名称:plastering,代码行数:20,代码来源:char2ir_gpu.py


示例5: feature_vectorizer

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def feature_vectorizer(X_train, X_test, y_train, y_test):
    """prepare X data with tfidf and y with multi label binarizer"""
    vectorizer = TfidfVectorizer(
        analyzer="word", min_df=0.0,
        max_df=1.0, strip_accents=None,
        encoding="utf-8", preprocessor=None,
        token_pattern=r"(?u)\S\S+", max_features=1000,
    )
    # fit only training data
    vectorizer.fit(X_train)
    _save_data(vectorizer, "/workdir/models/X_vectorizer.pk")
    X_train_features = vectorizer.transform(X_train)
    X_test_features = vectorizer.transform(X_test)
    # use multiLabelBinarizer to create one-hot encoding of labels for y data
    mlb = MultiLabelBinarizer()
    # fit only training data
    mlb.fit(y_train)
    _save_data(mlb, "/workdir/models/label_binarizer.pk")
    y_train_features = mlb.transform(y_train)
    y_test_features = mlb.transform(y_test)
    return X_train_features, X_test_features, y_train_features, y_test_features 
开发者ID:intel,项目名称:stacks-usecase,代码行数:23,代码来源:train.py


示例6: _build_label_dict

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def _build_label_dict(self,
                          labels: List[str]):
        from sklearn.preprocessing import MultiLabelBinarizer
        if self.multi_label:
            label_set = set()
            for i in labels:
                label_set = label_set.union(list(i))
        else:
            label_set = set(labels)
        self.label2idx = {}
        for idx, label in enumerate(sorted(label_set)):
            self.label2idx[label] = len(self.label2idx)

        self.idx2label = dict([(value, key) for key, value in self.label2idx.items()])
        self.dataset_info['label_count'] = len(self.label2idx)
        self.multi_label_binarizer = MultiLabelBinarizer(classes=list(self.label2idx.keys())) 
开发者ID:shibing624,项目名称:text2vec,代码行数:18,代码来源:default_processor.py


示例7: text_similarity

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def text_similarity(df, col):
        """
        Convert strings to their unicode representation and then apply one hot encoding, creating one feature for each unique character in the column. 
        This can be useful when similarity between strings is significant.
        """
        
        unique = pd.DataFrame(df[col].unique(), columns=[col])
        
        encoded = pd.DataFrame(unique.loc[:,col].apply(lambda s: [ord(a) for a in s]), index=unique.index)
        
        mlb = preprocessing.MultiLabelBinarizer()
        encoded = pd.DataFrame(mlb.fit_transform(encoded[col]),columns=mlb.classes_, index=encoded.index).add_prefix(col+"_")
        
        unique = unique.join(encoded)
        
        return unique.set_index(col) 
开发者ID:nabeel-oz,项目名称:qlik-py-tools,代码行数:18,代码来源:_machine_learning.py


示例8: prepVect

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def prepVect(min_df=2, max_features=50000, n_captions=5, n_sbu=None,
             multilabel=False):
    print "prepping the Word Tokenizer..."
    _0, _1, trY, _3 = coco(mode='full', n_captions=n_captions)
    if n_sbu:
        _4, sbuY, _5 = sbuXYFilenames(n_sbu)
        trY.extend(sbuY)
    vect = Tokenizer(min_df=min_df, max_features=max_features)
    captions = sampleCaptions(trY, n_captions)
    vect.fit(captions)
    if multilabel:
        mlb = MultiLabelBinarizer()
        mlb.fit(vect.transform(captions))
        return vect, mlb
    # if not multilabel:
    return vect 
开发者ID:youralien,项目名称:image-captioning-for-mortals,代码行数:18,代码来源:pipeline.py


示例9: test_objectmapper

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:19,代码来源:test_preprocessing.py


示例10: NodeClassification

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def NodeClassification(embedding_look_up, node_list, labels, testing_ratio, seed):

    X_train, y_train, X_test, y_test = split_train_test_classify(embedding_look_up, node_list, labels,
                                                                 testing_ratio=testing_ratio,seed=seed)
    binarizer = MultiLabelBinarizer(sparse_output=True)
    y_all = np.append(y_train, y_test)
    binarizer.fit(y_all)
    y_train = binarizer.transform(y_train).todense()
    y_test = binarizer.transform(y_test).todense()
    model = OneVsRestClassifier(LogisticRegression(random_state=seed, solver='lbfgs'))
    model.fit(X_train, y_train)
    y_pred_prob = model.predict_proba(X_test)

    ## small trick : we assume that we know how many label to predict
    y_pred = get_y_pred(y_test, y_pred_prob)

    accuracy = accuracy_score(y_test, y_pred)
    micro_f1 = f1_score(y_test, y_pred, average="micro")
    macro_f1 = f1_score(y_test, y_pred, average="macro")

    print('#' * 9 + ' Node Classification Performance ' + '#' * 9)
    print(f'Accuracy: {accuracy:.3f}, Micro-F1: {micro_f1:.3f}, Macro-F1: {macro_f1:.3f}')
    print('#' * 50)
    return accuracy, micro_f1, macro_f1 
开发者ID:xiangyue9607,项目名称:BioNEV,代码行数:26,代码来源:evaluation.py


示例11: build_input_label_data

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def build_input_label_data(labels, class_order):
    from sklearn.preprocessing import MultiLabelBinarizer
    from itertools import chain

    bml = MultiLabelBinarizer(classes=class_order, sparse_output=True)
    indexes = sp.find(bml.fit_transform(labels)) 
    y = []

    for i in range(len(labels)):
        y.append([])
    for i,j in zip(indexes[0], indexes[1]):
        y[i].append(j)
    return y

# padding operation
# ========================================================= 
开发者ID:ShimShim46,项目名称:HFT-CNN,代码行数:18,代码来源:data_helper.py


示例12: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def __init__(self, inputs, labels, test_indices=None, **kwargs):
        """Encapsulates all pieces of data to run an experiment. This is basically a bag of items that makes it
        easy to serialize and deserialize everything as a unit.

        Args:
            inputs: The raw model inputs. This can be set to None if you dont want
                to serialize this value when you save the dataset.
            labels: The raw output labels.
            test_indices: The optional test indices to use. Ideally, this should be generated one time and reused
                across experiments to make results comparable. `generate_test_indices` can be used generate first
                time indices.
            **kwargs: Additional key value items to store.
        """
        self.X = np.array(inputs)
        self.y = np.array(labels)
        for key, value in kwargs.items():
            setattr(self, key, value)

        self._test_indices = None
        self._train_indices = None
        self.test_indices = test_indices

        self.is_multi_label = isinstance(labels[0], (set, list, tuple))
        self.label_encoder = MultiLabelBinarizer() if self.is_multi_label else LabelBinarizer()
        self.y = self.label_encoder.fit_transform(self.y).flatten() 
开发者ID:raghakot,项目名称:keras-text,代码行数:27,代码来源:data.py


示例13: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def __init__(self, vectors, clf):
        self.embeddings = vectors
        self.clf = TopKRanker(clf)
        self.binarizer = MultiLabelBinarizer(sparse_output=True) 
开发者ID:thunlp,项目名称:OpenNE,代码行数:6,代码来源:classify.py


示例14: setUp

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def setUp(self):
        dataset_filepath = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'datasets/yeast_train.svm')
        X, y = load_svmlight_file(dataset_filepath, multilabel=True)
        self.X = X.todense().tolist()
        self.y = MultiLabelBinarizer().fit_transform(y).tolist()
        self.quota = 10 
开发者ID:ntucllab,项目名称:libact,代码行数:10,代码来源:test_multilabel_realdata.py


示例15: main

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def main(argv=sys.argv):
    if len(argv) != 1:
        usage(argv)

    FILENAME = "../../../data/recommender/recommendations.txt"
    OUTPUT_DIR = "."
    data = prepare.data_from_csv(FILENAME, sep='\\t',
                                 to_drop=['RMSE', 'Avg Movie Age',
                                          'Avg Recommended Rating',
                                          'Avg Seen Rating', 'Occupation'])
    TARGET = 'Types'
    SENS = ['Gender']

    EXPL = []
    labeled_data = [ast.literal_eval(s) for s in data[TARGET]]
    for labels in labeled_data:
        assert len(labels) == 5
    label_encoder = preprocessing.MultiLabelBinarizer()
    labeled_data = label_encoder.fit_transform(labeled_data)
    labels = label_encoder.classes_
    df_labels = pd.DataFrame(labeled_data, columns=labels)
    data = pd.concat([data.drop(TARGET, axis=1), df_labels], axis=1)
    TARGET = labels.tolist()

    data_source = DataSource(data)

    # Instantiate the experiment
    inv = Discovery(data_source, SENS, TARGET, EXPL, topk=10, random_state=0)

    # Train the classifier
    train([inv])

    # Evaluate on the testing set
    test([inv])

    # Create the report
    report([inv], "discovery", OUTPUT_DIR) 
开发者ID:columbia,项目名称:fairtest,代码行数:39,代码来源:discovery.py


示例16: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def __init__(self, embeddings, clf):
        self.embeddings = embeddings
        self.clf = TopKRanker(clf)
        self.binarizer = MultiLabelBinarizer(sparse_output=True) 
开发者ID:shenweichen,项目名称:GraphEmbedding,代码行数:6,代码来源:classify.py


示例17: binarize_labels

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def binarize_labels(labels, sparse_output=False, return_classes=False):
    """Convert labels vector to a binary label matrix.

    In the default single-label case, labels look like
    labels = [y1, y2, y3, ...].
    Also supports the multi-label format.
    In this case, labels should look something like
    labels = [[y11, y12], [y21, y22, y23], [y31], ...].

    Parameters
    ----------
    labels : array-like, shape [num_samples]
        Array of node labels in categorical single- or multi-label format.
    sparse_output : bool, default False
        Whether return the label_matrix in CSR format.
    return_classes : bool, default False
        Whether return the classes corresponding to the columns of the label matrix.

    Returns
    -------
    label_matrix : np.ndarray or sp.csr_matrix, shape [num_samples, num_classes]
        Binary matrix of class labels.
        num_classes = number of unique values in "labels" array.
        label_matrix[i, k] = 1 <=> node i belongs to class k.
    classes : np.array, shape [num_classes], optional
        Classes that correspond to each column of the label_matrix.

    """
    if hasattr(labels[0], '__iter__'):  # labels[0] is iterable <=> multilabel format
        binarizer = MultiLabelBinarizer(sparse_output=sparse_output)
    else:
        binarizer = LabelBinarizer(sparse_output=sparse_output)
    label_matrix = binarizer.fit_transform(labels).astype(np.float32)
    return (label_matrix, binarizer.classes_) if return_classes else label_matrix 
开发者ID:shchur,项目名称:gnn-benchmark,代码行数:36,代码来源:preprocess.py


示例18: multilabel_to_indicator_df

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def multilabel_to_indicator_df(y: List[List[str]], labels: List[str]) -> pd.DataFrame:
    """
    Convert a list of label lists to a 0/1 indicator dataframe.

    Args:
      y: List of label lists
      labels: List of all unique labels found in y

    Returns:
      The dataframe will have a column for each label and a row for each observation,
      with a 1 if the observation has that label or a 0 if not.
    """
    mlb = MultiLabelBinarizer(classes=labels)
    return pd.DataFrame(mlb.fit_transform(y), columns=mlb.classes_) 
开发者ID:RTIInternational,项目名称:gobbli,代码行数:16,代码来源:util.py


示例19: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def __init__(self, estimator: TEXT_CLASSIFIER_ESTIMATOR, multilabel=False, **params):
        self.multilabel = multilabel
        if multilabel:
            self.y_encoder = MultiLabelBinarizer()
        self.estimator = estimator
        self.params = params
        if estimator == TEXT_CLASSIFIER_ESTIMATOR.FAST_TEXT:
            self.ft = None
        if estimator == TEXT_CLASSIFIER_ESTIMATOR.SVC:
            self.svc = None
        if estimator == TEXT_CLASSIFIER_ESTIMATOR.PIPELINE:
            if "pipeline" in params:
                self.pipeline = params["pipeline"]
            else:
                self.pipeline = None 
开发者ID:undertheseanlp,项目名称:underthesea,代码行数:17,代码来源:text_classifier.py


示例20: binarize_labels

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def binarize_labels(true_labels, pred_labels, excluding_labels=[]):
    excluding_labels = ['building-ebu3b']
    srcids = list(pred_labels.keys())
    tot_labels = [[label for label in labels if label not in excluding_labels]
                   for labels in list(pred_labels.values()) + list(true_labels.values())
                  ]
    mlb = MultiLabelBinarizer().fit(tot_labels)
    pred_mat = mlb.transform(pred_labels.values())
    true_mat = mlb.transform(true_labels.values())
    return true_mat, pred_mat 
开发者ID:plastering,项目名称:plastering,代码行数:12,代码来源:eval_func.py


示例21: evaluate

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def evaluate(self, preds):
        srcids = list(preds.keys())
        pred_tags_list = [reduce(adder,
                                 [preds[srcid][t]
                                  for t in self.available_metadata_types])
                          for srcid in srcids]
        true_tags_list = [reduce(adder,
                                 [self.label_dict[srcid][t]
                                  for t in self.available_metadata_types])
                          for srcid in srcids]
        acc = eval_func.sequential_accuracy(true_tags_list,
                                            pred_tags_list)

        pred = [preds[srcid] for srcid in preds.keys()]
        true = [self.label_dict[srcid] for srcid in preds.keys()]
        mlb = MultiLabelBinarizer()
        mlb.fit(pred + true)
        encoded_true = mlb.transform(true)
        encoded_pred = mlb.transform(pred)
        macro_f1 = f1_score(encoded_true, encoded_pred, average='macro')
        f1 = f1_score(encoded_true, encoded_pred, average='weighted')
        res = {
            'accuracy': acc,
            'f1': f1,
            'macro_f1': macro_f1
        }
        return res 
开发者ID:plastering,项目名称:plastering,代码行数:29,代码来源:char2ir.py


示例22: binarize_labels

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def binarize_labels(true_labels, pred_labels):
    srcids = list(pred_labels.keys())
    tot_labels = [list(labels) for labels in
               list(pred_labels.values()) + list(true_labels.values())]
    mlb = MultiLabelBinarizer().fit(tot_labels)
    pred_mat = mlb.transform(pred_labels.values())
    true_mat = mlb.transform(true_labels.values())
    return true_mat, pred_mat 
开发者ID:plastering,项目名称:plastering,代码行数:10,代码来源:evaluator.py


示例23: fit

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def fit(self, X, y=None):
        Xsplit = X.applymap(lambda x: x.split(self.sep))
        self.mlbs = [MultiLabelBinarizer().fit(Xsplit[c]) for c in X.columns]
        return self 
开发者ID:jem1031,项目名称:pandas-pipelines-custom-transformers,代码行数:6,代码来源:custom_transformers.py


示例24: explicitness_per_factor

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def explicitness_per_factor(mus_train, y_train, mus_test, y_test):
  """Compute explicitness score for a factor as ROC-AUC of a classifier.

  Args:
    mus_train: Representation for training, (num_codes, num_points)-np array.
    y_train: Ground truth factors for training, (num_factors, num_points)-np
      array.
    mus_test: Representation for testing, (num_codes, num_points)-np array.
    y_test: Ground truth factors for testing, (num_factors, num_points)-np
      array.

  Returns:
    roc_train: ROC-AUC score of the classifier on training data.
    roc_test: ROC-AUC score of the classifier on testing data.
  """
  x_train = np.transpose(mus_train)
  x_test = np.transpose(mus_test)
  clf = LogisticRegression().fit(x_train, y_train)
  y_pred_train = clf.predict_proba(x_train)
  y_pred_test = clf.predict_proba(x_test)
  mlb = MultiLabelBinarizer()
  roc_train = roc_auc_score(mlb.fit_transform(np.expand_dims(y_train, 1)),
                            y_pred_train)
  roc_test = roc_auc_score(mlb.fit_transform(np.expand_dims(y_test, 1)),
                           y_pred_test)
  return roc_train, roc_test 
开发者ID:google-research,项目名称:disentanglement_lib,代码行数:28,代码来源:modularity_explicitness.py


示例25: test_BRKnna_no_labels_take_closest

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_BRKnna_no_labels_take_closest(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1, 1.1], [0, 1]])
        train_ids = [['lid0', 'lid1'], ['lid2', 'lid3'], ['lid2', 'lid3'], ['lid0', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=True)
        y = mlb.fit_transform(train_ids)
        knn = BRKNeighborsClassifier(n_neighbors=2, threshold=0.6, mode='a')
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[0, 1]])).todense()
        print(pred)
        np.testing.assert_array_equal([[1, 0, 0, 0, 0]], pred) 
开发者ID:quadflor,项目名称:Quadflor,代码行数:13,代码来源:test_BRKNN.py


示例26: test_BRKnna_predict

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_BRKnna_predict(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1, 1.1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid2', 'lid3'], ['lid4', 'lid3'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=True)
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(threshold=0.5, n_neighbors=3, mode='a')
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[1.1, 1.1]])).todense()
        np.testing.assert_array_equal([[0, 0, 0, 1, 1, 0]], pred) 
开发者ID:quadflor,项目名称:Quadflor,代码行数:13,代码来源:test_BRKNN.py


示例27: test_BRKnna_predict_dense

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_BRKnna_predict_dense(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1, 1.1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid2', 'lid3'], ['lid4', 'lid3'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer()
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(threshold=0.5, n_neighbors=3, mode='a')
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[1.1, 1.1]])).todense()
        np.testing.assert_array_equal([[0, 0, 0, 1, 1, 0]], pred) 
开发者ID:quadflor,项目名称:Quadflor,代码行数:13,代码来源:test_BRKNN.py


示例28: test_BRKnnb_predict

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_BRKnnb_predict(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1.5, 1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid0', 'lid1'], ['lid4', 'lid3'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=True)
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(mode='b', n_neighbors=3)
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[0, 1]])).todense()
        np.testing.assert_array_equal([[1, 1, 0, 0, 0]], pred) 
开发者ID:quadflor,项目名称:Quadflor,代码行数:13,代码来源:test_BRKNN.py


示例29: test_BRKnnb_predict_dense

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
def test_BRKnnb_predict_dense(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1.5, 1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid0', 'lid1'], ['lid4', 'lid3'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=False)
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(mode='b', n_neighbors=3)
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[0, 1]])).todense()
        np.testing.assert_array_equal([[1, 1, 0, 0, 0]], pred) 
开发者ID:quadflor,项目名称:Quadflor,代码行数:13,代码来源:test_BRKNN.py



注:本文中的sklearn.preprocessing.MultiLabelBinarizer方法示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。