当前位置: 首页>>代码示例>>Python>>正文


Python LabelBinarizer.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.label.LabelBinarizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.transform方法的具体用法?Python LabelBinarizer.transform怎么用?Python LabelBinarizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.label.LabelBinarizer的用法示例。


在下文中一共展示了LabelBinarizer.transform方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_label_binarizer_unseen_labels

# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import transform [as 别名]
def test_label_binarizer_unseen_labels():
    lb = LabelBinarizer()

    expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    got = lb.fit_transform(["b", "d", "e"])
    assert_array_equal(expected, got)

    expected = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]])
    got = lb.transform(["a", "b", "c", "d", "e", "f"])
    assert_array_equal(expected, got)
开发者ID:tguillemot,项目名称:scikit-learn,代码行数:12,代码来源:test_label.py

示例2: dbpedia_convgemb

# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import transform [as 别名]
def dbpedia_convgemb(sample=None, n_procs=None):
    if not n_procs:
        n_procs = cpu_count()

    df = get_dbpedia_data(size=sample)

    if sample:
        test_size = int(round(np.sum(5000 * df.category.value_counts().values / 45000)))
    else:
        test_size = 5000 * 14

    split = StratifiedShuffleSplit(df.category, test_size=test_size)
    train_split, test_split = next(iter(split))
    train_df = df.iloc[train_split]
    test_df = df.iloc[test_split]

    train_docs = DataframeSentences(train_df, cols=['title', 'abstract'], flatten=True)
    vocab = Dictionary(train_docs)
    vocab.filter_extremes(keep_n=5000)
    bin = LabelBinarizer()

    x_train = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
                                      for s in train_docs],
                                     max_length=100, padding_word=0))
    y_train = bin.fit_transform(train_df.category.values)

    test_docs = DataframeSentences(test_df, cols=['title', 'abstract'], flatten=True)
    x_test = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
                                      for s in test_docs],
                                     max_length=100, padding_word=0))
    y_test = bin.transform(test_df.category.values)

    emb_weights = load_w2v_weights(vocab)

    model = Sequential()
    model.add(Embedding(5001, 300, input_length=100, dropout=.2, weights=[emb_weights], trainable=False))
    model.add(Convolution1D(nb_filter=50, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=model.output_shape[1]))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(.2))
    model.add(Dense(14, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(x_train, y_train)

    print(accuracy_score(np.argwhere(y_test)[:,1], model.predict_classes(x_test)))
开发者ID:andrely,项目名称:sublexical-features,代码行数:53,代码来源:zhang-char-convnets-text-class.py

示例3: test_label_binarizer_unseen_labels

# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import transform [as 别名]
def test_label_binarizer_unseen_labels():
    lb = LabelBinarizer()

    expected = np.array([[1, 0, 0],
                         [0, 1, 0],
                         [0, 0, 1]])
    got = lb.fit_transform(['b', 'd', 'e'])
    assert_array_equal(expected, got)

    expected = np.array([[0, 0, 0],
                         [1, 0, 0],
                         [0, 0, 0],
                         [0, 1, 0],
                         [0, 0, 1],
                         [0, 0, 0]])
    got = lb.transform(['a', 'b', 'c', 'd', 'e', 'f'])
    assert_array_equal(expected, got)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:19,代码来源:test_label.py

示例4: dbpedia_smallcharconv

# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import transform [as 别名]
def dbpedia_smallcharconv(sample=None, n_procs=None):
    if not n_procs:
        n_procs = cpu_count()

    df = get_dbpedia_data(size=sample)

    if sample:
        test_size = int(round(np.sum(5000 * df.category.value_counts().values / 45000)))
    else:
        test_size = 5000 * 14

    logging.info('creating train test split ...')
    split = StratifiedShuffleSplit(df.category, test_size=test_size)
    train_split, test_split = next(iter(split))
    train_df = df.iloc[train_split]
    test_df = df.iloc[test_split]

    logging.info('preprocessing, padding and binarizing data ...')
    train_docs = [[CHAR_MAP.index(c) if c in CHAR_MAP else len(CHAR_MAP) for c in text] for text
                  in train_df[['title', 'abstract']].apply(lambda cols: u'\n'.join(cols), axis=1).values]
    bin = LabelBinarizer()

    x_train = np.array(pad_sentences(train_docs, max_length=1014, padding_word=CHAR_MAP.index(' ')))
    y_train = bin.fit_transform(train_df.category.values)

    test_docs = [[CHAR_MAP.index(c) if c in CHAR_MAP else len(CHAR_MAP) for c in text] for text
                 in test_df[['title', 'abstract']].apply(lambda cols: u'\n'.join(cols), axis=1).values]
    x_test = np.array(pad_sentences(test_docs, max_length=1014, padding_word=0))
    y_test = bin.transform(test_df.category.values)

    logging.info('building model ...')
    model = Sequential()
    model.add(Embedding(len(CHAR_MAP) + 1, len(CHAR_MAP) + 1, input_length=1014,
                        weights=[char_embedding()], trainable=False))
    model.add(Convolution1D(nb_filter=256, filter_length=7, border_mode='valid',
                            activation='relu'))
    model.add(MaxPooling1D(pool_length=3))
    model.add(Convolution1D(nb_filter=256, filter_length=7, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=3))
    model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=3))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(14, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['categorical_accuracy'])

    print(model.summary())

    model.fit(x_train, y_train, batch_size=64, nb_epoch=5, validation_data=[x_test, y_test])

    print(accuracy_score(np.argwhere(y_test)[:,1], model.predict_classes(x_test)))
开发者ID:andrely,项目名称:sublexical-features,代码行数:67,代码来源:zhang-char-convnets-text-class.py

示例5: dbpedia_smallwordconv

# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import transform [as 别名]
def dbpedia_smallwordconv(sample=None, n_procs=None):
    if not n_procs:
        n_procs = cpu_count()

    df = get_dbpedia_data(size=sample)

    if sample:
        test_size = int(round(np.sum(5000 * df.category.value_counts().values / 45000)))
    else:
        test_size = 5000 * 14

    logging.info('creating train test split ...')
    split = StratifiedShuffleSplit(df.category, test_size=test_size)
    train_split, test_split = next(iter(split))
    train_df = df.iloc[train_split]
    test_df = df.iloc[test_split]

    logging.info('preprocessing, padding and binarizing data ...')
    train_docs = DataframeSentences(train_df, cols=['title', 'abstract'], flatten=True)
    vocab = Dictionary(train_docs)
    vocab.filter_extremes(keep_n=5000)
    bin = LabelBinarizer()

    x_train = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
                                      for s in train_docs],
                                     max_length=100, padding_word=0))
    y_train = bin.fit_transform(train_df.category.values)

    test_docs = DataframeSentences(test_df, cols=['title', 'abstract'], flatten=True)
    x_test = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
                                      for s in test_docs],
                                     max_length=100, padding_word=0))
    y_test = bin.transform(test_df.category.values)

    logging.info('building model ...')
    model = Sequential()
    model.add(Embedding(5001, 300, input_length=100))
    model.add(Convolution1D(nb_filter=300, filter_length=7, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=3, stride=1))
    model.add(Convolution1D(nb_filter=300, filter_length=7, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=3, stride=1))
    model.add(Convolution1D(nb_filter=300, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=300, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=300, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(Convolution1D(nb_filter=300, filter_length=3, border_mode='valid',
                            activation='relu', subsample_length=1))
    model.add(MaxPooling1D(pool_length=3, stride=1))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(14, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['categorical_accuracy'])

    model.fit(x_train, y_train, batch_size=32, nb_epoch=5, validation_data=[x_test, y_test])

    print(accuracy_score(np.argwhere(y_test)[:,1], model.predict_classes(x_test)))
开发者ID:andrely,项目名称:sublexical-features,代码行数:68,代码来源:zhang-char-convnets-text-class.py


注:本文中的sklearn.preprocessing.label.LabelBinarizer.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。