当前位置: 首页>>代码示例>>Python>>正文


Python MultiLabelBinarizer.fit方法代码示例

本文整理汇总了Python中sklearn.preprocessing.MultiLabelBinarizer.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MultiLabelBinarizer.fit方法的具体用法?Python MultiLabelBinarizer.fit怎么用?Python MultiLabelBinarizer.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.MultiLabelBinarizer的用法示例。


在下文中一共展示了MultiLabelBinarizer.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
    def __init__(self, inter_filePath = "inter/technology_companies_of_the_united_states/"):
        # [[cat,cat...]...]
        self.m = Word2Vec.load_word2vec_format("vectors/technology_companies_of_the_united_states/cat_train_neg5size400min_count5", binary=True) 
        self.dim = 400

        (correct_categories_train, context_categories_train) = self.load_category_page(inter_filePath + "category_page.txt")  
        (correct_categories_test, context_categories_test) = self.load_category_page(inter_filePath + "category_page_test.txt")
        ## ----  By mean ---
        Xvectors = np.array(self.predict_vector_by_mean(context_categories_train))
        Xvectors_test = np.array(self.predict_vector_by_mean(context_categories_test))


        ## ----  By mean --- *

        ## ----  By SVM ---
        corpus_train = [" ".join(i) for i in context_categories_train]
        corpus_test = [" ".join(i) for i in context_categories_test]
        cv = CountVectorizer(min_df = 1)
        X = cv.fit_transform(corpus_train)
        ##TFIDF
        transformer = TfidfTransformer()
        X_tfidf = transformer.fit_transform(X)
        #Labels
        mlb = MultiLabelBinarizer()
        mlb.fit(correct_categories_train + correct_categories_test)
        Y = mlb.transform(correct_categories_train) ###Transform to multilabel indicator
        #predict test labels
        X_test = cv.transform(corpus_test)
        Y_test = mlb.transform(correct_categories_test)
        #Y_predict_ovr = self.ovrSVM(X, Y, X_test)
        Y_predict_ovr = self.ovrSVM(Xvectors, Y, Xvectors_test)
        #Y_predict_ovo = self.ovoSVM(X, Y, X_test)
        print "---One versus rest---"
        print "Macro F-1:", f1_score(Y_test, Y_predict_ovr, average='macro')
        print "Micro F-1:", f1_score(Y_test, Y_predict_ovr, average='micro')
开发者ID:pkumusic,项目名称:HCE,代码行数:37,代码来源:SVMs.py

示例2: fit_images

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def fit_images():
    client = pymongo.MongoClient('localhost', 27017)
    db = client['image_annotation']
    responses = db['mapped_responses'].find()
    no_labels = db['labels_binary'].find()
    numbers = []
    for i in no_labels:
        numbers.append(set([int(i["number"])]))
    train_data = []
    labels = []
    i=0
    mlb = MultiLabelBinarizer()
    mlb.fit(numbers)
    for index, instance in enumerate(responses):
        t_data =  instance['hist']['0']
        indexes[index] = instance['image_no']
        train_data.append(t_data)
        label = instance['binary_results']
        new_labels = []
        for key, value in enumerate(label):
            value1 = int(value)
            new_labels.append(set([value1]))
        new_labels = mlb.transform(new_labels)
        labels.append(label)
    classifier = KNeighborsClassifier(n_neighbors = 5, weights='uniform')
    classifier.fit(train_data, labels)
    build_dir = getBuildDir()
    pickle.dump(classifier, open(join(build_dir, 'model.data'),'w'),protocol=1)
    client.close()
开发者ID:sreeram-boyapati,项目名称:image-annotation,代码行数:31,代码来源:classifier.py

示例3: test_multilabel_classification_report

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def test_multilabel_classification_report():
    n_classes = 4
    n_samples = 50
    make_ml = make_multilabel_classification
    _, y_true_ll = make_ml(n_features=1, n_classes=n_classes, random_state=0,
                           n_samples=n_samples)
    _, y_pred_ll = make_ml(n_features=1, n_classes=n_classes, random_state=1,
                           n_samples=n_samples)

    expected_report = """\
             precision    recall  f1-score   support

          0       0.50      0.67      0.57        24
          1       0.51      0.74      0.61        27
          2       0.29      0.08      0.12        26
          3       0.52      0.56      0.54        27

avg / total       0.45      0.51      0.46       104
"""

    lb = MultiLabelBinarizer()
    lb.fit([range(4)])
    y_true_bi = lb.transform(y_true_ll)
    y_pred_bi = lb.transform(y_pred_ll)

    for y_true, y_pred in [(y_true_ll, y_pred_ll), (y_true_bi, y_pred_bi)]:
        report = classification_report(y_true, y_pred)
        assert_equal(report, expected_report)
开发者ID:nateyoder,项目名称:scikit-learn,代码行数:30,代码来源:test_classification.py

示例4: TimeSeriesLabelTransformer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
class TimeSeriesLabelTransformer(BaseTaskTransformer):

    def __init__(self, namespace, name, labels=None):
        '''Initialize a time-series label transformer

        Parameters
        ----------
        jam : jams.JAMS
            The JAMS object container

        n_samples : int > 0
            The number of samples in the audio frame

        label_encoder : sklearn.preprocessing.MultiLabelBinarizer
            The (pre-constructed) label encoder
        '''

        super(TimeSeriesLabelTransformer, self).__init__(namespace, 0)

        self.encoder = MultiLabelBinarizer()
        self.encoder.fit([labels])
        self._classes = set(self.encoder.classes_)
        self.name = name

    def transform(self, jam):

        ann = self.find_annotation(jam)

        intervals = np.asarray([[0.0, jam.file_metadata.duration]])
        values = [None]
        mask = False

        if ann:
            ann_int, ann_val = ann.data.to_interval_values()
            intervals = np.vstack([intervals, ann_int])
            values.extend(ann_val)
            mask = True

        # Suppress all intervals not in the encoder
        tags = []
        for v in values:
            if v in self._classes:
                tags.extend(self.encoder.transform([[v]]))
            else:
                tags.extend(self.encoder.transform([[]]))

        tags = np.asarray(tags)
        target = self.encode_intervals(jam.file_metadata.duration,
                                       intervals,
                                       tags)
        return {'output_{:s}'.format(self.name): target,
                'mask_{:s}'.format(self.name): mask}
开发者ID:jfsantos,项目名称:crema,代码行数:54,代码来源:tags.py

示例5: test_multilabelbinarizer_vs_sklearn

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def test_multilabelbinarizer_vs_sklearn():
    # Compare msmbuilder.preprocessing.MultiLabelBinarizer
    # with sklearn.preprocessing.MultiLabelBinarizer

    multilabelbinarizerr = MultiLabelBinarizerR()
    multilabelbinarizerr.fit(np.concatenate(trajs))

    multilabelbinarizer = MultiLabelBinarizer()
    multilabelbinarizer.fit(trajs)

    y_ref1 = multilabelbinarizerr.transform(trajs[0])
    y1 = multilabelbinarizer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:16,代码来源:test_preprocessing.py

示例6: load_data

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def load_data():
    labels=pd.read_csv("train.csv")
    bismatch=pd.read_csv("train_photo_to_biz_ids.csv")
    labels=bismatch.merge(labels,how='left',on='business_id')
    labels=labels[pd.isnull(labels['labels'])==False]
    labels['labels']=labels['labels'].map(lambda x:[int(i) for i in x.split(" ")])
    training_=os.listdir("train_photos/train244")
    train_ids=pd.DataFrame({"photo_id":[int(i.split(".")[0]) for i in training_]})
    train_ids=train_ids.merge(labels,on='photo_id',how='inner')
#    val_ids=val_ids.merge(labels,on='photo_id',how='inner')
    mlb=MultiLabelBinarizer()
    mlb.fit(train_ids['labels'].tolist())
#    X_train=np.array([imread('train_photos/train244/'+str(f_)+".jpg") for f_ in train_ids['photo_id'].tolist()]).astype(np.float32)
#    X_test=np.array([imread('train_photos/val244/'+str(f_)+".jpg") for f_ in val_ids['photo_id'].tolist()]).astype(np.float32)
    return train_ids,mlb
开发者ID:TELSER1,项目名称:yelp_recruit,代码行数:17,代码来源:keras_test.py

示例7: ACMClassificator

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
class ACMClassificator(BaseACMClassificator):
    def __init__(self):
        self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
        self.mlb = MultiLabelBinarizer()
        self.classificator = OneVsRestClassifier(ExtraTreeClassifier(criterion="gini",
                                                                     max_depth=None,
                                                                     min_samples_split=2,
                                                                     min_samples_leaf=1,
                                                                     min_weight_fraction_leaf=0.,
                                                                     max_features="auto",
                                                                     max_leaf_nodes=None,
                                                                     class_weight=None),
                                                 n_jobs=-1
                                                 )

    def _prepare_problems(self, problems):
        return self.vectorizer.transform([p.statement for p in problems])

    def fit(self, problems, tags):
        nltk.download('punkt', quiet=True)
        self.vectorizer.fit([p.statement for p in problems])
        mat = self._prepare_problems(problems)
        self.mlb = self.mlb.fit(tags)
        self.classificator.fit(mat.toarray(), self.mlb.transform(tags))

    def predict(self, problems):
        mat = self._prepare_problems(problems)
        predicted = self.classificator.predict(mat.toarray())
        return self.mlb.inverse_transform(predicted)
开发者ID:morojenoe,项目名称:classificator,代码行数:31,代码来源:one_vs_rest_tree.py

示例8: prepVect

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def prepVect(min_df=2, max_features=50000, n_captions=5, n_sbu=None,
             multilabel=False):
    print "prepping the Word Tokenizer..."
    _0, _1, trY, _3 = coco(mode='full', n_captions=n_captions)
    if n_sbu:
        _4, sbuY, _5 = sbuXYFilenames(n_sbu)
        trY.extend(sbuY)
    vect = Tokenizer(min_df=min_df, max_features=max_features)
    captions = sampleCaptions(trY, n_captions)
    vect.fit(captions)
    if multilabel:
        mlb = MultiLabelBinarizer()
        mlb.fit(vect.transform(captions))
        return vect, mlb
    # if not multilabel:
    return vect
开发者ID:alanguo001,项目名称:image-captioning-for-mortals,代码行数:18,代码来源:pipeline.py

示例9: run_classifierAccuracy

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
def run_classifierAccuracy(trainSentences, trainLabels, testSentences, testLabels):
	all_labels = ["Drought", "Earthquake", "Flood", "Epidemic", "Hurricane", \
			"Rebellion", "Terrorism", "Tornado", "Tsunami", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", "infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	disaster_labels = ["Drought", "Earthquake", "Flood", "Hurricane", \
			"Tornado", "Tsunami", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", "infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	health_labels = ["Epidemic", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", \
			"injured_or_dead_people"]
	conflict_labels = ["Rebellion", "Terrorism", "displaced_people_and_evacuations", \
			"infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	import numpy as np
	curr_labels = all_labels

	trainLabels = [list(set(l).intersection(curr_labels)) for l in trainLabels]
	testLabels = [list(set(l).intersection(curr_labels))for l in testLabels]

	from sklearn.preprocessing import MultiLabelBinarizer
	mlb = MultiLabelBinarizer(classes=curr_labels)
	train_label_matrix = mlb.fit(trainLabels)
	print("Labels : ", mlb.classes_)
	train_label_matrix = mlb.transform(trainLabels)
	test_label_matrix = mlb.transform(testLabels)
	print("Shape of label matrix : ", test_label_matrix.shape)

	train_matrix, tfidf = tf_idf_fit_transform(trainSentences)
	test_matrix = tfidf.transform(testSentences)
	print("Shape of sentence matrix : ", test_matrix.shape)


	from sklearn.multiclass import OneVsRestClassifier
	from sklearn.svm import LinearSVC
	from sklearn.ensemble import RandomForestClassifier
	# estimator = LinearSVC()
	estimator = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_split=2, random_state=0, n_jobs = -1)
	classifier = OneVsRestClassifier(estimator, n_jobs=-1)
	classifier.fit(train_matrix, train_label_matrix)
	predictions = classifier.predict(test_matrix)

	from sklearn.metrics import f1_score, precision_score, recall_score
	print("Micro-Precision", precision_score(test_label_matrix, predictions, average='micro'))
	print("Micro-Recall", recall_score(test_label_matrix, predictions, average='micro'))
	print("Micro-F1", f1_score(test_label_matrix, predictions, average='micro'))
	print("Macro-Precision", precision_score(test_label_matrix, predictions, average='macro'))
	print("Macro-Recall", recall_score(test_label_matrix, predictions, average='macro'))
	print("Macro-F1", f1_score(test_label_matrix, predictions, average='macro'))
	print("Macro-Precision", precision_score(test_label_matrix, predictions, average=None))
	print("Macro-Recall", recall_score(test_label_matrix, predictions, average=None))
	print("Macro-F1", f1_score(test_label_matrix, predictions, average=None))
开发者ID:sarath1,项目名称:EventExtraction,代码行数:55,代码来源:sentence_classifier.py

示例10: GlobalLabelTransformer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
class GlobalLabelTransformer(BaseTaskTransformer):

    def __init__(self, namespace, name, labels=None):
        '''Initialize a global label transformer

        Parameters
        ----------
        jam : jams.JAMS
            The JAMS object container
        '''

        super(GlobalLabelTransformer, self).__init__(namespace, 0)

        self.encoder = MultiLabelBinarizer()
        self.encoder.fit([labels])
        self._classes = set(self.encoder.classes_)
        self.name = name

    def transform(self, jam):

        ann = self.find_annotation(jam)

        intervals = np.asarray([[0, 1]])
        values = [None]
        mask = False

        if ann:
            values = list(ann.data.value)
            intervals = np.tile(intervals, [len(values), 1])
            mask = True

        # Suppress all intervals not in the encoder
        tags = [v for v in values if v in self._classes]
        if len(tags):
            target = self.encoder.transform([tags]).max(axis=0)
        else:
            target = np.zeros(len(self._classes), dtype=np.int)

        return {'output_{:s}'.format(self.name): target,
                'mask_{:s}'.format(self.name): mask}
开发者ID:jfsantos,项目名称:crema,代码行数:42,代码来源:tags.py

示例11: ACMClassificator

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
class ACMClassificator(BaseACMClassificator):
    def __init__(self):
        self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
        self.mlb = MultiLabelBinarizer()
        self.classificator = OneVsRestClassifier(SVC(), n_jobs=-1)

    def _prepare_problems(self, problems):
        return self.vectorizer.transform([p.statement for p in problems])

    def fit(self, problems, tags):
        nltk.download('punkt', quiet=True)
        self.vectorizer.fit([p.statement for p in problems])
        mat = self._prepare_problems(problems)
        self.mlb = self.mlb.fit(tags)
        self.classificator.fit(mat.toarray(), self.mlb.transform(tags))

    def predict(self, problems):
        mat = self._prepare_problems(problems)
        predicted = self.classificator.predict(mat.toarray())
        return self.mlb.inverse_transform(predicted)
开发者ID:morojenoe,项目名称:classificator,代码行数:22,代码来源:one_vs_rest_svc.py

示例12: open

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
# unique_tags = []

# with open("../logs/tags.txt") as top_tag_list:
#     for line in top_tag_list:
#         line = line.split('\n')[0]
#         if cnt[line] > 0:
#             unique_tags.append(line) 

# for key in data:
#     for tag in data[key]:
#         if tag not in unique_tags:
#             data[key].remove(tag)

tags = data.values()
mlb = MultiLabelBinarizer()
mlb.fit(tags)
print("Saving trained LabelBinarizer to disk")
joblib.dump(mlb, '../dump/pkl/' + str(mlb)[:5] + '.pkl')
print("")

# Split corpus into training and test sets
questions_train, questions_test, tags_train, tags_test = train_test_split(questions, tags, test_size=0.2, random_state = random.randint(1, 100))

print("Extracting features from the training data using the vectorizer")
t0 = time()
X_train = vectorizer.transform(questions_train)
duration = time() - t0
print("done in %fs" % (duration))
print("n_samples: %d, n_features: %d" % X_train.shape)
print("")
开发者ID:nishnik,项目名称:QuoRecommender,代码行数:32,代码来源:workbench.py

示例13: return

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
    a_=[loadbusimage(im_) for im_ in x]
    return(np.array(a_))
labels=pd.read_csv("train.csv")
labels=labels[pd.isnull(labels['labels'])==False]
bismatch=pd.read_csv("train_photo_to_biz_ids.csv")
photo_labels=bismatch.merge(labels,how='left',on='business_id')
photo_labels=photo_labels[pd.isnull(photo_labels['labels'])==False]
photo_labels['labels']=photo_labels['labels'].map(lambda x:[int(i) for i in x.split(" ")])
np.random.seed(42)
labels['assignment']=np.random.randint(0,10,size=(labels.shape[0],1))
photo_labels=photo_labels.merge(labels[['business_id','assignment']],on='business_id')
train=photo_labels[photo_labels['assignment']<=7].reset_index(drop=True)
test=photo_labels[photo_labels['assignment']>7].reset_index(drop=True)

mlb=MultiLabelBinarizer()
mlb.fit(train['labels'].tolist()+test['labels'].tolist())
#INSERT NORMALIZATION TRAINING HERE
n_images=10
graph = Graph()
nfilters=32
for i in xrange(0,n_images):
    graph.add_input(name="input"+str(i),input_shape=(3,size,size))
graph.add_shared_node(Convolution2D(nfilters, 3, 3, border_mode='same',activation='relu'),name='conv1',inputs=["input"+str(i) for i in xrange(0,10)])
graph.add_shared_node(BatchNormalization(),name='batch1',inputs=['conv1'])
graph.add_shared_node(Convolution2D(nfilters,3,3,activation=LeakyReLU()), name='conv2', inputs=['batch1'])
graph.add_shared_node(BatchNormalization(),name='batch2',inputs=['conv2'])
graph.add_shared_node(Convolution2D(nfilters,3,3,activation=LeakyReLU()), name='conv3', inputs=['batch2'])
graph.add_shared_node(BatchNormalization(),name='batch3',inputs=['conv3'])
graph.add_shared_node(Convolution2D(nfilters,3,3,activation=LeakyReLU()), name='conv4', inputs=['batch3'])
graph.add_shared_node(BatchNormalization(),name='batch4',inputs=['conv4'])
graph.add_shared_node(Convolution2D(nfilters,3,3,activation=LeakyReLU()), name='conv5', inputs=['batch4'])
开发者ID:TELSER1,项目名称:yelp_recruit,代码行数:33,代码来源:multinstance_learning.py

示例14: del

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
chunks = []
for chunk in reader:
    chunk.dropna(inplace=True) 
    chunks.append(chunk)

test = pd.concat(chunks)

del(chunks)

# Split the tags by spaces
train_labels = train['Tags'].map(lambda x: x.split())
test_labels = test['Tags'].map(lambda x: x.split())

# The label binarizer takes all the tags and turns them into a big sparse matrix
mlb = MultiLabelBinarizer()
mlb.fit(pd.concat([train_labels, test_labels]))
labels = mlb.transform(train_labels)

# Turn the tokens into a sparse matrix
vect = CountVectorizer(
    # Get text from html
    preprocessor = preprocess,
    # Turn the text into tokens
    tokenizer = tokenize,
    # Generate ngrams
    ngram_range = (1, 2),
    # Remove extremely common tokens
    max_df = 0.5,
    # Remove extremely uncommon tokens
    min_df = 0.001
)
开发者ID:LanceKaiwei,项目名称:Kaggle-Topic-Models,代码行数:33,代码来源:SGD.py

示例15: load_train

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import fit [as 别名]
#    val_ids=val_ids.merge(labels,on='photo_id',how='inner')
    mlb=LabelEncoder()
    mlb.fit(train_ids['business_id'].tolist())
#    X_train=np.array([imread('train_photos/train244/'+str(f_)+".jpg") for f_ in train_ids['photo_id'].tolist()]).astype(np.float32)
#    X_test=np.array([imread('train_photos/val244/'+str(f_)+".jpg") for f_ in val_ids['photo_id'].tolist()]).astype(np.float32)
    return train_ids,mlb
def load_train(train_list):
    return(np.array([imread('train_photos/train244/'+str(f_)+".jpg") for f_ in train_list]).astype(np.float32)/255.0)
train_ids,mlb=load_data()
labels=pd.read_csv("train.csv")
labels=labels[pd.isnull(labels['labels'])==False].reset_index(drop=True)
labels['assignment']=np.random.uniform(size=(labels.shape[0],1))

MLB=MultiLabelBinarizer()
train_ids=train_ids.merge(labels[['business_id','assignment']],on='business_id',how='left')
MLB.fit(train_ids['labels'].tolist()) 
labels['labels']=labels['labels'].map(lambda x:[int(i) for i in x.split(" ")])
BETA=MLB.transform(labels.sort('business_id')['labels'])
val_ids=train_ids[train_ids['assignment']>=.9].reset_index(drop=True)
val_Y=MLB.transform(val_ids['labels'])
train_ids=train_ids[train_ids['assignment']<.9].reset_index(drop=True)
Y_test=mlb.transform(val_ids['business_id'].tolist())
print Y_test.shape
np.random.seed(42)
#train_ids=train_ids.sort('business_id').reset_index(drop=True)
train_ids.reindex(np.random.permutation(train_ids.index))
val_ids.reindex(np.random.permutation(val_ids.index))
validate=np.array([imread('train_photos/train244/'+str(f_)+".jpg") for f_ in val_ids['photo_id'].tolist()[0:10000]]).astype(np.float32)/255.0

datagen = ImageDataGenerator(
    featurewise_center=True,
开发者ID:TELSER1,项目名称:yelp_recruit,代码行数:33,代码来源:idbusiness2.py


注:本文中的sklearn.preprocessing.MultiLabelBinarizer.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。