当前位置: 首页>>代码示例>>Python>>正文


Python MultiLabelBinarizer.inverse_transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.MultiLabelBinarizer.inverse_transform方法的典型用法代码示例。如果您正苦于以下问题:Python MultiLabelBinarizer.inverse_transform方法的具体用法?Python MultiLabelBinarizer.inverse_transform怎么用?Python MultiLabelBinarizer.inverse_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.MultiLabelBinarizer的用法示例。


在下文中一共展示了MultiLabelBinarizer.inverse_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ACMClassificator

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
class ACMClassificator(BaseACMClassificator):
    def __init__(self):
        self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
        self.mlb = MultiLabelBinarizer()
        self.classificator = OneVsRestClassifier(ExtraTreeClassifier(criterion="gini",
                                                                     max_depth=None,
                                                                     min_samples_split=2,
                                                                     min_samples_leaf=1,
                                                                     min_weight_fraction_leaf=0.,
                                                                     max_features="auto",
                                                                     max_leaf_nodes=None,
                                                                     class_weight=None),
                                                 n_jobs=-1
                                                 )

    def _prepare_problems(self, problems):
        return self.vectorizer.transform([p.statement for p in problems])

    def fit(self, problems, tags):
        nltk.download('punkt', quiet=True)
        self.vectorizer.fit([p.statement for p in problems])
        mat = self._prepare_problems(problems)
        self.mlb = self.mlb.fit(tags)
        self.classificator.fit(mat.toarray(), self.mlb.transform(tags))

    def predict(self, problems):
        mat = self._prepare_problems(problems)
        predicted = self.classificator.predict(mat.toarray())
        return self.mlb.inverse_transform(predicted)
开发者ID:morojenoe,项目名称:classificator,代码行数:31,代码来源:one_vs_rest_tree.py

示例2: main

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
def main():
    #sets = select_by_trait(10,2,tags=["Comedy","Human","Sad","Dark"])
    sets = select_sets_by_tag(20,4,tag_names)
    #sets = random_select_sets(30,6)
    train_tags = fetch_tags(sets["train"])
    train_texts = id_to_filename(sets["train"])#txt_to_list(sets["train"])
    #vectorize
    count_vect = CountVectorizer(stop_words='english', encoding="utf-16", input="filename")
    X_train_counts = count_vect.fit_transform(train_texts)

    #tf-idf transformation
    tfidf_transformer = TfidfTransformer()
    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

    #process tags
    mlb = MultiLabelBinarizer()
    processed_train_tags = mlb.fit_transform(train_tags)
    #rint(processed_train_tags)
    #classifier
    #clf = OneVsRestClassifier(MultinomialNB())
    clf = OneVsRestClassifier(LinearSVC())
    clf.fit(X_train_tfidf,processed_train_tags)
    print("classes:{}".format(clf.classes_))
    #process test set

    test_texts = id_to_filename(sets["test"])#txt_to_list(sets["test"])
    X_test_counts = count_vect.transform(test_texts)
    #print("X_test_counts inverse transformed: {}".format(count_vect.inverse_transform(X_test_counts)))
    X_test_tfidf = tfidf_transformer.transform(X_test_counts)

    predicted_tags = clf.predict(X_test_tfidf)
    predicted_tags_readable = mlb.inverse_transform(predicted_tags)
    test_tags_actual = fetch_tags(sets["test"])
    predicted_probs = clf.decision_function(X_test_tfidf)
    #predicted_probs = clf.get_params(X_test_tfidf)
    class_list = mlb.classes_
    report = metrics.classification_report(mlb.transform(test_tags_actual),predicted_tags,target_names=class_list)
    print(report)
    #retrieve top 30% for each class
    top_percentage = 30
    threshold_index = int( len(sets["test"]) *(top_percentage/100.0) )
    threshold_vals_dic = {}
    threshold_vals = []
    num_classes = len(class_list)
    for i in range(num_classes):
        z = [ predicted_probs[j,i] for j in range(len(sets["test"]))]
        z.sort(reverse=True)
        threshold_vals_dic[class_list[i]]= z[threshold_index]
        threshold_vals.append(z[threshold_index])
    print(threshold_vals_dic)


    print_predictions(sets["test"],predicted_tags_readable,class_list, class_probablities=predicted_probs,threshold_vals=threshold_vals)
开发者ID:samkam,项目名称:Senior-Project,代码行数:55,代码来源:classify.py

示例3: ACMClassificator

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
class ACMClassificator(BaseACMClassificator):
    def __init__(self):
        self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
        self.mlb = MultiLabelBinarizer()
        self.classificator = OneVsRestClassifier(SVC(), n_jobs=-1)

    def _prepare_problems(self, problems):
        return self.vectorizer.transform([p.statement for p in problems])

    def fit(self, problems, tags):
        nltk.download('punkt', quiet=True)
        self.vectorizer.fit([p.statement for p in problems])
        mat = self._prepare_problems(problems)
        self.mlb = self.mlb.fit(tags)
        self.classificator.fit(mat.toarray(), self.mlb.transform(tags))

    def predict(self, problems):
        mat = self._prepare_problems(problems)
        predicted = self.classificator.predict(mat.toarray())
        return self.mlb.inverse_transform(predicted)
开发者ID:morojenoe,项目名称:classificator,代码行数:22,代码来源:one_vs_rest_svc.py

示例4: get_classify

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
def get_classify():
    X_train, Y_train = load_data()

    # 定义分类器
    classifier = Pipeline([
        ('counter', CountVectorizer(tokenizer=jieba_tokenizer)),  # 标记和计数,提取特征用 向量化
        ('tfidf', TfidfTransformer()),                            # IF-IDF 权重
        ('clf', OneVsRestClassifier(LinearSVC())),                # 1-rest 多分类(多标签)
    ])
    mlb = MultiLabelBinarizer()
    Y_train = mlb.fit_transform(Y_train)                          # 分类号数值化

    classifier.fit(X_train, Y_train)

    # X_test = ["数据分析"]
    # 把所有的测试文本存到一个list中
    test_list = []
    test_name = []
    filelist2 = os.listdir(base_path + "data_test/")
    for files in filelist2:
        # print (files)
        test_name.append(files)
        f = open(base_path + "data_test/" + files, 'r')
        test_list.append(f.read())

    prediction = classifier.predict(test_list)
    result = mlb.inverse_transform(prediction)

    f = open('result2.txt', 'w')
    for i in range(len(test_name)):
        f.write(str(test_name[i]) + '   ' + str(result[i]) + '\n')

    print (result, len(result))
    num_dict = Counter(result)
    print (len(num_dict))
    print ((num_dict[('1',)] + num_dict[('2',)] + num_dict[('3',)]) / float(len(result)))  # 整数除整数为0,应把其中一个改为浮点数。
开发者ID:ccTiming,项目名称:TextInfoExp,代码行数:38,代码来源:get_cls.py

示例5: MultiLabelBinarizer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
import pandas as pd

data_root = "/Users/erdicalli/dev/workspace/yelp/submission/submissions/"

mlb = MultiLabelBinarizer()
total_labels = list()
for idx, file in enumerate(output_file_names):
    f = pd.read_csv(data_root + "merged_" + output_file_names[idx] + ".csv")
    labels = np.array([list(y.replace(" ", "")) for y in f["labels"]])
    total_labels.append(mlb.fit_transform(labels))

result_labels = np.ndarray(shape=(10000, 9))

for label_id, algorithm in enumerate(combination):
    result_labels[:, label_id] = total_labels[algorithm][:, label_id]

labels = mlb.inverse_transform(result_labels)

test_data_frame = pd.read_csv(data_root + "merged_" + output_file_names[4] + ".csv")
df = pd.DataFrame(columns=['business_id', 'labels'])

for i in range(len(test_data_frame)):
    biz = test_data_frame.loc[i]['business_id']
    label = labels[i]
    label = str(label)[1:-1].replace(",", " ")
    df.loc[i] = [str(biz), label]

with open(data_root + "combined_results.csv", 'w') as f:
    df.to_csv(f, index=False)
开发者ID:xarion,项目名称:mlip-yelp,代码行数:31,代码来源:combine_results.py

示例6: open

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
##################


# In[ ]:

classifier.fit(train_business_feature, y_ptrain_mlb)

test_business_feature = pd.read_csv(data_root+'test_business_feature'+cluster +'.csv')
business_id = test_business_feature['business_id'].reshape(-1,1)
test_business_feature.drop('business_id', axis=1, inplace=True)
y_predict_test = classifier.predict(test_business_feature)


# In[ ]:

y_predict_label = mlb.inverse_transform(y_predict_test)

df = pd.DataFrame(columns=['business_id','labels'])

for i in range(len(y_predict_label)):
    biz = business_id[i][0]
    label = y_predict_label[i]
    label = str(label)[1:-1].replace(",", " ")
    df.loc[i] = [str(biz), label]

with open(data_root+"sub_pca300.csv",'w') as f:
    df.to_csv(f, index=False) 



开发者ID:atulkum,项目名称:ml,代码行数:29,代码来源:chi2-svm.py

示例7: MultiLabelBinarizer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
X_train_scaled_Concat = np.hstack((X_train_scaled,X_train_scaled_Res))
X_test_scaled = preprocessing.normalize(X_test, norm='l2')
X_test_scaled_Res = preprocessing.normalize(X_test_Res, norm='l2')
X_test_scaled_Concat = np.hstack((X_test_scaled,X_test_scaled_Res))

mlb = MultiLabelBinarizer()
y_train= mlb.fit_transform(y_train)  #Convert list of labels to binary matrix

random_state = np.random.RandomState(0)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True))
classifier.fit(X_train_scaled_Concat, y_train)

y_predict = classifier.predict(X_test_scaled_Concat)

#print list(mlb.classes_)
y_predict_label = mlb.inverse_transform(y_predict) #Convert binary matrix back to labels

print "Time passed: ", "{0:.1f}".format(time.time()-t), "sec"

test_data_frame  = pd.read_csv(data_root+"test_biz_fc7features.csv") #fc7features and fc1000features have same business names
df = pd.DataFrame(columns=['business_id','labels'])

for i in range(len(test_data_frame)):
    biz = test_data_frame.loc[i]['business']
    label = y_predict_label[i]
    label = str(label)[1:-1].replace(",", " ")
    df.loc[i] = [str(biz), label]

with open(data_root+"submission_fc7_fc1000_norm.csv",'w') as f:
    df.to_csv(f, index=False)
开发者ID:jwsong0617,项目名称:Yelp,代码行数:32,代码来源:fc7+fc1000_test_normalization_eachSample.py

示例8: normalize

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
print "Calculating Predictions..."

files = ["xaa", "xab", "xac", "xad", "xae", "xaf"]
header = True
for chunk in files:
    t = time.time()
    print "chunk: " + chunk
    test_df = pd.read_csv(data_root + chunk)
    # test_features = test_df['feature vector'].values
    test_features = np.array([convert_feature_to_vector(x) for x in test_df['feature vector']])
    test_features = normalize(np.append(normalize(test_features[:, :8192]), normalize(test_features[:, 8192:]), axis=1))
    reduced_test_features = model.transform(test_features)

    binarized_predicted_labels = classifier.predict(reduced_test_features)

    predicted_labels = mlb.inverse_transform(binarized_predicted_labels)

    print "Calculated Predictions... Time passed: ", "{0:.1f}".format(time.time() - t), "sec"
    print "Writing predictions to output file"
    test_data_frame = pd.read_csv(data_root + chunk)
    df = pd.DataFrame(columns=['business_id', 'labels'])

    for i in range(len(test_data_frame)):
        biz = test_data_frame.loc[i]['business']
        label = predicted_labels[i]
        label = str(label)[1:-1].replace(",", " ")
        df.loc[i] = [str(biz), label]

    if header:
        with open(submission_root + "reduced_" + output_file_name + ".csv", 'w') as f:
            df.to_csv(f, index=False, header=header)
开发者ID:xarion,项目名称:mlip-yelp,代码行数:33,代码来源:compare_algorithms.py

示例9: TfidfTransformer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
#     ('tfidf', TfidfTransformer()),
#     ('to_dense', DenseTransformer()),
#     ('clf', OneVsRestClassifier(tree.DecisionTreeClassifier()))])


print '7th print'

gc.collect()
classifier.fit(X_train, Y)

print '8th print'

predicted = classifier.predict(X_test)
predicted_probability = classifier.predict_proba(X_test)

all_labels = mlb.inverse_transform(predicted)
results = classifier.predict_proba(X_test)[0]


# gets a dictionary of {'class_name': probability}
prob_per_class_dictionary = dict(zip(all_labels, results))

# gets a list of ['most_probable_class', 'second_most_probable_class', ..., 'least_class']
results_ordered_by_probability = map(lambda x: x[0], sorted(zip(all_labels, results), key=lambda x: x[1], reverse=True))

print results_ordered_by_probability

# for item, labels, probability in zip(X_test, all_labels,predicted_probability):
#     #print '%s => %s, %s' % (item, ', '.join(labels),str(probability))
#     output_file_object.write('%s => %s, %s' % (item, ', '.join(labels),str(probability))+'\n')
开发者ID:jainprateek,项目名称:MachineLearning,代码行数:32,代码来源:document_classification.py

示例10: MultiLabelBinarizer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
import time
t=time.time()

mlb = MultiLabelBinarizer()
y_ptrain= mlb.fit_transform(y_train)  #Convert list of labels to binary matrix

random_state = np.random.RandomState(0)
X_ptrain, X_ptest, y_ptrain, y_ptest = train_test_split(X_train, y_ptrain, test_size=.2,random_state=random_state)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True)) #F1 score:  0.803711220644
#classifier = OneVsOneClassifier(svm.SVC(kernel='linear', probability=True))
#classifier = OutputCodeClassifier(svm.SVC(kernel='linear', probability=True))
classifier.fit(X_ptrain, y_ptrain)

y_ppredict = classifier.predict(X_ptest)

print "Time passed: ", "{0:.1f}".format(time.time()-t), "sec"

print "Samples of predicted labels (in binary matrix):\n", y_ppredict[0:3]
print "\nSamples of predicted labels:\n", mlb.inverse_transform(y_ppredict[0:3])


statistics = pd.DataFrame(columns=[ "attribuite "+str(i) for i in range(9)]+['num_biz'], index = ["biz count", "biz ratio"])
statistics.loc["biz count"] = np.append(np.sum(y_ppredict, axis=0), len(y_ppredict))
pd.options.display.float_format = '{:.0f}%'.format
statistics.loc["biz ratio"] = statistics.loc["biz count"]*100/len(y_ppredict)
statistics

from sklearn.metrics import f1_score

print "F1 score: ", f1_score(y_ptest, y_ppredict, average='micro')
print "Individual Class F1 score: ", f1_score(y_ptest, y_ppredict, average=None)
开发者ID:jwsong0617,项目名称:Yelp,代码行数:33,代码来源:Multilabel-Classification_assessment_ResNet.py

示例11: printF1scores

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
printF1scores()

t = time.time()

binarizer = MultiLabelBinarizer()
#labels list is converted to binary matrix
y_train= binarizer.fit_transform(y_train) 

random_state = np.random.RandomState(0)
svmclassifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True))
svmclassifier.fit(X_train, y_train)

y_predict = svmclassifier.predict(X_test)

#Binary matrix is converted to labels
y_predict_label = binarizer.inverse_transform(y_predict) 

print "Elaspsed Time: ", "{0:.1f}".format(time.time()-t), "sec"

tdf  = pd.read_csv(path_to_data+"test_biz_fc8features.csv")
df = pd.DataFrame(columns=['business_id','labels'])

for i in range(len(tdf)):
    biz = tdf.loc[i]['business']
    label = y_predict_label[i]
    label = str(label)[1:-1].replace(",", " ")
    df.loc[i] = [str(biz), label]

with open(path_to_data+"submission_fc8.csv",'w') as file67:
    df.to_csv(file67, index=False)   
开发者ID:amallem,项目名称:yelpr,代码行数:32,代码来源:classifierfc8.py

示例12: range

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
y_map_cate = ml_cate.fit_transform(y_cate)
y_map_cate = np.array(y_map_cate)


f_scores = []
for loop_stat in range(0,1):
    scores = []
    report_y_actual = []
    report_y_predict = []
    kf = cross_validation.KFold(tfidf_train.shape[0], n_folds=5, shuffle=True)
    loop = 0
    for train_index, test_index in kf:
        x_train, x_test = tfidf_train[train_index].toarray(), tfidf_train[test_index].toarray()
        y_train_cate_map, y_test_cate_map = y_map_cate[train_index], y_map_cate[test_index]
        y_train_code_map,y_test_code_map = y_map[train_index], y_map[test_index]
        y_train_code, y_test_code = np.array(ml.inverse_transform(y_train_code_map)),np.array(ml.inverse_transform(y_test_code_map))
        y_train_cate,y_test_cate = np.array(ml_cate.inverse_transform(y_train_cate_map)),np.array(ml_cate.inverse_transform(y_test_cate_map))
        # classify the category
        model_cate = OneVsRestClassifier(LogisticRegression())
        model_cate.fit(x_train, y_train_cate_map)
        y_predict_cate_map = model_cate.predict(x_test)
        y_predict_cate = np.array(ml_cate.inverse_transform(y_predict_cate_map))
        y_predict_cate_unique = reduce(lambda a,b:set(a)|set(b)  ,y_predict_cate)
        for cate_cur in y_predict_cate_unique:
            if cate_cur not in defaultcode:
                y_text_new,y_predict_new = transfer_multilabel(y_predict_cate_map,y_test_cate_map,ml_cate,None,"0")
                report_y_predict.extend(y_predict_new)
                report_y_actual.extend(y_text_new)
            else:
                continue
                idx_test_cur = [ind for ind in range(0,len(y_predict_cate)) if cate_cur in y_predict_cate[ind]]
开发者ID:Sanqiang,项目名称:medical_nlp,代码行数:33,代码来源:classify_hirechy.py

示例13: CountVectorizer

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
train_counts = count_vect.fit_transform(stories)

from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer().fit(train_counts)
X_train_tfidf = tfidf_transformer.transform(train_counts)

#format tags
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
tag_list = preprocess_tags(tags)
processed_tags = mlb.fit_transform(tag_list)
print(processed_tags)
#train the classifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
clf = OneVsRestClassifier(MultinomialNB())#MultinomialNB()
clf.fit(X_train_tfidf,processed_tags)

test_docs = ["funny funny joke", "died sad joke tragedy funny", "lasers and robots"]
X_test_counts = count_vect.transform(test_docs,)
print("X_test_counts.shape")
print(X_test_counts.shape)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)

predicted = clf.predict(X_test_tfidf)
print(predicted)
print(mlb.inverse_transform(predicted))
开发者ID:samkam,项目名称:Senior-Project,代码行数:32,代码来源:testing_sklearn.py

示例14: train_test_split

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
X_ptrain, X_ptest, y_ptrain, y_ptest = train_test_split(X_train, y_ptrain, test_size=.2, random_state=random_state)

print("About to start training classifier with set parameters on subset of train data")
classifier = OneVsRestClassifier(GradientBoostingClassifier(learning_rate=0.01, n_estimators=5000, subsample=0.5,
                                                            min_samples_split=175, min_samples_leaf=10, max_depth=5,
                                                            max_features='sqrt',
                                                            verbose=1,
                                                            random_state=SEED))
classifier.fit(X_ptrain, y_ptrain)

print("About to make predictions on sample of training data")
y_ppredict = classifier.predict(X_ptest)

print("Time passed: {0:.1f} sec".format(time.time() - t))
print("Samples of predicted labels (in binary matrix):\n{}".format(y_ppredict[0:3]))
print("\nSamples of predicted labels:\n", mlb.inverse_transform(y_ppredict[0:3]))
statistics = pd.DataFrame(columns=["attribute " + str(i) for i in range(9)] + ['num_biz'],
                          index=["biz count", "biz ratio"])
pd.options.display.float_format = '{:.0f}%'.format
print(statistics)
print("F1 score: {}".format(f1_score(y_ptest, y_ppredict, average='micro')))
print("Individual Class F1 score: {}".format(f1_score(y_ptest, y_ppredict, average=None)))


# Re-Train classifier using all training data, and make predictions on test set
t = time.time()

mlb = MultiLabelBinarizer()
y_train = mlb.fit_transform(y_train)  # Convert list of labels to binary matrix

print("About to train classifier on all training data (to have it ready to predict on submission test data)")
开发者ID:bluelight773,项目名称:Kaggle_Yelp_Photo_Top10_Solution,代码行数:33,代码来源:predict_biz_labels.py

示例15: run

# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]

#.........这里部分代码省略.........
                                                    X.shape[0]))
        words = X.sum(axis=1)
        print("Mean word count per document: {} ({})".format(words.mean(), words.std()))

    if VERBOSE > 1:
        X_tmp = X.todense()
        # drop samples without any features...
        X_tmp = X_tmp[np.unique(np.nonzero(X_tmp)[0])]
        print("[entropy] Dropped {} samples with all zeroes?!".format(X.shape[0] - X_tmp.shape[0]))
        X_tmp = X_tmp.T # transpose to compute entropy per sample
        h = entropy(X_tmp)
        print("[entropy] shape:", h.shape)
        print("[entropy] mean entropy per sample {} ({})".format(h.mean(), h.std()))
        # print("Mean entropy (base {}): {}".format(X_dense.shape[0], entropy(X_dense, base=X_dense.shape[0]).mean()))
        # print("Mean entropy (base e): {}".format(entropy(X_dense).mean()))
    # _, _, values = sp.find(X)
    # print("Mean value: %.2f (+/- %.2f) " % (values.mean(), 2 * values.std()))


    # n_iter = np.ceil(10**6 / (X.shape[0] * 0.9))
    # print("Dynamic n_iter = %d" % n_iter)


    if options.interactive:
        print("Please wait...")
        clf = create_classifier(options, Y.shape[1])  # --- INTERACTIVE MODE ---
        clf.fit(X, Y)
        thesaurus = tr.thesaurus
        print("Ready.")
        try:
            for line in sys.stdin:
                x = extractor.transform([line])
                y = clf.predict(x)
                desc_ids = mlb.inverse_transform(y)[0]
                labels = [thesaurus[desc_id]['prefLabel'] for desc_id in desc_ids]
                print(*labels)
        except KeyboardInterrupt:
            exit(1)
        exit(0)

    if VERBOSE: print("Performing %d-fold cross-validation..." % (options.folds if options.cross_validation else 1))

    if options.plot:
        all_f1s = []

    # --- CROSS-VALIDATION ---
    scores = defaultdict(list)
    if options.cross_validation:
        kf = model_selection.KFold(X.shape[0], n_folds=options.folds, shuffle=True)
    else:
        kf = ShuffleSplit(X.shape[0], test_size=options.test_size, n_iter=1)
    for train, test in kf:
        if VERBOSE: print("=" * 80)
        X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]

        # mlp doesn't seem to like being stuck into a new process...
        if options.debug or options.clf_key in {'mlp', 'mlpthr'}:
            Y_pred, Y_train_pred = fit_predict(X_test, X_train, Y_train, options, tr)
        else:
            Y_pred, Y_train_pred = fit_predict_new_process(X_test, X_train, Y_train, options, tr)

        if options.training_error:
            scores['train_f1_samples'].append(f1_score(Y_train, Y_train_pred, average='samples'))

        scores['avg_n_labels_pred'].append(np.mean(Y_pred.getnnz(1)))
        scores['avg_n_labels_gold'].append(np.mean(Y_test.getnnz(1)))
开发者ID:quadflor,项目名称:Quadflor,代码行数:70,代码来源:run.py


注:本文中的sklearn.preprocessing.MultiLabelBinarizer.inverse_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。