本文整理汇总了Python中sklearn.preprocessing.MultiLabelBinarizer.inverse_transform方法的典型用法代码示例。如果您正苦于以下问题:Python MultiLabelBinarizer.inverse_transform方法的具体用法?Python MultiLabelBinarizer.inverse_transform怎么用?Python MultiLabelBinarizer.inverse_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.MultiLabelBinarizer
的用法示例。
在下文中一共展示了MultiLabelBinarizer.inverse_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ACMClassificator
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
class ACMClassificator(BaseACMClassificator):
def __init__(self):
self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
self.mlb = MultiLabelBinarizer()
self.classificator = OneVsRestClassifier(ExtraTreeClassifier(criterion="gini",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.,
max_features="auto",
max_leaf_nodes=None,
class_weight=None),
n_jobs=-1
)
def _prepare_problems(self, problems):
return self.vectorizer.transform([p.statement for p in problems])
def fit(self, problems, tags):
nltk.download('punkt', quiet=True)
self.vectorizer.fit([p.statement for p in problems])
mat = self._prepare_problems(problems)
self.mlb = self.mlb.fit(tags)
self.classificator.fit(mat.toarray(), self.mlb.transform(tags))
def predict(self, problems):
mat = self._prepare_problems(problems)
predicted = self.classificator.predict(mat.toarray())
return self.mlb.inverse_transform(predicted)
示例2: main
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
def main():
#sets = select_by_trait(10,2,tags=["Comedy","Human","Sad","Dark"])
sets = select_sets_by_tag(20,4,tag_names)
#sets = random_select_sets(30,6)
train_tags = fetch_tags(sets["train"])
train_texts = id_to_filename(sets["train"])#txt_to_list(sets["train"])
#vectorize
count_vect = CountVectorizer(stop_words='english', encoding="utf-16", input="filename")
X_train_counts = count_vect.fit_transform(train_texts)
#tf-idf transformation
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
#process tags
mlb = MultiLabelBinarizer()
processed_train_tags = mlb.fit_transform(train_tags)
#rint(processed_train_tags)
#classifier
#clf = OneVsRestClassifier(MultinomialNB())
clf = OneVsRestClassifier(LinearSVC())
clf.fit(X_train_tfidf,processed_train_tags)
print("classes:{}".format(clf.classes_))
#process test set
test_texts = id_to_filename(sets["test"])#txt_to_list(sets["test"])
X_test_counts = count_vect.transform(test_texts)
#print("X_test_counts inverse transformed: {}".format(count_vect.inverse_transform(X_test_counts)))
X_test_tfidf = tfidf_transformer.transform(X_test_counts)
predicted_tags = clf.predict(X_test_tfidf)
predicted_tags_readable = mlb.inverse_transform(predicted_tags)
test_tags_actual = fetch_tags(sets["test"])
predicted_probs = clf.decision_function(X_test_tfidf)
#predicted_probs = clf.get_params(X_test_tfidf)
class_list = mlb.classes_
report = metrics.classification_report(mlb.transform(test_tags_actual),predicted_tags,target_names=class_list)
print(report)
#retrieve top 30% for each class
top_percentage = 30
threshold_index = int( len(sets["test"]) *(top_percentage/100.0) )
threshold_vals_dic = {}
threshold_vals = []
num_classes = len(class_list)
for i in range(num_classes):
z = [ predicted_probs[j,i] for j in range(len(sets["test"]))]
z.sort(reverse=True)
threshold_vals_dic[class_list[i]]= z[threshold_index]
threshold_vals.append(z[threshold_index])
print(threshold_vals_dic)
print_predictions(sets["test"],predicted_tags_readable,class_list, class_probablities=predicted_probs,threshold_vals=threshold_vals)
示例3: ACMClassificator
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
class ACMClassificator(BaseACMClassificator):
def __init__(self):
self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
self.mlb = MultiLabelBinarizer()
self.classificator = OneVsRestClassifier(SVC(), n_jobs=-1)
def _prepare_problems(self, problems):
return self.vectorizer.transform([p.statement for p in problems])
def fit(self, problems, tags):
nltk.download('punkt', quiet=True)
self.vectorizer.fit([p.statement for p in problems])
mat = self._prepare_problems(problems)
self.mlb = self.mlb.fit(tags)
self.classificator.fit(mat.toarray(), self.mlb.transform(tags))
def predict(self, problems):
mat = self._prepare_problems(problems)
predicted = self.classificator.predict(mat.toarray())
return self.mlb.inverse_transform(predicted)
示例4: get_classify
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
def get_classify():
X_train, Y_train = load_data()
# 定义分类器
classifier = Pipeline([
('counter', CountVectorizer(tokenizer=jieba_tokenizer)), # 标记和计数,提取特征用 向量化
('tfidf', TfidfTransformer()), # IF-IDF 权重
('clf', OneVsRestClassifier(LinearSVC())), # 1-rest 多分类(多标签)
])
mlb = MultiLabelBinarizer()
Y_train = mlb.fit_transform(Y_train) # 分类号数值化
classifier.fit(X_train, Y_train)
# X_test = ["数据分析"]
# 把所有的测试文本存到一个list中
test_list = []
test_name = []
filelist2 = os.listdir(base_path + "data_test/")
for files in filelist2:
# print (files)
test_name.append(files)
f = open(base_path + "data_test/" + files, 'r')
test_list.append(f.read())
prediction = classifier.predict(test_list)
result = mlb.inverse_transform(prediction)
f = open('result2.txt', 'w')
for i in range(len(test_name)):
f.write(str(test_name[i]) + ' ' + str(result[i]) + '\n')
print (result, len(result))
num_dict = Counter(result)
print (len(num_dict))
print ((num_dict[('1',)] + num_dict[('2',)] + num_dict[('3',)]) / float(len(result))) # 整数除整数为0,应把其中一个改为浮点数。
示例5: MultiLabelBinarizer
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
import pandas as pd
data_root = "/Users/erdicalli/dev/workspace/yelp/submission/submissions/"
mlb = MultiLabelBinarizer()
total_labels = list()
for idx, file in enumerate(output_file_names):
f = pd.read_csv(data_root + "merged_" + output_file_names[idx] + ".csv")
labels = np.array([list(y.replace(" ", "")) for y in f["labels"]])
total_labels.append(mlb.fit_transform(labels))
result_labels = np.ndarray(shape=(10000, 9))
for label_id, algorithm in enumerate(combination):
result_labels[:, label_id] = total_labels[algorithm][:, label_id]
labels = mlb.inverse_transform(result_labels)
test_data_frame = pd.read_csv(data_root + "merged_" + output_file_names[4] + ".csv")
df = pd.DataFrame(columns=['business_id', 'labels'])
for i in range(len(test_data_frame)):
biz = test_data_frame.loc[i]['business_id']
label = labels[i]
label = str(label)[1:-1].replace(",", " ")
df.loc[i] = [str(biz), label]
with open(data_root + "combined_results.csv", 'w') as f:
df.to_csv(f, index=False)
示例6: open
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
##################
# In[ ]:
classifier.fit(train_business_feature, y_ptrain_mlb)
test_business_feature = pd.read_csv(data_root+'test_business_feature'+cluster +'.csv')
business_id = test_business_feature['business_id'].reshape(-1,1)
test_business_feature.drop('business_id', axis=1, inplace=True)
y_predict_test = classifier.predict(test_business_feature)
# In[ ]:
y_predict_label = mlb.inverse_transform(y_predict_test)
df = pd.DataFrame(columns=['business_id','labels'])
for i in range(len(y_predict_label)):
biz = business_id[i][0]
label = y_predict_label[i]
label = str(label)[1:-1].replace(",", " ")
df.loc[i] = [str(biz), label]
with open(data_root+"sub_pca300.csv",'w') as f:
df.to_csv(f, index=False)
示例7: MultiLabelBinarizer
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
X_train_scaled_Concat = np.hstack((X_train_scaled,X_train_scaled_Res))
X_test_scaled = preprocessing.normalize(X_test, norm='l2')
X_test_scaled_Res = preprocessing.normalize(X_test_Res, norm='l2')
X_test_scaled_Concat = np.hstack((X_test_scaled,X_test_scaled_Res))
mlb = MultiLabelBinarizer()
y_train= mlb.fit_transform(y_train) #Convert list of labels to binary matrix
random_state = np.random.RandomState(0)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True))
classifier.fit(X_train_scaled_Concat, y_train)
y_predict = classifier.predict(X_test_scaled_Concat)
#print list(mlb.classes_)
y_predict_label = mlb.inverse_transform(y_predict) #Convert binary matrix back to labels
print "Time passed: ", "{0:.1f}".format(time.time()-t), "sec"
test_data_frame = pd.read_csv(data_root+"test_biz_fc7features.csv") #fc7features and fc1000features have same business names
df = pd.DataFrame(columns=['business_id','labels'])
for i in range(len(test_data_frame)):
biz = test_data_frame.loc[i]['business']
label = y_predict_label[i]
label = str(label)[1:-1].replace(",", " ")
df.loc[i] = [str(biz), label]
with open(data_root+"submission_fc7_fc1000_norm.csv",'w') as f:
df.to_csv(f, index=False)
示例8: normalize
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
print "Calculating Predictions..."
files = ["xaa", "xab", "xac", "xad", "xae", "xaf"]
header = True
for chunk in files:
t = time.time()
print "chunk: " + chunk
test_df = pd.read_csv(data_root + chunk)
# test_features = test_df['feature vector'].values
test_features = np.array([convert_feature_to_vector(x) for x in test_df['feature vector']])
test_features = normalize(np.append(normalize(test_features[:, :8192]), normalize(test_features[:, 8192:]), axis=1))
reduced_test_features = model.transform(test_features)
binarized_predicted_labels = classifier.predict(reduced_test_features)
predicted_labels = mlb.inverse_transform(binarized_predicted_labels)
print "Calculated Predictions... Time passed: ", "{0:.1f}".format(time.time() - t), "sec"
print "Writing predictions to output file"
test_data_frame = pd.read_csv(data_root + chunk)
df = pd.DataFrame(columns=['business_id', 'labels'])
for i in range(len(test_data_frame)):
biz = test_data_frame.loc[i]['business']
label = predicted_labels[i]
label = str(label)[1:-1].replace(",", " ")
df.loc[i] = [str(biz), label]
if header:
with open(submission_root + "reduced_" + output_file_name + ".csv", 'w') as f:
df.to_csv(f, index=False, header=header)
示例9: TfidfTransformer
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
# ('tfidf', TfidfTransformer()),
# ('to_dense', DenseTransformer()),
# ('clf', OneVsRestClassifier(tree.DecisionTreeClassifier()))])
print '7th print'
gc.collect()
classifier.fit(X_train, Y)
print '8th print'
predicted = classifier.predict(X_test)
predicted_probability = classifier.predict_proba(X_test)
all_labels = mlb.inverse_transform(predicted)
results = classifier.predict_proba(X_test)[0]
# gets a dictionary of {'class_name': probability}
prob_per_class_dictionary = dict(zip(all_labels, results))
# gets a list of ['most_probable_class', 'second_most_probable_class', ..., 'least_class']
results_ordered_by_probability = map(lambda x: x[0], sorted(zip(all_labels, results), key=lambda x: x[1], reverse=True))
print results_ordered_by_probability
# for item, labels, probability in zip(X_test, all_labels,predicted_probability):
# #print '%s => %s, %s' % (item, ', '.join(labels),str(probability))
# output_file_object.write('%s => %s, %s' % (item, ', '.join(labels),str(probability))+'\n')
示例10: MultiLabelBinarizer
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
import time
t=time.time()
mlb = MultiLabelBinarizer()
y_ptrain= mlb.fit_transform(y_train) #Convert list of labels to binary matrix
random_state = np.random.RandomState(0)
X_ptrain, X_ptest, y_ptrain, y_ptest = train_test_split(X_train, y_ptrain, test_size=.2,random_state=random_state)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True)) #F1 score: 0.803711220644
#classifier = OneVsOneClassifier(svm.SVC(kernel='linear', probability=True))
#classifier = OutputCodeClassifier(svm.SVC(kernel='linear', probability=True))
classifier.fit(X_ptrain, y_ptrain)
y_ppredict = classifier.predict(X_ptest)
print "Time passed: ", "{0:.1f}".format(time.time()-t), "sec"
print "Samples of predicted labels (in binary matrix):\n", y_ppredict[0:3]
print "\nSamples of predicted labels:\n", mlb.inverse_transform(y_ppredict[0:3])
statistics = pd.DataFrame(columns=[ "attribuite "+str(i) for i in range(9)]+['num_biz'], index = ["biz count", "biz ratio"])
statistics.loc["biz count"] = np.append(np.sum(y_ppredict, axis=0), len(y_ppredict))
pd.options.display.float_format = '{:.0f}%'.format
statistics.loc["biz ratio"] = statistics.loc["biz count"]*100/len(y_ppredict)
statistics
from sklearn.metrics import f1_score
print "F1 score: ", f1_score(y_ptest, y_ppredict, average='micro')
print "Individual Class F1 score: ", f1_score(y_ptest, y_ppredict, average=None)
示例11: printF1scores
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
printF1scores()
t = time.time()
binarizer = MultiLabelBinarizer()
#labels list is converted to binary matrix
y_train= binarizer.fit_transform(y_train)
random_state = np.random.RandomState(0)
svmclassifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True))
svmclassifier.fit(X_train, y_train)
y_predict = svmclassifier.predict(X_test)
#Binary matrix is converted to labels
y_predict_label = binarizer.inverse_transform(y_predict)
print "Elaspsed Time: ", "{0:.1f}".format(time.time()-t), "sec"
tdf = pd.read_csv(path_to_data+"test_biz_fc8features.csv")
df = pd.DataFrame(columns=['business_id','labels'])
for i in range(len(tdf)):
biz = tdf.loc[i]['business']
label = y_predict_label[i]
label = str(label)[1:-1].replace(",", " ")
df.loc[i] = [str(biz), label]
with open(path_to_data+"submission_fc8.csv",'w') as file67:
df.to_csv(file67, index=False)
示例12: range
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
y_map_cate = ml_cate.fit_transform(y_cate)
y_map_cate = np.array(y_map_cate)
f_scores = []
for loop_stat in range(0,1):
scores = []
report_y_actual = []
report_y_predict = []
kf = cross_validation.KFold(tfidf_train.shape[0], n_folds=5, shuffle=True)
loop = 0
for train_index, test_index in kf:
x_train, x_test = tfidf_train[train_index].toarray(), tfidf_train[test_index].toarray()
y_train_cate_map, y_test_cate_map = y_map_cate[train_index], y_map_cate[test_index]
y_train_code_map,y_test_code_map = y_map[train_index], y_map[test_index]
y_train_code, y_test_code = np.array(ml.inverse_transform(y_train_code_map)),np.array(ml.inverse_transform(y_test_code_map))
y_train_cate,y_test_cate = np.array(ml_cate.inverse_transform(y_train_cate_map)),np.array(ml_cate.inverse_transform(y_test_cate_map))
# classify the category
model_cate = OneVsRestClassifier(LogisticRegression())
model_cate.fit(x_train, y_train_cate_map)
y_predict_cate_map = model_cate.predict(x_test)
y_predict_cate = np.array(ml_cate.inverse_transform(y_predict_cate_map))
y_predict_cate_unique = reduce(lambda a,b:set(a)|set(b) ,y_predict_cate)
for cate_cur in y_predict_cate_unique:
if cate_cur not in defaultcode:
y_text_new,y_predict_new = transfer_multilabel(y_predict_cate_map,y_test_cate_map,ml_cate,None,"0")
report_y_predict.extend(y_predict_new)
report_y_actual.extend(y_text_new)
else:
continue
idx_test_cur = [ind for ind in range(0,len(y_predict_cate)) if cate_cur in y_predict_cate[ind]]
示例13: CountVectorizer
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
train_counts = count_vect.fit_transform(stories)
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer().fit(train_counts)
X_train_tfidf = tfidf_transformer.transform(train_counts)
#format tags
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
tag_list = preprocess_tags(tags)
processed_tags = mlb.fit_transform(tag_list)
print(processed_tags)
#train the classifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
clf = OneVsRestClassifier(MultinomialNB())#MultinomialNB()
clf.fit(X_train_tfidf,processed_tags)
test_docs = ["funny funny joke", "died sad joke tragedy funny", "lasers and robots"]
X_test_counts = count_vect.transform(test_docs,)
print("X_test_counts.shape")
print(X_test_counts.shape)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)
predicted = clf.predict(X_test_tfidf)
print(predicted)
print(mlb.inverse_transform(predicted))
示例14: train_test_split
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
X_ptrain, X_ptest, y_ptrain, y_ptest = train_test_split(X_train, y_ptrain, test_size=.2, random_state=random_state)
print("About to start training classifier with set parameters on subset of train data")
classifier = OneVsRestClassifier(GradientBoostingClassifier(learning_rate=0.01, n_estimators=5000, subsample=0.5,
min_samples_split=175, min_samples_leaf=10, max_depth=5,
max_features='sqrt',
verbose=1,
random_state=SEED))
classifier.fit(X_ptrain, y_ptrain)
print("About to make predictions on sample of training data")
y_ppredict = classifier.predict(X_ptest)
print("Time passed: {0:.1f} sec".format(time.time() - t))
print("Samples of predicted labels (in binary matrix):\n{}".format(y_ppredict[0:3]))
print("\nSamples of predicted labels:\n", mlb.inverse_transform(y_ppredict[0:3]))
statistics = pd.DataFrame(columns=["attribute " + str(i) for i in range(9)] + ['num_biz'],
index=["biz count", "biz ratio"])
pd.options.display.float_format = '{:.0f}%'.format
print(statistics)
print("F1 score: {}".format(f1_score(y_ptest, y_ppredict, average='micro')))
print("Individual Class F1 score: {}".format(f1_score(y_ptest, y_ppredict, average=None)))
# Re-Train classifier using all training data, and make predictions on test set
t = time.time()
mlb = MultiLabelBinarizer()
y_train = mlb.fit_transform(y_train) # Convert list of labels to binary matrix
print("About to train classifier on all training data (to have it ready to predict on submission test data)")
示例15: run
# 需要导入模块: from sklearn.preprocessing import MultiLabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.MultiLabelBinarizer import inverse_transform [as 别名]
#.........这里部分代码省略.........
X.shape[0]))
words = X.sum(axis=1)
print("Mean word count per document: {} ({})".format(words.mean(), words.std()))
if VERBOSE > 1:
X_tmp = X.todense()
# drop samples without any features...
X_tmp = X_tmp[np.unique(np.nonzero(X_tmp)[0])]
print("[entropy] Dropped {} samples with all zeroes?!".format(X.shape[0] - X_tmp.shape[0]))
X_tmp = X_tmp.T # transpose to compute entropy per sample
h = entropy(X_tmp)
print("[entropy] shape:", h.shape)
print("[entropy] mean entropy per sample {} ({})".format(h.mean(), h.std()))
# print("Mean entropy (base {}): {}".format(X_dense.shape[0], entropy(X_dense, base=X_dense.shape[0]).mean()))
# print("Mean entropy (base e): {}".format(entropy(X_dense).mean()))
# _, _, values = sp.find(X)
# print("Mean value: %.2f (+/- %.2f) " % (values.mean(), 2 * values.std()))
# n_iter = np.ceil(10**6 / (X.shape[0] * 0.9))
# print("Dynamic n_iter = %d" % n_iter)
if options.interactive:
print("Please wait...")
clf = create_classifier(options, Y.shape[1]) # --- INTERACTIVE MODE ---
clf.fit(X, Y)
thesaurus = tr.thesaurus
print("Ready.")
try:
for line in sys.stdin:
x = extractor.transform([line])
y = clf.predict(x)
desc_ids = mlb.inverse_transform(y)[0]
labels = [thesaurus[desc_id]['prefLabel'] for desc_id in desc_ids]
print(*labels)
except KeyboardInterrupt:
exit(1)
exit(0)
if VERBOSE: print("Performing %d-fold cross-validation..." % (options.folds if options.cross_validation else 1))
if options.plot:
all_f1s = []
# --- CROSS-VALIDATION ---
scores = defaultdict(list)
if options.cross_validation:
kf = model_selection.KFold(X.shape[0], n_folds=options.folds, shuffle=True)
else:
kf = ShuffleSplit(X.shape[0], test_size=options.test_size, n_iter=1)
for train, test in kf:
if VERBOSE: print("=" * 80)
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
# mlp doesn't seem to like being stuck into a new process...
if options.debug or options.clf_key in {'mlp', 'mlpthr'}:
Y_pred, Y_train_pred = fit_predict(X_test, X_train, Y_train, options, tr)
else:
Y_pred, Y_train_pred = fit_predict_new_process(X_test, X_train, Y_train, options, tr)
if options.training_error:
scores['train_f1_samples'].append(f1_score(Y_train, Y_train_pred, average='samples'))
scores['avg_n_labels_pred'].append(np.mean(Y_pred.getnnz(1)))
scores['avg_n_labels_gold'].append(np.mean(Y_test.getnnz(1)))