本文整理汇总了Python中sklearn.pipeline.FeatureUnion.get_feature_names方法的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion.get_feature_names方法的具体用法?Python FeatureUnion.get_feature_names怎么用?Python FeatureUnion.get_feature_names使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.FeatureUnion
的用法示例。
在下文中一共展示了FeatureUnion.get_feature_names方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_set_feature_union_steps
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_set_feature_union_steps():
mult2 = Mult(2)
mult2.get_feature_names = lambda: ["x2"]
mult3 = Mult(3)
mult3.get_feature_names = lambda: ["x3"]
mult5 = Mult(5)
mult5.get_feature_names = lambda: ["x5"]
ft = FeatureUnion([("m2", mult2), ("m3", mult3)])
assert_array_equal([[2, 3]], ft.transform(np.asarray([[1]])))
assert_equal(["m2__x2", "m3__x3"], ft.get_feature_names())
# Directly setting attr
ft.transformer_list = [("m5", mult5)]
assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
assert_equal(["m5__x5"], ft.get_feature_names())
# Using set_params
ft.set_params(transformer_list=[("mock", mult3)])
assert_array_equal([[3]], ft.transform(np.asarray([[1]])))
assert_equal(["mock__x3"], ft.get_feature_names())
# Using set_params to replace single step
ft.set_params(mock=mult5)
assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
assert_equal(["mock__x5"], ft.get_feature_names())
示例2: test_set_feature_union_step_none
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_set_feature_union_step_none():
mult2 = Mult(2)
mult2.get_feature_names = lambda: ['x2']
mult3 = Mult(3)
mult3.get_feature_names = lambda: ['x3']
X = np.asarray([[1]])
ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
assert_array_equal([[2, 3]], ft.fit(X).transform(X))
assert_array_equal([[2, 3]], ft.fit_transform(X))
assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
ft.set_params(m2=None)
assert_array_equal([[3]], ft.fit(X).transform(X))
assert_array_equal([[3]], ft.fit_transform(X))
assert_equal(['m3__x3'], ft.get_feature_names())
ft.set_params(m3=None)
assert_array_equal([[]], ft.fit(X).transform(X))
assert_array_equal([[]], ft.fit_transform(X))
assert_equal([], ft.get_feature_names())
# check we can change back
ft.set_params(m3=mult3)
assert_array_equal([[3]], ft.fit(X).transform(X))
示例3: train_model
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def train_model(trainset):
word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
# print word_vector
print "works fine"
char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
corpus = []
classes = []
for item in trainset:
corpus.append(item['text'])
classes.append(item['label'])
print "Training instances : ", 0.8*len(classes)
print "Testing instances : ", 0.2*len(classes)
matrix = vectorizer.fit_transform(corpus)
print "feature count : ", len(vectorizer.get_feature_names())
print "training model"
X = matrix.toarray()
y = numpy.asarray(classes)
model =LinearSVC()
X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
#print y_prob
#con_matrix = []
#for row in range(len(y_prob)):
# temp = [y_pred[row]]
# for prob in y_prob[row]:
# temp.append(prob)
# con_matrix.append(temp)
#for row in con_matrix:
# output.write(str(row)+"\n")
#print y_pred
#print y_test
res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
reset=[]
for r in res1:
if y_test[r] != "anonEdited":
reset.append(y_test[r])
for r in res2:
if y_pred[r] != "anonEdited":
reset.append(y_pred[r])
output=open(sys.argv[2],"w")
for suspect in reset:
output.write(str(suspect)+"\n")
cm = confusion_matrix(y_test, y_pred)
print(cm)
pl.matshow(cm)
pl.title('Confusion matrix')
pl.colorbar()
pl.ylabel('True label')
pl.xlabel('Predicted label')
pl.show()
print accuracy_score(y_pred,y_test)
示例4: test_feature_union_feature_names
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_feature_union_feature_names():
word_vect = CountVectorizer(analyzer="word")
char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
ft.fit(JUNK_FOOD_DOCS)
feature_names = ft.get_feature_names()
for feat in feature_names:
assert_true("chars__" in feat or "words__" in feat)
assert_equal(len(feature_names), 35)
示例5: test_feature_union_feature_names
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_feature_union_feature_names():
word_vect = CountVectorizer(analyzer="word")
char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
ft.fit(JUNK_FOOD_DOCS)
feature_names = ft.get_feature_names()
for feat in feature_names:
assert_true("chars__" in feat or "words__" in feat)
assert_equal(len(feature_names), 35)
ft = FeatureUnion([("tr1", Transf())]).fit([[1]])
assert_raise_message(
AttributeError, 'Transformer tr1 (type Transf) does not provide '
'get_feature_names', ft.get_feature_names)
示例6: train_model
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def train_model(trainset):
# create 2 blocks of features, word and character ngrams, size of 2 (using TF-IDF method)
# we can also append here multiple other features in general
word_vector = TfidfVectorizer( analyzer="word" , ngram_range=(2,2), binary = False, max_features= 2000 )
char_vector = TfidfVectorizer(ngram_range=(2, 3), analyzer="char", binary=False, min_df=0 , max_features=2000 )
# our vectors are the feature union of word/char ngrams
vectorizer = FeatureUnion([ ("chars", char_vector),("words", word_vector) ] )
corpus, classes = [], []
for item in trainset:
corpus.append( item['text'] )
classes.append( item['label'] )
print "num of training instances: ", len(classes)
print "num of training classes: ", len(set(classes))
#fit the model of tfidf vectors for the coprus
matrix = vectorizer.fit_transform(corpus)
print "num of features: " , len(vectorizer.get_feature_names())
print "training model"
X = matrix.toarray()
y = np.asarray(classes)
print X[0]
# Here are results of several different models for Law corpus:
# model = SVC(kernel='sigmoid') # -> 0.38
# model = KNeighborsClassifier(algorithm = 'kd_tree') # -> 0.41
# model = AdaBoostClassifier() #-> 0.46
# model = RandomForestClassifier() # -> 0.52
# model = LogisticRegression() # -> 0.65
model = LinearSVC( loss='l1', dual=True) # -> 0.70
# Results of several different models for Enron corpus:
# model = LinearSVC( loss='l1', dual=True) # -> 0.6
scores = cross_validation.cross_val_score( estimator = model,
X = matrix.toarray(),
y= np.asarray(classes), cv=10 )
print "10-fold cross-validation results:", "mean score = ", scores.mean(), "std=", scores.std(), ", num folds =", len(scores)
示例7: test_same_result
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_same_result(self):
X, Z = self.make_text_rdd(2)
loc_char = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
dist_char = SparkCountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
loc_word = CountVectorizer(analyzer="word")
dist_word = SparkCountVectorizer(analyzer="word")
loc_union = FeatureUnion([
("chars", loc_char),
("words", loc_word)
])
dist_union = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word)
])
# test same feature names
loc_union.fit(X)
dist_union.fit(Z)
assert_equal(
loc_union.get_feature_names(),
dist_union.get_feature_names()
)
# test same results
X_transformed = loc_union.transform(X)
Z_transformed = sp.vstack(dist_union.transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
# test same results with fit_transform
X_transformed = loc_union.fit_transform(X)
Z_transformed = sp.vstack(dist_union.fit_transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
# test same results in parallel
loc_union_par = FeatureUnion([
("chars", loc_char),
("words", loc_word)
], n_jobs=2)
dist_union_par = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word)
], n_jobs=2)
loc_union_par.fit(X)
dist_union_par.fit(Z)
X_transformed = loc_union_par.transform(X)
Z_transformed = sp.vstack(dist_union_par.transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
示例8: test_feature_stacker_feature_names
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_feature_stacker_feature_names():
JUNK_FOOD_DOCS = (
"the pizza pizza beer copyright",
"the pizza burger beer copyright",
"the the pizza beer beer copyright",
"the burger beer beer copyright",
"the coke burger coke copyright",
"the coke burger burger",
)
word_vect = CountVectorizer(analyzer="word")
char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
ft.fit(JUNK_FOOD_DOCS)
feature_names = ft.get_feature_names()
for feat in feature_names:
assert_true("chars__" in feat or "words__" in feat)
assert_equal(len(feature_names), 35)
示例9: main
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def main():
qtrain = read_set()
# X_train = gen_features(qtrain)
Y_train = get_ans(qtrain)
qtest = read_set()
# X_test = gen_features(qtest)
# (X_train, X_test), featkeys = dictVec(X_train, X_test)
# tfidf_word = TfidfVectorizer(preprocessor=lambda x: x['question_text'].lower(), ngram_range=(1, 3), analyzer="word", binary=False, min_df=3)
tfidf_word = TfidfVectorizer(preprocessor=exa, ngram_range=(1, 3), analyzer="word", binary=False, min_df=0.05)
# feat_select = SelectPercentile(score_func=f_regression_, percentile=0.15)
feat_select = SelectKBest(score_func=f_regression_, k=QN_PARAMS[QUESTION]['features_select'])
cf = CustomFeat()
feat = FeatureUnion([('word_counts', tfidf_word), ('custom', cf)])
# feat = FeatureUnion([('custom', cf)])
# feat = FeatureUnion([('word_counts', tfidf_word)])
# est = ESTIMATOR(**params[SETTINGS['EST']])
w_model = Pipeline([('funion', feat), ('feat_select', feat_select)]) #, ('est', est)]
# w_X_train = tfidf_word.fit_transform(qtrain)
# w_X_test = tfidf_word.transform(qtest)
# print_err(w_X_train[0])
# X_train = w_X_train
# X_test = w_X_test
# featkeys = tfidf_word.get_feature_names()
# feat_select
# f_regression_(X_train[:,0],Y_train)
# print_err('fitting')
# w_model.fit(qtrain, Y_train)
# print_err(feat_select.get_support(indices=True))
X_train = w_model.fit_transform(qtrain, Y_train).toarray()
X_test = w_model.transform(qtest).toarray()
featkeys = np.asarray(feat.get_feature_names())[feat_select.get_support(indices=True)]
# featkeys = []
# Y_test = classify(w_model, qtest)
# print_err(est.coef_.nonzero())
clf = get_clf(X_train, Y_train, feat_indices=featkeys, clf_used=SETTINGS['EST'], grid_search=SETTINGS['GRIDSEARCH'])
Y_test = classify(clf, X_test)
for qn, pans in zip(qtest, Y_test):
print json.dumps({
'question_key': qn['question_key'].encode('ascii'),
'__ans__': pans
})
示例10: dump_train
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def dump_train():
_, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data()
train_df = f.make_data_df(train_gray_data, labels)
test_df = f.make_test_df(test_gray_data)
train_df = train_df.reset_index()
test_df = test_df.reset_index()
train_df.columns = ["pngname", "input", "label"]
test_df.columns = ["pngname", "input"]
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
feature_name_list = [s.split("__")[1] for s in fu.get_feature_names()]
feature_name_list.append("target")
train_X = fu.fit_transform(train_df)
train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten()))
train_X, train_y = cl.downsampling_data(train_X, train_y, 0.2)
train_dump = pd.DataFrame(np.c_[train_X, train_y], columns=feature_name_list)
dump_path = os.path.abspath(os.path.dirname(__file__)) +\
"/../tmp/train_dump"
train_dump.to_csv(dump_path + "/train_dump.csv", index=False)
示例11: SpecialWordCounter
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
print ''
word_count = SpecialWordCounter()
word_count.fit(t)
print word_count.get_feature_names()
print word_count.transform(t)
combined_features = FeatureUnion([
('stats', TextStats())
, ('special_word_stats', SpecialWordCounter())
])
# Use combined features to transform dataset:
X_features = combined_features.fit(t).transform(t)
print '\nfeature union'
print 'X:', X_features
print 'names:', combined_features.get_feature_names()
print
pipeline = Pipeline([
# Use FeatureUnion to combine the features from subject and body
('union', FeatureUnion(
transformer_list=[
('scaled_text_stats', Pipeline([
('stats', TextStats())
, ('scaling', StandardScaler())
])
)
, ('special_word_stats', SpecialWordCounter())
]
)
)
示例12: Orchestrator
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
#.........这里部分代码省略.........
# TODO No hardcodear
columns_names = self.headings
columns_is_text = [False, False, True, False]
columns_is_class = [False, False, False, True]
train_y = []
steps = []
# steps.append(('numeric_feats', MyPipeline([
# ('selector', SelectNumerics(columns_is_text, columns_names, columns_is_class)),
# ('dict', DictVectorizer()),
# ])))
for column_i, column_is_text in enumerate(columns_is_text):
if columns_is_class[column_i]:
train_y = map(lambda x: float(x[column_i]), self.preprocessed_rows)
train_y = np.array(list(train_y))
else:
if column_is_text:
steps.append(
(
columns_names[column_i],
MyPipeline(
[
("selector", SelectText(column_i=column_i)),
("count_vector", CountVectorizer(**kwargs)),
]
),
)
)
self.feature_union = FeatureUnion(steps)
self.featured_rows = self.feature_union.fit_transform(self.preprocessed_rows, train_y)
self.featured_headings = deepcopy(self.feature_union.get_feature_names())
self.train_y = train_y
variance_too_high = False
if variance_threshold is not None:
thresholder = VarianceThreshold(threshold=variance_threshold)
try:
self.featured_rows = thresholder.fit_transform(self.featured_rows)
self.featured_support = thresholder.get_support()
self.featured_selected_headings = [
self.featured_headings[i] for i, v in enumerate(self.featured_support) if v
]
self.main_pfcsamr_app.variance_warn_message = ""
except ValueError:
traceback.print_exc()
self.featured_rows = np.empty_like(self.featured_rows)
self.featured_support = []
self.featured_selected_headings = []
self.main_pfcsamr_app.variance_warn_message = "threshold too high!!!"
variance_too_high = True
else:
self.main_pfcsamr_app.variance_warn_message = ""
self.featured_support = [True] * self.featured_rows.shape[1]
self.featured_selected_headings = deepcopy(self.featured_headings)
if not variance_too_high:
self.main_pfcsamr_app.learn_tab_enabled = True
self.main_pfcsamr_app.current_model = MyTableModel(self.featured_selected_headings, self.featured_rows)
self.main_pfcsamr_app.status_text = "Feature extraction done. Shape of useful features: %s. Removed %d." % (
str(self.featured_rows.shape),
len(self.featured_headings) - len(self.featured_selected_headings),
)
示例13: test_same_result_withdictrdd
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
def test_same_result_withdictrdd(self):
X, X_rdd = self.make_text_rdd(2)
Y_rdd = ArrayRDD(self.sc.parallelize([None] * len(X), 4), bsize=2)
Z = DictRDD([X_rdd, Y_rdd], columns=("X", "y"), bsize=2)
loc_char = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
dist_char = SparkCountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
loc_word = CountVectorizer(analyzer="word")
loc_word_2 = CountVectorizer(analyzer="word")
dist_word = SparkCountVectorizer(analyzer="word")
dist_word_2 = SparkCountVectorizer(analyzer="word")
loc_union = FeatureUnion([
("chars", loc_char),
("words", loc_word),
("words2", loc_word_2)
])
dist_union = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word),
("words2", dist_word_2)
])
# test same feature names
loc_union.fit(X)
dist_union.fit(Z)
converted_union = dist_union.to_scikit()
assert_equal(
loc_union.get_feature_names(),
dist_union.get_feature_names(),
converted_union.get_feature_names(),
)
# test same results
Z_transformed = sp.vstack(dist_union.transform(Z)[:, 'X'].collect())
assert_array_equal(loc_union.transform(X).toarray(), Z_transformed.toarray())
assert_array_equal(loc_union.transform(X).toarray(),
converted_union.transform(X).toarray())
# test same results with fit_transform
X_transformed = loc_union.fit_transform(X)
X_converted_transformed = converted_union.fit_transform(X)
Z_transformed = sp.vstack(dist_union.fit_transform(Z)[:, 'X'].collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
assert_array_equal(X_transformed.toarray(),
X_converted_transformed.toarray())
# test same results in parallel
loc_union_par = FeatureUnion([
("chars", loc_char),
("words", loc_word)
], n_jobs=2)
dist_union_par = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word)
], n_jobs=2)
loc_union_par.fit(X)
dist_union_par.fit(Z)
converted_union = dist_union_par.to_scikit()
X_transformed = loc_union_par.transform(X)
Z_transformed = sp.vstack(dist_union_par.transform(Z)[:, 'X'].collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
assert_array_equal(X_transformed.toarray(),
converted_union.transform(X).toarray())
示例14: CountVectorizer
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
preprocessor=get_col('description'))),
('title', CountVectorizer(
ngram_range=(1, 2),
stop_words=russian_stop,
# max_features=7000,
preprocessor=get_col('title')))
])
start_vect = time.time()
# Fit my vectorizer on the entire dataset instead of the training rows
# Score improved by .0001
vectorizer.fit(df.to_dict('records'))
ready_df = vectorizer.transform(df.to_dict('records'))
tfvocab = vectorizer.get_feature_names()
print("Vectorization Runtime: %0.2f Minutes" % ((time.time() - start_vect) / 60))
# Drop Text Cols
textfeats = ["description", "title"]
df.drop(textfeats, axis=1, inplace=True)
from sklearn.metrics import mean_squared_error
from math import sqrt
ridge_params = {'alpha': 30.0, 'fit_intercept': True, 'normalize': False, 'copy_X': True,
'max_iter': None, 'tol': 0.001, 'solver': 'auto', 'random_state': SEED}
# Ridge oof method from Faron's kernel
# I was using this to analyze my vectorization, but figured it would be interesting to add the results back into the dataset
# It doesn't really add much to the score, but it does help lightgbm converge faster
示例15: FeatureExtractor
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import get_feature_names [as 别名]
#.........这里部分代码省略.........
else:
self._vectorizer = pickle.loads(base64.b64decode(vectorizer))
self._transform_only = True
if labelencoder is None:
self._labelencoder = preprocessing.LabelEncoder()
else:
self._labelencoder = pickle.loads(base64.b64decode(labelencoder))
self._transform_only = True
def export(self):
return {
"settings": self.settings.export(),
"vectorizer": base64.b64encode(pickle.dumps(self._vectorizer)).decode("ascii"),
"labelencoder": base64.b64encode(pickle.dumps(self._labelencoder)).decode("ascii"),
}
@property
def settings(self):
return self._settings
@property
def vectorizer(self):
return self._vectorizer
@property
def dataframe(self):
return self._dataframe
@property
def settings(self):
return self._settings
@property
def labelencoder(self):
return self._labelencoder
@property
def strings(self):
"""Get feature strings.
Returns
-------
list[unicode]
Dataframe columns concatenated to a single string.
"""
if "strings" not in self._cache:
self._cache["strings"] = [
" ".join(row) for row in self._dataframe[self._settings.features].fillna("").values
]
return self._cache["strings"]
@property
def X(self):
"""Returns
-------
scipy.sparse
Sparse matrix containing textual features for classification.
"""
if "X" not in self._cache:
X = self.strings
if self._transform_only:
self._cache["X"] = self._vectorizer.transform(X)
else:
self._cache["X"] = self._vectorizer.fit_transform(X)
return self._cache["X"]
@property
def y(self):
"""Returns
-------
numpy.array
Labels encoded as integer values. Use get_labels() for mapping them back to strings.
"""
if "y" not in self._cache:
y = list(self._dataframe[self._settings.label].fillna(""))
if self._transform_only:
self._cache["y"] = np.array(self._labelencoder.transform(y))
else:
self._cache["y"] = np.array(self._labelencoder.fit_transform(y))
return self._cache["y"]
@property
def feature_names(self):
"""Returns
-------
list[unicode]
Meaningful feature names for vectorized feature matrix columns.
"""
return self._vectorizer.get_feature_names()
@property
def labels(self):
"""Returns
-------
list[unicode]
Labels for for encoded labels (y).
"""
self.y
return [l for l in self._labelencoder.classes_]