Python LabelEncoder.classes_方法代码示例

本文整理汇总了Python中sklearn.preprocessing.LabelEncoder.classes_方法的典型用法代码示例。如果您正苦于以下问题：Python LabelEncoder.classes_方法的具体用法？Python LabelEncoder.classes_怎么用？Python LabelEncoder.classes_使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.LabelEncoder的用法示例。

在下文中一共展示了LabelEncoder.classes_方法的10个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: data

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def data(fold=False):
    fname = df.zoo.download('http://dags.stanford.edu/data/iccv09Data.tar.gz')

    # extracting files one-by-one in memory is unfortunately WAY too slow
    # for this dataset. So we bite the bullet and extract the full tgz.

    where = _p.dirname(fname)
    imgdir = 'iccv09Data/images/'

    with _taropen(fname, 'r') as f:
        f.extractall(where)
        ids = [_p.basename(n)[:-4] for n in f.getnames() if n.startswith(imgdir)]

    X = [imread(_p.join(where, imgdir, i) + '.jpg') for i in ids]
    y = [_np.loadtxt(_p.join(where, 'iccv09Data/labels', i) + '.regions.txt', dtype=_np.int32) for i in ids]
    # I personally don't believe in the other label types.

    le = _np.array(['sky', 'tree', 'road', 'grass', 'water', 'building', 'mountain', 'foreground', 'object'])
    try:
        from sklearn.preprocessing import LabelEncoder
        le, classes = LabelEncoder(), le
        le.classes_ = classes
    except ImportError:
        pass

    if fold is False:
        return X, y, le

    lo, hi = fold*ntest(), (fold+1)*ntest()
    Xtr = X[:lo] + X[hi:]
    ytr = y[:lo] + y[hi:]
    Xte = X[lo:hi]
    yte = y[lo:hi]
    return (Xtr, ytr), (Xte, yte), le

开发者ID:yobibyte，项目名称:DeepFried2，代码行数:36，代码来源:stanfordbg.py

示例2: encode

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def encode(df, dump=fromPickle):
    """
    Takes in: dataframe from clean_col
    
    Returns: a dataframe that LabelEncodes the categorical variables
    """
    encoders=dict()
    for col in lblColumns:
        if col not in final_cols:
            continue
        le = LabelEncoder()
        if dump:
            fName="%s/%s.npy"%(modelPath,col)
            if os.path.isfile(fName):
                le.classes_=np.load(fName)
            else:
                le.fit(df[col])
                np.save(fName, le.classes_)
        else:
            le.fit(df[col])
        encoders[col]=le
        df[col] = le.transform(df[col])
    # Order columns with logprice as the last column
    df = df[final_cols]
    df = df.reset_index().drop('index', axis = 1)
    return df

开发者ID:jbrosamer，项目名称:PonyPricer，代码行数:28，代码来源:model.py

示例3: restore

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
 def restore(self,model_path):
   '''
   Restore a saved multiencoder from path using npz file, by reconstructing the LabelEncoders with the classes.
   Restore the X header too.
   '''
   path = model_path + '/encoder.npz'
   h_path = model_path + '/header.npz'
   npzfile = np.load(path)
   h_npzfile = np.load(h_path)
   self.header = h_npzfile['header']
   self.encoders = {}
   for k,v in npzfile.items():
     le = LabelEncoder()
     le.classes_ = v
     self.encoders[k] = le
   self.columns = list(self.encoders.keys())
   return self

开发者ID:Abhinav23，项目名称:AIVA，代码行数:19，代码来源:preprocess.py

示例4: learn_sentdist

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def learn_sentdist(clean_pcc,
                   feature_list=None,
                   label_features=None):
    """ Learning a classifier for the distance of arguments from a connective

    Runs a random forest. Prints out accuracy scores from a 5-fold cross validation.
    Returns the classifier and the label encoder that was used.
    :param clean_pcc: Cleaned PCC data, no NaNs
    :type clean_pcc: pd.DataFrame
    :param feature_list: list of features that shall be calculated with discourse_connective_text_featurizer
    :param label_features: list of features that have to be encoded as labels
    :return: trained classifier, score array and label encoder
    :rtype: tuple
    """
    print 'Calculating features...'
    # Taking our favorite featurizer
    featurizer = lambda sents, conn_pos: discourse_connective_text_featurizer(sents, conn_pos,
                                                                              feature_list=feature_list)
    features = sentdist_feature_dataframe(clean_pcc, featurizer)  # Got features of X
    print 'Calculated all features'

    # We need to encode the non-numerical labels
    le = LabelEncoder()
    # LabelEncoder only deals with 1 dim np.arrays
    le.fit(features[label_features].values.ravel())
    # Dealing with unknowns
    le.classes_ = np.append(le.classes_, '<unknown>')
    features = encode_label_features(features, le, label_features)

    print 'Cross validating classifier...'
    clf = RandomForestClassifier(min_samples_leaf=5, n_jobs=-1, verbose=0)
    scores = cross_val_score(clf, features, clean_pcc['sentence_dist'], cv=5)
    print 'Cross validated classifier\nscores: %s\nmean score: %f' % (str(scores), scores.mean())

    print 'Learning classifier on the whole data set...'
    clf.fit(features, clean_pcc['sentence_dist'])
    print 'Learned classifier on the whole data set'

    return clf, scores, le

开发者ID:arksch，项目名称:zwitscher，代码行数:41，代码来源:learn.py

示例5: create_kaggle_submission

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def create_kaggle_submission(prob, ids_raw, score=None, threshold=0.0):
	"""
	Given a model, load training data and predict on it. Includes id col.
	Note that this assumes only one prediction per user. (for now)
	"""
	ids = []  #list of ids
	cts = []  #list of countries
	# threshold = 0.0

	le = LabelEncoder()
	le.classes_ = COUNTRY_CLASSES

	for i in xrange(len(list(ids_raw))):
		idx = ids_raw[i]
		if pd.isnull(idx):
			py.test.set_trace()
		valid = sorted([(j,k) for j,k in enumerate(prob[i]) if k >= threshold],key=lambda x: x[1], reverse=True)
		valid_ids, valid_prob = zip(*valid)
		sub_countries = list([le.inverse_transform(x) for x in valid_ids])[:5]

		ids += [idx] * len(sub_countries)
		cts += sub_countries

	# spot check that it's not all 7's
	irene = [np.argmax(i) for i in prob]
	print pd.Series(irene).value_counts()

	#Generate submission
	sub = pd.DataFrame(np.column_stack((ids, cts)), columns=['id', 'country'])

	# datetime submission
	date_str = datetime.datetime.now().strftime('%y%m%d_%H%M')
	if score:
		sub.to_csv('submission_%.4f.csv' % score,index=False)
	else:
		sub.to_csv('submission_%s.csv' % date_str ,index=False)
	return sub

开发者ID:irenetrampoline，项目名称:airbnb-kaggle，代码行数:39，代码来源:xgbst.py

示例6: LabelEncoder

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
# convert Tag1 from strings to integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train['Tag1_enc'] = le.fit_transform(train.Tag1)

# confirm that the conversion worked
train.Tag1.value_counts().head()
train.Tag1_enc.value_counts().head()

# create a dummy column for each value of Tag1_enc (returns a sparse matrix)
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
tag1_dummies = ohe.fit_transform(train[['Tag1_enc']])
tag1_dummies

# try a Naive Bayes model with tag1_dummies as the features
cross_val_score(nb, tag1_dummies, train.OpenStatus, scoring='log_loss', cv=10).mean()   # 0.650

# adjust Tag1 on testing set since LabelEncoder errors on new values during a transform
test['Tag1'] = test['Tag1'].map(lambda s: '<unknown>' if s not in le.classes_ else s)
import numpy as np
le.classes_ = np.append(le.classes_, '<unknown>')

# apply the same encoding to the actual testing data and make predictions
test['Tag1_enc'] = le.transform(test.Tag1)
oos_tag1_dummies = ohe.transform(test[['Tag1_enc']])
nb.fit(tag1_dummies, train.OpenStatus)
oos_pred_prob = nb.predict_proba(oos_tag1_dummies)[:, 1]
sub = pd.DataFrame({'id':test.index, 'OpenStatus':oos_pred_prob}).set_index('id')
sub.to_csv('sub5.csv')  # 0.649

开发者ID:AntHar，项目名称:DAT7，代码行数:32，代码来源:16_kaggle.py

示例7: learn_main_arg_node

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def learn_main_arg_node(node_df,
                        syntax_dict,
                        node_dict,
                        precalc_features=None,
                          feature_list=None,
                          label_features=None):
    """ Learn a classifier for a node being arg0 or arg1

    :param node_df: node data with tree and node ids
    :type node_df: pd.DataFrame
    :param syntax_dict: to look up the syntax trees by their id
    :type syntax_dict: dict
    :param node_dict: to look up the nodes by their id
    :type node_dict: dict
    :param precalc_features: precalculated features to save computation time in development
    :param precalc_features: pd.DataFrame
    :param feature_list: Names of the features that shall be calculated
    :type feature_list: list
    :param label_features: Names of features that are discrete
    :type label_features: list
    :return: All data that is needed to classifiy new data with the classifiers
    LogisticRegression classifiers from scikit learn, the list of features and label
    features, as well as encoders for the labels and a binary encoder and a featurizer method
             {'logit_arg0_clf': logit_arg0_clf,
              'logit_arg1_clf': logit_arg1_clf,
              'feature_list': feature_list,
              'label_features': label_features,
              'label_encoder': le,
              'binary_encoder': ohe,
              'node_featurizer': featurizer}
    :rtype: dict
    """

    def featurizer(node_df, syntax_dict, node_dict):
        return node_feature_dataframe(node_df, node_featurizer,
                                      syntax_dict=syntax_dict,
                                      node_dict=node_dict,
                                      feature_list=feature_list)

    if precalc_features is None:
        print 'Calculating features'
        features = featurizer(node_df, syntax_dict, node_dict)
        print 'done'
    else:
        features = precalc_features

    # We need to encode the non-numerical labels
    print 'Encoding labels...'
    le = LabelEncoder()
    # LabelEncoder only deals with 1 dim np.arrays
    le.fit(features[label_features].values.ravel())
    # Dealing with unknowns
    le.classes_ = np.append(le.classes_, '<unknown>')
    encoded_features = encode_label_features(features, le, label_features)
    print 'Encoded label'
    # We need to binarize the data for logistic regression
    print 'Binarizing features for logistic regression...'
    ohe = OneHotEncoder(sparse=False)
    ohe.fit(encoded_features[label_features].values)
    logit_features = binarize_features(encoded_features, ohe, label_features)
    print 'Binarized features.'

    print 'Training classifiers for arg0 labeling'
    print '======================================'
    nr_of_nodes = float(len(node_df))
    baseline = (nr_of_nodes - sum(node_df['is_arg0_node'])) / nr_of_nodes
    print 'Majority baseline: %f' % baseline
    print 'Cross validating Logistic regression classifier...'
    # C is the inverse of the regularization strength
    # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
    logit_arg0_clf = LogisticRegression(C=1.0)
    scores = cross_val_score(logit_arg0_clf, logit_features,
                             node_df['is_arg0_node'], cv=5)
    print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \
          '%f' % (str(scores), scores.mean())

    print ''
    print 'Training classifiers for arg1 labeling'
    print '======================================'
    baseline = (nr_of_nodes - sum(node_df['is_arg1_node'])) / nr_of_nodes
    print 'Majority baseline: %f' % baseline
    print 'Cross validating Logistic regression classifier...'
    # C is the inverse of the regularization strength
    logit_arg1_clf = LogisticRegression(C=1.0)
    scores = cross_val_score(logit_arg1_clf, logit_features,
                             node_df['is_arg1_node'], cv=5)
    print 'Cross validated Logistic Regression classifier\nscores: %s\nmean score: ' \
          '%f' % (
              str(scores), scores.mean())

    print 'Learning classifiers on the whole data set...'
    logit_arg0_clf.fit(logit_features, node_df['is_arg0_node'])
    logit_arg1_clf.fit(logit_features, node_df['is_arg1_node'])
    print 'Learned classifier on the whole data set'


    # ToDo: Design features (see Lin et al p. 17, Connective_syntactic!)

    # ToDo: Evaluate this method (remember not to count punctuation)
    # ToDo: Get baseline by labeling everything after the connective as
#.........这里部分代码省略.........

开发者ID:arksch，项目名称:zwitscher，代码行数:103，代码来源:learn.py

示例8: LabelEncoder

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
if __name__ == '__main__':

    # TODO: ROS node initialization
    rospy.init_node('clustering', anonymous=True)

    # TODO: Create Subscribers
    pcl_sub = rospy.Subscriber("/sensor_stick/point_cloud", pc2.PointCloud2, pcl_callback, queue_size=1)

    # TODO: Create Publishers
    pcl_objects_pub = rospy.Publisher("/pcl_objects", pc2.PointCloud2, queue_size=1)
    pcl_table_pub = rospy.Publisher("/pcl_table", pc2.PointCloud2, queue_size=1)
    pcl_cluster_pub = rospy.Publisher("/pcl_cluster", pc2.PointCloud2, queue_size=1)

    object_markers_pub = rospy.Publisher("/object_markers", Marker, queue_size=1)
    detected_objects_pub = rospy.Publisher("/detected_objects", DetectedObjectsArray, queue_size=1)

    # TODO: Load Model From disk
    model = pickle.load(open('model.sav', 'rb'))
    clf = model['classifier']
    encoder = LabelEncoder()
    encoder.classes_ = model['classes']
    scaler = model['scaler']

    # Initialize color_list
    get_color_list.color_list = []

    # TODO: Spin while node is not shutdown
    while not rospy.is_shutdown():
        rospy.spin()

开发者ID:evgeniyarbatov，项目名称:joy-of-coding，代码行数:31，代码来源:object_recognition.py

示例9: LabelEncoder

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
        cosine = F.linear(x_norm, w_norm, None)
        out = cosine * self.scale
        return out

resume   = sys.argv[1]
encoder  = sys.argv[2]
x_test   = sys.argv[3]

train_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/train_bb_fastai2/"
test_df     = "/home/blcv/CODE/Kaggle/humpback_whale_identification/data/processed/sample_submission.csv"
test_folder = "/home/blcv/CODE/Kaggle/humpback_short_blażej/data/processed/test_bb_fastai2/"
option_da = ['gray']# [] #


label_encoder = LabelEncoder()
label_encoder.classes_ = np.load(encoder)
# encode whale as integers
X_test = pd.read_csv(x_test)
val_loader = getDataLoader(X_test, train_folder, 'val', option_da = option_da, image_size = 224, batch_size = 64)

# model preparation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = 'se_resnext101_32x4d'
model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
model.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
model.last_linear = nn.Sequential(*[nn.LayerNorm(model.last_linear.in_features, elementwise_affine = False),
                                        NormLinear(model.last_linear.in_features, 5004)])

model = model.to(device)
model = nn.DataParallel(model)

开发者ID:melgor，项目名称:kaggle-whale-tail，代码行数:32，代码来源:eval_bb_se.py

示例10: process_data

# 需要导入模块: from sklearn.preprocessing import LabelEncoder [as 别名]
# 或者: from sklearn.preprocessing.LabelEncoder import classes_ [as 别名]
def process_data(data_type='train', write_to_csv=False, return_df=True,
	include_sessions=False):
	train = pd.read_csv(TRAINING_DATA, header=0)
	test = pd.read_csv(TEST_DATA, header=0)

	train_countries = train['country_destination']
	train_ids = train['id']
	test_ids = test['id']
	train.drop(['id', 'country_destination'], axis=1, inplace=True)
	test.drop(['id'], axis=1, inplace=True)

	piv_train = train.shape[0]

	data = pd.concat((train, test), axis=0, ignore_index=True)
	# features to output into model training data
	nonnumeric_columns = [
		'gender',
		'signup_method',
		'signup_flow',
		'language',
		'affiliate_channel',
		'affiliate_provider',
		'first_affiliate_tracked',
		'signup_app',
		'first_device_type',
		'first_browser',
		]

	# add_null_cols(data)
	add_date_cols(data)
	parse_age(data)

	data = add_categorical_cols(data, nonnumeric_columns, data_type)
	data = fill_in_na(data)

	vals = data.values
	X = vals[:piv_train]

	le = LabelEncoder()
	le.classes_ = COUNTRY_CLASSES
	y = le.fit_transform(train_countries)

	train_df = pd.DataFrame(X, columns=data.columns)
	train_df['id'] = train_ids
	train_df['country_destination'] = y

	X_kaggle = vals[piv_train:]
	test_df = pd.DataFrame(X_kaggle, columns=data.columns)
	test_df['id'] = test_ids

	if include_sessions:
		sessions_df = pd.read_csv('sessions_users.csv')

		train_df = train_df.merge(sessions_df, how='left',
				left_index='id', right_index='user_id')
		train_df.fillna(0, inplace=True)

		test_df = test_df.merge(sessions_df, how='left',
				left_index='id', right_index='user_id')
		test_df.fillna(0, inplace=True)

	if write_to_csv:
		if include_sessions:
			test_df.to_csv('test_sessions.csv', index=False)
			train_df.to_csv('train_sessions.csv', index=False)
			print 'Wrote train_sessions.csv, test_sessions.csv'
		else:
			test_df.to_csv('test.csv', index=False)
			train_df.to_csv('train.csv', index=False)
			print 'Wrote train.csv, test.csv'
	if return_df:
		return test_df, train_df

开发者ID:irenetrampoline，项目名称:airbnb-kaggle，代码行数:74，代码来源:clean_data.py

注：本文中的sklearn.preprocessing.LabelEncoder.classes_方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。