本文整理汇总了Python中sklearn.externals.joblib.dump函数的典型用法代码示例。如果您正苦于以下问题:Python dump函数的具体用法?Python dump怎么用?Python dump使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dump函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
pos_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresPos160_60.npy'
neg_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresNeg160_60.npy'
saving_loc = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/'
pos_features = np.load(pos_features_path)[:, 0::3]
neg_features = np.load(neg_features_path) [:, 0::3]
train, val = prepare_features(pos_features, neg_features, True, saving_loc)
del pos_features
del neg_features
clf = svm.SVC(kernel='rbf')
logging.info('starts training')
clf.fit(train[:, 1:], train[:, 0])
del train
logging.info('starts predicting')
predicted = clf.predict(val[:, 1:])
conf_mat = confusion_matrix(predicted, val[:, 0])
acc = accuracy_score(val[:, 0], predicted)
del val
del predicted
logging.info('Confusion matrix: %s' %conf_mat)
logging.info('Accuracy: %s' %acc)
logging.info('saving model')
joblib.dump(clf, join(saving_loc, 'svm_rbf_scaled.pkl'))
示例2: predict_test
def predict_test(self,clf, tag):
np.random.seed(1919)
if os.path.isdir('../model/'+tag) == False:
os.mkdir('../model/'+tag)
print "Dir made : "+str(datetime.datetime.now())
print "Fit Started : "+str(datetime.datetime.now())
clf.fit(self.X, self.y)
print "Dump Started : "+str(datetime.datetime.now())
joblib.dump(clf, '../model/'+tag+'/'+tag+'.pkl')
print "Prediction Started : "+str(datetime.datetime.now())
output_arr = clf.predict_proba(self.x_test)
f = open("../data/output_"+str(tag), "w")
f.write("id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9\n")
i=1
for row in output_arr:
row = map(str, row)
f.write(str(i)+","+str(",".join(row))+"\n")
i += 1
f.close()
print "ALL DONE : "+str(datetime.datetime.now())
示例3: train
def train(self, seg_corpus, dep_corpus, path=None):
assert seg_corpus.keys() == dep_corpus.keys()
features, labels = self.extract_features_from_corpus(
dep_corpus, seg_corpus=seg_corpus)
self._train(features, labels)
if path is not None:
joblib.dump(self.pipeline, path, compress=1, cache_size=1e9)
示例4: train_svm
def train_svm(feedback, classes):
print "Building n-grams"
X_train_counts = count_vect.fit_transform(feedback) # converting string to the bag - of - words form, using bi-grams
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts) # weighting the words from bag-of-words form
'''
The vocabulary used here on the training set needs to be saved for classification matters,
what this means is that the number of words during training is going to be different of
the number of words in classification but the count still needs to be the same, in other
words if the word "animal" is associated with the integer 3 during training, it has to be
associated again with number 3 during classification
'''
pickle.dump(count_vect.vocabulary_,open("feature.pkl","wb")) # Saving vocab
print "Saving words features"
c = svm.SVC(kernel = 'rbf',gamma = 0.001, C = 100)
print "training SVM"
c.fit(X_train_tfidf, classes) # Training the SVM
print "Training completed..."
joblib.dump(c, 'filename.pkl', compress= 9) # Saving the Support vectors
示例5: train
def train(self):
with gzip.open(constants.TRAIN_EXPANDED, 'r') as source:
reader = csv.reader(source)
next(reader, None)
n_sample = 0
labels = []
features = []
for feature_vector in reader:
s_features = feature_vector[2:6] + feature_vector[7:]
s_label = int(feature_vector[1])
features.append(s_features)
labels.append(s_label)
# print 'features', s_features
# print 'labels', s_label
# print 'norm features', normalized_features
n_sample += 1
if n_sample % 500000 == 0:
self.clf.partial_fit(features, labels)
features = []
labels = []
print 'Processing sample [%s]' % n_sample
print 'Finished training'
print 'Estimated parameters [%s]' % self.clf.get_params()
# saving model into file
joblib.dump(self.clf, constants.MODEL_FILENAME, compress=9)
示例6: fetch_vgg_architecture
def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
"""Fetch a pickled version of the caffe model, represented as list of
dictionaries."""
default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
if caffemodel_parsed is not None:
if os.path.exists(caffemodel_parsed):
return joblib.load(caffemodel_parsed)
else:
if os.path.exists(default_filename):
import warnings
warnings.warn('Did not find %s, but found %s. Loading it.' %
(caffemodel_parsed, default_filename))
return joblib.load(default_filename)
else:
if os.path.exists(default_filename):
return joblib.load(default_filename)
# We didn't find the file: let's create it by parsing the protobuffer
protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
model = _parse_caffe_model(protobuf_file)
if caffemodel_parsed is not None:
joblib.dump(model, caffemodel_parsed)
else:
joblib.dump(model, default_filename)
return model
示例7: train_model
def train_model(feats_csv):
df = pd.DataFrame()
df = pd.read_csv(feats_csv).iloc[:,1:]
y = np.ravel(df.iloc[:,-1:])
X = np.array(df.iloc[:,:-1])
############ 15 Best selected features using ANOVA F-value score function ###############
X_new = SelectKBest(f_classif, k=15).fit_transform(X, y)
selected_features = SelectKBest(f_classif, k=15).fit(X, y).get_support(indices = True)
############ KNN manhattan ###############
##### preprocessing: data scaling########
min_max_scaler = MinMaxScaler()
X_new = min_max_scaler.fit_transform(X_new)
model = KNeighborsClassifier(n_neighbors = 1,algorithm = 'brute',metric = 'manhattan',weights = 'uniform')
model.fit(X_new,y)
newdir = '../kNN_clfr'
os.mkdir(newdir)
joblib.dump(model, os.path.join(newdir,'kNN.pkl'))
return
示例8: train_classifier
def train_classifier():
pos_feat_path = positive_features_path
neg_feat_path = negative_features_path
model_path = classifier_model_path
feature_vectors = []
labels = []
for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
fd = joblib.load(feat_path)
print len(fd)
if len(fd):
fd = fd.astype(numpy.object)
feature_vectors.append(fd)
labels.append(1)
for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
fd = joblib.load(feat_path)
print len(fd)
if len(fd):
fd = fd.astype(numpy.object)
feature_vectors.append(fd)
labels.append(0)
classifier = LinearSVC()
print "Training classifier"
classifier.fit(feature_vectors, labels)
print "Classifier successfully trained"
if not os.path.isdir(os.path.split(model_path)[0]):
os.makedirs(os.path.split(model_path)[0])
joblib.dump(classifier, model_path)
示例9: trainClassifier
def trainClassifier(clf,
dir,model_file='adaptive',
data_file='train',
seed=1234,
):
'''
Train classifier
'''
print 'Training classifier'
data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file))
traindata = data[:,:-1]
targetdata = data[:,-1]
pdb.set_trace()
if model_g == 'mlp':
train_mlp((traindata, targetdata), save_file='{0}/{1}_F0_F1.pkl'.format(dir,model_file))
else:
rng = np.random.RandomState(seed)
indices = rng.permutation(traindata.shape[0])
traindata = traindata[indices]
targetdata = targetdata[indices]
scores = cross_validation.cross_val_score(clf, traindata, targetdata)
print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
clf.fit(traindata,targetdata)
#clf.plot_importance_matrix(vars_names)
joblib.dump(clf, '{0}/{1}_F0_F1.pkl'.format(dir,model_file))
示例10: setTestInputforNN
def setTestInputforNN(self, collection={}, sel_words=[]):
list_of_strings = []
list_of_salary = []
count = 0
sel_words_set = set(sel_words)
sel_words_list = list(sel_words_set)
for document in collection:
count += 1
title = document.getTitle()
description = document.getDescription()
salary = (int)(document.getSalaryNorm())
words = re.split(" ", title) + re.split(" ", description)
# words = [x for x in words if x in sel_words]
wordsUnique = set(words)
wordsUnique = wordsUnique & sel_words_set
words = [x for x in words if x in wordsUnique]
documentString = " ".join(words)
list_of_strings.append(documentString)
list_of_salary.append(salary)
if not (count % 15000):
break
vectorizer = CountVectorizer(vocabulary=sel_words, min_df=1)
self.inp = vectorizer.fit_transform(list_of_strings)
from sklearn.externals import joblib
joblib.dump(self.inp.tocsr(), "test_dataset_in.joblib")
self.inp_size = len(list_of_strings)
output = np.array(list_of_salary)
self.target = output.reshape(len(list_of_strings), 1)
joblib.dump(self.target, "test_dataset_out.joblib")
return [self.inp, self.target]
示例11: save_classifier
def save_classifier(cl, fn, use_joblib=True, **kwargs):
"""Save a classifier to disk.
Parameters
----------
cl : classifier object
Pickleable object or a classify.VigraRandomForest object.
fn : string
Writeable path/filename.
use_joblib : bool, optional
Whether to prefer joblib persistence to pickle.
kwargs : keyword arguments
Keyword arguments to be passed on to either `pck.dump` or
`joblib.dump`.
Returns
-------
None
Notes
-----
For joblib persistence, `compress=3` is the default.
"""
if isinstance(cl, VigraRandomForest):
cl.save_to_disk(fn)
elif use_joblib and sklearn_available:
if not kwargs.has_key('compress'):
kwargs['compress'] = 3
joblib.dump(cl, fn, **kwargs)
else:
with open(fn, 'w') as f:
pck.dump(cl, f, protocol=kwargs.get('protocol', -1))
示例12: trainModel
def trainModel():
# 数据预处理
data_train = joblib.load('data/data_train.pkl')
label_train = joblib.load('data/label_train.pkl')
print data_train.shape
clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
kernel='rbf', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=True)
#clf.set_params(kernel='rbf')
print clf
print data_train.shape
print label_train.shape
print 'begin training....'
clf.fit(data_train,label_train)
print 'finish training....'
print clf
joblib.dump(clf, 'model/svm.pkl')
return None
示例13: rf_fit
def rf_fit():
train_inp,valid_inp,train_target,valid_target = prepare_input()
rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
start = time.time()
rf.fit(train_inp,train_target)
end = time.time()
print "fitting took {:0.4} seconds".format(end-start)
training_output = rf.predict_proba(train_inp)
validation_output = rf.predict_proba(valid_inp)
training_error = log_loss(train_target,training_output)
validation_error = log_loss(valid_target,validation_output)
print "Train error: {:02.4f}".format(training_error)
print "Validation error: {:02.4f}".format(validation_error)
joblib.dump(rf,rf_filename)
return rf
示例14: xgb_fit
def xgb_fit():
train_inp,valid_inp,train_target,valid_target = prepare_input()
dtrain = xgb.DMatrix(train_inp,label=train_target)
dvalid = xgb.DMatrix(valid_inp)
param = {'max_depth':10, 'eta':0.02, 'silent':1, 'objective':'binary:logistic' }
param['nthread'] = 4
param['eval_metric'] = 'auc'
param['subsample'] = 0.7
param['colsample_bytree']= 0.7
param['min_child_weight'] = 0
param['booster'] = "gblinear"
watchlist = [(dtrain,'train')]
num_round = 300
early_stopping_rounds=10
bst = xgb.train(param, dtrain, num_round, watchlist,early_stopping_rounds=early_stopping_rounds)
joblib.dump(bst,bst_filename)
train_pred = bst.predict(xgb.DMatrix(train_inp))
valid_pred = bst.predict(xgb.DMatrix(valid_inp))
示例15: train
def train(trainingData, pklFile):
# ========================================================================= #
# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
# ========================================================================= #
if (pklFile == ''):
os.system('rm -rf learntModel & mkdir learntModel')
pklFile = 'learntModel/learntModel.pkl'
# ========================================================================= #
# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
# ========================================================================= #
NUMBER_OF_FEATURES = len(trainingData[0]) - 1
NUMBER_OF_TRAINING_POINTS = len(trainingData)
x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
y = trainingData[:, NUMBER_OF_FEATURES]
# ========================================================================= #
# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
# ========================================================================= #
minSquareError = np.inf
targetAlpha = None
alphas = np.logspace(-10, -2, 500)
# ========================================================================= #
# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
# ========================================================================= #
clf = LogisticRegressionCV(Cs=alphas)
clf.fit(x, y)
joblib.dump(clf, pklFile)
return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}