Python joblib.dump函数代码示例

本文整理汇总了Python中sklearn.externals.joblib.dump函数的典型用法代码示例。如果您正苦于以下问题：Python dump函数的具体用法？Python dump怎么用？Python dump使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了dump函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
	pos_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresPos160_60.npy'
	neg_features_path = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/featuresNeg160_60.npy'

	saving_loc = '/home/retailyze/Downloads/INRIAPerson/checkb/cropped/svm/'

	pos_features = np.load(pos_features_path)[:, 0::3]
	neg_features = np.load(neg_features_path) [:, 0::3]
	train, val = prepare_features(pos_features, neg_features, True, saving_loc)
	del pos_features
	del neg_features

	clf = svm.SVC(kernel='rbf')

	logging.info('starts training')
	clf.fit(train[:, 1:], train[:, 0])
	del train
	logging.info('starts predicting')
	predicted = clf.predict(val[:, 1:])
	conf_mat = confusion_matrix(predicted, val[:, 0])
	acc = accuracy_score(val[:, 0], predicted)
	del val
	del predicted
	logging.info('Confusion matrix: %s' %conf_mat)
	logging.info('Accuracy: %s' %acc)
	logging.info('saving model')
	joblib.dump(clf, join(saving_loc, 'svm_rbf_scaled.pkl'))

开发者ID:Sh-imaa，项目名称:Kaggle_whales，代码行数:27，代码来源:svm_model.py

示例2: predict_test

 def predict_test(self,clf, tag):
     np.random.seed(1919) 
     if os.path.isdir('../model/'+tag) == False: 
         os.mkdir('../model/'+tag)   
     print "Dir made : "+str(datetime.datetime.now())
     
     print "Fit Started : "+str(datetime.datetime.now())
     clf.fit(self.X, self.y)    
     
     print "Dump Started : "+str(datetime.datetime.now())    
     joblib.dump(clf, '../model/'+tag+'/'+tag+'.pkl')
     
     print "Prediction Started : "+str(datetime.datetime.now())
     output_arr = clf.predict_proba(self.x_test)
     
     f = open("../data/output_"+str(tag), "w")
     f.write("id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9\n")
     i=1
     for row in output_arr:
         row = map(str, row)
         f.write(str(i)+","+str(",".join(row))+"\n")
         i += 1
     f.close()
     
     print "ALL DONE : "+str(datetime.datetime.now())

开发者ID:raman-sharma，项目名称:ML-Learn，代码行数:25，代码来源:svc.py

示例3: train

 def train(self, seg_corpus, dep_corpus, path=None):
     assert seg_corpus.keys() == dep_corpus.keys()
     features, labels = self.extract_features_from_corpus(
         dep_corpus, seg_corpus=seg_corpus)
     self._train(features, labels)
     if path is not None:
         joblib.dump(self.pipeline, path, compress=1, cache_size=1e9)

开发者ID:discourse-lab，项目名称:DiscourseSegmenter，代码行数:7，代码来源:matesegmenter.py

示例4: train_svm

def train_svm(feedback, classes):

    print "Building n-grams"

    X_train_counts = count_vect.fit_transform(feedback) # converting string to the bag - of - words form, using bi-grams

    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts) # weighting the words from bag-of-words form

    '''
    The vocabulary used here on the training set needs to be saved for classification matters,
    what this means is that the number of words during training is going to be different of
    the number of words in classification but the count still needs to be the same, in other
    words if the word "animal" is associated with the integer 3 during training, it has to be
    associated again with number 3 during classification
    '''

    pickle.dump(count_vect.vocabulary_,open("feature.pkl","wb")) # Saving vocab

    print "Saving words features"

    c = svm.SVC(kernel = 'rbf',gamma = 0.001, C = 100)

    print "training SVM"

    c.fit(X_train_tfidf, classes) # Training the SVM

    print "Training completed..."

    joblib.dump(c, 'filename.pkl', compress= 9) # Saving the Support vectors

开发者ID:Guaramy，项目名称:WeWillRuleML，代码行数:29，代码来源:train_svm.py

示例5: train

    def train(self):
        with gzip.open(constants.TRAIN_EXPANDED, 'r') as source:
            reader = csv.reader(source)
            next(reader, None)

            n_sample = 0
            labels = []
            features = []
            for feature_vector in reader:
                s_features = feature_vector[2:6] + feature_vector[7:]
                s_label = int(feature_vector[1])
                features.append(s_features)
                labels.append(s_label)

                # print 'features', s_features
                # print 'labels', s_label
                # print 'norm features', normalized_features

                n_sample += 1
                if n_sample % 500000 == 0:
                    self.clf.partial_fit(features, labels)
                    features = []
                    labels = []
                    print 'Processing sample [%s]' % n_sample

        print 'Finished training'
        print 'Estimated parameters [%s]' % self.clf.get_params()

        # saving model into file
        joblib.dump(self.clf, constants.MODEL_FILENAME, compress=9)

开发者ID:trein，项目名称:criteo-challenge，代码行数:30，代码来源:training.py

示例6: fetch_vgg_architecture

def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
    """Fetch a pickled version of the caffe model, represented as list of
    dictionaries."""

    default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
    if caffemodel_parsed is not None:
        if os.path.exists(caffemodel_parsed):
            return joblib.load(caffemodel_parsed)
        else:
            if os.path.exists(default_filename):
                import warnings
                warnings.warn('Did not find %s, but found %s. Loading it.' %
                              (caffemodel_parsed, default_filename))
                return joblib.load(default_filename)
    else:
        if os.path.exists(default_filename):
            return joblib.load(default_filename)

    # We didn't find the file: let's create it by parsing the protobuffer
    protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
    model = _parse_caffe_model(protobuf_file)

    if caffemodel_parsed is not None:
        joblib.dump(model, caffemodel_parsed)
    else:
        joblib.dump(model, default_filename)

    return model

开发者ID:Faruk-Ahmed，项目名称:sklearn-theano，代码行数:28，代码来源:vgg.py

示例7: train_model

def train_model(feats_csv):

	df = pd.DataFrame()
	df = pd.read_csv(feats_csv).iloc[:,1:]

	y = np.ravel(df.iloc[:,-1:])
	X = np.array(df.iloc[:,:-1])

	############ 15 Best selected features using ANOVA F-value score function ###############
	X_new = SelectKBest(f_classif, k=15).fit_transform(X, y)
	selected_features = SelectKBest(f_classif, k=15).fit(X, y).get_support(indices = True)

	############ KNN manhattan ###############
	##### preprocessing: data scaling######## 
	min_max_scaler = MinMaxScaler()
	X_new = min_max_scaler.fit_transform(X_new)

	model = KNeighborsClassifier(n_neighbors = 1,algorithm = 'brute',metric = 'manhattan',weights = 'uniform')
	model.fit(X_new,y)

	newdir = '../kNN_clfr'
	os.mkdir(newdir)

	joblib.dump(model, os.path.join(newdir,'kNN.pkl')) 

	return

开发者ID:LefterisStamellos，项目名称:swoice_assignment_gender，代码行数:26，代码来源:train_model.py

示例8: train_classifier

def train_classifier():
    pos_feat_path = positive_features_path
    neg_feat_path = negative_features_path

    model_path = classifier_model_path

    feature_vectors = []
    labels = []

    for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(1)

    for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(0)

    classifier = LinearSVC()
    print "Training classifier"
    classifier.fit(feature_vectors, labels)
    print "Classifier successfully trained"
    if not os.path.isdir(os.path.split(model_path)[0]):
        os.makedirs(os.path.split(model_path)[0])
    joblib.dump(classifier, model_path)

开发者ID:ranveeraggarwal，项目名称:traffic-light-detection，代码行数:32，代码来源:train_classifier.py

示例9: trainClassifier

def trainClassifier(clf,
      dir,model_file='adaptive',
      data_file='train',
      seed=1234,
    ):
  '''
   Train classifier
  '''
  print 'Training classifier'

  data = np.loadtxt('{0}/train_{1}.dat'.format(dir,data_file)) 
  traindata = data[:,:-1]
  targetdata = data[:,-1]
  pdb.set_trace()

  if model_g == 'mlp':
    train_mlp((traindata, targetdata), save_file='{0}/{1}_F0_F1.pkl'.format(dir,model_file))
  else:
    rng = np.random.RandomState(seed)
    indices = rng.permutation(traindata.shape[0])
    traindata = traindata[indices]
    targetdata = targetdata[indices]
    scores = cross_validation.cross_val_score(clf, traindata, targetdata)
    print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
    clf.fit(traindata,targetdata)
    #clf.plot_importance_matrix(vars_names)
    joblib.dump(clf, '{0}/{1}_F0_F1.pkl'.format(dir,model_file))

开发者ID:jgpavez，项目名称:transfer_learning，代码行数:27，代码来源:transfer_learning_ratios.py

示例10: setTestInputforNN

    def setTestInputforNN(self, collection={}, sel_words=[]):
        list_of_strings = []
        list_of_salary = []
        count = 0
        sel_words_set = set(sel_words)
        sel_words_list = list(sel_words_set)
        for document in collection:
            count += 1
            title = document.getTitle()
            description = document.getDescription()
            salary = (int)(document.getSalaryNorm())
            words = re.split(" ", title) + re.split(" ", description)
            # words = [x for x in words if x in sel_words]
            wordsUnique = set(words)
            wordsUnique = wordsUnique & sel_words_set
            words = [x for x in words if x in wordsUnique]
            documentString = " ".join(words)
            list_of_strings.append(documentString)
            list_of_salary.append(salary)

            if not (count % 15000):
                break

        vectorizer = CountVectorizer(vocabulary=sel_words, min_df=1)
        self.inp = vectorizer.fit_transform(list_of_strings)
        from sklearn.externals import joblib

        joblib.dump(self.inp.tocsr(), "test_dataset_in.joblib")

        self.inp_size = len(list_of_strings)
        output = np.array(list_of_salary)
        self.target = output.reshape(len(list_of_strings), 1)
        joblib.dump(self.target, "test_dataset_out.joblib")

        return [self.inp, self.target]

开发者ID:ananthd88，项目名称:CSCE-633-670-Project，代码行数:35，代码来源:neural_lab.py

示例11: save_classifier

def save_classifier(cl, fn, use_joblib=True, **kwargs):
    """Save a classifier to disk.

    Parameters
    ----------
    cl : classifier object
        Pickleable object or a classify.VigraRandomForest object.
    fn : string
        Writeable path/filename.
    use_joblib : bool, optional
        Whether to prefer joblib persistence to pickle.
    kwargs : keyword arguments
        Keyword arguments to be passed on to either `pck.dump` or 
        `joblib.dump`.

    Returns
    -------
    None

    Notes
    -----
    For joblib persistence, `compress=3` is the default.
    """
    if isinstance(cl, VigraRandomForest):
        cl.save_to_disk(fn)
    elif use_joblib and sklearn_available:
        if not kwargs.has_key('compress'):
            kwargs['compress'] = 3
        joblib.dump(cl, fn, **kwargs)
    else:
        with open(fn, 'w') as f:
            pck.dump(cl, f, protocol=kwargs.get('protocol', -1))

开发者ID:cmor，项目名称:gala，代码行数:32，代码来源:classify.py

示例12: trainModel

def trainModel():

	# 数据预处理
	data_train = joblib.load('data/data_train.pkl')
	label_train = joblib.load('data/label_train.pkl')

	print data_train.shape

	clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=True)

	#clf.set_params(kernel='rbf')

	print clf

	print data_train.shape
	print label_train.shape

	print 'begin training....'
	clf.fit(data_train,label_train)
	print 'finish training....'
	print clf
	joblib.dump(clf, 'model/svm.pkl')
	
	return None

开发者ID:BLKStone，项目名称:EasyPyPR，代码行数:26，代码来源:SVMtrain.py

示例13: rf_fit

def rf_fit():

	train_inp,valid_inp,train_target,valid_target = prepare_input()

	rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
	start = time.time()

	rf.fit(train_inp,train_target)

	end = time.time()
	print "fitting took {:0.4} seconds".format(end-start)

	training_output = rf.predict_proba(train_inp)
	validation_output = rf.predict_proba(valid_inp)

	training_error = log_loss(train_target,training_output)
	validation_error = log_loss(valid_target,validation_output)

	print "Train error: {:02.4f}".format(training_error)
	print "Validation error: {:02.4f}".format(validation_error)


	joblib.dump(rf,rf_filename)


	return rf

开发者ID:btaborsky，项目名称:red-hat-kaggle，代码行数:26，代码来源:red_hat.py

示例14: xgb_fit

def xgb_fit():

	train_inp,valid_inp,train_target,valid_target = prepare_input()

	dtrain = xgb.DMatrix(train_inp,label=train_target)
	dvalid = xgb.DMatrix(valid_inp)


	param = {'max_depth':10, 'eta':0.02, 'silent':1, 'objective':'binary:logistic' }
	param['nthread'] = 4
	param['eval_metric'] = 'auc'
	param['subsample'] = 0.7
	param['colsample_bytree']= 0.7
	param['min_child_weight'] = 0
	param['booster'] = "gblinear"

	watchlist  = [(dtrain,'train')]
	num_round = 300
	early_stopping_rounds=10
	bst = xgb.train(param, dtrain, num_round, watchlist,early_stopping_rounds=early_stopping_rounds)

	joblib.dump(bst,bst_filename)


	train_pred = bst.predict(xgb.DMatrix(train_inp))
	valid_pred = bst.predict(xgb.DMatrix(valid_inp))

开发者ID:btaborsky，项目名称:red-hat-kaggle，代码行数:26，代码来源:red_hat.py

示例15: train

def train(trainingData, pklFile):
	# ========================================================================= #
	# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
	# ========================================================================= #
	if (pklFile == ''):
		os.system('rm -rf learntModel & mkdir learntModel')
		pklFile = 'learntModel/learntModel.pkl'
	
	# ========================================================================= #
	# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
	# ========================================================================= #
	NUMBER_OF_FEATURES = len(trainingData[0]) - 1
	NUMBER_OF_TRAINING_POINTS = len(trainingData)

	x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
	y = trainingData[:, NUMBER_OF_FEATURES]
	
	# ========================================================================= #
	# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
	# ========================================================================= #
	minSquareError = np.inf
	targetAlpha = None
	alphas = np.logspace(-10, -2, 500)			
	
	# ========================================================================= #
	# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
	# ========================================================================= #
	clf = LogisticRegressionCV(Cs=alphas)
	clf.fit(x, y)
	joblib.dump(clf, pklFile)
	
	return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}

开发者ID:ZAZAZakari，项目名称:ML-Algorithm，代码行数:32，代码来源:logisticRegression.py

注：本文中的sklearn.externals.joblib.dump函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。