当前位置: 首页>>代码示例>>Python>>正文


Python LogisticRegressionCV.fit方法代码示例

本文整理汇总了Python中sklearn.linear_model.LogisticRegressionCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegressionCV.fit方法的具体用法?Python LogisticRegressionCV.fit怎么用?Python LogisticRegressionCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.LogisticRegressionCV的用法示例。


在下文中一共展示了LogisticRegressionCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: LogitSelector

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def LogitSelector(x, y, cv, niter, njob):
    t_size=1 / cv

    lb = prep.LabelBinarizer()
    y = lb.fit_transform(y).ravel()

    model = LogisticRegressionCV(penalty='l1', solver='liblinear', refit=False, cv=cv, n_jobs=njob)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        warnings.simplefilter('ignore', ConvergenceWarning)
        model.fit(x, y)
    columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]

    accu = []
    prec = []
    rec = []
    f1 = []
    au = []
    cls = LogisticRegression()
    gn_cvset = (Cvset(x[i][:, columns], y[i], x[j][:, columns], y[j]) for (i, j) in ShuffleSplit(len(y), n_iter=niter, test_size=t_size))

    for cvt in gn_cvset:
        cls.fit(cvt.xtr, cvt.ytr)
        accu.append(accuracy_score(cvt.yte, cls.predict(cvt.xte)))
        prec.append(precision_score(cvt.yte, cls.predict(cvt.xte)))
        rec.append(recall_score(cvt.yte, cls.predict(cvt.xte)))
        f1.append(f1_score(cvt.yte, cls.predict(cvt.xte)))
        au.append(__Auc(cls, cvt.xte, cvt.yte))

    cls.fit(x[:,columns], y)
    return Mdc(model=cls, idx=columns, accu=np.mean(accu),
               prec=np.mean(prec), rec=np.mean(rec), f1=np.mean(f1),
               au=np.mean(au))
开发者ID:CallMeXiaoChiZi,项目名称:cage-dev,代码行数:35,代码来源:logit_selector.py

示例2: classify

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def classify(_char):
    print 'to fetch data'
    start_time = time.time()
    char_count = Character.objects.filter(char=_char, is_correct=1).count()
    if char_count < 10:
        return
    char_lst = Character.objects.filter(char=_char)
    y, X, ty, tX, t_charid_lst, test_accuracy_lst = prepare_data_with_database(char_lst)
    if len(y) == 0 or len(ty) == 0:
        return
    if 1 == len(set(y)) or len(y) < 10:
        return
    fetch_negative_samples(_char, X, y)
    if len(y) == 0 or len(ty) == 0:
        return
    if 1 == len(set(y)) or len(y) < 50:
        return

    print "fetch data done, spent %s seconds." % int(time.time() - start_time)
    start_time = time.time()
    print "traning: data size: %d" % len(y)
    model = LogisticRegressionCV(cv=5, solver='liblinear', n_jobs=1)
    try:
        model.fit(X, y)
        print "training done, spent %s seconds." % int(time.time() - start_time)
        #print 'params: '
        #for k, v in model.get_params().iteritems():
        #    print '\t', k, ' : ', v
        print 'score: ', model.score(X, y)
    except Exception, e:
        print 'except: ', e
        traceback.print_exc()
        return
开发者ID:CoinLQ,项目名称:SegmentationCheck,代码行数:35,代码来源:tasks.py

示例3: lr_with_scale2

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def lr_with_scale2():
    """
    Submission: lr_with_scale2_0704_03.csv
    E_val:
    E_in: 0.878996
    E_out: 0.8768131004917349
    """
    from sklearn.linear_model import LogisticRegressionCV
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    clf = LogisticRegressionCV(Cs=50, cv=5, scoring='roc_auc', n_jobs=-1,
                               class_weight='auto')
    clf.fit(X_scaled, y)
    logger.debug('Best C: %f', clf.C_[0])
    logger.debug('Cs: %s', clf.Cs_)
    logger.debug('Grid scores: %f', clf.scores_)
    logger.debug('Ein: %f', Util.auc_score(clf, X_scaled, y))

    IO.dump_submission(Pipeline([('scale_raw', raw_scaler),
                                 ('lr', clf)]), 'lr_with_scale2_0704_03')
开发者ID:Divergent914,项目名称:yakddcup2015,代码行数:29,代码来源:modeling.py

示例4: logistic_test

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def logistic_test(train_data, train_labels, test_data, test_labels, cv=False):
    # Perform logistic regression.
    clf = LogisticRegressionCV() if cv else LogisticRegression()
    clf.fit(train_data, train_labels)
    predicted_labels = clf.predict(test_data)

    # Count true positives, true negatives, false positives, false negatives.
    tp, tn, fp, fn = 0, 0, 0, 0
    for predicted, actual in zip(predicted_labels, test_labels):
        if predicted == 1 and actual == 1:
            tp += 1
        if predicted == 0 and actual == 0:
            tn += 1
        if predicted == 1 and actual == 0:
            fp += 1
        if predicted == 0 and actual == 1:
            fn += 1

    # Compute statistics. 
    accuracy =  (tp + tn) / (tp + tn + fp +fn)
    precision = 0 if (tp + fp) == 0 else tp / (tp + fp)
    recall = 0 if (tp + fn) == 0 else tp / (tp + fn)

    # Print report.
    print "Correctly classified {}/{}".format(tp + tn, tp + tn + fp +fn)
    print "Accuracy:", accuracy
    print "Precision:", precision
    print "Recall:", recall
    print "tp: {}; tn: {}; fp: {}; fn {}".format(tp, tn, fp, fn)

    return accuracy
开发者ID:kulshrax,项目名称:cs224w,代码行数:33,代码来源:ml.py

示例5: LogitSelector

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def LogitSelector(x, y, cv, njob):

    lb = prep.LabelBinarizer()
    y = lb.fit_transform(y).ravel()

    cls = LogisticRegression()
    def __Auc(xte, yte):
        ypo = cls.predict_proba(xte)
        flt_auc = roc_auc_score(yte, ypo[:,1])
        return flt_auc
    
    skf = StratifiedKFold(y, n_folds=cv)
    model = LogisticRegressionCV(penalty='l1', solver='liblinear', fit_intercept=False, cv=cv, n_jobs=njob)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        warnings.simplefilter('ignore', ConvergenceWarning)
        model.fit(x, y)
    columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]
    
    mdl_eval = lambda func: lambda idx_tr, idx_te: func(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict(x[idx_te][:,columns]))
    auc_eval = lambda idx_tr, idx_te: roc_auc_score(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict_proba(x[idx_te][:,columns])[:,1])
    res_eval = lambda func: np.average(map(mdl_eval(func), *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))

    accu = res_eval(accuracy_score)
    prec = res_eval(precision_score)
    rec = res_eval(recall_score)
    f1 = res_eval(f1_score)
    au = np.average(map(auc_eval, *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))

    cls.fit(x[:,columns], y)
    return Mdc(model=cls, idx=columns, accu=accu, prec=prec, rec=rec, f1=f1, au=au)
开发者ID:bm2-lab,项目名称:cage,代码行数:33,代码来源:logit_selector.py

示例6: train

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def train(trainingData, pklFile):
	# ========================================================================= #
	# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
	# ========================================================================= #
	if (pklFile == ''):
		os.system('rm -rf learntModel & mkdir learntModel')
		pklFile = 'learntModel/learntModel.pkl'
	
	# ========================================================================= #
	# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
	# ========================================================================= #
	NUMBER_OF_FEATURES = len(trainingData[0]) - 1
	NUMBER_OF_TRAINING_POINTS = len(trainingData)

	x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
	y = trainingData[:, NUMBER_OF_FEATURES]
	
	# ========================================================================= #
	# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
	# ========================================================================= #
	minSquareError = np.inf
	targetAlpha = None
	alphas = np.logspace(-10, -2, 500)			
	
	# ========================================================================= #
	# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
	# ========================================================================= #
	clf = LogisticRegressionCV(Cs=alphas)
	clf.fit(x, y)
	joblib.dump(clf, pklFile)
	
	return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:34,代码来源:logisticRegression.py

示例7: optimal_l2

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def optimal_l2(X, y): 
    '''
    Find the optimal level of L2 regularization for logistic regression
    '''
    logit = LogisticRegressionCV(Cs=50, cv=10)
    logit.fit(X, y)
    return logit.C_
开发者ID:thomasbrawner,项目名称:python_tools,代码行数:9,代码来源:marginal_effects_example.py

示例8: make_predictions

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def make_predictions():
    # Fit Logistic Regression Model
    logreg = LogisticRegressionCV(scoring='log_loss', n_jobs=-1, verbose=1, random_state=6156)
    logreg.fit(X=trainX, y=train['y'].values)
    
    # Validate
    pred_pr = logreg.predict_proba(valX)
    loss = log_loss(y_true=val['y'].values, y_pred=pred_pr)
    print "Validation log loss:", loss
    
    # Get Test predictions
    img_files = [os.path.join(IMG_DIR, f) for f in os.listdir(IMG_DIR)]
        
    if os.path.isfile('test_pca.csv'):
        test_pca = pd.read_csv('test_pca.csv', dtype={'id' : str})
    else:
        test_pca = prepare_test_data(img_files, STD_SIZE)
        
    test_predictions = logreg.predict_proba(test_pca.values[:, 1:])
    id_s = [re.sub('\D', '', f) for f in img_files]
    df_id = pd.DataFrame({'id' : id_s})
    col_names = ['col'+str(i) for i in range(1, 9)]
    df_yhat = pd.DataFrame(data=test_predictions, columns=col_names)
    df_id_yhat = pd.concat([test_pca['id'], df_yhat], axis=1)
    yhat = df_id.merge(df_id_yhat, on='id', how='left')
    yhat.fillna(1./8, inplace=True)
    yhat.to_csv('kaggle_430_2pm.csv', index=False)
开发者ID:keithgw,项目名称:ML_Competition,代码行数:29,代码来源:build_model.py

示例9: lr_with_fs

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def lr_with_fs():
    """
    Submission: lr_with_fs_0620_02.csv
    E_val: <missing>
    E_in: 0.856252488379
    E_out: 0.8552577388980213
    """
    from sklearn.linear_model import LogisticRegressionCV
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline

    X = util.fetch(util.cache_path('train_X_before_2014-08-01_22-00-47'))
    y = util.fetch(util.cache_path('train_y_before_2014-08-01_22-00-47'))

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    rfe = util.fetch(util.cache_path('feature_selection.RFE.21'))

    X_pruned = rfe.transform(X_scaled)

    new_scaler = StandardScaler()
    new_scaler.fit(X_pruned)
    X_new = new_scaler.transform(X_pruned)

    clf = LogisticRegressionCV(cv=10, scoring='roc_auc', n_jobs=-1)
    clf.fit(X_new, y)
    print(auc_score(clf, X_new, y))
    to_submission(Pipeline([('scale_raw', raw_scaler),
                            ('rfe', rfe),
                            ('scale_new', new_scaler),
                            ('lr', clf)]), 'lr_with_fs_0620_02')
开发者ID:Divergent914,项目名称:kddcup2015,代码行数:35,代码来源:modeling.py

示例10: mdl_1d_cat

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def mdl_1d_cat(x, y):
    """builds univariate model to calculate AUC"""
    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x = sb_cutz(x)

    series = pd.get_dummies(x, dummy_na=True)
    lr = LogisticRegressionCV(scoring='roc_auc')

    lr.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    plot = plot_cat(x, y)

    imgdata = BytesIO()
    plot.savefig(imgdata)
    imgdata.seek(0)

    aucz = roc_auc_score(y, preds)
    cmatrix = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()
    return aucz, cmatrix
开发者ID:bartlesy,项目名称:pandas-profiling,代码行数:29,代码来源:sb_univar.py

示例11: Fraud

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
class Fraud(object):
    def __init__(self):
        self.model = None
        self.fitted = False

    def fit(self, jsonfile, target=0.3):
        self.model = LogisticRegressionCV(cv=15, scoring='recall')
        X, y = featurize_data(jsonfile)

        # Balance the classes
        X_oversample, y_oversample = oversample(X, y, target)
        print X_oversample, y_oversample

        # Fit the model
        self.model.fit(X_oversample, y_oversample)
        self.fitted = True

    def predict(self, X_test):
        return self.model.predict(X_test)[0]

    def save_model(self, picklefile):
        with open(picklefile, 'w') as f:
            pickle.dump(self.model, f)

    def load_model(self, picklefile):
        with open(picklefile, 'r') as f:
            self.model = pickle.load(f)
            self.fitted = True
开发者ID:daryleserrant,项目名称:FraudDetectionCaseStudy,代码行数:30,代码来源:model_utils.py

示例12: logistic_test_using_cosine

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def logistic_test_using_cosine(score_feature=False):
    logger.info('using cosine features in logistic regression')
    if score_feature:
        logger.info('also use score feature')
    Cs = [2**t for t in range(0, 10, 1)]
    Cs.extend([3**t for t in range(1, 10, 1)])
    snli2cosine = SNLI2Cosine('/home/junfeng/word2vec/GoogleNews-vectors-negative300.bin')
    logger.info('loading snli data ...')
    train_df = pd.read_csv('./snli/snli_1.0/snli_1.0_train.txt', delimiter='\t')
    train_df = train_df[pd.notnull(train_df.sentence2)]
    train_df = train_df[train_df.gold_label != '-']
    train_df = train_df[:(len(train_df) / 3)]
    train_df.reset_index(inplace=True)
    test_df = pd.read_csv('./snli/snli_1.0/snli_1.0_test.txt', delimiter='\t')
    test_df = test_df[pd.notnull(test_df.sentence2)]
    test_df = test_df[test_df.gold_label != '-']
    test_df.reset_index(inplace=True)
    X_train, train_labels, X_test, test_labels = snli2cosine.calculate_cosine_features(train_df, test_df)
    if score_feature:
        y_train_proba, y_test_proba = joblib.load('./snli/logistic_score_snli.pkl')
        # y_train_proba = y_train_proba.flatten()
        # y_test_proba = y_test_proba.flatten()
        X_train = np.concatenate([X_train, y_train_proba.reshape((-1, 1))], axis=1)
        X_test = np.concatenate([X_test, y_test_proba.reshape((-1, 1))], axis=1)
    logger.info('X_train.shape: {0}'.format(X_train.shape))
    logger.info('X_test.shape: {0}'.format(X_test.shape))

    logreg = LogisticRegressionCV(Cs=Cs, cv=3, n_jobs=10, random_state=919)
    logreg.fit(X_train, train_labels)
    logger.info('best C is {0}'.format(logreg.C_))
    y_test_predicted = logreg.predict(X_test)
    acc = accuracy_score(test_labels, y_test_predicted)
    logger.info('test data predicted accuracy: {0}'.format(acc))
开发者ID:junfenglx,项目名称:skip-thoughts,代码行数:35,代码来源:eval_snli_dataset.py

示例13: classify_maxEnt

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def classify_maxEnt(train_X, train_Y, test_X):

    print("Classifying using Maximum Entropy ...")
    maxEnt = LogisticRegressionCV()
    maxEnt.fit(train_X, train_Y)
    yHat = maxEnt.predict(test_X)

    return yHat
开发者ID:shalinc,项目名称:ML-Sentiment-Analysis-of-Movie-Reviews-from-Twitter,代码行数:10,代码来源:sentiment_analysis.py

示例14: build_classifier_lr

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def build_classifier_lr(data, labels, regularization='l2', **kwargs):
    if (regularization == 'l1') or (regularization == 'l2'):
        log_reg = LogisticRegressionCV(penalty=regularization, Cs=100, cv=10, solver='liblinear', refit=False,
                                       n_jobs=10, verbose=1, class_weight='balanced', **kwargs)
    else:
        # lambda = 1/C:  if C->inf lambda -> 0. So if we want no regularization we need to set C to a high value
        log_reg = LogisticRegression(C=100000000., class_weight='balanced', solver='liblinear', n_jobs=10,
                                     verbose=1, **kwargs)
    log_reg.fit(data, labels)
    return log_reg
开发者ID:PaulZhutovsky,项目名称:rsn_analysis,代码行数:12,代码来源:ml_utils.py

示例15: fitModels

# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def fitModels(training_data, training_labels, test_data, test_labels):
    print('=========fitModels========:')

    # print('RandomForestClassifier:')
    # clf =RandomForestClassifier(n_estimators=100)
    # clf.fit(training_data, training_labels)  # 训练模型
    # getReport(clf, test_data, test_labels)
    # print('='*50)

    # print('GradientBoostingClassifier: ')
    # gbdt = GradientBoostingClassifier()
    # gbdt.fit(training_data, training_labels)
    # getReport(gbdt, test_data, test_labels)
    # print('='*50)

    # print('MultinomialNB: ')
    # clf =MultinomialNB()
    # clf.fit(training_data, training_labels)  # 训练模型
    # getReport(clf, test_data, test_labels)
    # print('='*50)
    #
    # print('GaussianNB: ')
    # clf =GaussianNB()
    # clf.fit(training_data, training_labels)  # 训练模型
    # getReport(clf, test_data, test_labels)
    # print('='*50)

    print('LogisticRegression: ')
    lr =LogisticRegressionCV()
    lr.fit(training_data, training_labels)  # 训练模型
    print(lr)
    getReport(lr, test_data, test_labels)
    print('='*50)

    print('LinearSVC: ')
    linSVC =LinearSVC()
    linSVC.fit(training_data, training_labels)  # 训练模型
    predict_labels = linSVC.predict(test_data)  # 预测训练集
    getPRF(predict_labels, test_labels)
    print('='*50)
    
    # print('svm: ')
    # clf =svm.SVC()
    # clf.fit(training_data, training_labels)  # 训练模型
    # getReport(clf, test_data, test_labels)
    # print('='*50)

    # print('DecisionTreeClassifier: ')
    # clf =tree.DecisionTreeClassifier()
    # clf.fit(training_data, training_labels)  # 训练模型
    # getReport(clf, test_data, test_labels)
    # print('='*50)

    return lr, linSVC
开发者ID:fydlzr,项目名称:CustomersLost-predict,代码行数:56,代码来源:classifier.py


注:本文中的sklearn.linear_model.LogisticRegressionCV.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。