当前位置: 首页>>代码示例>>Python>>正文


Python linear_model.LogisticRegressionCV类代码示例

本文整理汇总了Python中sklearn.linear_model.LogisticRegressionCV的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegressionCV类的具体用法?Python LogisticRegressionCV怎么用?Python LogisticRegressionCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LogisticRegressionCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: classify

def classify(_char):
    print 'to fetch data'
    start_time = time.time()
    char_count = Character.objects.filter(char=_char, is_correct=1).count()
    if char_count < 10:
        return
    char_lst = Character.objects.filter(char=_char)
    y, X, ty, tX, t_charid_lst, test_accuracy_lst = prepare_data_with_database(char_lst)
    if len(y) == 0 or len(ty) == 0:
        return
    if 1 == len(set(y)) or len(y) < 10:
        return
    fetch_negative_samples(_char, X, y)
    if len(y) == 0 or len(ty) == 0:
        return
    if 1 == len(set(y)) or len(y) < 50:
        return

    print "fetch data done, spent %s seconds." % int(time.time() - start_time)
    start_time = time.time()
    print "traning: data size: %d" % len(y)
    model = LogisticRegressionCV(cv=5, solver='liblinear', n_jobs=1)
    try:
        model.fit(X, y)
        print "training done, spent %s seconds." % int(time.time() - start_time)
        #print 'params: '
        #for k, v in model.get_params().iteritems():
        #    print '\t', k, ' : ', v
        print 'score: ', model.score(X, y)
    except Exception, e:
        print 'except: ', e
        traceback.print_exc()
        return
开发者ID:CoinLQ,项目名称:SegmentationCheck,代码行数:33,代码来源:tasks.py

示例2: logistic_test_using_cosine

def logistic_test_using_cosine(score_feature=False):
    logger.info('using cosine features in logistic regression')
    if score_feature:
        logger.info('also use score feature')
    Cs = [2**t for t in range(0, 10, 1)]
    Cs.extend([3**t for t in range(1, 10, 1)])
    snli2cosine = SNLI2Cosine('/home/junfeng/word2vec/GoogleNews-vectors-negative300.bin')
    logger.info('loading snli data ...')
    train_df = pd.read_csv('./snli/snli_1.0/snli_1.0_train.txt', delimiter='\t')
    train_df = train_df[pd.notnull(train_df.sentence2)]
    train_df = train_df[train_df.gold_label != '-']
    train_df = train_df[:(len(train_df) / 3)]
    train_df.reset_index(inplace=True)
    test_df = pd.read_csv('./snli/snli_1.0/snli_1.0_test.txt', delimiter='\t')
    test_df = test_df[pd.notnull(test_df.sentence2)]
    test_df = test_df[test_df.gold_label != '-']
    test_df.reset_index(inplace=True)
    X_train, train_labels, X_test, test_labels = snli2cosine.calculate_cosine_features(train_df, test_df)
    if score_feature:
        y_train_proba, y_test_proba = joblib.load('./snli/logistic_score_snli.pkl')
        # y_train_proba = y_train_proba.flatten()
        # y_test_proba = y_test_proba.flatten()
        X_train = np.concatenate([X_train, y_train_proba.reshape((-1, 1))], axis=1)
        X_test = np.concatenate([X_test, y_test_proba.reshape((-1, 1))], axis=1)
    logger.info('X_train.shape: {0}'.format(X_train.shape))
    logger.info('X_test.shape: {0}'.format(X_test.shape))

    logreg = LogisticRegressionCV(Cs=Cs, cv=3, n_jobs=10, random_state=919)
    logreg.fit(X_train, train_labels)
    logger.info('best C is {0}'.format(logreg.C_))
    y_test_predicted = logreg.predict(X_test)
    acc = accuracy_score(test_labels, y_test_predicted)
    logger.info('test data predicted accuracy: {0}'.format(acc))
开发者ID:junfenglx,项目名称:skip-thoughts,代码行数:33,代码来源:eval_snli_dataset.py

示例3: Fraud

class Fraud(object):
    def __init__(self):
        self.model = None
        self.fitted = False

    def fit(self, jsonfile, target=0.3):
        self.model = LogisticRegressionCV(cv=15, scoring='recall')
        X, y = featurize_data(jsonfile)

        # Balance the classes
        X_oversample, y_oversample = oversample(X, y, target)
        print X_oversample, y_oversample

        # Fit the model
        self.model.fit(X_oversample, y_oversample)
        self.fitted = True

    def predict(self, X_test):
        return self.model.predict(X_test)[0]

    def save_model(self, picklefile):
        with open(picklefile, 'w') as f:
            pickle.dump(self.model, f)

    def load_model(self, picklefile):
        with open(picklefile, 'r') as f:
            self.model = pickle.load(f)
            self.fitted = True
开发者ID:daryleserrant,项目名称:FraudDetectionCaseStudy,代码行数:28,代码来源:model_utils.py

示例4: compute_roc_auc

def compute_roc_auc(test_sa, adv_sa, split=1000):
    tr_test_sa = np.array(test_sa[:split])
    tr_adv_sa = np.array(adv_sa[:split])

    tr_values = np.concatenate(
        (tr_test_sa.reshape(-1, 1), tr_adv_sa.reshape(-1, 1)), axis=0
    )
    tr_labels = np.concatenate(
        (np.zeros_like(tr_test_sa), np.ones_like(tr_adv_sa)), axis=0
    )

    lr = LogisticRegressionCV(cv=5, n_jobs=-1).fit(tr_values, tr_labels)

    ts_test_sa = np.array(test_sa[split:])
    ts_adv_sa = np.array(adv_sa[split:])
    values = np.concatenate(
        (ts_test_sa.reshape(-1, 1), ts_adv_sa.reshape(-1, 1)), axis=0
    )
    labels = np.concatenate(
        (np.zeros_like(ts_test_sa), np.ones_like(ts_adv_sa)), axis=0
    )

    probs = lr.predict_proba(values)[:, 1]

    _, _, auc_score = compute_roc(
        probs_neg=probs[: (len(test_sa) - split)],
        probs_pos=probs[(len(test_sa) - split) :],
    )

    return auc_score
开发者ID:coinse,项目名称:sadl,代码行数:30,代码来源:utils.py

示例5: LogitSelector

def LogitSelector(x, y, cv, niter, njob):
    t_size=1 / cv

    lb = prep.LabelBinarizer()
    y = lb.fit_transform(y).ravel()

    model = LogisticRegressionCV(penalty='l1', solver='liblinear', refit=False, cv=cv, n_jobs=njob)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        warnings.simplefilter('ignore', ConvergenceWarning)
        model.fit(x, y)
    columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]

    accu = []
    prec = []
    rec = []
    f1 = []
    au = []
    cls = LogisticRegression()
    gn_cvset = (Cvset(x[i][:, columns], y[i], x[j][:, columns], y[j]) for (i, j) in ShuffleSplit(len(y), n_iter=niter, test_size=t_size))

    for cvt in gn_cvset:
        cls.fit(cvt.xtr, cvt.ytr)
        accu.append(accuracy_score(cvt.yte, cls.predict(cvt.xte)))
        prec.append(precision_score(cvt.yte, cls.predict(cvt.xte)))
        rec.append(recall_score(cvt.yte, cls.predict(cvt.xte)))
        f1.append(f1_score(cvt.yte, cls.predict(cvt.xte)))
        au.append(__Auc(cls, cvt.xte, cvt.yte))

    cls.fit(x[:,columns], y)
    return Mdc(model=cls, idx=columns, accu=np.mean(accu),
               prec=np.mean(prec), rec=np.mean(rec), f1=np.mean(f1),
               au=np.mean(au))
开发者ID:CallMeXiaoChiZi,项目名称:cage-dev,代码行数:33,代码来源:logit_selector.py

示例6: logistic_test

def logistic_test(train_data, train_labels, test_data, test_labels, cv=False):
    # Perform logistic regression.
    clf = LogisticRegressionCV() if cv else LogisticRegression()
    clf.fit(train_data, train_labels)
    predicted_labels = clf.predict(test_data)

    # Count true positives, true negatives, false positives, false negatives.
    tp, tn, fp, fn = 0, 0, 0, 0
    for predicted, actual in zip(predicted_labels, test_labels):
        if predicted == 1 and actual == 1:
            tp += 1
        if predicted == 0 and actual == 0:
            tn += 1
        if predicted == 1 and actual == 0:
            fp += 1
        if predicted == 0 and actual == 1:
            fn += 1

    # Compute statistics. 
    accuracy =  (tp + tn) / (tp + tn + fp +fn)
    precision = 0 if (tp + fp) == 0 else tp / (tp + fp)
    recall = 0 if (tp + fn) == 0 else tp / (tp + fn)

    # Print report.
    print "Correctly classified {}/{}".format(tp + tn, tp + tn + fp +fn)
    print "Accuracy:", accuracy
    print "Precision:", precision
    print "Recall:", recall
    print "tp: {}; tn: {}; fp: {}; fn {}".format(tp, tn, fp, fn)

    return accuracy
开发者ID:kulshrax,项目名称:cs224w,代码行数:31,代码来源:ml.py

示例7: lr_with_scale2

def lr_with_scale2():
    """
    Submission: lr_with_scale2_0704_03.csv
    E_val:
    E_in: 0.878996
    E_out: 0.8768131004917349
    """
    from sklearn.linear_model import LogisticRegressionCV
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    clf = LogisticRegressionCV(Cs=50, cv=5, scoring='roc_auc', n_jobs=-1,
                               class_weight='auto')
    clf.fit(X_scaled, y)
    logger.debug('Best C: %f', clf.C_[0])
    logger.debug('Cs: %s', clf.Cs_)
    logger.debug('Grid scores: %f', clf.scores_)
    logger.debug('Ein: %f', Util.auc_score(clf, X_scaled, y))

    IO.dump_submission(Pipeline([('scale_raw', raw_scaler),
                                 ('lr', clf)]), 'lr_with_scale2_0704_03')
开发者ID:Divergent914,项目名称:yakddcup2015,代码行数:27,代码来源:modeling.py

示例8: lr_with_fs

def lr_with_fs():
    """
    Submission: lr_with_fs_0620_02.csv
    E_val: <missing>
    E_in: 0.856252488379
    E_out: 0.8552577388980213
    """
    from sklearn.linear_model import LogisticRegressionCV
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline

    X = util.fetch(util.cache_path('train_X_before_2014-08-01_22-00-47'))
    y = util.fetch(util.cache_path('train_y_before_2014-08-01_22-00-47'))

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    rfe = util.fetch(util.cache_path('feature_selection.RFE.21'))

    X_pruned = rfe.transform(X_scaled)

    new_scaler = StandardScaler()
    new_scaler.fit(X_pruned)
    X_new = new_scaler.transform(X_pruned)

    clf = LogisticRegressionCV(cv=10, scoring='roc_auc', n_jobs=-1)
    clf.fit(X_new, y)
    print(auc_score(clf, X_new, y))
    to_submission(Pipeline([('scale_raw', raw_scaler),
                            ('rfe', rfe),
                            ('scale_new', new_scaler),
                            ('lr', clf)]), 'lr_with_fs_0620_02')
开发者ID:Divergent914,项目名称:kddcup2015,代码行数:33,代码来源:modeling.py

示例9: LogitSelector

def LogitSelector(x, y, cv, njob):

    lb = prep.LabelBinarizer()
    y = lb.fit_transform(y).ravel()

    cls = LogisticRegression()
    def __Auc(xte, yte):
        ypo = cls.predict_proba(xte)
        flt_auc = roc_auc_score(yte, ypo[:,1])
        return flt_auc
    
    skf = StratifiedKFold(y, n_folds=cv)
    model = LogisticRegressionCV(penalty='l1', solver='liblinear', fit_intercept=False, cv=cv, n_jobs=njob)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        warnings.simplefilter('ignore', ConvergenceWarning)
        model.fit(x, y)
    columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]
    
    mdl_eval = lambda func: lambda idx_tr, idx_te: func(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict(x[idx_te][:,columns]))
    auc_eval = lambda idx_tr, idx_te: roc_auc_score(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict_proba(x[idx_te][:,columns])[:,1])
    res_eval = lambda func: np.average(map(mdl_eval(func), *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))

    accu = res_eval(accuracy_score)
    prec = res_eval(precision_score)
    rec = res_eval(recall_score)
    f1 = res_eval(f1_score)
    au = np.average(map(auc_eval, *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))

    cls.fit(x[:,columns], y)
    return Mdc(model=cls, idx=columns, accu=accu, prec=prec, rec=rec, f1=f1, au=au)
开发者ID:bm2-lab,项目名称:cage,代码行数:31,代码来源:logit_selector.py

示例10: train

def train(trainingData, pklFile):
	# ========================================================================= #
	# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
	# ========================================================================= #
	if (pklFile == ''):
		os.system('rm -rf learntModel & mkdir learntModel')
		pklFile = 'learntModel/learntModel.pkl'
	
	# ========================================================================= #
	# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
	# ========================================================================= #
	NUMBER_OF_FEATURES = len(trainingData[0]) - 1
	NUMBER_OF_TRAINING_POINTS = len(trainingData)

	x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
	y = trainingData[:, NUMBER_OF_FEATURES]
	
	# ========================================================================= #
	# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
	# ========================================================================= #
	minSquareError = np.inf
	targetAlpha = None
	alphas = np.logspace(-10, -2, 500)			
	
	# ========================================================================= #
	# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
	# ========================================================================= #
	clf = LogisticRegressionCV(Cs=alphas)
	clf.fit(x, y)
	joblib.dump(clf, pklFile)
	
	return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:32,代码来源:logisticRegression.py

示例11: optimal_l2

def optimal_l2(X, y): 
    '''
    Find the optimal level of L2 regularization for logistic regression
    '''
    logit = LogisticRegressionCV(Cs=50, cv=10)
    logit.fit(X, y)
    return logit.C_
开发者ID:thomasbrawner,项目名称:python_tools,代码行数:7,代码来源:marginal_effects_example.py

示例12: mdl_1d_cat

def mdl_1d_cat(x, y):
    """builds univariate model to calculate AUC"""
    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x = sb_cutz(x)

    series = pd.get_dummies(x, dummy_na=True)
    lr = LogisticRegressionCV(scoring='roc_auc')

    lr.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    plot = plot_cat(x, y)

    imgdata = BytesIO()
    plot.savefig(imgdata)
    imgdata.seek(0)

    aucz = roc_auc_score(y, preds)
    cmatrix = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()
    return aucz, cmatrix
开发者ID:bartlesy,项目名称:pandas-profiling,代码行数:27,代码来源:sb_univar.py

示例13: make_predictions

def make_predictions():
    # Fit Logistic Regression Model
    logreg = LogisticRegressionCV(scoring='log_loss', n_jobs=-1, verbose=1, random_state=6156)
    logreg.fit(X=trainX, y=train['y'].values)
    
    # Validate
    pred_pr = logreg.predict_proba(valX)
    loss = log_loss(y_true=val['y'].values, y_pred=pred_pr)
    print "Validation log loss:", loss
    
    # Get Test predictions
    img_files = [os.path.join(IMG_DIR, f) for f in os.listdir(IMG_DIR)]
        
    if os.path.isfile('test_pca.csv'):
        test_pca = pd.read_csv('test_pca.csv', dtype={'id' : str})
    else:
        test_pca = prepare_test_data(img_files, STD_SIZE)
        
    test_predictions = logreg.predict_proba(test_pca.values[:, 1:])
    id_s = [re.sub('\D', '', f) for f in img_files]
    df_id = pd.DataFrame({'id' : id_s})
    col_names = ['col'+str(i) for i in range(1, 9)]
    df_yhat = pd.DataFrame(data=test_predictions, columns=col_names)
    df_id_yhat = pd.concat([test_pca['id'], df_yhat], axis=1)
    yhat = df_id.merge(df_id_yhat, on='id', how='left')
    yhat.fillna(1./8, inplace=True)
    yhat.to_csv('kaggle_430_2pm.csv', index=False)
开发者ID:keithgw,项目名称:ML_Competition,代码行数:27,代码来源:build_model.py

示例14: classify_maxEnt

def classify_maxEnt(train_X, train_Y, test_X):

    print("Classifying using Maximum Entropy ...")
    maxEnt = LogisticRegressionCV()
    maxEnt.fit(train_X, train_Y)
    yHat = maxEnt.predict(test_X)

    return yHat
开发者ID:shalinc,项目名称:ML-Sentiment-Analysis-of-Movie-Reviews-from-Twitter,代码行数:8,代码来源:sentiment_analysis.py

示例15: fit_logistic_regression

def fit_logistic_regression(y, X):
    """
    Fites a logistic regression
    """
    model_log = LogisticRegressionCV(cv=5, penalty='l2', verbose=1, max_iter=1000)
    fit = model_log.fit(X, y)

    return fit
开发者ID:MarkRegalla27,项目名称:Lending_Club,代码行数:8,代码来源:model.py


注:本文中的sklearn.linear_model.LogisticRegressionCV类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。