Python utility.load_data函数代码示例

本文整理汇总了Python中utility.load_data函数的典型用法代码示例。如果您正苦于以下问题：Python load_data函数的具体用法？Python load_data怎么用？Python load_data使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了load_data函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    revision = 1

    print("Loading the classifier")
    classifier = utility.load_model("train_rtext_rev{}".format(revision))
    
    print("Reading in the training data")
    train = utility.load_data("training", "rtext")

    print("Predicting the rest of the training data")
    pred = np.ravel(classifier.predict(list(train['rtext_bcat'])))
    score = utility.rmsle_log(pred, train['votes_useful_log'])
    print "Score:", score

    print("Writing out new training data")
    del train['rtext_bcat']
    train['votes_useful_log_rtextpred_sgd'] = pd.Series(pred, index=train.index)
    utility.save_data(train, "training", "rtext_sgd_rev{}".format(revision))
    
    print("Reading in the test data")
    test = utility.load_data("test", "rtext")
    tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))

    print("Writing out new test data")
    del test['rtext_bcat']
    test['votes_useful_log_rtextpred_sgd'] = pd.Series(tepred, index=test.index)
    utility.save_data(test, "test", "rtext_sgd_rev{}".format(revision))
    test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)

    print("Writing out a new submission file")
    utility.write_submission(test, "rtextsgd_sub_rev{}".format(revision))

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:31，代码来源:predict_rtext_sgd.py

示例2: main

def main():
    print("Loading the classifier")
    classifier = utility.load_model("fullsgd_model_rev{}".format(revision))

    print("Reading in the training data")
    train = utility.load_data("training", "finalinput")
    truth = train['votes_useful_log']
    del train['votes_useful_log']

    print("Predicting the training data")
    logpred = np.ravel(classifier.predict(train.values[:,1:]))
    score = utility.rmsle_log(logpred, truth)
    print "Score:", score

    print("Reading in the test data")
    test = utility.load_data("test", "finalinput")
    del test['votes_useful_log']

    print("Predicting the test data")
    logpred = np.ravel(classifier.predict(test.values[:,1:]))
    pred = np.exp(np.array(logpred, dtype=np.float64)) - 1
    test['votes'] = pred
    
    print("Writing out a new submission file")
    utility.write_submission(test, "fullsgd_sub_rev{}.csv".format(revision))

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:25，代码来源:predict.py

示例3: main

def main():
    
    #load data
    df = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','BORO','GRADE','GRADE DATE'])
    df = clean_data(df) #clean data
    
    #question 4
    sum_nyc, sum_boro = grade_sum(df) #calculate sum of test_grade in nyc and in each borough
    print 'The sum of test_grade in NYC is: {} \n'.format(sum_nyc)
    print 'The sum of test_grade in each boroughs is: \n {}'.format(sum_boro)
    
    #question 5
    grade_overtime_plot(df, 'nyc') #grade overtime plot for nyc
    #grade overtime plot for each borough
    for borough in ['BRONX', 'BROOKLYN', 'MANHATTAN', 'QUEENS', 'STATEN ISLAND']:
        df_boro = df[df['BORO'] == borough]
        grade_overtime_plot(df_boro, borough.lower())
    
    #question 6
    df1 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION'])
    type_name = get_top_10_nyc(df1)
    df2 = load_data('../../assignment10_data/restaurants.csv', ['CAMIS','CUISINE DESCRIPTION', 'GRADE', 'GRADE DATE'])
    df2 = clean_data(df2)
    df2 = df2[df2['CUISINE DESCRIPTION'].isin(type_name)]
    df_sum = top_10_grade_overtime(df2, type_name) #calculate score overtime for each restaurant type
    top_10_plot(df_sum) #score overtime plot
    top_10_colormap(df_sum) #plot correlation between any two restaurant types in NYC in color map

开发者ID:ariesyi329，项目名称:assignment10，代码行数:27，代码来源:assignment10.py

示例4: main

def main():
    """
    This function is to present the results of this assignment.
    Users will ask to see:
    1)Income distribution across all countries for a given year:
      Users need to input a year from 1800 to 2012.
      Results will be saved as a .png file.
    2)Income distribution by region in recent years:
      Users need to input the first year, last year and year gap in a year rangeand select a plot type, boxplot or histograms.
      Results will be saved as a .pdf file.
    """

    #load countries and income data
    countries = load_data('countries.csv')
    income = load_data('indicator gapminder gdp_per_capita_ppp.csv')
    #transform income data set
    income = trans_data(income)

    try:
        while raw_input('To see income distribution across all countries? (y/n) ') == 'y':
            try:
                year = raw_input('Which year? ') #select a year
                income_distr(income, year)
            except:
                print 'Please input a year from 1800 to 2012'
        
        while raw_input('To see income distribution by region in recent years? (y/n) ') == 'y':
            try:
                from_year = int(raw_input('From which year? ')) #input the first year
                to_year = int(raw_input('To which year? ')) #input the last year
                year_gap = int(raw_input('Year gap? ')) #input a year gap
                pltype = raw_input('Plot type: boxplots or histograms? (b/h) ') #select a plot type
                if pltype == 'b':
                    pp = PdfPages('results/Income by region from {0} to {1}_boxplot.pdf'.format(from_year, to_year)) #create a pdf file to save plots
                    for i in xrange(from_year, to_year+1, year_gap):
                        fig = income_region(1,str(i))
                        pp.savefig(fig)
                elif pltype == 'h':
                    pp = PdfPages('results/Income by region from {0} to {1}_hist.pdf'.format(from_year, to_year))
                    for i in xrange(from_year, to_year+1, year_gap):
                        fig = income_region(0, str(i))
                        plt.suptitle('{}'.format(i))
                        pp.savefig(fig)
                pp.close() #close the pdf file
            except:
                print 'please input years from 1800 to 2012 and try again!'

    except(KeyboardInterrupt):
        print 'Bye!'
        sys.exit()

开发者ID:ariesyi329，项目名称:assignment9，代码行数:50，代码来源:assignment9.py

示例5: optimal_svm

def optimal_svm(optimal_c):
    """
    This function is to calculate AUC for optimal C chose from model selection
    """
    
    #load datasets
    train_X, train_y = load_data('train_X.csv', 'train_y.csv')
    test_X, test_y = load_data('test_X.csv', 'test_y.csv')
    train_X_pca = data_pca(0.95, train_X, train_X)
    test_X_pca = data_pca(0.95, train_X, test_X)
    train_y = np.array(train_y).ravel()
    test_y = np.array(test_y).ravel()
    #set up model with the optimal C
    my_svm = svm.SVC(kernel='linear', C=optimal_c, class_weight='auto')
    predicted_y = my_svm.fit(train_X_pca,train_y).decision_function(test_X_pca)
    fpr, tpr, tr = roc_curve(test_y, predicted_y)
    
    print auc(fpr, tpr)

开发者ID:LEONOB2014，项目名称:DS-GA1001-Project，代码行数:18，代码来源:svm_testing.py

示例6: run

def run(train_file, test_file, output_file):
    train, labels, test = utils.load_data(train_file, test_file)

    clf = XGBoost(max_iterations=500, max_depth=12, min_child_weight=4.9208250938262745,
                  row_subsample=.9134478530382129, min_loss_reduction=.5132278416508804,
                  column_subsample=.730128689911957, step_size=.1)
    clf.fit(train, labels)
    predictions = clf.predict_proba(test)
    utils.save_prediction(output_file, predictions)

开发者ID:shqyking，项目名称:BigDataProject，代码行数:9，代码来源:xgboost.py

示例7: main

def main():
    revision = 4

    print("Loading the classifier")
    classifier = utility.load_model("train_rtext_rev{}".format(revision))
    
    print("Reading in the training data")
    train = utility.load_data("training", "rtext")

    print("Predicting the rest of the training data")
    bunch = 50000
    pred = np.zeros(len(train))
    for ibunch in range(int(len(train) / bunch)) :
        beg = ibunch * bunch
        end = (ibunch + 1) * 50000
        mtrain = train.ix[beg:end - 1]
        mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
        pred[beg:end] = mpred

    beg = int(len(train) / bunch) * bunch
    mtrain = train.ix[beg:]
    mpred = np.ravel(classifier.predict(list(mtrain['rtext_bcat'])))
    pred[beg:] = mpred

    score = utility.rmsle_log(pred, train['votes_useful_log'])
    print "Score:", score

    print("Writing out new training data")
    del train['rtext_bcat']
    train['votes_useful_log_rtextpred'] = pd.Series(pred, index=train.index)
    utility.save_data(train, "training", "rtext_rev{}".format(revision))
    
    print("Reading in the test data")
    test = utility.load_data("test", "rtext")
    tepred = np.ravel(classifier.predict(list(test['rtext_bcat'])))

    print("Writing out new test data")
    del test['rtext_bcat']
    test['votes_useful_log_rtextpred'] = pd.Series(tepred, index=test.index)
    utility.save_data(test, "test", "rtext_rev{}".format(revision))
    test['votes'] = pd.Series(np.exp(tepred) + 1, index=test.index)

    print("Writing out a new submission file")
    utility.write_submission(test, "rtextrf_sub_rev{}.csv".format(revision))

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:44，代码来源:predict_rtext.py

示例8: main

def main():
    print("Reading in the training data")
    train = utility.load_data("training", "finalinput")
    truth = np.ravel(np.array(train['votes_useful_log']))
    del train['votes_useful_log']

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(train.values[:,1:], np.array(truth))

    print("Saving the classifier")
    utility.save_model(classifier, "fullsgd_model_rev{}".format(revision))

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:12，代码来源:train.py

示例9: main

def main():
    
    #load datasets
    train_X, train_Y = load_data('train_X.csv', 'train_y.csv')
    train_X_pca = data_pca(0.95, train_X, train_X)
    train = train_X_pca
    train['Y'] = train_Y
    #set a list of hyperparameter C
    c = [10**i for i in range(-9,2)]
    #conduct X cross validation and return AUCs in each sample for each C
    aucs=xValSVM(train, 'Y', 5, c)
    #calculate the average and standard error of AUC for each C
    avg, stderr = avg_stderr(aucs, c)
    #plot the results of cross validation
    plotxValSVM(avg, stderr, c)

开发者ID:LEONOB2014，项目名称:DS-GA1001-Project，代码行数:15，代码来源:svm_val.py

示例10: main

def main():
    revision = 4

    print("Reading in the training data")
    train = utility.load_data("training", "rtext")
    inds = random.sample(range(len(train)), 100000)
    mtrain = train.ix[inds]

    print("Extracting features and training review text model")
    classifier = get_pipeline()
    classifier.fit(list(mtrain['rtext_bcat']), 
                   list(mtrain['votes_useful_log']))

    print("Saving the classifier")
    utility.save_model(classifier, "train_rtext_rev{}".format(revision))

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:15，代码来源:train_rtext.py

示例11: main

def main():

    trabus = utility.load_data("training", "business")
    tesbus = utility.load_data("test", "business")
    bus = pd.concat((trabus, tesbus))
    for cat in delbuscats :
        if hasattr(bus, cat) : del bus[cat]
    bus['procbcat'] = pd.Series(map(process_bcat, bus['categories']), bus.index)
    del bus['categories']

    for s in ["training", "test"] :

        rev = utility.load_data(s, "review")
        for cat in delrevcats :
            if hasattr(rev, cat) : del rev[cat]
        if hasattr(rev, 'votes_useful') :
            rev['votes_useful_log'] = np.log(rev.votes_useful + 1)
        rev = pd.merge(rev, bus, 'inner')

        rev['rtext_bcat'] = rev['text'] + rev['procbcat']
        del rev['procbcat']
        del rev['text']

        utility.save_data(rev, s, 'rtext')

开发者ID:mrphilroth，项目名称:kaggle-yelp，代码行数:24，代码来源:preprocess_rtext.py

示例12: printPOS

def printPOS(pos_words):
    #pos_words is a list of (word, tag)
    s = ""
    t = ""
    for p in pos_words:
        l = len(p[0]) if len(p[0]) > len(p[1]) else len(p[1])
        s = s + p[0].rjust(l) + ' '
        t = t + p[1].rjust(l) + ' '

    print '-----------'
    print s
    print t
    print ""





if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Usage: python showTaggedSentences.py <input file>"
        sys.exit(0)

    qaTests = load_data(sys.argv[1])

    showAllTaggedSentences(qaTests)

开发者ID:wangxu724，项目名称:NLPproject，代码行数:26，代码来源:showTaggedSentences.py

示例13: mask_load

	def mask_load(self):
		self.url_masks = utility.load_data("urlmasks", {})

开发者ID:Merola，项目名称:pynik，代码行数:2，代码来源:title_reader.py

示例14: load_urls

	def load_urls(self):
		self.url_lists = utility.load_data("urls", {})

开发者ID:Merola，项目名称:pynik，代码行数:2，代码来源:title_reader.py

示例15: on_load

	def on_load(self):
		self.id_directory = utility.load_data('schema_id', {})
		self.id_presets = utility.load_data('schema_fav', {})

开发者ID:IcEBnd，项目名称:pyirkbot，代码行数:3，代码来源:ical_parser.py

注：本文中的utility.load_data函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。