当前位置: 首页>>代码示例>>Python>>正文


Python Parallel.to_csv方法代码示例

本文整理汇总了Python中joblib.Parallel.to_csv方法的典型用法代码示例。如果您正苦于以下问题:Python Parallel.to_csv方法的具体用法?Python Parallel.to_csv怎么用?Python Parallel.to_csv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在joblib.Parallel的用法示例。


在下文中一共展示了Parallel.to_csv方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: preprocess

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
def preprocess(file_in, file_out, test=False, n_jobs=6):
    """
    This function preprocesses raw data file.
    For each row and for each feature it extracts aggregations over TimeToEnd:
        From feature TimeToEnd it extracts total time ("time") and number of observations ("n_obs")
        From feature DistanceToRadar it extracts aggregations ('min', '50% quantile', 'mean', 'max')
        For any other features it calculates ('mean', 'std', 'min', '50% quantile', 'max')

        New features names follow the pattern: <feature name>_<aggregation function>

    Parameters
    ----------
    :param file_in: str
        csv-file name for data to be preprocessed
    :param file_out: str
        csv-file name for output data
    :param test: bool
        indicator for test data (data without label)
    :return:
    """
    # Load data to pandas.DataFrame
    data_raw = pd.read_csv(file_in, na_filter=False, chunksize=5000)

    # Apply transformations to data chunks in parallel
    start = time.time()
    data = Parallel(n_jobs=n_jobs, verbose=11)(delayed(foo)(x, transform, axis=1, test=test) for i, x in enumerate(data_raw))
    print "Preprocessing time: ", round((time.time() - start) / 60, 3)
    print "Records: ", len(data)

    # Join data chunks and save result to csv
    data = pd.concat(data)
    data.to_csv(file_out, index=False)

    print "File", file_in, "preprocessed to", file_out
开发者ID:alfiya400,项目名称:kaggle-rain,代码行数:36,代码来源:preprocess.py

示例2: create_training_data

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
def create_training_data():
  num_cores = 8

  # getting total number of trips
  list_of_files = [[folder, f.replace('.csv','')] for folder in os.listdir('drivers') if 'DS_Store' not in folder
                 for f in os.listdir('drivers/'+folder) if '.csv' in f]

  raw_data = Parallel( n_jobs=num_cores )(delayed(create_attributes)(i) for i in list_of_files)
  raw_data = pd.DataFrame(raw_data)
  raw_data.columns = ['driver_trip','trip_time','total_distance','skyway_distance','avg_speed','std_speed',
                      'avg_speed_up','avg_speed_down',
                      'avg_acc','std_acc','avg_turn','std_turn','standing_time','standing_speed']
  # save to file for later training
  raw_data.to_csv('training_set.csv', index=False)
  return raw_data
开发者ID:neikusc,项目名称:Kaggle_Driver_Telematics_Analysis,代码行数:17,代码来源:main_lr.py

示例3: list

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
    filled['cc_{}'.format(sex)] = filled['ccpublic_{}'.format(sex)] + filled['ccprivate_{}'.format(sex)]
    filled['crime_{}'.format(sex)] = filled['crimepublic_{}'.format(sex)] + filled['crimeprivate_{}'.format(sex)]
    filled['health_{}'.format(sex)] = filled['health_private_{}'.format(sex)] + filled['health_public_{}'.format(sex)] 
    filled['transfer_{}'.format(sex)] = filled['inc_trans_pub_{}'.format(sex)] + filled['diclaim_{}'.format(sex)] + filled['ssclaim_{}'.format(sex)] + filled['ssiclaim_{}'.format(sex)]

components = ['inc_labor', 'inc_parent', 'transfer', 'edu', 'crime', 'costs', 'cc', 'health', 'qaly', 'm_ed']
factors = np.arange(0,3.1,0.25)
combo = list(itertools.product(components, factors))

# vary factor: IRR
# applying factor to benefits        

def irr_factors(part, f):
    irr_tmp = deepcopy(filled)
    for sex in ['m', 'f', 'p']:
        irr_tmp['{}_{}'.format(part, sex)] = irr_tmp['{}_{}'.format(part, sex)] * f

    output = irr_calc(irr_tmp, etype=etype, components=components)        

    output['rate'] = f
    output['part'] = part
    
    print 'IRR for {} and factor {} calculated.'.format(part, f)
    return output

irr_factors = Parallel(n_jobs=25)(
	delayed(irr_factors)(part, f) for part, f in combo)
irr_factors = pd.concat(irr_factors, axis=0)
irr_factors.sort_index(inplace=True)
irr_factors.to_csv(os.path.join(plots, 'irr_factors.csv'), index=True)
开发者ID:jorgelgarcia,项目名称:abc-treatmenteffects-finalseason,代码行数:32,代码来源:sa_irrfactor.py

示例4: print

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
# Do we filter by a base?
if base=="None":
	gene_sets  = gsea.groupby(["Gene_set"]).size().reset_index(name="count")
	print(gene_sets)
	gene_sets  = gene_sets[gene_sets["count"] >= 100] #Changed back to 100!!!
	print(gene_sets)
	# gene_sets  = gene_sets[gene_sets["count"] <= 150]
	# print(gene_sets)
	gene_sets  = [i for i in itertools.combinations(list(gene_sets["Gene_set"]),2)]
	
	if both=="T":
		print("both one-tailed used")
		gene_r    = [(i[1], i[0]) for i in gene_sets]
		gene_sets = gene_sets + gene_r
else:
	base       = pd.read_csv(in_folder + "GSEA_FILES/" + gsea_type + "_gsea_" + base + "_both_" + both + "_pvals", sep="\t")
	gene_sets  = list(set(base["gs"]))
	gene_sets  = [(i.split("$")[0], i.split("$")[1]) for i in gene_sets]

print(gsea_type, len(gene_sets))

main_dict = Parallel(n_jobs=40)(delayed(mann_pval)(i) for i in gene_sets)

print("Done calculating")
# Write to file
main_dict = pd.concat([pd.DataFrame(i) for i in main_dict])
file_out  = in_folder + "GSEA_FILES/" + gsea_type + "_gsea_"+ exp_type + "_both_" + both + "_ext_gmv_" + ext_gmv + "_pvals"

main_dict.to_csv(file_out, sep="\t", header=True, index=False)

print ("Done writing")
开发者ID:LOBUTO,项目名称:CANCER.GENOMICS,代码行数:33,代码来源:gsea_exp.py

示例5: list

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
    filled['transfer_{}'.format(sex)] = filled['inc_trans_pub_{}'.format(sex)] + filled['diclaim_{}'.format(sex)] + filled['ssclaim_{}'.format(sex)] + filled['ssiclaim_{}'.format(sex)]

components = ['inc_labor', 'inc_parent', 'transfer', 'edu', 'crime', 'costs', 'cc', 'health', 'qaly']
factors = np.arange(0,3.1,0.25)

combo = list(itertools.product(components, factors))
    
# vary factor: BCR
# applying factor to benefits        

def bc_factors(part, f):    
    bc_tmp = deepcopy(filled)
    for sex in ['m', 'f', 'p']:
        bc_tmp['{}_{}'.format(part, sex)] = bc_tmp['{}_{}'.format(part, sex)] * f

    output = bc_calc(bc_tmp, components=components)        

    output['rate'] = f
    output['part'] = part

    print 'B/C Ratio for {} and factor {} calculated.'.format(part, f)
    return output



bc_factors = Parallel(n_jobs=25)(
	delayed(bc_factors)(part, f) for part, f in combo)
bc_factors = pd.concat(bc_factors, axis=0)
bc_factors.sort_index(inplace=True)
bc_factors.to_csv(os.path.join(plots, 'bc_factors.csv'), index=True)
开发者ID:jorgelgarcia,项目名称:abc-treatmenteffects-finalseason,代码行数:32,代码来源:sa_bcrfactor.py

示例6: makeflows

# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
etype = 2
filled = makeflows(etype=etype)

# vary DWL: IRR
irr_dwl = pd.DataFrame([])
def irr_dwl(d):
    irr_tmp = deepcopy(filled)
    for sex in ['m', 'f', 'p']:
        for part in ['inc_trans_pub_{}'.format(sex), 'diclaim_{}'.format(sex), 'ssclaim_{}'.format(sex), 'ssiclaim_{}'.format(sex)]:
            irr_tmp[part] = irr_tmp[part] * (d/0.5)
        for part in ['edu_{}'.format(sex), 'm_ed_{}'.format(sex)]:
            irr_tmp[part].loc[(sex, slice(None), slice(None)), slice('c0','c18')] = \
                irr_tmp[part].loc[(sex, slice(None), slice(None)), slice('c0','c18')] * ((1 + d)/1.5)
        for part in ['ccpublic_{}'.format(sex), 'crimepublic_{}'.format(sex), 'health_public_{}'.format(sex), 'costs_{}'.format(sex)]:
            irr_tmp[part]  = irr_tmp[part] * ((1+d)/1.5)

    output = irr_calc(irr_tmp, etype=etype)
    
    output['rate'] = d
            
    print 'IRR for MCW {} calculated.'.format(d)
    
    return output

irr_dwl = Parallel(n_jobs=25)(
	delayed(irr_dwl)(d) for d in rate_range)
irr_dwl = pd.concat(irr_dwl, axis=0)
irr_dwl.sort_index(inplace=True)
irr_dwl.to_csv(os.path.join(plots, 'irr_dwl.csv'), index=True)

开发者ID:jorgelgarcia,项目名称:abc-treatmenteffects-finalseason,代码行数:31,代码来源:sa_irrdwl.py


注:本文中的joblib.Parallel.to_csv方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。