本文整理汇总了Python中joblib.Parallel.to_csv方法的典型用法代码示例。如果您正苦于以下问题:Python Parallel.to_csv方法的具体用法?Python Parallel.to_csv怎么用?Python Parallel.to_csv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类joblib.Parallel
的用法示例。
在下文中一共展示了Parallel.to_csv方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: preprocess
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
def preprocess(file_in, file_out, test=False, n_jobs=6):
"""
This function preprocesses raw data file.
For each row and for each feature it extracts aggregations over TimeToEnd:
From feature TimeToEnd it extracts total time ("time") and number of observations ("n_obs")
From feature DistanceToRadar it extracts aggregations ('min', '50% quantile', 'mean', 'max')
For any other features it calculates ('mean', 'std', 'min', '50% quantile', 'max')
New features names follow the pattern: <feature name>_<aggregation function>
Parameters
----------
:param file_in: str
csv-file name for data to be preprocessed
:param file_out: str
csv-file name for output data
:param test: bool
indicator for test data (data without label)
:return:
"""
# Load data to pandas.DataFrame
data_raw = pd.read_csv(file_in, na_filter=False, chunksize=5000)
# Apply transformations to data chunks in parallel
start = time.time()
data = Parallel(n_jobs=n_jobs, verbose=11)(delayed(foo)(x, transform, axis=1, test=test) for i, x in enumerate(data_raw))
print "Preprocessing time: ", round((time.time() - start) / 60, 3)
print "Records: ", len(data)
# Join data chunks and save result to csv
data = pd.concat(data)
data.to_csv(file_out, index=False)
print "File", file_in, "preprocessed to", file_out
示例2: create_training_data
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
def create_training_data():
num_cores = 8
# getting total number of trips
list_of_files = [[folder, f.replace('.csv','')] for folder in os.listdir('drivers') if 'DS_Store' not in folder
for f in os.listdir('drivers/'+folder) if '.csv' in f]
raw_data = Parallel( n_jobs=num_cores )(delayed(create_attributes)(i) for i in list_of_files)
raw_data = pd.DataFrame(raw_data)
raw_data.columns = ['driver_trip','trip_time','total_distance','skyway_distance','avg_speed','std_speed',
'avg_speed_up','avg_speed_down',
'avg_acc','std_acc','avg_turn','std_turn','standing_time','standing_speed']
# save to file for later training
raw_data.to_csv('training_set.csv', index=False)
return raw_data
示例3: list
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
filled['cc_{}'.format(sex)] = filled['ccpublic_{}'.format(sex)] + filled['ccprivate_{}'.format(sex)]
filled['crime_{}'.format(sex)] = filled['crimepublic_{}'.format(sex)] + filled['crimeprivate_{}'.format(sex)]
filled['health_{}'.format(sex)] = filled['health_private_{}'.format(sex)] + filled['health_public_{}'.format(sex)]
filled['transfer_{}'.format(sex)] = filled['inc_trans_pub_{}'.format(sex)] + filled['diclaim_{}'.format(sex)] + filled['ssclaim_{}'.format(sex)] + filled['ssiclaim_{}'.format(sex)]
components = ['inc_labor', 'inc_parent', 'transfer', 'edu', 'crime', 'costs', 'cc', 'health', 'qaly', 'm_ed']
factors = np.arange(0,3.1,0.25)
combo = list(itertools.product(components, factors))
# vary factor: IRR
# applying factor to benefits
def irr_factors(part, f):
irr_tmp = deepcopy(filled)
for sex in ['m', 'f', 'p']:
irr_tmp['{}_{}'.format(part, sex)] = irr_tmp['{}_{}'.format(part, sex)] * f
output = irr_calc(irr_tmp, etype=etype, components=components)
output['rate'] = f
output['part'] = part
print 'IRR for {} and factor {} calculated.'.format(part, f)
return output
irr_factors = Parallel(n_jobs=25)(
delayed(irr_factors)(part, f) for part, f in combo)
irr_factors = pd.concat(irr_factors, axis=0)
irr_factors.sort_index(inplace=True)
irr_factors.to_csv(os.path.join(plots, 'irr_factors.csv'), index=True)
示例4: print
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
# Do we filter by a base?
if base=="None":
gene_sets = gsea.groupby(["Gene_set"]).size().reset_index(name="count")
print(gene_sets)
gene_sets = gene_sets[gene_sets["count"] >= 100] #Changed back to 100!!!
print(gene_sets)
# gene_sets = gene_sets[gene_sets["count"] <= 150]
# print(gene_sets)
gene_sets = [i for i in itertools.combinations(list(gene_sets["Gene_set"]),2)]
if both=="T":
print("both one-tailed used")
gene_r = [(i[1], i[0]) for i in gene_sets]
gene_sets = gene_sets + gene_r
else:
base = pd.read_csv(in_folder + "GSEA_FILES/" + gsea_type + "_gsea_" + base + "_both_" + both + "_pvals", sep="\t")
gene_sets = list(set(base["gs"]))
gene_sets = [(i.split("$")[0], i.split("$")[1]) for i in gene_sets]
print(gsea_type, len(gene_sets))
main_dict = Parallel(n_jobs=40)(delayed(mann_pval)(i) for i in gene_sets)
print("Done calculating")
# Write to file
main_dict = pd.concat([pd.DataFrame(i) for i in main_dict])
file_out = in_folder + "GSEA_FILES/" + gsea_type + "_gsea_"+ exp_type + "_both_" + both + "_ext_gmv_" + ext_gmv + "_pvals"
main_dict.to_csv(file_out, sep="\t", header=True, index=False)
print ("Done writing")
示例5: list
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
filled['transfer_{}'.format(sex)] = filled['inc_trans_pub_{}'.format(sex)] + filled['diclaim_{}'.format(sex)] + filled['ssclaim_{}'.format(sex)] + filled['ssiclaim_{}'.format(sex)]
components = ['inc_labor', 'inc_parent', 'transfer', 'edu', 'crime', 'costs', 'cc', 'health', 'qaly']
factors = np.arange(0,3.1,0.25)
combo = list(itertools.product(components, factors))
# vary factor: BCR
# applying factor to benefits
def bc_factors(part, f):
bc_tmp = deepcopy(filled)
for sex in ['m', 'f', 'p']:
bc_tmp['{}_{}'.format(part, sex)] = bc_tmp['{}_{}'.format(part, sex)] * f
output = bc_calc(bc_tmp, components=components)
output['rate'] = f
output['part'] = part
print 'B/C Ratio for {} and factor {} calculated.'.format(part, f)
return output
bc_factors = Parallel(n_jobs=25)(
delayed(bc_factors)(part, f) for part, f in combo)
bc_factors = pd.concat(bc_factors, axis=0)
bc_factors.sort_index(inplace=True)
bc_factors.to_csv(os.path.join(plots, 'bc_factors.csv'), index=True)
示例6: makeflows
# 需要导入模块: from joblib import Parallel [as 别名]
# 或者: from joblib.Parallel import to_csv [as 别名]
etype = 2
filled = makeflows(etype=etype)
# vary DWL: IRR
irr_dwl = pd.DataFrame([])
def irr_dwl(d):
irr_tmp = deepcopy(filled)
for sex in ['m', 'f', 'p']:
for part in ['inc_trans_pub_{}'.format(sex), 'diclaim_{}'.format(sex), 'ssclaim_{}'.format(sex), 'ssiclaim_{}'.format(sex)]:
irr_tmp[part] = irr_tmp[part] * (d/0.5)
for part in ['edu_{}'.format(sex), 'm_ed_{}'.format(sex)]:
irr_tmp[part].loc[(sex, slice(None), slice(None)), slice('c0','c18')] = \
irr_tmp[part].loc[(sex, slice(None), slice(None)), slice('c0','c18')] * ((1 + d)/1.5)
for part in ['ccpublic_{}'.format(sex), 'crimepublic_{}'.format(sex), 'health_public_{}'.format(sex), 'costs_{}'.format(sex)]:
irr_tmp[part] = irr_tmp[part] * ((1+d)/1.5)
output = irr_calc(irr_tmp, etype=etype)
output['rate'] = d
print 'IRR for MCW {} calculated.'.format(d)
return output
irr_dwl = Parallel(n_jobs=25)(
delayed(irr_dwl)(d) for d in rate_range)
irr_dwl = pd.concat(irr_dwl, axis=0)
irr_dwl.sort_index(inplace=True)
irr_dwl.to_csv(os.path.join(plots, 'irr_dwl.csv'), index=True)