本文整理汇总了Python中pandas.read_csv方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_csv方法的具体用法?Python pandas.read_csv怎么用?Python pandas.read_csv使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_csv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: saveTimingInfo
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def saveTimingInfo(summary):
timingsPath = "test_timings.csv"
git_version = subprocess.check_output(["git", "describe"]).strip()
new_row = summary[["timing"]].T
new_row["date"] = [datetime.datetime.now()]
new_row["version"] = git_version
if os.path.exists(timingsPath):
timings = pandas.read_csv(timingsPath, index_col=0)
timings = pandas.concat([timings, new_row])
else:
timings = new_row
timings.to_csv(timingsPath)
print(timings)
示例2: load_label
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def load_label(self):
"""
load label dictionary into the object.
the format must be like this:
积极,消极
p1,n1
p2,n2
...,...
pk,nk
"""
table=pd.read_csv(self.label_file)
pos=table.loc[:,'积极'].tolist()
neg=table.loc[:,'消极'].tolist()
self.Label_index=pos+neg
self.Label_dict=dict(zip(pos,[1]*len(pos)))
self.Label_dict.update(dict(zip(neg,[-1]*len(neg))))
示例3: load_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def load_data(self, path):
if ".pickle" in path:
print("Loading data from: {}".format(path))
data_utils.load_existing_data_loader(self, path)
return True
for split in self.data:
file_name = "v4_atomic_{}.csv".format(map_name(split))
df = pandas.read_csv("{}/{}".format(path, file_name), index_col=0)
df.iloc[:, :9] = df.iloc[:, :9].apply(
lambda col: col.apply(json.loads))
for cat in self.categories:
attr = df[cat]
self.data[split]["total"] += utils.zipped_flatten(zip(
attr.index, ["<{}>".format(cat)] * len(attr), attr.values))
if do_take_partial_dataset(self.opt.data):
self.data["train"]["total"] = select_partial_dataset(
self.opt.data, self.data["train"]["total"])
return False
示例4: _load_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def _load_data():
dfTrain = pd.read_csv(config.TRAIN_FILE)
dfTest = pd.read_csv(config.TEST_FILE)
def preprocess(df):
cols = [c for c in df.columns if c not in ["id", "target"]]
df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1)
df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"]
return df
dfTrain = preprocess(dfTrain)
dfTest = preprocess(dfTest)
cols = [c for c in dfTrain.columns if c not in ["id", "target"]]
cols = [c for c in cols if (not c in config.IGNORE_COLS)]
X_train = dfTrain[cols].values
y_train = dfTrain["target"].values
X_test = dfTest[cols].values
ids_test = dfTest["id"].values
cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS]
return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices
示例5: gen_feat_dict
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def gen_feat_dict(self):
if self.dfTrain is None:
dfTrain = pd.read_csv(self.trainfile)
else:
dfTrain = self.dfTrain
if self.dfTest is None:
dfTest = pd.read_csv(self.testfile)
else:
dfTest = self.dfTest
df = pd.concat([dfTrain, dfTest])
self.feat_dict = {}
tc = 0
for col in df.columns:
if col in self.ignore_cols:
continue
if col in self.numeric_cols:
# map to a single index
self.feat_dict[col] = tc
tc += 1
else:
us = df[col].unique()
self.feat_dict[col] = dict(zip(us, range(tc, len(us)+tc)))
tc += len(us)
self.feat_dim = tc
示例6: _lsa_events_converter
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def _lsa_events_converter(events_file):
"""Make a model where each trial has its own regressor using least squares
all (LSA)
Parameters
----------
events_file : str
File that contains all events from the bold run
Yields
------
events : DataFrame
A DataFrame in which each trial has its own trial_type
"""
import pandas as pd
events = pd.read_csv(events_file, sep='\t')
events['original_trial_type'] = events['trial_type']
for cond, cond_df in events.groupby('trial_type'):
cond_idx = cond_df.index
for i_trial, trial_idx in enumerate(cond_idx):
trial_name = '{0}_{1:04d}'.format(cond, i_trial+1)
events.loc[trial_idx, 'trial_type'] = trial_name
return events
示例7: test_select_confounds_error
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def test_select_confounds_error(confounds_file, tmp_path):
import pandas as pd
import numpy as np
confounds_df = pd.read_csv(str(confounds_file), sep='\t', na_values='n/a')
confounds_df['white_matter'][0] = np.nan
conf_file = tmp_path / "confounds.tsv"
confounds_df.to_csv(str(conf_file), index=False, sep='\t', na_rep='n/a')
with pytest.raises(ValueError) as val_err:
_select_confounds(str(conf_file), ['white_matter', 'csf'])
assert "The selected confounds contain nans" in str(val_err.value)
示例8: test_select_confounds
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def test_select_confounds(confounds_file, selected_confounds, nan_confounds,
expanded_confounds):
import pandas as pd
import numpy as np
confounds_df = pd.read_csv(str(confounds_file), sep='\t', na_values='n/a')
res_df = _select_confounds(str(confounds_file), selected_confounds)
# check if the correct columns are selected
assert set(expanded_confounds) == set(res_df.columns)
# check if nans are being imputed when expected
if nan_confounds:
for nan_c in nan_confounds:
vals = confounds_df[nan_c].values
expected_result = np.nanmean(vals[vals != 0])
assert res_df[nan_c][0] == expected_result
示例9: load_LUT
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def load_LUT(filename, to='data'):
'''
load_LUT(filename) loads the given filename as a FreeSurfer LUT.
The optional argument to (default: 'data') specifies how the LUT should be interpreted; it can
be any of the following:
* 'data' specifies that a dataframe should be returned.
'''
from neuropythy.util import to_dataframe
import pandas
# start by slurping in the text:
dat = pandas.read_csv(filename, comment='#', sep='\s+', names=['id', 'name', 'r','g','b','a'])
# if all the alpha values are 0, we set them to 1 (not sure why freesurfer does this)
dat['a'] = 255 - dat['a']
if pimms.is_str(to): to = to.lower()
if to is None: return dat
elif to == 'data':
df = to_dataframe({'id': dat['id'].values, 'name': dat['name'].values})
df['color'] = dat.apply(lambda r: [r[k]/255.0 for k in ['r','g','b','a']], axis=1)
df.set_index('id', inplace=True)
return df
else: raise ValueError('Unknown to argument: %s' % to)
# A function to load in default data from the freesurfer home: e.g., the default LUTs
示例10: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def main():
data_dir_path = './data'
model_dir_path = './models'
ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
print(ecg_data.head())
ecg_np_data = ecg_data.as_matrix()
scaler = MinMaxScaler()
ecg_np_data = scaler.fit_transform(ecg_np_data)
print(ecg_np_data.shape)
ae = BidirectionalLstmAutoEncoder()
# fit the data and save model into model_dir_path
if DO_TRAINING:
ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)
# load back the model saved in model_dir_path detect anomaly
ae.load_model(model_dir_path)
anomaly_information = ae.anomaly(ecg_np_data[:23, :])
reconstruction_error = []
for idx, (is_anomaly, dist) in enumerate(anomaly_information):
print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
reconstruction_error.append(dist)
visualize_reconstruction_error(reconstruction_error, ae.threshold)
示例11: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def main():
data_dir_path = './data'
model_dir_path = './models'
ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
print(ecg_data.head())
ecg_np_data = ecg_data.as_matrix()
scaler = MinMaxScaler()
ecg_np_data = scaler.fit_transform(ecg_np_data)
print(ecg_np_data.shape)
ae = CnnLstmAutoEncoder()
# fit the data and save model into model_dir_path
if DO_TRAINING:
ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)
# load back the model saved in model_dir_path detect anomaly
ae.load_model(model_dir_path)
anomaly_information = ae.anomaly(ecg_np_data[:23, :])
reconstruction_error = []
for idx, (is_anomaly, dist) in enumerate(anomaly_information):
print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
reconstruction_error.append(dist)
visualize_reconstruction_error(reconstruction_error, ae.threshold)
示例12: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def main():
data_dir_path = './data'
model_dir_path = './models'
ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
print(ecg_data.head())
ecg_np_data = ecg_data.as_matrix()
scaler = MinMaxScaler()
ecg_np_data = scaler.fit_transform(ecg_np_data)
print(ecg_np_data.shape)
ae = LstmAutoEncoder()
# fit the data and save model into model_dir_path
if DO_TRAINING:
ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)
# load back the model saved in model_dir_path detect anomaly
ae.load_model(model_dir_path)
anomaly_information = ae.anomaly(ecg_np_data[:23, :])
reconstruction_error = []
for idx, (is_anomaly, dist) in enumerate(anomaly_information):
print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
reconstruction_error.append(dist)
visualize_reconstruction_error(reconstruction_error, ae.threshold)
示例13: read_names
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def read_names(names_path):
"""read data from downloaded file. See SmallNames.txt for example format
or go to https://www.kaggle.com/kaggle/us-baby-names for full lists
Args:
names_path: path to the csv file similar to the example type
Returns:
Dataset: a namedtuple of two elements: deduped names and their associated
counts. The names contain only 26 chars and are all lower case
"""
names_data = pd.read_csv(names_path)
names_data.Name = names_data.Name.str.lower()
name_data = names_data.groupby(by=["Name"])["Count"].sum()
name_counts = np.array(name_data.tolist())
names_deduped = np.array(name_data.index.tolist())
Dataset = collections.namedtuple('Dataset', ['Name', 'Count'])
return Dataset(names_deduped, name_counts)
示例14: generate_vocabulary
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def generate_vocabulary(self, review_summary_file):
"""
:param review_summary_file:
:return:
"""
self.rev_sum_pair = pd.read_csv(review_summary_file, header=0).values
for review,summary in self.rev_sum_pair:
rev_lst = wordpunct_tokenize(review)
sum_lst = wordpunct_tokenize(summary)
self.__add_list_to_dict(rev_lst)
self.__add_list_to_dict(sum_lst)
# Now store the "" empty string as the last word of the voacabulary
self.map[""] = len(self.map)
self.revmap[len(self.map)] = ""
示例15: get_data_from_biodbnet
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_csv [as 别名]
def get_data_from_biodbnet(self, df_hgnc):
"""keys are unique Gene names
input is made of the df based on HGNC data web services
uniprot accession are duplicated sometimes. If som this is actually the
iprimary accession entry and all secondary ones.
e.g. ,
ABHD11 >>>> Q8N723;Q8NFV2;Q8NFV3;Q6PJU0;Q8NFV4;H7BYM8;Q8N722;Q9HBS8 ABHDB_HUMAN Alpha/beta hydrolase domain-containing protein 11
correspond actually to the primary one : Q8NFV4
"""
b = biodbnet.BioDBNet()
res2 = b.db2db("Gene Symbol", ["HGNC ID", "UniProt Accession", "UniProt Entry Name", "UniProt Protein Name", "KEGG Gene ID", "Ensembl Gene ID"],
res.keys()[0:2000])
import pandas as pd
import StringIO
c = pd.read_csv(StringIO.StringIO(res2), delimiter="\t", index_col="Gene Symbol")
return c