本文整理汇总了Python中pandas.read_table方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_table方法的具体用法?Python pandas.read_table怎么用?Python pandas.read_table使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_table方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _assert_result
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def _assert_result(self, filename: str,
data: str,
iterations: int,
project_name: str,
result_means_filename: str,
debug_seed: int,
threshold: float,
result_precision: int
) -> None:
str_threshold = ''.join(str(threshold).split('.'))
means_test_filename = \
'statistical_analysis__{}_result__' \
'data-{}_it-{}_seed-{}_threshold-{}_precision-{}.txt'.format(filename,
data,
iterations,
debug_seed,
str_threshold,
result_precision)
original_means = pd.read_table(os.path.realpath('{}/{}'.format(data_test_dir, means_test_filename)))
result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
self.assertTrue(dataframe_functions.dataframes_has_same_data(result_means, original_means),
msg='failed comparing {} with {}'.format(means_test_filename, result_means_filename))
self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
示例2: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def main():
# read and preprocess the movie data
movie = pd.read_table('movies.dat', sep='::', names=['movie_id', 'movie_name', 'tag'], engine='python')
movie = movie_preprocessing(movie)
# read the ratings data and merge it with movie data
rating = pd.read_table("ratings.dat", sep="::",
names=["user_id", "movie_id", "rating", "timestamp"], engine='python')
data = pd.merge(rating, movie, on="movie_id")
# extract feature from our data set
streaming_batch, user_feature, actions, reward_list = feature_extraction(data)
streaming_batch.to_csv("streaming_batch.csv", sep='\t', index=False)
user_feature.to_csv("user_feature.csv", sep='\t')
pd.DataFrame(actions, columns=['movie_id']).to_csv("actions.csv", sep='\t', index=False)
reward_list.to_csv("reward_list.csv", sep='\t', index=False)
action_context = movie[movie['movie_id'].isin(actions)]
action_context.to_csv("action_context.csv", sep='\t', index = False)
示例3: get_ref_contig_sizes
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def get_ref_contig_sizes(altref_file):
"""
Get a Series of contigs lengths. Includes primary and alt contigs.
:param altref_file: BED file of contig information where each record spans the whole contig. Must contain
columns "#CHROM" and "END".
:return: Series of contig lengths indexed by the contig name.
"""
# Get reference chromosome sizes
ref_len_series = pd.read_table(altref_file, header=0)
ref_len_series.index = ref_len_series['#CHROM']
ref_len_series = ref_len_series['END']
return ref_len_series
示例4: _read_dataframe
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def _read_dataframe(filename):
""" Reads the original dataset TSV as a pandas dataframe """
# delay importing this to avoid another dependency
import pandas
# read in triples of user/artist/playcount from the input dataset
# get a model based off the input params
start = time.time()
log.debug("reading data from %s", filename)
data = pandas.read_table(filename,
usecols=[0, 2, 3],
names=['user', 'artist', 'plays'],
na_filter=False)
# map each artist and user to a unique numeric value
data['user'] = data['user'].astype("category")
data['artist'] = data['artist'].astype("category")
# store as a CSR matrix
log.debug("read data file in %s", time.time() - start)
return data
示例5: _read_dataframe
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def _read_dataframe(filename):
""" Reads the original dataset TSV as a pandas dataframe """
# delay importing this to avoid another dependency
import pandas
# read in triples of user/artist/playcount from the input dataset
# get a model based off the input params
start = time.time()
log.debug("reading data from %s", filename)
data = pandas.read_table(filename, usecols=[0, 1, 3], names=['user', 'item', 'rating'])
# map each artist and user to a unique numeric value
data['user'] = data['user'].astype("category")
data['item'] = data['item'].astype("category")
# store as a CSR matrix
log.debug("read data file in %s", time.time() - start)
return data
示例6: _read_triplets_dataframe
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def _read_triplets_dataframe(filename):
""" Reads the original dataset TSV as a pandas dataframe """
# delay importing this to avoid another dependency
import pandas
# read in triples of user/artist/playcount from the input dataset
# get a model based off the input params
start = time.time()
log.debug("reading data from %s", filename)
data = pandas.read_table("train_triplets.txt", names=['user', 'track', 'plays'])
# map each artist and user to a unique numeric value
data['user'] = data['user'].astype("category")
data['track'] = data['track'].astype("category")
# store as a CSR matrix
log.debug("read data file in %s", time.time() - start)
return data
示例7: test_1000_sep
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def test_1000_sep(self):
data = """A|B|C
1|2,334|5
10|13|10.
"""
expected = DataFrame({
'A': [1, 10],
'B': [2334, 13],
'C': [5, 10.]
})
df = self.read_csv(StringIO(data), sep='|', thousands=',')
tm.assert_frame_equal(df, expected)
df = self.read_table(StringIO(data), sep='|', thousands=',')
tm.assert_frame_equal(df, expected)
示例8: test_duplicate_columns
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def test_duplicate_columns(self):
for engine in ['python', 'c']:
data = """A,A,B,B,B
1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
# check default beahviour
df = self.read_table(StringIO(data), sep=',',engine=engine)
self.assertEqual(list(df.columns), ['A', 'A.1', 'B', 'B.1', 'B.2'])
df = self.read_table(StringIO(data), sep=',',engine=engine,mangle_dupe_cols=False)
self.assertEqual(list(df.columns), ['A', 'A', 'B', 'B', 'B'])
df = self.read_table(StringIO(data), sep=',',engine=engine,mangle_dupe_cols=True)
self.assertEqual(list(df.columns), ['A', 'A.1', 'B', 'B.1', 'B.2'])
示例9: test_no_header
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def test_no_header(self):
data = """1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
df = self.read_table(StringIO(data), sep=',', header=None)
df_pref = self.read_table(StringIO(data), sep=',', prefix='X',
header=None)
names = ['foo', 'bar', 'baz', 'quux', 'panda']
df2 = self.read_table(StringIO(data), sep=',', names=names)
expected = [[1, 2, 3, 4, 5.],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]]
tm.assert_almost_equal(df.values, expected)
tm.assert_almost_equal(df.values, df2.values)
self.assert_(np.array_equal(df_pref.columns,
['X0', 'X1', 'X2', 'X3', 'X4']))
self.assert_(np.array_equal(df.columns, lrange(5)))
self.assert_(np.array_equal(df2.columns, names))
示例10: test_1000_sep_with_decimal
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def test_1000_sep_with_decimal(self):
data = """A|B|C
1|2,334.01|5
10|13|10.
"""
expected = DataFrame({
'A': [1, 10],
'B': [2334.01, 13],
'C': [5, 10.]
})
df = self.read_csv(StringIO(data), sep='|', thousands=',')
tm.assert_frame_equal(df, expected)
df = self.read_table(StringIO(data), sep='|', thousands=',')
tm.assert_frame_equal(df, expected)
示例11: test_iteration_open_handle
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def test_iteration_open_handle(self):
if PY3:
raise nose.SkipTest("won't work in Python 3 {0}".format(sys.version_info))
with tm.ensure_clean() as path:
with open(path, 'wb') as f:
f.write('AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG')
with open(path, 'rb') as f:
for line in f:
if 'CCC' in line:
break
try:
read_table(f, squeeze=True, header=None, engine='c')
except Exception:
pass
else:
raise ValueError('this should not happen')
result = read_table(f, squeeze=True, header=None,
engine='python')
expected = Series(['DDD', 'EEE', 'FFF', 'GGG'])
tm.assert_series_equal(result, expected)
示例12: merge_files
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def merge_files(fl_lst, output, ext):
df_lst = []
for fl in fl_lst:
df = pd.read_table(fl, sep='\t', index_col=0, header=0)
old_header = df.columns.values
new_header = [os.path.basename(fl).split(".")[0]+"_"+col_id for col_id in old_header]
df.rename(columns=dict(zip(old_header, new_header)), inplace=True)
df_lst.append(df)
merged_dfs = pd.concat(df_lst, axis=1)
header = merged_dfs.columns.values
with open("%s.%s" % (output, ext), "w+") as fh:
ln = "\t".join(header)
fh.write(ln+"\n")
with open("%s.%s" % (output, ext), "a") as fh:
merged_dfs.to_csv(fh, sep="\t", na_rep="nan", header=False)
示例13: annotate
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def annotate(anno_file, chromo, pos):
#anno_file = dat.GzipFile(anno_file, 'r')
anno_file = get_fh(anno_file, 'r')
anno = pd.read_table(anno_file, header=None, usecols=[0, 1, 2],
dtype={0: 'str', 1: 'int32', 2: 'int32'})
anno_file.close()
anno.columns = ['chromo', 'start', 'end']
anno.chromo = anno.chromo.str.upper().str.replace('CHR', '')
anno = anno.loc[anno.chromo == chromo]
anno.sort_values('start', inplace=True)
start, end = an.join_overlapping(anno.start.values, anno.end.values)
anno = np.array(an.is_in(pos, start, end), dtype='int8')
return anno
示例14: get_datasets
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def get_datasets(fpath, condition=None):
unit = 0
datasets = []
for root, dir, files in os.walk(fpath):
if 'log.txt' in files:
param_path = open(os.path.join(root,'params.json'))
params = json.load(param_path)
exp_name = params['exp_name']
log_path = os.path.join(root,'log.txt')
experiment_data = pd.read_table(log_path)
experiment_data.insert(
len(experiment_data.columns),
'Unit',
unit
)
experiment_data.insert(
len(experiment_data.columns),
'Condition',
condition or exp_name
)
datasets.append(experiment_data)
unit += 1
return datasets
示例15: get_datasets
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_table [as 别名]
def get_datasets(fpath, condition=None):
unit = 0
datasets = []
for root, dir, files in os.walk(fpath):
if 'log.txt' in files:
param_path = open(os.path.join(root,'params.json'))
params = json.load(param_path)
exp_name = params['exp_name']
log_path = os.path.join(root,'log.txt')
experiment_data = pd.read_table(log_path)
experiment_data.insert(
len(experiment_data.columns),
'Unit',
unit
)
experiment_data.insert(
len(experiment_data.columns),
'Condition',
condition or exp_name
)
datasets.append(experiment_data)
unit += 1
datasets = pd.concat(datasets, ignore_index=True)
return datasets