本文整理汇总了Python中bcolz.open方法的典型用法代码示例。如果您正苦于以下问题:Python bcolz.open方法的具体用法?Python bcolz.open怎么用?Python bcolz.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bcolz
的用法示例。
在下文中一共展示了bcolz.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_fasta_to_file
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def extract_fasta_to_file(fasta, output_dir, mode="bcolz", overwrite=False):
assert mode in _array_writer
makedirs(output_dir, exist_ok=overwrite)
fasta_file = FastaFile(fasta)
file_shapes = {}
for chrom, size in zip(fasta_file.references, fasta_file.lengths):
data = np.zeros((size, NUM_SEQ_CHARS), dtype=np.float32)
seq = fasta_file.fetch(chrom)
one_hot_encode_sequence(seq, data)
file_shapes[chrom] = data.shape
_array_writer[mode](data, os.path.join(output_dir, chrom))
with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
json.dump(
{
"file_shapes": file_shapes,
"type": "array_{}".format(mode),
"source": fasta,
},
fp,
)
示例2: open
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def open(cls, rootdir, end_session=None):
"""
Open an existing ``rootdir`` for writing.
Parameters
----------
end_session : Timestamp (optional)
When appending, the intended new ``end_session``.
"""
metadata = BcolzMinuteBarMetadata.read(rootdir)
return BcolzMinuteBarWriter(
rootdir,
metadata.calendar,
metadata.start_session,
end_session if end_session is not None else metadata.end_session,
metadata.minutes_per_day,
metadata.default_ohlc_ratio,
metadata.ohlc_ratios_per_sid,
write_metadata=end_session is not None
)
示例3: divident_toMongo
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def divident_toMongo(divident_name, db_name):
_table = bcolz.open(data_root_path + divident_name, 'r')
_index = _table.attrs['line_map']
_order_book_id = _index.keys()
for code in _order_book_id:
s, e = _index[code]
dividends = _table[s:e]
for d in dividends:
result = {
'book_closure_date': str(d['closure_date']),
'ex_dividend_date': str(d['ex_date']),
'payable_date': str(d['payable_date']),
'dividend_cash_before_tax': d['cash_before_tax'] / 10000.0,
'round_lot': int(d['round_lot']),
'announcement_date': str(d['announcement_date'])
}
db_name[code].insert(result)
示例4: dump
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def dump(obj, fname): pickle.dump(obj, open(fname, 'wb'))
示例5: load
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def load(fname): return pickle.load(open(fname, 'rb'))
示例6: load_array
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def load_array(fname): return bcolz.open(fname)[:]
示例7: load_glove
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def load_glove(loc):
return (load_array(loc+'.dat'),
pickle.load(open(loc+'_words.pkl','rb'), encoding='latin1'),
pickle.load(open(loc+'_idx.pkl','rb'), encoding='latin1'))
示例8: extract_bigwig_to_file
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def extract_bigwig_to_file(
bigwig,
output_dir,
mode="bcolz",
dtype=np.float32,
overwrite=False,
nan_as_zero=True,
):
assert mode in _array_writer
makedirs(output_dir, exist_ok=overwrite)
bw = pyBigWig.open(bigwig)
chrom_sizes = bw.chroms()
file_shapes = {}
for chrom, size in six.iteritems(chrom_sizes):
data = np.zeros(size, dtype=np.float32)
data[:] = bw.values(chrom, 0, size)
if nan_as_zero:
nan_to_zero(data)
_array_writer[mode](data.astype(dtype), os.path.join(output_dir, chrom))
file_shapes[chrom] = data.shape
bw.close()
with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
json.dump(
{
"file_shapes": file_shapes,
"type": "array_{}".format(mode),
"source": bigwig,
},
fp,
)
示例9: read_genome_sizes
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def read_genome_sizes(genome_file):
with open(genome_file) as fp:
chr2size = {}
for line in fp:
chrom, size = line.split()
chr2size[chrom] = int(size)
return chr2size
示例10: load_directory
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def load_directory(base_dir, in_memory=False):
with open(os.path.join(base_dir, "metadata.json"), "r") as fp:
metadata = json.load(fp)
if metadata["type"] == "array_numpy":
mmap_mode = None if in_memory else "r"
data = {
chrom: np.load(
"{}.npy".format(os.path.join(base_dir, chrom)), mmap_mode=mmap_mode
)
for chrom in metadata["file_shapes"]
}
elif metadata["type"] == "array_bcolz":
data = {
chrom: bcolz.open(os.path.join(base_dir, chrom), mode="r")
for chrom in metadata["file_shapes"]
}
if in_memory:
data = {k: data[k].copy() for k in data.keys()}
elif metadata["type"] == "array_tiledb":
data = {
chrom: load_tiledb(os.path.join(base_dir, chrom))
for chrom in metadata["file_shapes"]
}
else:
raise ValueError("Can only extract from array_bcolz and array_numpy")
for chrom, shape in six.iteritems(metadata["file_shapes"]):
if data[chrom].shape != tuple(shape):
raise ValueError(
"Inconsistent shape found in metadata file: "
"{} - {} vs {}".format(chrom, shape, data[chrom].shape)
)
return data
示例11: last_date_in_output_for_sid
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def last_date_in_output_for_sid(self, sid):
"""
Parameters
----------
sid : int
Asset identifier.
Returns
-------
out : pd.Timestamp
The midnight of the last date written in to the output for the
given sid.
"""
sizes_path = "{0}/close/meta/sizes".format(self.sidpath(sid))
if not os.path.exists(sizes_path):
return pd.NaT
with open(sizes_path, mode='r') as f:
sizes = f.read()
data = json.loads(sizes)
# use integer division so that the result is an int
# for pandas index later https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/base.py#L247 # noqa
num_days = data['shape'][0] // self._minutes_per_day
if num_days == 0:
# empty container
return pd.NaT
return self._session_labels[num_days - 1]
示例12: _init_ctable
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def _init_ctable(self, path):
"""
Create empty ctable for given path.
Parameters
----------
path : string
The path to rootdir of the new ctable.
"""
# Only create the containing subdir on creation.
# This is not to be confused with the `.bcolz` directory, but is the
# directory up one level from the `.bcolz` directories.
sid_containing_dirname = os.path.dirname(path)
if not os.path.exists(sid_containing_dirname):
# Other sids may have already created the containing directory.
os.makedirs(sid_containing_dirname)
initial_array = np.empty(0, np.uint64)
table = ctable(
rootdir=path,
columns=[
initial_array,
initial_array,
initial_array,
initial_array,
initial_array,
],
names=[
'open',
'high',
'low',
'close',
'volume'
],
expectedlen=self._expectedlen,
mode='w',
)
table.flush()
return table
示例13: write
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def write(self, data, show_progress=False, invalid_data_behavior='warn'):
"""Write a stream of minute data.
Parameters
----------
data : iterable[(int, pd.DataFrame)]
The data to write. Each element should be a tuple of sid, data
where data has the following format:
columns : ('open', 'high', 'low', 'close', 'volume')
open : float64
high : float64
low : float64
close : float64
volume : float64|int64
index : DatetimeIndex of market minutes.
A given sid may appear more than once in ``data``; however,
the dates must be strictly increasing.
show_progress : bool, optional
Whether or not to show a progress bar while writing.
"""
ctx = maybe_show_progress(
data,
show_progress=show_progress,
item_show_func=lambda e: e if e is None else str(e[0]),
label="Merging minute equity files:",
)
write_sid = self.write_sid
with ctx as it:
for e in it:
write_sid(*e, invalid_data_behavior=invalid_data_behavior)
示例14: write_cols
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def write_cols(self, sid, dts, cols, invalid_data_behavior='warn'):
"""
Write the OHLCV data for the given sid.
If there is no bcolz ctable yet created for the sid, create it.
If the length of the bcolz ctable is not exactly to the date before
the first day provided, fill the ctable with 0s up to that date.
Parameters
----------
sid : int
The asset identifier for the data being written.
dts : datetime64 array
The dts corresponding to values in cols.
cols : dict of str -> np.array
dict of market data with the following characteristics.
keys are ('open', 'high', 'low', 'close', 'volume')
open : float64
high : float64
low : float64
close : float64
volume : float64|int64
"""
if not all(len(dts) == len(cols[name]) for name in self.COL_NAMES):
raise BcolzMinuteWriterColumnMismatch(
"Length of dts={0} should match cols: {1}".format(
len(dts),
" ".join("{0}={1}".format(name, len(cols[name]))
for name in self.COL_NAMES)))
self._write_cols(sid, dts, cols, invalid_data_behavior)
示例15: truncate
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import open [as 别名]
def truncate(self, date):
"""Truncate data beyond this date in all ctables."""
truncate_slice_end = self.data_len_for_day(date)
glob_path = os.path.join(self._rootdir, "*", "*", "*.bcolz")
sid_paths = sorted(glob(glob_path))
for sid_path in sid_paths:
file_name = os.path.basename(sid_path)
try:
table = bcolz.open(rootdir=sid_path)
except IOError:
continue
if table.len <= truncate_slice_end:
logger.info("{0} not past truncate date={1}.", file_name, date)
continue
logger.info(
"Truncating {0} at end_date={1}", file_name, date.date()
)
table.resize(truncate_slice_end)
# Update end session in metadata.
metadata = BcolzMinuteBarMetadata.read(self._rootdir)
metadata.end_session = date
metadata.write(self._rootdir)