本文整理汇总了Python中bcolz.carray方法的典型用法代码示例。如果您正苦于以下问题:Python bcolz.carray方法的具体用法?Python bcolz.carray怎么用?Python bcolz.carray使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bcolz
的用法示例。
在下文中一共展示了bcolz.carray方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: perform_val
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def perform_val(embedding_size, batch_size, model,
carray, issame, nrof_folds=10, is_ccrop=False, is_flip=True):
"""perform val"""
embeddings = np.zeros([len(carray), embedding_size])
for idx in tqdm.tqdm(range(0, len(carray), batch_size)):
batch = carray[idx:idx + batch_size]
batch = np.transpose(batch, [0, 2, 3, 1]) * 0.5 + 0.5
if is_ccrop:
batch = ccrop_batch(batch)
if is_flip:
fliped = hflip_batch(batch)
emb_batch = model(batch) + model(fliped)
embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
else:
batch = ccrop_batch(batch)
emb_batch = model(batch)
embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
tpr, fpr, accuracy, best_thresholds = evaluate(
embeddings, issame, nrof_folds)
return accuracy.mean(), best_thresholds.mean()
示例2: get_val_pair
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def get_val_pair(path, name):
carray = bcolz.carray(rootdir=os.path.join(path, name), mode='r')
issame = np.load('{}/{}_list.npy'.format(path, name))
return carray, issame
示例3: save_array
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def save_array(fname, arr):
c=bcolz.carray(arr, rootdir=fname, mode='w')
c.flush()
示例4: bcolz_save
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def bcolz_save(path, np_array):
c = bcolz.carray(np_array, rootdir=path, mode='w')
c.flush()
print("Saved to " + path)
示例5: write
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def write(self, rootdir):
"""
Write the metadata to a JSON file in the rootdir.
Values contained in the metadata are:
first_trading_day : string
'YYYY-MM-DD' formatted representation of the first trading day
available in the dataset.
minute_index : list of integers
nanosecond integer representation of the minutes, the enumeration
of which corresponds to the values in each bcolz carray.
ohlc_ratio : int
The factor by which the pricing data is multiplied so that the
float data can be stored as an integer.
"""
metadata = {
'first_trading_day': str(self.first_trading_day.date()),
'minute_index': self.minute_index.asi8.tolist(),
'market_opens': self.market_opens.values.
astype('datetime64[m]').
astype(int).tolist(),
'market_closes': self.market_closes.values.
astype('datetime64[m]').
astype(int).tolist(),
'ohlc_ratio': self.ohlc_ratio,
}
with open(self.metadata_path(rootdir), 'w+') as fp:
json.dump(metadata, fp)
示例6: _open_minute_file
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def _open_minute_file(self, field, sid):
sid = int(sid)
try:
carray = self._carrays[field][sid]
except KeyError:
carray = self._carrays[field][sid] = \
bcolz.carray(rootdir=self._get_carray_path(sid, field),
mode='r')
return carray
示例7: _open_minute_file
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def _open_minute_file(self, field, sid):
try:
carray = self._carrays[field][sid]
except KeyError:
carray = self._carrays[field][sid] = \
bcolz.carray(rootdir=self._get_carray_path(sid, field),
mode='r')
return carray
示例8: _filtered_index
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def _filtered_index(self, instrument):
# TODO 确认是否跳过日内涨跌停
if instrument not in self._index_skip_suspending:
carray = self._open_minute_file("close", instrument)
sub_index = bcolz.eval("carray != 0", vm="numexpr")
index = self._minute_index[:len(sub_index)][sub_index[:]]
self._index_skip_suspending[instrument] = index
return self._index_skip_suspending[instrument]
示例9: unique
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def unique(self, col_or_col_list):
"""
Return a list of unique values of a column or a list of lists of column list
:param col_or_col_list: a column or a list of columns
:return:
"""
if isinstance(col_or_col_list, list):
col_is_list = True
col_list = col_or_col_list
else:
col_is_list = False
col_list = [col_or_col_list]
output = []
for col in col_list:
if self.auto_cache or self.cache_valid(col):
# create factorization cache
if not self.cache_valid(col):
self.cache_factor([col])
# retrieve values from existing disk-based factorization
col_values_rootdir = self[col].rootdir + '.values'
carray_values = bcolz.carray(rootdir=col_values_rootdir, mode='r')
values = list(carray_values)
else:
# factorize on-the-fly
_, values = ctable_ext.factorize(self[col])
values = values.values()
output.append(values)
if not col_is_list:
output = output[0]
return output
示例10: factorize_groupby_cols
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def factorize_groupby_cols(self, groupby_cols):
"""
factorizes all columns that are used in the groupby
it will use cache carrays if available
if not yet auto_cache is valid, it will create cache carrays
"""
# first check if the factorized arrays already exist
# unless we need to refresh the cache
factor_list = []
values_list = []
# factorize the groupby columns
for col in groupby_cols:
if self.auto_cache or self.cache_valid(col):
# create factorization cache if needed
if not self.cache_valid(col):
self.cache_factor([col])
col_rootdir = self[col].rootdir
col_factor_rootdir = col_rootdir + '.factor'
col_values_rootdir = col_rootdir + '.values'
col_carray_factor = \
bcolz.carray(rootdir=col_factor_rootdir, mode='r')
col_carray_values = \
bcolz.carray(rootdir=col_values_rootdir, mode='r')
else:
col_carray_factor, values = ctable_ext.factorize(self[col])
col_carray_values = \
bcolz.carray(np.fromiter(values.values(), dtype=self[col].dtype))
factor_list.append(col_carray_factor)
values_list.append(col_carray_values)
return factor_list, values_list
示例11: _int_array_hash
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def _int_array_hash(input_list):
"""
A function to calculate a hash value of multiple integer values, not used at the moment
Parameters
----------
input_list
Returns
-------
"""
list_len = len(input_list)
arr_len = len(input_list[0])
mult_arr = np.full(arr_len, 1000003, dtype=np.long)
value_arr = np.full(arr_len, 0x345678, dtype=np.long)
for i, current_arr in enumerate(input_list):
index = list_len - i - 1
value_arr ^= current_arr
value_arr *= mult_arr
mult_arr += (82520 + index + index)
value_arr += 97531
result_carray = bcolz.carray(value_arr)
del value_arr
return result_carray
示例12: unadjusted_window
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def unadjusted_window(self, fields, start_dt, end_dt, sids):
"""
Parameters
----------
fields : list of str
'open', 'high', 'low', 'close', or 'volume'
start_dt: Timestamp
Beginning of the window range.
end_dt: Timestamp
End of the window range.
sids : list of int
The asset identifiers in the window.
Returns
-------
list of np.ndarray
A list with an entry per field of ndarrays with shape
(sids, minutes in range) with a dtype of float64, containing the
values for the respective field over start and end dt range.
"""
# TODO: Handle early closes.
start_idx = self._find_position_of_minute(start_dt)
end_idx = self._find_position_of_minute(end_dt)
results = []
shape = (len(sids), (end_idx - start_idx + 1))
for field in fields:
if field != 'volume':
out = np.full(shape, np.nan)
else:
out = np.zeros(shape, dtype=np.uint32)
for i, sid in enumerate(sids):
carray = self._open_minute_file(field, sid)
values = carray[start_idx:end_idx + 1]
where = values != 0
out[i, where] = values[where]
if field != 'volume':
out *= self._ohlc_inverse
results.append(out)
return results
示例13: raw_history_bars
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def raw_history_bars(self, instrument, start_dt=None, end_dt=None, length=None, fields=None, skip_suspended=True):
"""
Parameters
----------
instrument
start_dt
end_dt
length
fields
skip_suspended
Returns
-------
"""
start_idx, end_idx = self.get_dt_slice(instrument, start_dt, end_dt, length, skip_suspended)
if fields is None:
fields_ = self.FIELDS
elif isinstance(fields, six.string_types):
fields_ = [fields]
else:
fields_ = [field for field in fields if field != "datetime"]
num_minutes = end_idx - start_idx
types = {f: self._converter.field_type(f, np.float64) for f in fields_}
dtype = np.dtype([("datetime", np.uint64)] +
[(f, self._converter.field_type(f, np.float64)) for f in fields_])
shape = (num_minutes,)
result = np.empty((num_minutes,), dtype=dtype)
for field in fields_:
if field != 'volume':
out = np.full(shape, np.nan, dtype=types[field])
else:
out = np.zeros(shape, dtype=types[field])
carray = self._open_minute_file(field, instrument)
values = carray[start_idx: end_idx]
where = values != 0
if field != 'volume':
out[:len(where)][where] = values[where] * self._ohlc_ratio_inverse_for_sid(instrument)
else:
out[:len(where)][where] = values[where]
result[field] = out
result["datetime"] = list(map(convert_dt_to_int, self._minute_index[start_idx: end_idx].to_pydatetime()))
result = result if fields is None else result[fields]
return self.numba_loops_dropna(result) if skip_suspended else self.numba_loops_ffill(result)
示例14: cache_factor
# 需要导入模块: import bcolz [as 别名]
# 或者: from bcolz import carray [as 别名]
def cache_factor(self, col_list, refresh=False):
"""
Existing todos here are: these should be hidden helper carrays
As in: not normal columns that you would normally see as a user
The factor (label index) carray is as long as the original carray
(and the rest of the table therefore)
But the (unique) values carray is not as long (as long as the number
of unique values)
:param col_list:
:param refresh:
:return:
"""
if not self.rootdir:
raise TypeError('Only out-of-core ctables can have '
'factorization caching at the moment')
if not isinstance(col_list, list):
col_list = [col_list]
if refresh:
kill_list = [x for x in os.listdir(self.rootdir) if '.factor' in x or '.values' in x]
for kill_dir in kill_list:
rm_file_or_dir(os.path.join(self.rootdir, kill_dir))
for col in col_list:
# create cache if needed
if refresh or not self.cache_valid(col):
# todo: also add locking mechanism here
# create directories
col_rootdir = self[col].rootdir
col_factor_rootdir = col_rootdir + '.factor'
col_factor_rootdir_tmp = tempfile.mkdtemp(prefix='bcolz-')
col_values_rootdir = col_rootdir + '.values'
col_values_rootdir_tmp = tempfile.mkdtemp(prefix='bcolz-')
try:
# create factor
carray_factor = \
bcolz.carray([], dtype='int64', expectedlen=self.size,
rootdir=col_factor_rootdir_tmp, mode='w')
_, values = \
ctable_ext.factorize(self[col], labels=carray_factor)
carray_factor.flush()
finally:
rm_file_or_dir(col_factor_rootdir, ignore_errors=True)
shutil.move(col_factor_rootdir_tmp, col_factor_rootdir)
try:
# create values
carray_values = \
bcolz.carray(np.fromiter(values.values(), dtype=self[col].dtype),
rootdir=col_values_rootdir_tmp, mode='w')
carray_values.flush()
finally:
rm_file_or_dir(col_values_rootdir, ignore_errors=True)
shutil.move(col_values_rootdir_tmp, col_values_rootdir)