本文整理汇总了Python中scipy.stats.percentileofscore方法的典型用法代码示例。如果您正苦于以下问题:Python stats.percentileofscore方法的具体用法?Python stats.percentileofscore怎么用?Python stats.percentileofscore使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.stats
的用法示例。
在下文中一共展示了stats.percentileofscore方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: agg_func
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def agg_func(request):
agg_func_name = request.param
if agg_func_name == "custom":
# When using custom you assign the function rather than a string.
agg_func_name = npy_func = custom_test_func
elif agg_func_name == "percentile":
agg_func_name = {
"func": "percentile",
"args": [95],
"kwargs": {}
}
npy_func = partial(np.percentile, q=95)
elif agg_func_name == "percentileofscore":
agg_func_name = {
"func": "percentileofscore",
"kwargs": {
"score": 0.5,
"kind": "rank"
}
}
npy_func = partial(percentileofscore_with_axis, score=0.5, kind="rank")
else:
npy_func = npy_funcs[agg_func_name]
return agg_func_name, npy_func
示例2: doPercentileCalculation
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def doPercentileCalculation(model_name):
global rdkit_mols
#expensive to unzip training file - so only done if smiles requested
if options.ad_smiles:
smiles = get_training_smiles(model_name)
ad_data = getAdData(model_name)
def calcPercentile(rdkit_mol):
sims = DataStructs.BulkTanimotoSimilarity(rdkit_mol,ad_data[:,0])
bias = ad_data[:,2].astype(float)
std_dev = ad_data[:,3].astype(float)
scores = ad_data[:,5].astype(float)
weights = sims / (bias * std_dev)
critical_weight = weights.max()
percentile = percentileofscore(scores,critical_weight)
if options.ad_smiles:
critical_smiles = smiles[np.argmax(weights)]
result = percentile, critical_smiles
else:
result = percentile, None
return result
ret = [calcPercentile(x) for x in rdkit_mols]
return model_name, ret
#prediction runner for percentile calculation
示例3: fit_position
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def fit_position(self, factor_object):
"""
针对均值回复类型策略的仓位管理:
根据当前买入价格在过去一段金融序列中的价格rank位置来决定仓位
fit_position计算的结果是买入多少个单位(股,手,顿,合约)
:param factor_object: ABuFactorBuyBases子类实例对象
:return:买入多少个单位(股,手,顿,合约)
"""
# self.kl_pd_buy为买入当天的数据,获取之前的past_day_cnt天数据
last_kl = factor_object.past_today_kl(self.kl_pd_buy, self.past_day_cnt)
if last_kl is None or last_kl.empty:
precent_pos = self.pos_base
else:
# 使用percentileofscore计算买入价格在过去的past_day_cnt天的价格位置
precent_pos = stats.percentileofscore(last_kl.close, self.bp)
precent_pos = (1 + (self.mid_precent - precent_pos) / 100) * self.pos_base
# 最大仓位限制,依然受上层最大仓位控制限制,eg:如果算出全仓,依然会减少到75%,如修改需要修改最大仓位值
precent_pos = self.pos_max if precent_pos > self.pos_max else precent_pos
# 结果是买入多少个单位(股,手,顿,合约)
return self.read_cash * precent_pos / self.bp * self.deposit_rate
示例4: calcSeverity
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def calcSeverity(model, varname="soil_moist"):
"""Calculate drought severity from *climatology* table stored in database."""
db = dbio.connect(model.dbname)
cur = db.cursor()
if varname == "soil_moist":
sql = "select fdate,(ST_DumpValues(st_union(rast,'sum'))).valarray from {0}.soil_moist group by fdate order by fdate".format(model.name)
else:
sql = "select fdate,(ST_DumpValues(rast)).valarray from {0}.runoff order by fdate".format(model.name)
cur.execute(sql)
results = cur.fetchall()
data = np.array([np.array(r[1]).ravel() for r in results])
i = np.where(np.not_equal(data[0, :], None))[0]
p = pandas.DataFrame(data[:, i], index=np.array([r[0] for r in results], dtype='datetime64'), columns=range(len(i)))
p = p.rolling('10D').mean() # calculate percentiles with dekad rolling mean
st = "{0}-{1}-{2}".format(model.startyear, model.startmonth, model.startday)
et = "{0}-{1}-{2}".format(model.endyear, model.endmonth, model.endday)
s = np.array([[stats.percentileofscore(p[pi].values, v) for v in p[pi][st:et]] for pi in p.columns]).T
s = 100.0 - s
cur.close()
db.close()
return s
示例5: percentileofscore_with_axis
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def percentileofscore_with_axis(values, *args, axis=0, **kwargs):
if values.ndim == 1:
# For 1D data we just calculate the percentile
out = percentileofscore(values, *args, **kwargs)
elif axis == 0:
# 2D data by axis 0
out = [percentileofscore(values[:, i], *args, **kwargs) for i in range(values.shape[1])]
elif axis == 1:
# 2D data by axis 1
out = [percentileofscore(values[i, :], *args, **kwargs) for i in range(values.shape[0])]
else:
raise ValueError('Axis "{}" not supported'.format(axis))
return out
示例6: test_own_eia860
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def test_own_eia860(pudl_out_eia, live_pudl_db):
"""Sanity checks for EIA 860 generator ownership data."""
if not live_pudl_db:
raise AssertionError("Data validation only works with a live PUDL DB.")
logger.info('Reading EIA 860 generator ownership data...')
own_out = pudl_out_eia.own_eia860()
if (own_out.fraction_owned > 1.0).any():
raise AssertionError(
"Generators with ownership fractions > 1.0 found. Bad data?"
)
if (own_out.fraction_owned < 0.0).any():
raise AssertionError(
"Generators with ownership fractions < 0.0 found. Bad data?"
)
# Verify that the reported ownership fractions add up to something very
# close to 1.0 (i.e. that the full ownership of each generator is
# specified by the EIA860 data)
own_gb = own_out.groupby(['report_date', 'plant_id_eia', 'generator_id'])
own_sum = own_gb['fraction_owned'].agg(helpers.sum_na).reset_index()
logger.info(
f"{len(own_sum[own_sum.fraction_owned.isnull()])} generator-years have no ownership data.")
own_sum = own_sum.dropna()
pct_missing = stats.percentileofscore(own_sum.fraction_owned, 0.98)
logger.info(
f"{len(own_sum[own_sum.fraction_owned < 0.98])} ({pct_missing}%) "
f"generator-years have incomplete ownership data.")
if not max(own_sum['fraction_owned'] < 1.02):
raise ValueError("Plants with more than 100% ownership found...")
# There might be a few generators with incomplete ownership but virtually
# everything should be pretty fully described. If not, let us know. The
# 0.5 threshold means 0.5% -- i.e. less than 1 in 200 is partial.
if pct_missing >= 0.5:
raise ValueError(
f"{pct_missing}% of generators lack complete ownership data."
)
示例7: score_anomaly
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def score_anomaly(self, x):
x = pd.Series(x)
scores = pd.Series([0.01*percentileofscore(self.sample_, z) for z in x])
return scores
示例8: score
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def score(self, x):
from scipy.stats import percentileofscore
return [0.01*percentileofscore(x, z) for z in x]
示例9: rolling_percentileofscore
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def rolling_percentileofscore(series, window, min_periods=None):
"""Computue the score percentile for the specified window."""
import scipy.stats as stats
def _percentile(arr):
score = arr[-1]
vals = arr[:-1]
return stats.percentileofscore(vals, score)
notnull = series.dropna()
min_periods = min_periods or window
if notnull.empty:
return pd.Series(np.nan, index=series.index)
else:
return pd.rolling_apply(notnull, window, _percentile, min_periods=min_periods).reindex(series.index)
示例10: expanding_percentileofscore
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def expanding_percentileofscore(series, min_periods=None):
import scipy.stats as stats
def _percentile(arr):
score = arr[-1]
vals = arr[:-1]
return stats.percentileofscore(vals, score)
notnull = series.dropna()
if notnull.empty:
return pd.Series(np.nan, index=series.index)
else:
return pd.expanding_apply(notnull, _percentile, min_periods=min_periods).reindex(series.index)
示例11: null_to_p
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def null_to_p(test_value, null_array, tail='two'):
"""Return p-value for test value against null array.
Parameters
----------
test_value : :obj:`float`
Value for which to determine p-value.
null_array : 1D :class:`numpy.ndarray`
Null distribution against which test_value is compared.
tail : {'two', 'upper', 'lower'}, optional
Whether to compare value against null distribution in a two-sided
('two') or one-sided ('upper' or 'lower') manner.
If 'upper', then higher values for the test_value are more significant.
If 'lower', then lower values for the test_value are more significant.
Default is 'two'.
Returns
-------
p_value : :obj:`float`
P-value associated with the test value when compared against the null
distribution.
"""
if tail == 'two':
p_value = (50 - np.abs(stats.percentileofscore(
null_array, test_value) - 50.)) * 2. / 100.
elif tail == 'upper':
p_value = 1 - (stats.percentileofscore(null_array, test_value) / 100.)
elif tail == 'lower':
p_value = stats.percentileofscore(null_array, test_value) / 100.
else:
raise ValueError('Argument "tail" must be one of ["two", "upper", '
'"lower"]')
return p_value
示例12: score_hmm_events_no_xval
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def score_hmm_events_no_xval(bst, training=None, validation=None, num_states=30, n_shuffles=5000, shuffle='row-wise', verbose=False):
"""same as score_hmm_events, but train on training set, and only score validation set..."""
if shuffle == 'row-wise':
rowwise = True
elif shuffle == 'col-wise':
rowwise = False
else:
shuffle = 'timeswap'
scores_hmm = np.zeros(len(validation))
scores_hmm_shuffled = np.zeros((len(validation), n_shuffles))
PBEs_train = bst[training]
PBEs_test = bst[validation]
# train HMM on all training PBEs
hmm = hmmutils.PoissonHMM(n_components=num_states, random_state=0, verbose=False)
hmm.fit(PBEs_train)
# reorder states according to transmat ordering
transmat_order = hmm.get_state_order('transmat')
hmm.reorder_states(transmat_order)
# compute scores_hmm (log likelihoods) of validation set:
scores_hmm[:] = hmm.score(PBEs_test)
if shuffle == 'timeswap':
_, scores_tswap_hmm = score_hmm_timeswap_shuffle(bst=PBEs_test,
hmm=hmm,
n_shuffles=n_shuffles)
scores_hmm_shuffled[:,:] = scores_tswap_hmm.T
else:
hmm_shuffled = copy.deepcopy(hmm)
for nn in range(n_shuffles):
# shuffle transition matrix:
if rowwise:
hmm_shuffled.transmat_ = shuffle_transmat(hmm_shuffled.transmat)
else:
hmm_shuffled.transmat_ = shuffle_transmat_Kourosh_breaks_stochasticity(hmm_shuffled.transmat)
hmm_shuffled.transmat_ = hmm_shuffled.transmat / np.tile(hmm_shuffled.transmat.sum(axis=1), (hmm_shuffled.n_components, 1)).T
# score validation set with shuffled HMM
scores_hmm_shuffled[:, nn] = hmm_shuffled.score(PBEs_test)
n_scores = len(scores_hmm)
scores_hmm_percentile = np.array([stats.percentileofscore(scores_hmm_shuffled[idx], scores_hmm[idx], kind='mean') for idx in range(n_scores)])
return scores_hmm, scores_hmm_shuffled, scores_hmm_percentile
示例13: percentiles
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def percentiles(x: pd.Series, y: pd.Series = None, w: Union[Window, int] = Window(None, 0)) -> pd.Series:
"""
Rolling percentiles over given window
:param x: value series
:param y: distribution series
:param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
and 10 the ramp up value. Window size defaults to length of series.
:return: timeseries of percentiles
**Usage**
Calculate `percentile rank <https://en.wikipedia.org/wiki/Percentile_rank>`_ of :math:`y` in the sample distribution
of :math:`x` over a rolling window of length :math:`w`:
:math:`R_t = \\frac{\sum_{i=t-N+1}^{t}{[X_i<{Y_t}]}+0.5\sum_{i=t-N+1}^{t}{[X_i={Y_t}]}}{N}\\times100\%`
Where :math:`N` is the number of observations in a rolling window. If :math:`y` is not provided, calculates
percentiles of :math:`x` over its historical values. If window length :math:`w` is not provided, uses an
ever-growing history of values. If :math:`w` is greater than the available data size, returns empty.
**Examples**
Compute percentile ranks of a series in the sample distribution of a second series over :math:`22` observations
>>> a = generate_series(100)
>>> b = generate_series(100)
>>> percentiles(a, b, 22)
**See also**
:func:`zscores`
"""
w = normalize_window(x, w)
if x.empty:
return x
if y is None:
y = x.copy()
if isinstance(w.r, int) and w.r > len(y):
raise ValueError('Ramp value must be less than the length of the series y.')
if isinstance(w.w, int) and w.w > len(x):
return pd.Series()
res = pd.Series(dtype=np.dtype(float))
for idx, val in y.iteritems():
sample = x.loc[(x.index > idx - w.w) & (x.index <= idx)] if isinstance(w.w, pd.DateOffset) else x[:idx][-w.w:]
res.loc[idx] = percentileofscore(sample, val, kind='mean')
if isinstance(w.r, pd.DateOffset):
return res.loc[res.index[0] + w.r:]
else:
return res[w.r:]
示例14: season_composite
# 需要导入模块: from scipy import stats [as 别名]
# 或者: from scipy.stats import percentileofscore [as 别名]
def season_composite(self,seasons,climo_bounds=None):
r"""
Create composite statistics for a list of seasons.
Parameters
----------
seasons : list
List of seasons to create a composite of. For Southern Hemisphere, season is the start of the two-year period.
climo_bounds : list or tuple
List or tuple of start and end years of climatology bounds. If none, defaults to (1981,2010).
Returns
-------
dict
Dictionary containing the composite of the requested seasons.
"""
#Error check
if isinstance(seasons,list) == False:
raise TypeError("'seasons' must be of type list.")
#Create climo bounds
if climo_bounds is None:
climo_bounds = (1981,2010)
#Get Season object for the composite
summary = self.get_season(seasons).summary()
#Get basin climatology
climatology = self.climatology(climo_bounds[0],climo_bounds[1])
full_climo = self.to_dataframe()
subset_climo = full_climo.loc[climo_bounds[0]:climo_bounds[1]+1]
#Create composite dictionary
map_keys = {'all_storms':'season_storms',
'named_storms':'season_named',
'hurricanes':'season_hurricane',
'major_hurricanes':'season_major',
'ace':'season_ace',
}
composite = {}
for key in map_keys.keys():
#Get list from seasons
season_list = summary[map_keys.get(key)]
#Get climatology
season_climo = climatology[key]
#Get individual years in climatology
season_fullclimo = subset_climo[key].values
#Create dictionary of relevant calculations for this entry
composite[key] = {'list':season_list,
'average':np.round(np.average(season_list),1),
'composite_anomaly':np.round(np.average(season_list)-season_climo,1),
'percentile_ranks':[np.round(stats.percentileofscore(season_fullclimo,i),1) for i in season_list],
}
return composite