本文整理匯總了Python中augustus.core.NumpyInterface.NP類的典型用法代碼示例。如果您正苦於以下問題:Python NP類的具體用法?Python NP怎麽用?Python NP使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了NP類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: applyInvalidValueTreatment
def applyInvalidValueTreatment(mask, invalidValueTreatment, overwrite=False):
"""Replace INVALID values with MISSING if invalidValueTreatment is "asMissing".
This function does not modify the original data (unless
C{overwrite} is True), but it returns a substitute. Example
use::
mask = dataColumn.mask
mask = FieldCastMethods.applyInvalidValueTreatment(mask, pmml.get("invalidValueTreatment"))
return DataColumn(dataColumn.fieldType, dataColumn.data, mask)
It can also be used in conjunction with other FieldCastMethods.
@type mask: 1d Numpy array of dtype defs.maskType, or None
@param mask: The mask.
@type invalidValueTreatment: string
@param invalidValueTreatment: One of "returnInvalid", "asIs", "asMissing"; only "asMissing" has an effect.
@type overwrite: bool
@param overwrite: If True, temporarily unlike and overwrite the original mask.
@rtype: 1d Numpy array of dtype defs.maskType
@return: The new mask.
"""
if mask is None: return mask
if invalidValueTreatment == "asMissing":
if overwrite:
mask.setflags(write=True)
else:
mask = NP("copy", mask)
mask.setflags(write=True)
mask[NP(mask == defs.INVALID)] = defs.MISSING
return mask
示例2: mapper
def mapper(self, dataTable):
dataTable = dataTable.subTable() # ensure that the results of this calculation do not get propagated
self.metadata["ClusteringModel"].calculate(dataTable, performanceTable=self.performanceTable)
data = dataTable.score.data
mask = dataTable.score.mask
stringToValue = dataTable.score.fieldType.stringToValue
for index, cluster in enumerate(self.clusters):
clusterName = cluster.get("id", "%d" % (index + 1))
value = stringToValue(clusterName)
selection = NP(data == value)
if mask is not None:
NP("logical_and", selection, NP(mask == defs.VALID), selection)
denominator = selection.sum()
numer = dict((fieldName, 0.0) for fieldName in self.fieldNames)
denom = dict((fieldName, 0.0) for fieldName in self.fieldNames)
for fieldName in self.fieldNames:
numer[fieldName] += dataTable.fields[fieldName].data[selection].sum()
denom[fieldName] += denominator
self.emit(clusterName, {"numer": numer, "denom": denom})
示例3: _toDataColumn_dateTime
def _toDataColumn_dateTime(self, data, mask):
data, mask = self._checkNumpy(data, mask, tryToCast=False)
data, mask = self._checkNonNumpy(data, mask)
data2 = NP("empty", len(data), dtype=self.dtype)
mask2 = NP("zeros", len(data), dtype=defs.maskType)
for i, x in enumerate(data):
if (mask is not None and mask[i]) or (isinstance(x, float) and math.isnan(x)) or (isinstance(x, basestring) and x.upper() == "NAN"):
data2[i] = defs.PADDING
mask2[i] = defs.MISSING
else:
try:
data2[i] = self.stringToValue(x)
except (ValueError, TypeError):
data2[i] = defs.PADDING
mask2[i] = defs.INVALID
if not mask2.any():
data, mask = data2, None
else:
data, mask = data2, mask2
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例4: generateSamples
def generateSamples(self, low, high):
"""Used by C{prepare} to generate an array of samples.
@type low: number
@param low: Minimum value to sample.
@type high: number
@param high: Maximum value to sample.
@rtype: 1d Numpy array
@return: An array of uniform, random, or adaptive samples of an interval.
"""
numSamples = self.get("numSamples", defaultFromXsd=True, convertType=True)
samplingMethod = self.get("samplingMethod", defaultFromXsd=True)
if samplingMethod == "uniform":
samples = NP("linspace", low, high, numSamples, endpoint=True)
elif samplingMethod == "random":
samples = NP(NP(NP(NP.random.rand(numSamples)) * (high - low)) + low)
samples.sort()
else:
raise NotImplementedError("TODO: add 'adaptive'")
return samples
示例5: cusum
def cusum(self, testDistributions, fieldName, dataColumn, state, performanceTable):
"""Calculate the score of a CUSUM TestStatistic.
The CUSUM cumulative sum is a stateful calculation: each row
depends on the result of the previous row. To continue
calculations through multiple calls to C{calc} or
C{calculate}, pass a DataTableState object and give the
BaselineModel a C{stateId} attribute. The C{stateId} is not
valid in strict PMML, but it can be inserted after validation
or used in custom-ODG models (C{from augustus.odg import *}).
@type testDistributions: PmmlBinding
@param testDistributions: The <TestDistributions> element.
@type fieldName: string
@param fieldName: The field name (for error messages).
@type dataColumn: DataColumn
@param dataColumn: The field.
@type state: DataTableState
@param state: The persistent state object, which is used to initialize the start state and save the end state of the cumulative sum.
@type performanceTable: PerformanceTable or None
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@rtype: dict
@return: A dictionary mapping PMML "feature" strings to DataColumns; CUSUM only defines the None key ("predictedValue").
"""
baseline = testDistributions.xpath("pmml:Baseline/pmml:GaussianDistribution | pmml:Baseline/pmml:PoissonDistribution")
alternate = testDistributions.xpath("pmml:Alternate/pmml:GaussianDistribution | pmml:Alternate/pmml:PoissonDistribution")
if len(baseline) == 0 or len(alternate) == 0:
raise defs.PmmlValidationError("BaselineModel CUSUM requires a Baseline and an Alternate that are either GaussianDistribution or PoissonDistribution")
ratios = alternate[0].logpdf(dataColumn.data) - baseline[0].logpdf(dataColumn.data)
if dataColumn.mask is None:
good = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
else:
good = NP(dataColumn.mask == defs.VALID)
stateId = self.get("stateId")
last = None
if stateId is not None:
last = state.get(stateId)
if last is None:
last = 0.0
resetValue = testDistributions.get("resetValue", defaultFromXsd=True, convertType=True)
output = NP("empty", len(dataColumn), dtype=NP.dtype(float))
performanceTable.begin("fill CUSUM")
for index in xrange(len(dataColumn)):
if good[index]:
last = max(resetValue, last + ratios[index])
output[index] = last
performanceTable.end("fill CUSUM")
if stateId is not None:
state[stateId] = last
return {None: DataColumn(self.scoreType, output, None)}
示例6: functionMax
def functionMax(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Finds the maximum of rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn
@return: A column of maximized rows.
"""
fieldType = dataColumn.fieldType
if fieldType.optype not in ("continuous", "ordinal"):
raise defs.PmmlValidationError("Aggregate function \"min\" requires a continuous or ordinal input field")
if dataColumn.mask is None:
selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
else:
selection = NP(dataColumn.mask == defs.VALID)
if whereMask is not None:
NP("logical_and", selection, whereMask, selection)
if groupSelection is not None:
NP("logical_and", selection, groupSelection, selection)
maximum = None
if getstate is not None:
startingState = getstate()
if startingState is not None:
maximum = startingState
data = NP("empty", len(dataColumn), dtype=fieldType.dtype)
mask = NP("zeros", len(dataColumn), dtype=defs.maskType)
for i, x in enumerate(dataColumn.data):
if selection[i]:
if maximum is None or x > maximum:
maximum = x
if maximum is None:
mask[i] = defs.INVALID
else:
data[i] = maximum
if not mask.any():
mask = None
if setstate is not None:
setstate(maximum)
return DataColumn(fieldType, data, mask)
示例7: functionAverage
def functionAverage(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Averages rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn
@return: A column of averaged rows.
"""
fieldType = FakeFieldType("double", "continuous")
if dataColumn.fieldType.dataType not in ("integer", "float", "double"):
raise defs.PmmlValidationError("Aggregate function \"average\" requires a numeric input field: \"integer\", \"float\", \"double\"")
denominator = NP("ones", len(dataColumn), dtype=fieldType.dtype)
if dataColumn.mask is not None:
NP("logical_and", denominator, NP(dataColumn.mask == defs.VALID), denominator)
if whereMask is not None:
NP("logical_and", denominator, whereMask, denominator)
if groupSelection is not None:
NP("logical_and", denominator, groupSelection, denominator)
numerator = NP("multiply", denominator, dataColumn.data)
if getstate is not None and len(dataColumn) > 0:
startingState = getstate()
if startingState is not None:
startingNumerator, startingDenominator = startingState
numerator[0] += startingNumerator
denominator[0] += startingDenominator
numerator = NP("cumsum", numerator)
denominator = NP("cumsum", denominator)
data = NP(numerator / denominator)
mask = NP(NP("logical_not", NP("isfinite", data)) * defs.INVALID)
if not mask.any():
mask = None
if setstate is not None and len(dataColumn) > 0:
setstate((numerator[-1], denominator[-1]))
return DataColumn(fieldType, data, mask)
示例8: _checkValues
def _checkValues(self, data, mask):
values = self.values
if len(values) == 0:
return data, mask
if mask is None:
missing = NP("zeros", len(data), dtype=NP.dtype(bool))
invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
else:
missing = NP(mask == defs.MISSING)
invalid = NP(mask == defs.INVALID)
valid = NP("zeros", len(data), dtype=NP.dtype(bool))
numberOfValidSpecified = 0
for value in values:
v = value.get("value")
displayValue = value.get("displayValue")
if displayValue is not None:
self._displayValue[v] = displayValue
prop = value.get("property", "valid")
try:
v2 = self.stringToValue(v)
except ValueError:
raise defs.PmmlValidationError("Improper value in Value specification: \"%s\"" % v)
if prop == "valid":
NP("logical_or", valid, NP(data == v2), valid)
numberOfValidSpecified += 1
elif prop == "missing":
NP("logical_or", missing, NP(data == v2), missing)
elif prop == "invalid":
NP("logical_or", invalid, NP(data == v2), invalid)
if numberOfValidSpecified > 0:
# guilty until proven innocent
NP("logical_and", valid, NP("logical_not", missing), valid)
if valid.all():
return data, None
mask = NP(NP("ones", len(data), dtype=defs.maskType) * defs.INVALID)
mask[missing] = defs.MISSING
mask[valid] = defs.VALID
else:
# innocent until proven guilty
NP("logical_and", invalid, NP("logical_not", missing), invalid)
if not NP("logical_or", invalid, missing).any():
return data, None
mask = NP("zeros", len(data), dtype=defs.maskType)
mask[missing] = defs.MISSING
mask[invalid] = defs.INVALID
return data, mask
示例9: functionMultiset
def functionMultiset(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Derives a multiset of rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn of dict objects
@return: A column of multisetted rows.
"""
fieldType = FakeFieldType("object", "any")
selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
if dataColumn.mask is not None:
selection = NP("logical_and", selection, NP(dataColumn.mask == defs.VALID))
if whereMask is not None:
NP("logical_and", selection, whereMask, selection)
if groupSelection is not None:
NP("logical_and", selection, groupSelection, selection)
multiset = {}
if getstate is not None:
startingState = getstate()
if startingState is not None:
multiset = startingState
current = dict(multiset)
data = NP("empty", len(dataColumn), dtype=NP.dtype(object))
toPython = dataColumn.fieldType.valueToPython
for i, x in enumerate(dataColumn.data):
if selection[i]:
value = toPython(x)
if value not in multiset:
multiset[value] = 0
multiset[value] += 1
current = dict(multiset)
data[i] = current
if setstate is not None:
setstate(multiset)
return DataColumn(fieldType, data, None)
示例10: applyMapMissingTo
def applyMapMissingTo(fieldType, data, mask, mapMissingTo, overwrite=False):
"""Replace MISSING values with a given substitute.
This function does not modify the original data (unless
C{overwrite} is True), but it returns a substitute. Example
use::
data, mask = dataColumn.data, dataColumn.mask
data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, "-999")
return DataColumn(dataColumn.fieldType, data, mask)
It can also be used in conjunction with other FieldCastMethods.
@type fieldType: FieldType
@param fieldType: The data fieldType (to interpret C{mapMissingTo}).
@type data: 1d Numpy array
@param data: The data.
@type mask: 1d Numpy array of dtype defs.maskType, or None
@param mask: The mask.
@type mapMissingTo: string
@param mapMissingTo: The replacement value, represented as a string (e.g. directly from a PMML attribute).
@type overwrite: bool
@param overwrite: If True, temporarily unlike and overwrite the original mask.
@rtype: 2-tuple of 1d Numpy arrays
@return: The new data and mask.
"""
if mask is None: return data, mask
if mapMissingTo is not None:
selection = NP(mask == defs.MISSING)
try:
mappedValue = fieldType.stringToValue(mapMissingTo)
except ValueError as err:
raise defs.PmmlValidationError("mapMissingTo string \"%s\" cannot be cast as %r: %s" % (mapMissingTo, fieldType, str(err)))
if overwrite:
data.setflags(write=True)
mask.setflags(write=True)
else:
data = NP("copy", data)
mask = NP("copy", mask)
data[selection] = mappedValue
mask[selection] = defs.VALID
if not mask.any():
mask = None
return data, mask
示例11: _fromDataColumn_number
def _fromDataColumn_number(self, dataColumn):
if dataColumn.mask is None:
return NP("array", dataColumn.data, dtype=NP.dtype(object))
else:
output = NP("empty", len(dataColumn), dtype=NP.dtype(object))
mask = dataColumn.mask
for i, x in enumerate(dataColumn.data):
if mask[i] == defs.VALID:
output[i] = x
elif mask[i] == defs.MISSING:
output[i] = defs.NAN
else:
output[i] = None
return output
示例12: pointsToSmoothCurve
def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop):
"""Fit a smooth line through a set of given numeric points
with a characteristic smoothing scale.
This is a non-parametric locally linear fit, used to plot data
as a smooth line.
@type xarray: 1d Numpy array of numbers
@param xarray: Array of x values.
@type yarray: 1d Numpy array of numbers
@param yarray: Array of y values.
@type samples: 1d Numpy array of numbers
@param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives.
@type smoothingScale: number
@param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit.
@type loop: bool
@param loop: If False, disconnect the end of the fitted curve from the beginning.
@rtype: 4-tuple of 1d Numpy arrays
@return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}.
"""
ylist = []
dylist = []
for sample in samples:
weights = NP(NP(NP("exp", NP(NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0*math.pi)))
sum1 = weights.sum()
sumx = NP(weights * xarray).sum()
sumxx = NP(weights * NP(xarray * xarray)).sum()
sumy = NP(weights * yarray).sum()
sumxy = NP(weights * NP(xarray * yarray)).sum()
delta = (sum1 * sumxx) - (sumx * sumx)
intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta
slope = ((sum1 * sumxy) - (sumx * sumy)) / delta
ylist.append(intercept + (sample * slope))
dylist.append(slope)
xlist = samples
ylist = NP("array", ylist, dtype=NP.dtype(float))
dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist
if not loop:
dxlist[0] = 0.0
dxlist[-1] = 0.0
dylist[0] = 0.0
dylist[-1] = 0.0
return xlist, ylist, dxlist, dylist
示例13: _toDataColumn_number
def _toDataColumn_number(self, data, mask):
data, mask = self._checkNumpy(data, mask)
if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType) * defs.MISSING
else:
mask[mask2] = defs.MISSING
else:
data, mask = self._checkNonNumpy(data, mask)
try:
data = NP("array", data, dtype=self.dtype)
# mask is handled in the else statement after the except block
except (ValueError, TypeError):
data2 = NP("empty", len(data), dtype=self.dtype)
if mask is None:
mask2 = NP("zeros", len(data), dtype=defs.maskType)
else:
mask2 = NP("fromiter", ((defs.VALID if not m else defs.MISSING) for m in mask), dtype=defs.maskType, count=len(mask))
for i, v in enumerate(data):
try:
data2[i] = v
if mask2[i] == defs.VALID and ((isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN")):
mask2[i] = defs.MISSING
if v is None:
raise TypeError
except (ValueError, TypeError):
data2[i] = defs.PADDING
if mask2[i] == defs.VALID:
if (isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN"):
mask2[i] = defs.MISSING
else:
mask2[i] = defs.INVALID
if not mask2.any():
mask2 = None
data, mask = data2, mask2
else:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType)
else:
mask = NP(NP("array", NP("logical_or", mask2, NP("fromiter", (m != 0 for m in mask), dtype=NP.dtype(bool), count=len(mask))), defs.maskType) * defs.MISSING)
if not mask.any():
mask = None
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例14: _stringToValue_date
def _stringToValue_date(self, string):
regex = re.match(self._iso8601_date, string)
if regex is None:
raise ValueError("invalid ISO 8601 date string: \"%s\"" % string)
year = regex.group(1)
month = regex.group(3)
day = regex.group(5)
try:
if year is not None and month is not None and day is not None:
dateTimeObject = datetime.datetime(int(year), int(month), int(day))
elif year is not None and month is not None:
dateTimeObject = datetime.datetime(int(year), int(month), 1)
elif year is not None:
dateTimeObject = datetime.datetime(int(year), 1, 1)
else:
raise ValueError
except ValueError:
raise ValueError("invalid ISO 8601 date string: \"%s\"" % string)
td = dateTimeObject - self._dateTimeOrigin
return NP.int64(td.days*86400 * self._dateTimeResolution)
示例15: evaluate
def evaluate(self, dataTable, functionTable, performanceTable, arguments):
arguments = [x.evaluate(dataTable, functionTable, performanceTable) for x in arguments]
performanceTable.begin("built-in \"%s\"" % self.name)
fieldType = self.fieldTypeFromSignature(arguments)
left, right = arguments
zeroDenominators = NP(NP(right.data == 0.0) * defs.INVALID)
if not zeroDenominators.any():
zeroDenominators = None
mask = DataColumn.mapAnyMissingInvalid([zeroDenominators, left.mask, right.mask])
dataColumn = DataColumn(fieldType, NP("floor_divide", left.data, right.data), mask)
performanceTable.end("built-in \"%s\"" % self.name)
return dataColumn