本文整理汇总了Python中augustus.core.NumpyInterface.NP.dtype方法的典型用法代码示例。如果您正苦于以下问题:Python NP.dtype方法的具体用法?Python NP.dtype怎么用?Python NP.dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类augustus.core.NumpyInterface.NP
的用法示例。
在下文中一共展示了NP.dtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cusum
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def cusum(self, testDistributions, fieldName, dataColumn, state, performanceTable):
"""Calculate the score of a CUSUM TestStatistic.
The CUSUM cumulative sum is a stateful calculation: each row
depends on the result of the previous row. To continue
calculations through multiple calls to C{calc} or
C{calculate}, pass a DataTableState object and give the
BaselineModel a C{stateId} attribute. The C{stateId} is not
valid in strict PMML, but it can be inserted after validation
or used in custom-ODG models (C{from augustus.odg import *}).
@type testDistributions: PmmlBinding
@param testDistributions: The <TestDistributions> element.
@type fieldName: string
@param fieldName: The field name (for error messages).
@type dataColumn: DataColumn
@param dataColumn: The field.
@type state: DataTableState
@param state: The persistent state object, which is used to initialize the start state and save the end state of the cumulative sum.
@type performanceTable: PerformanceTable or None
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@rtype: dict
@return: A dictionary mapping PMML "feature" strings to DataColumns; CUSUM only defines the None key ("predictedValue").
"""
baseline = testDistributions.xpath("pmml:Baseline/pmml:GaussianDistribution | pmml:Baseline/pmml:PoissonDistribution")
alternate = testDistributions.xpath("pmml:Alternate/pmml:GaussianDistribution | pmml:Alternate/pmml:PoissonDistribution")
if len(baseline) == 0 or len(alternate) == 0:
raise defs.PmmlValidationError("BaselineModel CUSUM requires a Baseline and an Alternate that are either GaussianDistribution or PoissonDistribution")
ratios = alternate[0].logpdf(dataColumn.data) - baseline[0].logpdf(dataColumn.data)
if dataColumn.mask is None:
good = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
else:
good = NP(dataColumn.mask == defs.VALID)
stateId = self.get("stateId")
last = None
if stateId is not None:
last = state.get(stateId)
if last is None:
last = 0.0
resetValue = testDistributions.get("resetValue", defaultFromXsd=True, convertType=True)
output = NP("empty", len(dataColumn), dtype=NP.dtype(float))
performanceTable.begin("fill CUSUM")
for index in xrange(len(dataColumn)):
if good[index]:
last = max(resetValue, last + ratios[index])
output[index] = last
performanceTable.end("fill CUSUM")
if stateId is not None:
state[stateId] = last
return {None: DataColumn(self.scoreType, output, None)}
示例2: _checkValues
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def _checkValues(self, data, mask):
values = self.values
if len(values) == 0:
return data, mask
if mask is None:
missing = NP("zeros", len(data), dtype=NP.dtype(bool))
invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
else:
missing = NP(mask == defs.MISSING)
invalid = NP(mask == defs.INVALID)
valid = NP("zeros", len(data), dtype=NP.dtype(bool))
numberOfValidSpecified = 0
for value in values:
v = value.get("value")
displayValue = value.get("displayValue")
if displayValue is not None:
self._displayValue[v] = displayValue
prop = value.get("property", "valid")
try:
v2 = self.stringToValue(v)
except ValueError:
raise defs.PmmlValidationError("Improper value in Value specification: \"%s\"" % v)
if prop == "valid":
NP("logical_or", valid, NP(data == v2), valid)
numberOfValidSpecified += 1
elif prop == "missing":
NP("logical_or", missing, NP(data == v2), missing)
elif prop == "invalid":
NP("logical_or", invalid, NP(data == v2), invalid)
if numberOfValidSpecified > 0:
# guilty until proven innocent
NP("logical_and", valid, NP("logical_not", missing), valid)
if valid.all():
return data, None
mask = NP(NP("ones", len(data), dtype=defs.maskType) * defs.INVALID)
mask[missing] = defs.MISSING
mask[valid] = defs.VALID
else:
# innocent until proven guilty
NP("logical_and", invalid, NP("logical_not", missing), invalid)
if not NP("logical_or", invalid, missing).any():
return data, None
mask = NP("zeros", len(data), dtype=defs.maskType)
mask[missing] = defs.MISSING
mask[invalid] = defs.INVALID
return data, mask
示例3: functionMultiset
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def functionMultiset(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Derives a multiset of rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn of dict objects
@return: A column of multisetted rows.
"""
fieldType = FakeFieldType("object", "any")
selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
if dataColumn.mask is not None:
selection = NP("logical_and", selection, NP(dataColumn.mask == defs.VALID))
if whereMask is not None:
NP("logical_and", selection, whereMask, selection)
if groupSelection is not None:
NP("logical_and", selection, groupSelection, selection)
multiset = {}
if getstate is not None:
startingState = getstate()
if startingState is not None:
multiset = startingState
current = dict(multiset)
data = NP("empty", len(dataColumn), dtype=NP.dtype(object))
toPython = dataColumn.fieldType.valueToPython
for i, x in enumerate(dataColumn.data):
if selection[i]:
value = toPython(x)
if value not in multiset:
multiset[value] = 0
multiset[value] += 1
current = dict(multiset)
data[i] = current
if setstate is not None:
setstate(multiset)
return DataColumn(fieldType, data, None)
示例4: _fromDataColumn_number
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def _fromDataColumn_number(self, dataColumn):
if dataColumn.mask is None:
return NP("array", dataColumn.data, dtype=NP.dtype(object))
else:
output = NP("empty", len(dataColumn), dtype=NP.dtype(object))
mask = dataColumn.mask
for i, x in enumerate(dataColumn.data):
if mask[i] == defs.VALID:
output[i] = x
elif mask[i] == defs.MISSING:
output[i] = defs.NAN
else:
output[i] = None
return output
示例5: pointsToSmoothCurve
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop):
"""Fit a smooth line through a set of given numeric points
with a characteristic smoothing scale.
This is a non-parametric locally linear fit, used to plot data
as a smooth line.
@type xarray: 1d Numpy array of numbers
@param xarray: Array of x values.
@type yarray: 1d Numpy array of numbers
@param yarray: Array of y values.
@type samples: 1d Numpy array of numbers
@param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives.
@type smoothingScale: number
@param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit.
@type loop: bool
@param loop: If False, disconnect the end of the fitted curve from the beginning.
@rtype: 4-tuple of 1d Numpy arrays
@return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}.
"""
ylist = []
dylist = []
for sample in samples:
weights = NP(NP(NP("exp", NP(NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0*math.pi)))
sum1 = weights.sum()
sumx = NP(weights * xarray).sum()
sumxx = NP(weights * NP(xarray * xarray)).sum()
sumy = NP(weights * yarray).sum()
sumxy = NP(weights * NP(xarray * yarray)).sum()
delta = (sum1 * sumxx) - (sumx * sumx)
intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta
slope = ((sum1 * sumxy) - (sumx * sumy)) / delta
ylist.append(intercept + (sample * slope))
dylist.append(slope)
xlist = samples
ylist = NP("array", ylist, dtype=NP.dtype(float))
dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist
if not loop:
dxlist[0] = 0.0
dxlist[-1] = 0.0
dylist[0] = 0.0
dylist[-1] = 0.0
return xlist, ylist, dxlist, dylist
示例6: functionMax
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def functionMax(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Finds the maximum of rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn
@return: A column of maximized rows.
"""
fieldType = dataColumn.fieldType
if fieldType.optype not in ("continuous", "ordinal"):
raise defs.PmmlValidationError("Aggregate function \"min\" requires a continuous or ordinal input field")
if dataColumn.mask is None:
selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
else:
selection = NP(dataColumn.mask == defs.VALID)
if whereMask is not None:
NP("logical_and", selection, whereMask, selection)
if groupSelection is not None:
NP("logical_and", selection, groupSelection, selection)
maximum = None
if getstate is not None:
startingState = getstate()
if startingState is not None:
maximum = startingState
data = NP("empty", len(dataColumn), dtype=fieldType.dtype)
mask = NP("zeros", len(dataColumn), dtype=defs.maskType)
for i, x in enumerate(dataColumn.data):
if selection[i]:
if maximum is None or x > maximum:
maximum = x
if maximum is None:
mask[i] = defs.INVALID
else:
data[i] = maximum
if not mask.any():
mask = None
if setstate is not None:
setstate(maximum)
return DataColumn(fieldType, data, mask)
示例7: evaluate
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def evaluate(self, dataTable, functionTable, performanceTable, returnUnknowns=False):
"""Evaluate the predicate, using a DataTable as input.
@type dataTable: DataTable
@param dataTable: The input DataTable, containing any fields that might be used to evaluate this predicate.
@type functionTable: FunctionTable
@param functionTable: The FunctionTable, containing any functions that might be called in this predicate.
@type performanceTable: PerformanceTable
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@type returnUnknowns: bool
@param returnUnknowns: If True, return a "mask" for the selection that indicates which rows are unknown, rather than True or False.
@rtype: 1d Numpy array of bool or 3-tuple of arrays
@return: Either a simple selection array or selection, unknowns, encounteredUnknowns
"""
performanceTable.begin("Predicate False")
result = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
if returnUnknowns:
unknowns = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
result = result, unknowns, unknowns
performanceTable.end("Predicate False")
return result
示例8: _fromDataColumn
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def _fromDataColumn(self, dataColumn):
# enumeration uses less memory and, interestingly, a little less time than a list comprehension (80 ns instead of 100 ns per record)
output = NP("empty", len(dataColumn), dtype=NP.dtype(object))
if dataColumn.mask is None:
for i, x in enumerate(dataColumn.data):
output[i] = self.valueToPython(x)
else:
mask = dataColumn.mask
for i, x in enumerate(dataColumn.data):
if mask[i] == defs.VALID:
output[i] = self.valueToPython(x)
elif mask[i] == defs.MISSING:
output[i] = defs.NAN
else:
output[i] = None
return output
示例9: initialize
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def initialize(self, state, numberOfRecords, numberOfFields, distributionBased):
"""First step in a vectorized metric calculation with missing values, called once before all fields and cluster centers.
Only modifies the C{state} object.
@type state: ad-hoc Python object
@param state: State information that persists long enough to span the three steps of a metric calculation. This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
@type numberOfRecords: int
@param numberOfRecords: The number of rows in the dataset.
@type numberOfFields: int
@param numberOfFields: The number of columns in the dataset.
@type distributionBased: bool
@param distributionBased: If True, use a covariance matrix to scale the distance result.
"""
state.maximumComponent = NP("zeros", numberOfRecords, dtype=NP.dtype(float))
if distributionBased:
raise NotImplementedError("Distribution-based clustering has not been implemented for the %s metric" % self.t)
示例10: evaluate
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def evaluate(self, dataTable, functionTable, performanceTable, returnUnknowns=False):
"""Evaluate the predicate, using a DataTable as input.
@type dataTable: DataTable
@param dataTable: The input DataTable, containing any fields that might be used to evaluate this predicate.
@type functionTable: FunctionTable
@param functionTable: The FunctionTable, containing any functions that might be called in this predicate.
@type performanceTable: PerformanceTable
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@type returnUnknowns: bool
@param returnUnknowns: If True, return a "mask" for the selection that indicates which rows are unknown, rather than True or False.
@rtype: 1d Numpy array of bool or 3-tuple of arrays
@return: Either a simple selection array or selection, unknowns, encounteredUnknowns
"""
performanceTable.begin("SimpleSetPredicate")
fieldName = self.get("field")
dataColumn = dataTable.fields[fieldName]
fromString = dataColumn.fieldType.stringToValue
array = [fromString(x) for x in self.childOfClass(Array).values(convertType=False)]
selection = NP("in1d", dataColumn.data, array)
if self.get("booleanOperator") == "isNotIn":
NP("logical_not", selection, selection)
if returnUnknowns:
if dataColumn.mask is None:
unknowns = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
else:
unknowns = NP(dataColumn.mask != defs.VALID)
performanceTable.end("SimpleSetPredicate")
return selection, unknowns, unknowns
else:
if dataColumn.mask is not None:
NP("logical_and", selection, NP(dataColumn.mask == defs.VALID), selection)
performanceTable.end("SimpleSetPredicate")
return selection
示例11: _checkIntervals
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def _checkIntervals(self, data, mask):
intervals = self.intervals
if len(intervals) == 0:
return data, mask
# innocent until proven guilty
invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
for interval in intervals:
closure = interval["closure"]
leftMargin = interval.get("leftMargin")
rightMargin = interval.get("rightMargin")
if leftMargin is not None:
try:
leftMargin = self.stringToValue(leftMargin)
except ValueError:
raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)
if closure in ("openClosed", "openOpen"):
invalid[NP(data <= leftMargin)] = True
elif closure in ("closedOpen", "closedClosed"):
invalid[NP(data < leftMargin)] = True
if rightMargin is not None:
try:
rightMargin = self.stringToValue(rightMargin)
except ValueError:
raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)
if closure in ("openOpen", "closedOpen"):
invalid[NP(data >= rightMargin)] = True
elif closure in ("openClosed", "closedClosed"):
invalid[NP(data > rightMargin)] = True
if not invalid.any():
return data, mask
if mask is None:
return data, NP(invalid * defs.INVALID)
else:
NP("logical_and", invalid, NP(mask == defs.VALID), invalid) # only change what wasn't already marked as MISSING
mask[invalid] = defs.INVALID
return data, mask
示例12: initialize
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def initialize(self, state, numberOfRecords, numberOfFields, distributionBased):
"""First step in a vectorized metric calculation with missing values, called once before all fields and cluster centers.
Only modifies the C{state} object.
@type state: ad-hoc Python object
@param state: State information that persists long enough to span the three steps of a metric calculation. This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
@type numberOfRecords: int
@param numberOfRecords: The number of rows in the dataset.
@type numberOfFields: int
@param numberOfFields: The number of columns in the dataset.
@type distributionBased: bool
@param distributionBased: If True, use a covariance matrix to scale the distance result.
"""
state.sumInQuadrature = NP("zeros", numberOfRecords, dtype=NP.dtype(float))
if distributionBased:
state.displacements = NP("empty", (numberOfRecords, numberOfFields), dtype=NP.dtype(float))
state.displacementIndex = 0
示例13: evaluate
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def evaluate(self, dataTable, functionTable, performanceTable, arguments):
arguments = [x.evaluate(dataTable, functionTable, performanceTable) for x in arguments]
performanceTable.begin("built-in \"%s\"" % self.name)
fieldType = self.allBooleanType(arguments, atleast=2)
data = NP("zeros", len(dataTable), dtype=fieldType.dtype)
mask = None
allbad = NP("ones", len(dataTable), dtype=NP.dtype(bool))
(data, allbad), mask = self.applySkipMissing((data, allbad), mask, arguments)
if allbad.any():
if mask is None:
mask = allbad * defs.MISSING
else:
NP("logical_and", allbad, NP(mask == defs.VALID), allbad)
mask[allbad] = defs.MISSING
performanceTable.end("built-in \"%s\"" % self.name)
return DataColumn(fieldType, data, mask)
示例14: _toDataColumn_number
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def _toDataColumn_number(self, data, mask):
data, mask = self._checkNumpy(data, mask)
if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType) * defs.MISSING
else:
mask[mask2] = defs.MISSING
else:
data, mask = self._checkNonNumpy(data, mask)
try:
data = NP("array", data, dtype=self.dtype)
# mask is handled in the else statement after the except block
except (ValueError, TypeError):
data2 = NP("empty", len(data), dtype=self.dtype)
if mask is None:
mask2 = NP("zeros", len(data), dtype=defs.maskType)
else:
mask2 = NP("fromiter", ((defs.VALID if not m else defs.MISSING) for m in mask), dtype=defs.maskType, count=len(mask))
for i, v in enumerate(data):
try:
data2[i] = v
if mask2[i] == defs.VALID and ((isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN")):
mask2[i] = defs.MISSING
if v is None:
raise TypeError
except (ValueError, TypeError):
data2[i] = defs.PADDING
if mask2[i] == defs.VALID:
if (isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN"):
mask2[i] = defs.MISSING
else:
mask2[i] = defs.INVALID
if not mask2.any():
mask2 = None
data, mask = data2, mask2
else:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType)
else:
mask = NP(NP("array", NP("logical_or", mask2, NP("fromiter", (m != 0 for m in mask), dtype=NP.dtype(bool), count=len(mask))), defs.maskType) * defs.MISSING)
if not mask.any():
mask = None
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例15: compare
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
def compare(self, dataTable, functionTable, performanceTable, centerString, defaultCompareFunction, anyInvalid):
"""Compare input data with a cluster centern along the
direction of this field.
Cluster distances are computed in two steps: this C{compare}
function, which determines the distance in the direction of a
field, and the metric, which combines results from each field.
@type dataTable: DataTable
@param dataTable: The input data.
@type functionTable: FunctionTable
@param functionTable: A table of functions.
@type performanceTable: PerformanceTable
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@type centerString: string
@param centerString: The center of the cluster in this field, represented as a string.
@type defaultCompareFunction: string
@param defaultCompareFunction: The C{compareFunction} defined at the model level, which may be overruled on a per-field basis.
@type anyInvalid: 1d Numpy array of bool
@param anyInvalid: Mask for invalid data, accumulated with each C{compare} call. This method modifies it.
@rtype: 1d Numpy array of numbers
@return: The distances or similarities between the input data and the cluster center, along the distance of this field.
"""
performanceTable.begin("ClusteringField")
dataColumn = dataTable.fields[self["field"]]
if dataColumn.mask is not None:
# even though DataColumns are immutable, we're allowed to change the invalid values
# because they're not defined; set them so that x - y = 0, and hence they'll be
# effectively skipped in summations without any extra work
dataColumn._unlock()
dataColumn.data[NP(dataColumn.mask != defs.VALID)] = dataColumn.fieldType.stringToValue(centerString)
dataColumn._lock()
compareFunction = self.get("compareFunction", defaultCompareFunction)
if compareFunction == "absDiff":
result = NP("absolute", NP(dataColumn.data - dataColumn.fieldType.stringToValue(centerString)))
elif compareFunction == "gaussSim":
similarityScale = self.get("similarityScale")
if similarityScale is None:
raise defs.PmmlValidationError("If compareFunction is \"gaussSim\", a similarityScale must be provided")
s = float(similarityScale)
z = NP(dataColumn.data - dataColumn.fieldType.stringToValue(centerString))
result = NP("exp", NP((-self.LOG2/s**2) * NP(z**2)))
elif compareFunction == "delta":
result = NP(dataColumn.data != dataColumn.fieldType.stringToValue(centerString))
elif compareFunction == "equal":
result = NP(dataColumn.data == dataColumn.fieldType.stringToValue(centerString))
elif compareFunction == "table":
if dataColumn.fieldType.dataType != "integer":
raise defs.PmmlValidationError("If compareFunction is \"table\", the data must be integers")
matrix = self.xpath("pmml:Comparisons/pmml:Matrix")
if len(matrix) != 1:
raise defs.PmmlValidationError("If compareFunction is \"table\", ClusteringFields needs a Comparisons/Matrix")
values = matrix[0].values(convertType=False)
centerValue = dataColumn.fieldType.stringToValue(centerString)
try:
row = values[centerValue]
except IndexError:
raise defs.PmmlValidationError("Cluster center component is %s, but this is an invalid row index for the Comparisons/Matrix (0-indexed)" % centerString)
result = NP("empty", len(dataTable), dtype=NP.dtype(float))
valid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
for j, value in enumerate(row):
selection = NP(dataColumn.data == j)
result[selection] = dataColumn.fieldType.stringToValue(value)
NP("logical_or", valid, selection, valid)
NP("logical_or", anyInvalid, NP("logical_not", valid), anyInvalid)
performanceTable.end("ClusteringField")
return result