本文整理汇总了Python中augustus.core.NumpyInterface.NP.any方法的典型用法代码示例。如果您正苦于以下问题:Python NP.any方法的具体用法?Python NP.any怎么用?Python NP.any使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类augustus.core.NumpyInterface.NP
的用法示例。
在下文中一共展示了NP.any方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: maskInvalid
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def maskInvalid(self, data, mask):
"""Helper method to replace NaN and infinite values with
INVALID after a potentially dangerous operation.
Example::
result = NP("log", dataColumn.data) # log(0) = -inf, log(-x) = nan
resultMask = self.maskInvalid(result, dataColumn.mask)
return DataColumn(fakeFieldType, result, resultMask)
The input C{data} and C{mask} are not modified by this
method; a substitute mask is returned.
@type data: 1d Numpy array
@param data: The dataset that may contain NaN and infinite values.
@type mask: 1d Numpy array of C{defs.maskType}, or None
@param mask: The original mask.
@rtype: 1d Numpy array of C{defs.maskType}, or None
@return: The new mask.
"""
bad = NP("logical_not", NP("isfinite", data))
if bad.any():
if mask is None:
mask = bad * defs.INVALID
else:
NP("logical_and", bad, NP(mask == defs.VALID), bad)
if not mask.flags.writeable:
mask = NP("copy", mask)
mask.setflags(write=True)
mask[bad] = defs.INVALID
if mask is not None and not mask.any():
mask = None
return mask
示例2: _toDataColumn_number
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _toDataColumn_number(self, data, mask):
data, mask = self._checkNumpy(data, mask)
if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType) * defs.MISSING
else:
mask[mask2] = defs.MISSING
else:
data, mask = self._checkNonNumpy(data, mask)
try:
data = NP("array", data, dtype=self.dtype)
# mask is handled in the else statement after the except block
except (ValueError, TypeError):
data2 = NP("empty", len(data), dtype=self.dtype)
if mask is None:
mask2 = NP("zeros", len(data), dtype=defs.maskType)
else:
mask2 = NP("fromiter", ((defs.VALID if not m else defs.MISSING) for m in mask), dtype=defs.maskType, count=len(mask))
for i, v in enumerate(data):
try:
data2[i] = v
if mask2[i] == defs.VALID and ((isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN")):
mask2[i] = defs.MISSING
if v is None:
raise TypeError
except (ValueError, TypeError):
data2[i] = defs.PADDING
if mask2[i] == defs.VALID:
if (isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN"):
mask2[i] = defs.MISSING
else:
mask2[i] = defs.INVALID
if not mask2.any():
mask2 = None
data, mask = data2, mask2
else:
mask2 = NP("isnan", data)
if mask is None:
mask = NP("array", mask2, defs.maskType)
else:
mask = NP(NP("array", NP("logical_or", mask2, NP("fromiter", (m != 0 for m in mask), dtype=NP.dtype(bool), count=len(mask))), defs.maskType) * defs.MISSING)
if not mask.any():
mask = None
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例3: _toDataColumn_dateTime
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _toDataColumn_dateTime(self, data, mask):
data, mask = self._checkNumpy(data, mask, tryToCast=False)
data, mask = self._checkNonNumpy(data, mask)
data2 = NP("empty", len(data), dtype=self.dtype)
mask2 = NP("zeros", len(data), dtype=defs.maskType)
for i, x in enumerate(data):
if (mask is not None and mask[i]) or (isinstance(x, float) and math.isnan(x)) or (isinstance(x, basestring) and x.upper() == "NAN"):
data2[i] = defs.PADDING
mask2[i] = defs.MISSING
else:
try:
data2[i] = self.stringToValue(x)
except (ValueError, TypeError):
data2[i] = defs.PADDING
mask2[i] = defs.INVALID
if not mask2.any():
data, mask = data2, None
else:
data, mask = data2, mask2
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例4: functionMax
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def functionMax(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Finds the maximum of rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn
@return: A column of maximized rows.
"""
fieldType = dataColumn.fieldType
if fieldType.optype not in ("continuous", "ordinal"):
raise defs.PmmlValidationError("Aggregate function \"min\" requires a continuous or ordinal input field")
if dataColumn.mask is None:
selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
else:
selection = NP(dataColumn.mask == defs.VALID)
if whereMask is not None:
NP("logical_and", selection, whereMask, selection)
if groupSelection is not None:
NP("logical_and", selection, groupSelection, selection)
maximum = None
if getstate is not None:
startingState = getstate()
if startingState is not None:
maximum = startingState
data = NP("empty", len(dataColumn), dtype=fieldType.dtype)
mask = NP("zeros", len(dataColumn), dtype=defs.maskType)
for i, x in enumerate(dataColumn.data):
if selection[i]:
if maximum is None or x > maximum:
maximum = x
if maximum is None:
mask[i] = defs.INVALID
else:
data[i] = maximum
if not mask.any():
mask = None
if setstate is not None:
setstate(maximum)
return DataColumn(fieldType, data, mask)
示例5: functionAverage
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def functionAverage(self, dataColumn, whereMask, groupSelection, getstate, setstate):
"""Averages rows in a DataColumn, possibly with an SQL where mask and groupField.
@type dataColumn: DataColumn
@param dataColumn: The input data column.
@type whereMask: 1d Numpy array of bool, or None
@param whereMask: The result of the SQL where selection.
@type groupSelection: 1d Numpy array of bool, or None.
@param groupSelection: Rows corresponding to a particular value of the groupField.
@type getstate: callable function
@param getstate: Retrieve staring values from the DataTableState.
@type setstate: callable function
@param setstate: Store ending values to the DataTableState.
@rtype: DataColumn
@return: A column of averaged rows.
"""
fieldType = FakeFieldType("double", "continuous")
if dataColumn.fieldType.dataType not in ("integer", "float", "double"):
raise defs.PmmlValidationError("Aggregate function \"average\" requires a numeric input field: \"integer\", \"float\", \"double\"")
denominator = NP("ones", len(dataColumn), dtype=fieldType.dtype)
if dataColumn.mask is not None:
NP("logical_and", denominator, NP(dataColumn.mask == defs.VALID), denominator)
if whereMask is not None:
NP("logical_and", denominator, whereMask, denominator)
if groupSelection is not None:
NP("logical_and", denominator, groupSelection, denominator)
numerator = NP("multiply", denominator, dataColumn.data)
if getstate is not None and len(dataColumn) > 0:
startingState = getstate()
if startingState is not None:
startingNumerator, startingDenominator = startingState
numerator[0] += startingNumerator
denominator[0] += startingDenominator
numerator = NP("cumsum", numerator)
denominator = NP("cumsum", denominator)
data = NP(numerator / denominator)
mask = NP(NP("logical_not", NP("isfinite", data)) * defs.INVALID)
if not mask.any():
mask = None
if setstate is not None and len(dataColumn) > 0:
setstate((numerator[-1], denominator[-1]))
return DataColumn(fieldType, data, mask)
示例6: applyMapMissingTo
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def applyMapMissingTo(fieldType, data, mask, mapMissingTo, overwrite=False):
"""Replace MISSING values with a given substitute.
This function does not modify the original data (unless
C{overwrite} is True), but it returns a substitute. Example
use::
data, mask = dataColumn.data, dataColumn.mask
data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, "-999")
return DataColumn(dataColumn.fieldType, data, mask)
It can also be used in conjunction with other FieldCastMethods.
@type fieldType: FieldType
@param fieldType: The data fieldType (to interpret C{mapMissingTo}).
@type data: 1d Numpy array
@param data: The data.
@type mask: 1d Numpy array of dtype defs.maskType, or None
@param mask: The mask.
@type mapMissingTo: string
@param mapMissingTo: The replacement value, represented as a string (e.g. directly from a PMML attribute).
@type overwrite: bool
@param overwrite: If True, temporarily unlike and overwrite the original mask.
@rtype: 2-tuple of 1d Numpy arrays
@return: The new data and mask.
"""
if mask is None: return data, mask
if mapMissingTo is not None:
selection = NP(mask == defs.MISSING)
try:
mappedValue = fieldType.stringToValue(mapMissingTo)
except ValueError as err:
raise defs.PmmlValidationError("mapMissingTo string \"%s\" cannot be cast as %r: %s" % (mapMissingTo, fieldType, str(err)))
if overwrite:
data.setflags(write=True)
mask.setflags(write=True)
else:
data = NP("copy", data)
mask = NP("copy", mask)
data[selection] = mappedValue
mask[selection] = defs.VALID
if not mask.any():
mask = None
return data, mask
示例7: evaluate
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def evaluate(self, dataTable, functionTable, performanceTable, arguments):
arguments = [x.evaluate(dataTable, functionTable, performanceTable) for x in arguments]
performanceTable.begin("built-in \"%s\"" % self.name)
fieldType = self.fieldTypeFromSignature(arguments)
left, right = arguments
zeroDenominators = NP(NP(right.data == 0.0) * defs.INVALID)
if not zeroDenominators.any():
zeroDenominators = None
mask = DataColumn.mapAnyMissingInvalid([zeroDenominators, left.mask, right.mask])
dataColumn = DataColumn(fieldType, NP("floor_divide", left.data, right.data), mask)
performanceTable.end("built-in \"%s\"" % self.name)
return dataColumn
示例8: _toDataColumn_object
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _toDataColumn_object(self, data, mask):
data, mask = self._checkNumpy(data, mask)
if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
pass # proceed to return statement (after checking values and intervals)
else:
data, mask = self._checkNonNumpy(data, mask)
data = NP.array(data, dtype=self.dtype)
if mask is None:
mask = NP("fromiter", (defs.MISSING if (isinstance(d, float) and math.isnan(d)) else defs.VALID for d in data), dtype=defs.maskType, count=len(data))
else:
mask = NP("fromiter", (defs.MISSING if (m != 0 or (isinstance(data[i], float) and math.isnan(data[i]))) else defs.VALID for i, m in enumerate(mask)), dtype=defs.maskType, count=len(mask))
if not mask.any():
mask = None
data, mask = self._checkValues(data, mask)
data, mask = self._checkIntervals(data, mask)
return DataColumn(self, data, mask)
示例9: _checkIntervals
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _checkIntervals(self, data, mask):
intervals = self.intervals
if len(intervals) == 0:
return data, mask
# innocent until proven guilty
invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
for interval in intervals:
closure = interval["closure"]
leftMargin = interval.get("leftMargin")
rightMargin = interval.get("rightMargin")
if leftMargin is not None:
try:
leftMargin = self.stringToValue(leftMargin)
except ValueError:
raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)
if closure in ("openClosed", "openOpen"):
invalid[NP(data <= leftMargin)] = True
elif closure in ("closedOpen", "closedClosed"):
invalid[NP(data < leftMargin)] = True
if rightMargin is not None:
try:
rightMargin = self.stringToValue(rightMargin)
except ValueError:
raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)
if closure in ("openOpen", "closedOpen"):
invalid[NP(data >= rightMargin)] = True
elif closure in ("openClosed", "closedClosed"):
invalid[NP(data > rightMargin)] = True
if not invalid.any():
return data, mask
if mask is None:
return data, NP(invalid * defs.INVALID)
else:
NP("logical_and", invalid, NP(mask == defs.VALID), invalid) # only change what wasn't already marked as MISSING
mask[invalid] = defs.INVALID
return data, mask
示例10: _toDataColumn_internal
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _toDataColumn_internal(self, data, mask):
data, mask = self._checkNumpy(data, mask, tryToCast=False)
data, mask = self._checkNonNumpy(data, mask)
try:
data = NP("fromiter", (self.stringToValue(d) for d in data), dtype=self.dtype, count=len(data))
# mask is handled in the else statement after the except block
except ValueError:
data2 = NP("empty", len(data), dtype=self.dtype)
if mask is None:
mask2 = NP("zeros", len(data), dtype=defs.maskType)
else:
mask2 = NP("fromiter", (defs.VALID if not m else defs.MISSING for m in mask), dtype=defs.maskType, count=len(mask))
for i, v in enumerate(data):
if isinstance(v, float) and math.isnan(v):
data2[i] = defs.PADDING
mask2[i] = defs.MISSING
else:
try:
data2[i] = self.stringToValue(v)
except (ValueError, TypeError):
data2[i] = defs.PADDING
mask2[i] = defs.INVALID
if not mask2.any():
mask2 = None
data, mask = data2, mask2
else:
if mask is not None and not isinstance(mask, NP.ndarray):
mask = NP("array", mask, dtype=defs.maskType)
# this is the only _toDataColumn that doesn't check values and intervals because these were checked in _setup for categorical and ordinal strings
return DataColumn(self, data, mask)
示例11: _selectAllMedianMajority
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _selectAllMedianMajority(self, dataTable, functionTable, performanceTable, segmentation, which):
"""Used by C{calculateScore}."""
if which is self.SELECT_ALL:
performanceLabel = "Segmentation selectAll"
elif which is self.MEDIAN:
performanceLabel = "Segmentation median"
elif which is self.MAJORITY_VOTE:
performanceLabel = "Segmentation majorityVote"
elif which is self.WEIGHTED_MAJORITY_VOTE:
performanceLabel = "Segmentation weightedMajorityVote"
performanceTable.begin(performanceLabel)
scores = [[] for x in xrange(len(dataTable))]
if which is self.SELECT_ALL:
segments = [[] for x in xrange(len(dataTable))]
newOutputData = {}
for segment in segmentation.childrenOfTag("Segment", iterator=True):
performanceTable.pause(performanceLabel)
selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
performanceTable.unpause(performanceLabel)
if not selection.any():
continue
segmentName = segment.get("id")
indexes = NP("nonzero", selection)[0]
subTable = dataTable.subTable(selection)
subModel = segment.childOfClass(PmmlModel)
performanceTable.pause(performanceLabel)
subModel.calculate(subTable, functionTable, performanceTable)
performanceTable.unpause(performanceLabel)
if which is self.MEDIAN and subTable.score.fieldType.dataType in ("string", "boolean", "object"):
raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"median\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType)
scoreData = subTable.score.data
scoreMask = subTable.score.mask
indexesUsed = indexes
if which is self.SELECT_ALL:
for subIndex, index in enumerate(indexes):
if scoreMask is None or scoreMask[subIndex] == defs.VALID:
scores[index].append(scoreData[subIndex])
segments[index].append(segmentName)
elif which is self.MEDIAN:
for subIndex, index in enumerate(indexes):
if scoreMask is None or scoreMask[subIndex] == defs.VALID:
scores[index].append(scoreData[subIndex])
elif which in (self.MAJORITY_VOTE, self.WEIGHTED_MAJORITY_VOTE):
if which is self.MAJORITY_VOTE:
weight = 1.0
else:
weight = float(segment.get("weight", 1.0))
for subIndex, index in enumerate(indexes):
if scoreMask is None or scoreMask[subIndex] == defs.VALID:
newValue = scoreData[subIndex]
score = scores[index]
found = False
for pair in score:
if pair[0] == newValue:
pair[1] += weight
found = True
break
if not found:
score.append([newValue, weight])
if which is self.SELECT_ALL:
for fieldName, dataColumn in subTable.output.items():
newData = newOutputData.get(fieldName)
if newData is None:
newData = [[] for x in xrange(len(dataTable))]
newOutputData[fieldName] = newData
dataColumnData = dataColumn.data
dataColumnMask = dataColumn.mask
for subIndex, index in enumerate(indexes):
if scoreMask is None or scoreMask[subIndex] == defs.VALID:
if dataColumnMask is None or dataColumnMask[subIndex] == defs.VALID:
newData[index].append(dataColumnData[subIndex])
else:
newData[index].append(None)
if which is self.SELECT_ALL:
for fieldName, newData in newOutputData.items():
finalNewData = NP("empty", len(dataTable), dtype=NP.dtype(object))
for index, newDatum in enumerate(newData):
finalNewData[index] = tuple(newDatum)
dataTable.output[fieldName] = DataColumn(self.scoreType, finalNewData, None)
finalScoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
for index, score in enumerate(scores):
finalScoresData[index] = tuple(score)
finalScores = DataColumn(self.scoreType, finalScoresData, None)
if self.name is None:
performanceTable.end(performanceLabel)
#.........这里部分代码省略.........
示例12: _sumAverageWeighted
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _sumAverageWeighted(self, dataTable, functionTable, performanceTable, segmentation, which):
"""Used by C{calculateScore}."""
if which is self.SUM:
performanceLabel = "Segmentation sum"
elif which is self.AVERAGE:
performanceLabel = "Segmentation average"
elif which is self.WEIGHTED_AVERAGE:
performanceLabel = "Segmentation weightedAverage"
performanceTable.begin(performanceLabel)
scoresData = NP("zeros", len(dataTable), dtype=NP.dtype(object))
if which is not self.SUM:
denominator = NP("zeros", len(dataTable), dtype=NP.dtype(float))
invalid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
for segment in segmentation.childrenOfTag("Segment", iterator=True):
performanceTable.pause(performanceLabel)
selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
performanceTable.unpause(performanceLabel)
if not selection.any():
continue
subTable = dataTable.subTable(selection)
subModel = segment.childOfClass(PmmlModel)
performanceTable.pause(performanceLabel)
subModel.calculate(subTable, functionTable, performanceTable)
performanceTable.unpause(performanceLabel)
if subTable.score.fieldType.dataType in ("string", "boolean", "object"):
raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"%s\" cannot be applied to models that produce dataType \"%s\"" % (self.childOfTag("Segmentation").get("multipleModelMethod"), subTable.score.fieldType.dataType))
# ignore invalid in matches (like the built-in "+" and "avg" Apply functions)
if subTable.score.mask is not None:
NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection)
if which is self.SUM:
scoresData[selection] += subTable.score.data
if which is self.AVERAGE:
scoresData[selection] += subTable.score.data
denominator[selection] += 1.0
elif which is self.WEIGHTED_AVERAGE:
weight = float(segment.get("weight", 1.0))
scoresData[selection] += (subTable.score.data * weight)
denominator[selection] += weight
if subTable.score.mask is not None:
invalid[selection] = NP("logical_or", invalid[selection], NP(subTable.score.mask != defs.VALID))
if which is not self.SUM:
NP("logical_or", invalid, NP(denominator == 0.0), invalid)
valid = NP("logical_not", invalid)
scoresData[valid] /= denominator[valid]
if invalid.any():
scoresMask = NP(NP("array", invalid, dtype=defs.maskType) * defs.INVALID)
else:
scoresMask = None
scores = DataColumn(self.scoreType, scoresData, scoresMask)
performanceTable.end(performanceLabel)
return {None: scores}
示例13: _selectFirst
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def _selectFirst(self, dataTable, functionTable, performanceTable, segmentation):
"""Used by C{calculateScore}."""
performanceTable.begin("Segmentation selectFirst")
scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType)
unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))
segments = NP("empty", len(dataTable), dtype=NP.dtype(object))
newOutputData = []
for segment in segmentation.childrenOfTag("Segment", iterator=True):
performanceTable.pause("Segmentation selectFirst")
selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
performanceTable.unpause("Segmentation selectFirst")
NP("logical_and", selection, unfilled, selection)
if not selection.any():
continue
subTable = dataTable.subTable(selection)
subModel = segment.childOfClass(PmmlModel)
performanceTable.pause("Segmentation selectFirst")
subModel.calculate(subTable, functionTable, performanceTable)
performanceTable.unpause("Segmentation selectFirst")
scoresData[selection] = subTable.score.data
if subTable.score.mask is not None:
scoresMask[selection] = subTable.score.mask
else:
scoresMask[selection] = defs.VALID
segmentName = segment.get("id")
if segmentName is not None:
segments[selection] = segmentName
for fieldName, dataColumn in subTable.output.items():
if fieldName not in dataTable.output:
data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype)
data[selection] = dataColumn.data
mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING)
if dataColumn.mask is None:
mask[selection] = defs.VALID
else:
mask[selection] = dataColumn.mask
newDataColumn = DataColumn(dataColumn.fieldType, data, mask)
newDataColumn._unlock()
dataTable.output[fieldName] = newDataColumn
newOutputData.append(newDataColumn)
else:
newDataColumn = dataTable.output[fieldName]
newDataColumn.data[selection] = dataColumn.data
if dataColumn.mask is None:
newDataColumn.mask[selection] = defs.VALID
else:
newDataColumn.mask[selection] = dataColumn.mask
unfilled -= selection
if not unfilled.any():
break
for newDataColumn in newOutputData:
if not newDataColumn.mask.any():
newDataColumn._mask = None
newDataColumn._lock()
if not scoresMask.any():
scoresMask = None
scores = DataColumn(self.scoreType, scoresData, scoresMask)
if self.name is None:
performanceTable.end("Segmentation selectFirst")
return {None: scores}
else:
performanceTable.end("Segmentation selectFirst")
return {None: scores, "segment": DataColumn(self.scoreTypeSegment, segments, None)}
示例14: format
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def format(self, subTable, functionTable, performanceTable, score):
"""Extract or post-process output for the output field of a DataTable.
@type subTable: DataTable
@param subTable: The DataTable associated with this local lexical scope.
@type functionTable: FunctionTable or None
@param functionTable: A table of functions.
@type performanceTable: PerformanceTable or None
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@type score: dict
@param score: Dictionary mapping PMML score "feature" strings to DataColumns. This dictionary always contains a None key, which is the basic feature ("predictedValue").
@rtype: DataColumn
@return: The output that would go into an output field of a DataTable.
"""
performanceTable.begin("OutputField")
feature = self.get("feature")
if feature is None:
dataColumn = subTable.fields[self["name"]]
elif feature == "predictedValue":
dataColumn = score[None]
elif feature == "predictedDisplayValue":
original = score[None]
toString = original.fieldType.valueToString
data = NP("empty", len(subTable), dtype=NP.dtype(object))
for i, x in enumerate(original.data):
data[i] = toString(x)
dataColumn = DataColumn(FakeFieldType("string", "continuous"), data, None)
elif feature == "transformedValue":
expression = self.childOfClass(PmmlExpression)
if expression is None:
raise defs.PmmlValidationError("OutputField with feature \"transformedValue\" requires an EXPRESSION")
performanceTable.pause("OutputField")
dataColumn = expression.evaluate(subTable, functionTable, performanceTable)
performanceTable.unpause("OutputField")
elif feature == "decision":
decisions = self.childOfTag("Decisions")
if decisions is None:
raise defs.PmmlValidationError("OutputField with feature \"decision\" requires a Decisions block")
performanceTable.pause("OutputField")
dataColumn = self.childOfClass(PmmlExpression).evaluate(subTable, functionTable, performanceTable)
performanceTable.unpause("OutputField")
if dataColumn.mask is None:
valid = None
else:
valid = NP(dataColumn.mask == defs.VALID)
fieldType = FakeFieldType("object", "any")
data = NP("empty", len(subTable), dtype=fieldType.dtype)
mask = NP(NP("ones", len(subTable), dtype=defs.maskType) * defs.MISSING)
for decision in decisions.childrenOfTag("Decision"):
value = dataColumn.fieldType.stringToValue(decision["value"])
selection = NP(dataColumn.data == value)
if valid is not None:
NP("logical_and", selection, valid, selection)
for i in xrange(len(data)):
if selection[i]:
data[i] = decision
mask[selection] = defs.VALID
if not mask.any():
mask = None
dataColumn = DataColumn(fieldType, data, mask)
elif feature in score:
dataColumn = score[feature]
else:
model = self.getparent()
if model is not None: model = model.getparent()
if model is None:
model = "(orphaned OutputField; no parent model)"
else:
model = model.t
raise defs.PmmlValidationError("Models of type %s do not produce \"%s\" features (or at least, it is not yet implemented by Augustus)" % (model, feature))
dataType = self.get("dataType", dataColumn.fieldType.dataType)
optype = self.get("optype", dataColumn.fieldType.optype)
if (dataType != dataColumn.fieldType.dataType or optype != dataColumn.fieldType.optype) and feature not in ("predictedDisplayValue", "decision"):
dataColumn = FieldCastMethods.cast(FakeFieldType(dataType, optype), dataColumn)
if feature is not None:
subTable.fields[self.get("displayName", self["name"])] = dataColumn
performanceTable.end("OutputField")
#.........这里部分代码省略.........
示例15: calculateScore
# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
def calculateScore(self, dataTable, functionTable, performanceTable):
"""Calculate the score of this model.
This method is called by C{calculate} to separate operations
that are performed by all models (in C{calculate}) from
operations that are performed by specific models (in
C{calculateScore}).
@type subTable: DataTable
@param subTable: The DataTable representing this model's lexical scope.
@type functionTable: FunctionTable or None
@param functionTable: A table of functions.
@type performanceTable: PerformanceTable or None
@param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
@rtype: DataColumn
@return: A DataColumn containing the score.
"""
performanceTable.begin("ClusteringModel")
performanceTable.begin("set up")
distributionBased = (self["modelClass"] == "distributionBased")
clusteringFields = self.xpath("pmml:ClusteringField[not(@isCenterField='false')]")
fieldWeights = [clusteringField.get("fieldWeight", defaultFromXsd=True, convertType=True) for clusteringField in clusteringFields]
for fieldWeight in fieldWeights:
if fieldWeight < 0.0:
raise defs.PmmlValidationError("ClusteringField fieldWeights must all be non-negative (encountered %g)" % fieldWeight)
clusters = self.xpath("pmml:Cluster")
comparisonMeasure = self.childOfClass(ComparisonMeasure)
defaultCompareFunction = comparisonMeasure.get("compareFunction", defaultFromXsd=True)
metric = comparisonMeasure.childOfClass(PmmlClusteringMetric)
metrictag = metric.t
performanceTable.end("set up")
for clusteringField in clusteringFields:
dataType = dataTable.fields[clusteringField["field"]].fieldType.dataType
if dataType == "string":
raise defs.PmmlValidationError("ClusteringField \"%s\" has dataType \"%s\", which cannot be used for clustering" % (clusteringField["field"], dataType))
missingValueWeights = self.childOfTag("MissingValueWeights")
if missingValueWeights is None:
adjustM = None
else:
performanceTable.begin("MissingValueWeights")
missingWeights = missingValueWeights.childOfClass(PmmlArray).values(convertType=True)
sumNMqi = NP("zeros", len(dataTable), dtype=NP.dtype(float))
for clusteringField, missingWeight in zip(clusteringFields, missingWeights):
clusteringField.addToAdjustM(dataTable, functionTable, performanceTable, sumNMqi, missingWeight)
adjustM = NP(sum(missingWeights) / sumNMqi)
adjustM[NP(sumNMqi == 0.0)] = 1.0
performanceTable.end("MissingValueWeights")
anyInvalid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
for clusteringField in clusteringFields:
mask = dataTable.fields[clusteringField["field"]].mask
if mask is not None:
NP("logical_or", anyInvalid, NP(mask == defs.INVALID), anyInvalid)
bestClusterId = None
bestClusterAffinity = None
allClusterAffinities = {}
for index, cluster in enumerate(clusters):
array = cluster.childOfClass(PmmlArray)
if array is None:
raise defs.PmmlValidationError("Cluster must have an array to designate its center")
centerStrings = array.values(convertType=False)
if len(centerStrings) != len(clusteringFields):
raise defs.PmmlValidationError("Cluster array has %d components, but there are %d ClusteringFields with isCenterField=true" % (len(centerStrings), len(clusteringFields)))
performanceTable.begin(metrictag)
if distributionBased:
matrix = cluster.xpath("pmml:Covariances/pmml:Matrix")
if len(matrix) != 1:
raise defs.PmmlValidationError("In distribution-based clustering, all clusters must have a Covariances/Matrix")
try:
covarianceMatrix = NP("array", matrix[0].values(), dtype=NP.dtype(float))
except ValueError:
raise defs.PmmlValidationError("Covariances/Matrix must contain real numbers for distribution-based clustering")
else:
covarianceMatrix = None
state = self._State()
metric.initialize(state, len(dataTable), len(clusteringFields), distributionBased)
for clusteringField, centerString, fieldWeight in zip(clusteringFields, centerStrings, fieldWeights):
if isinstance(metric, PmmlClusteringMetricBinary):
metric.accumulateBinary(state, dataTable.fields[clusteringField["field"]], centerString, distributionBased)
else:
performanceTable.pause(metrictag)
#.........这里部分代码省略.........