当前位置: 首页>>代码示例>>Python>>正文


Python NP.any方法代码示例

本文整理汇总了Python中augustus.core.NumpyInterface.NP.any方法的典型用法代码示例。如果您正苦于以下问题:Python NP.any方法的具体用法?Python NP.any怎么用?Python NP.any使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在augustus.core.NumpyInterface.NP的用法示例。


在下文中一共展示了NP.any方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: maskInvalid

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def maskInvalid(self, data, mask):
        """Helper method to replace NaN and infinite values with
        INVALID after a potentially dangerous operation.

        Example::

            result = NP("log", dataColumn.data)    # log(0) = -inf, log(-x) = nan
            resultMask = self.maskInvalid(result, dataColumn.mask)
            return DataColumn(fakeFieldType, result, resultMask)

        The input C{data} and C{mask} are not modified by this
        method; a substitute mask is returned.

        @type data: 1d Numpy array
        @param data: The dataset that may contain NaN and infinite values.
        @type mask: 1d Numpy array of C{defs.maskType}, or None
        @param mask: The original mask.
        @rtype: 1d Numpy array of C{defs.maskType}, or None
        @return: The new mask.
        """

        bad = NP("logical_not", NP("isfinite", data))
        if bad.any():
            if mask is None:
                mask = bad * defs.INVALID
            else:
                NP("logical_and", bad, NP(mask == defs.VALID), bad)
                if not mask.flags.writeable:
                    mask = NP("copy", mask)
                    mask.setflags(write=True)
                mask[bad] = defs.INVALID
        if mask is not None and not mask.any():
            mask = None
        return mask
开发者ID:Huskyeder,项目名称:augustus,代码行数:36,代码来源:Function.py

示例2: _toDataColumn_number

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _toDataColumn_number(self, data, mask):
        data, mask = self._checkNumpy(data, mask)
        if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
            mask2 = NP("isnan", data)
            if mask is None:
                mask = NP("array", mask2, defs.maskType) * defs.MISSING
            else:
                mask[mask2] = defs.MISSING

        else:
            data, mask = self._checkNonNumpy(data, mask)
            try:
                data = NP("array", data, dtype=self.dtype)
                # mask is handled in the else statement after the except block

            except (ValueError, TypeError):
                data2 = NP("empty", len(data), dtype=self.dtype)
                if mask is None:
                    mask2 = NP("zeros", len(data), dtype=defs.maskType)
                else:
                    mask2 = NP("fromiter", ((defs.VALID if not m else defs.MISSING) for m in mask), dtype=defs.maskType, count=len(mask))

                for i, v in enumerate(data):
                    try:
                        data2[i] = v
                        if mask2[i] == defs.VALID and ((isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN")):
                            mask2[i] = defs.MISSING
                        if v is None:
                            raise TypeError
                    except (ValueError, TypeError):
                        data2[i] = defs.PADDING
                        if mask2[i] == defs.VALID:
                            if (isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN"):
                                mask2[i] = defs.MISSING
                            else:
                                mask2[i] = defs.INVALID

                if not mask2.any():
                    mask2 = None

                data, mask = data2, mask2

            else:
                mask2 = NP("isnan", data)
                if mask is None:
                    mask = NP("array", mask2, defs.maskType)
                else:
                    mask = NP(NP("array", NP("logical_or", mask2, NP("fromiter", (m != 0 for m in mask), dtype=NP.dtype(bool), count=len(mask))), defs.maskType) * defs.MISSING)
                if not mask.any():
                    mask = None

        data, mask = self._checkValues(data, mask)
        data, mask = self._checkIntervals(data, mask)
        return DataColumn(self, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:56,代码来源:FieldType.py

示例3: _toDataColumn_dateTime

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _toDataColumn_dateTime(self, data, mask):
        data, mask = self._checkNumpy(data, mask, tryToCast=False)
        data, mask = self._checkNonNumpy(data, mask)

        data2 = NP("empty", len(data), dtype=self.dtype)
        mask2 = NP("zeros", len(data), dtype=defs.maskType)

        for i, x in enumerate(data):
            if (mask is not None and mask[i]) or (isinstance(x, float) and math.isnan(x)) or (isinstance(x, basestring) and x.upper() == "NAN"):
                data2[i] = defs.PADDING
                mask2[i] = defs.MISSING
            else:
                try:
                    data2[i] = self.stringToValue(x)
                except (ValueError, TypeError):
                    data2[i] = defs.PADDING
                    mask2[i] = defs.INVALID

        if not mask2.any():
            data, mask = data2, None
        else:
            data, mask = data2, mask2

        data, mask = self._checkValues(data, mask)
        data, mask = self._checkIntervals(data, mask)
        return DataColumn(self, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:28,代码来源:FieldType.py

示例4: functionMax

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def functionMax(self, dataColumn, whereMask, groupSelection, getstate, setstate):
        """Finds the maximum of rows in a DataColumn, possibly with an SQL where mask and groupField.

        @type dataColumn: DataColumn
        @param dataColumn: The input data column.
        @type whereMask: 1d Numpy array of bool, or None
        @param whereMask: The result of the SQL where selection.
        @type groupSelection: 1d Numpy array of bool, or None.
        @param groupSelection: Rows corresponding to a particular value of the groupField.
        @type getstate: callable function
        @param getstate: Retrieve staring values from the DataTableState.
        @type setstate: callable function
        @param setstate: Store ending values to the DataTableState.
        @rtype: DataColumn
        @return: A column of maximized rows.
        """

        fieldType = dataColumn.fieldType

        if fieldType.optype not in ("continuous", "ordinal"):
            raise defs.PmmlValidationError("Aggregate function \"min\" requires a continuous or ordinal input field")

        if dataColumn.mask is None:
            selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
        else:
            selection = NP(dataColumn.mask == defs.VALID)

        if whereMask is not None:
            NP("logical_and", selection, whereMask, selection)

        if groupSelection is not None:
            NP("logical_and", selection, groupSelection, selection)

        maximum = None
        if getstate is not None:
            startingState = getstate()
            if startingState is not None:
                maximum = startingState

        data = NP("empty", len(dataColumn), dtype=fieldType.dtype)
        mask = NP("zeros", len(dataColumn), dtype=defs.maskType)

        for i, x in enumerate(dataColumn.data):
            if selection[i]:
                if maximum is None or x > maximum:
                    maximum = x
            if maximum is None:
                mask[i] = defs.INVALID
            else:
                data[i] = maximum

        if not mask.any():
            mask = None

        if setstate is not None:
            setstate(maximum)

        return DataColumn(fieldType, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:60,代码来源:Aggregate.py

示例5: functionAverage

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def functionAverage(self, dataColumn, whereMask, groupSelection, getstate, setstate):
        """Averages rows in a DataColumn, possibly with an SQL where mask and groupField.

        @type dataColumn: DataColumn
        @param dataColumn: The input data column.
        @type whereMask: 1d Numpy array of bool, or None
        @param whereMask: The result of the SQL where selection.
        @type groupSelection: 1d Numpy array of bool, or None.
        @param groupSelection: Rows corresponding to a particular value of the groupField.
        @type getstate: callable function
        @param getstate: Retrieve staring values from the DataTableState.
        @type setstate: callable function
        @param setstate: Store ending values to the DataTableState.
        @rtype: DataColumn
        @return: A column of averaged rows.
        """

        fieldType = FakeFieldType("double", "continuous")

        if dataColumn.fieldType.dataType not in ("integer", "float", "double"):
            raise defs.PmmlValidationError("Aggregate function \"average\" requires a numeric input field: \"integer\", \"float\", \"double\"")

        denominator = NP("ones", len(dataColumn), dtype=fieldType.dtype)
        if dataColumn.mask is not None:
            NP("logical_and", denominator, NP(dataColumn.mask == defs.VALID), denominator)

        if whereMask is not None:
            NP("logical_and", denominator, whereMask, denominator)

        if groupSelection is not None:
            NP("logical_and", denominator, groupSelection, denominator)

        numerator = NP("multiply", denominator, dataColumn.data)

        if getstate is not None and len(dataColumn) > 0:
            startingState  = getstate()
            if startingState is not None:
                startingNumerator, startingDenominator = startingState
                numerator[0] += startingNumerator
                denominator[0] += startingDenominator

        numerator = NP("cumsum", numerator)
        denominator = NP("cumsum", denominator)

        data = NP(numerator / denominator)
        mask = NP(NP("logical_not", NP("isfinite", data)) * defs.INVALID)
        if not mask.any():
            mask = None

        if setstate is not None and len(dataColumn) > 0:
            setstate((numerator[-1], denominator[-1]))

        return DataColumn(fieldType, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:55,代码来源:Aggregate.py

示例6: applyMapMissingTo

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def applyMapMissingTo(fieldType, data, mask, mapMissingTo, overwrite=False):
        """Replace MISSING values with a given substitute.

        This function does not modify the original data (unless
        C{overwrite} is True), but it returns a substitute.  Example
        use::

            data, mask = dataColumn.data, dataColumn.mask
            data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, "-999")
            return DataColumn(dataColumn.fieldType, data, mask)

        It can also be used in conjunction with other FieldCastMethods.

        @type fieldType: FieldType
        @param fieldType: The data fieldType (to interpret C{mapMissingTo}).
        @type data: 1d Numpy array
        @param data: The data.
        @type mask: 1d Numpy array of dtype defs.maskType, or None
        @param mask: The mask.
        @type mapMissingTo: string
        @param mapMissingTo: The replacement value, represented as a string (e.g. directly from a PMML attribute).
        @type overwrite: bool
        @param overwrite: If True, temporarily unlike and overwrite the original mask.
        @rtype: 2-tuple of 1d Numpy arrays
        @return: The new data and mask.
        """

        if mask is None: return data, mask

        if mapMissingTo is not None:
            selection = NP(mask == defs.MISSING)
            try:
                mappedValue = fieldType.stringToValue(mapMissingTo)
            except ValueError as err:
                raise defs.PmmlValidationError("mapMissingTo string \"%s\" cannot be cast as %r: %s" % (mapMissingTo, fieldType, str(err)))

            if overwrite:
                data.setflags(write=True)
                mask.setflags(write=True)
            else:
                data = NP("copy", data)
                mask = NP("copy", mask)

            data[selection] = mappedValue
            mask[selection] = defs.VALID

            if not mask.any():
                mask = None

        return data, mask
开发者ID:Huskyeder,项目名称:augustus,代码行数:52,代码来源:FieldCastMethods.py

示例7: evaluate

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
        def evaluate(self, dataTable, functionTable, performanceTable, arguments):
            arguments = [x.evaluate(dataTable, functionTable, performanceTable) for x in arguments]
            performanceTable.begin("built-in \"%s\"" % self.name)

            fieldType = self.fieldTypeFromSignature(arguments)
            left, right = arguments

            zeroDenominators = NP(NP(right.data == 0.0) * defs.INVALID)
            if not zeroDenominators.any():
                zeroDenominators = None

            mask = DataColumn.mapAnyMissingInvalid([zeroDenominators, left.mask, right.mask])

            dataColumn = DataColumn(fieldType, NP("floor_divide", left.data, right.data), mask)

            performanceTable.end("built-in \"%s\"" % self.name)
            return dataColumn
开发者ID:Huskyeder,项目名称:augustus,代码行数:19,代码来源:FunctionTableExtra.py

示例8: _toDataColumn_object

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _toDataColumn_object(self, data, mask):
        data, mask = self._checkNumpy(data, mask)
        if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
            pass  # proceed to return statement (after checking values and intervals)

        else:
            data, mask = self._checkNonNumpy(data, mask)
            data = NP.array(data, dtype=self.dtype)

            if mask is None:
                mask = NP("fromiter", (defs.MISSING if (isinstance(d, float) and math.isnan(d)) else defs.VALID for d in data), dtype=defs.maskType, count=len(data))
            else:
                mask = NP("fromiter", (defs.MISSING if (m != 0 or (isinstance(data[i], float) and math.isnan(data[i]))) else defs.VALID for i, m in enumerate(mask)), dtype=defs.maskType, count=len(mask))
            if not mask.any():
                mask = None

        data, mask = self._checkValues(data, mask)
        data, mask = self._checkIntervals(data, mask)
        return DataColumn(self, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:21,代码来源:FieldType.py

示例9: _checkIntervals

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _checkIntervals(self, data, mask):
        intervals = self.intervals
        if len(intervals) == 0:
            return data, mask

        # innocent until proven guilty
        invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
        for interval in intervals:
            closure = interval["closure"]
            leftMargin = interval.get("leftMargin")
            rightMargin = interval.get("rightMargin")

            if leftMargin is not None:
                try:
                    leftMargin = self.stringToValue(leftMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)

                if closure in ("openClosed", "openOpen"):
                    invalid[NP(data <= leftMargin)] = True
                elif closure in ("closedOpen", "closedClosed"):
                    invalid[NP(data < leftMargin)] = True

            if rightMargin is not None:
                try:
                    rightMargin = self.stringToValue(rightMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)

                if closure in ("openOpen", "closedOpen"):
                    invalid[NP(data >= rightMargin)] = True
                elif closure in ("openClosed", "closedClosed"):
                    invalid[NP(data > rightMargin)] = True

        if not invalid.any():
            return data, mask

        if mask is None:
            return data, NP(invalid * defs.INVALID)
        else:
            NP("logical_and", invalid, NP(mask == defs.VALID), invalid)   # only change what wasn't already marked as MISSING
            mask[invalid] = defs.INVALID
            return data, mask
开发者ID:Huskyeder,项目名称:augustus,代码行数:45,代码来源:FieldType.py

示例10: _toDataColumn_internal

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _toDataColumn_internal(self, data, mask):
        data, mask = self._checkNumpy(data, mask, tryToCast=False)
        data, mask = self._checkNonNumpy(data, mask)
        
        try:
            data = NP("fromiter", (self.stringToValue(d) for d in data), dtype=self.dtype, count=len(data))
            # mask is handled in the else statement after the except block

        except ValueError:
            data2 = NP("empty", len(data), dtype=self.dtype)
            if mask is None:
                mask2 = NP("zeros", len(data), dtype=defs.maskType)
            else:
                mask2 = NP("fromiter", (defs.VALID if not m else defs.MISSING for m in mask), dtype=defs.maskType, count=len(mask))

            for i, v in enumerate(data):
                if isinstance(v, float) and math.isnan(v):
                    data2[i] = defs.PADDING
                    mask2[i] = defs.MISSING
                else:
                    try:
                        data2[i] = self.stringToValue(v)
                    except (ValueError, TypeError):
                        data2[i] = defs.PADDING
                        mask2[i] = defs.INVALID

            if not mask2.any():
                mask2 = None

            data, mask = data2, mask2

        else:
            if mask is not None and not isinstance(mask, NP.ndarray):
                mask = NP("array", mask, dtype=defs.maskType)

        # this is the only _toDataColumn that doesn't check values and intervals because these were checked in _setup for categorical and ordinal strings

        return DataColumn(self, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:40,代码来源:FieldType.py

示例11: _selectAllMedianMajority

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _selectAllMedianMajority(self, dataTable, functionTable, performanceTable, segmentation, which):
        """Used by C{calculateScore}."""

        if which is self.SELECT_ALL:
            performanceLabel = "Segmentation selectAll"
        elif which is self.MEDIAN:
            performanceLabel = "Segmentation median"
        elif which is self.MAJORITY_VOTE:
            performanceLabel = "Segmentation majorityVote"
        elif which is self.WEIGHTED_MAJORITY_VOTE:
            performanceLabel = "Segmentation weightedMajorityVote"
        performanceTable.begin(performanceLabel)

        scores = [[] for x in xrange(len(dataTable))]
        if which is self.SELECT_ALL:
            segments = [[] for x in xrange(len(dataTable))]

        newOutputData = {}
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause(performanceLabel)
            selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause(performanceLabel)
            if not selection.any():
                continue

            segmentName = segment.get("id")
            indexes = NP("nonzero", selection)[0]

            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)

            performanceTable.pause(performanceLabel)
            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause(performanceLabel)

            if which is self.MEDIAN and subTable.score.fieldType.dataType in ("string", "boolean", "object"):
                raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"median\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType)

            scoreData = subTable.score.data
            scoreMask = subTable.score.mask
            indexesUsed = indexes
            if which is self.SELECT_ALL:
                for subIndex, index in enumerate(indexes):
                    if scoreMask is None or scoreMask[subIndex] == defs.VALID:
                        scores[index].append(scoreData[subIndex])
                        segments[index].append(segmentName)

            elif which is self.MEDIAN:
                for subIndex, index in enumerate(indexes):
                    if scoreMask is None or scoreMask[subIndex] == defs.VALID:
                        scores[index].append(scoreData[subIndex])

            elif which in (self.MAJORITY_VOTE, self.WEIGHTED_MAJORITY_VOTE):
                if which is self.MAJORITY_VOTE:
                    weight = 1.0
                else:
                    weight = float(segment.get("weight", 1.0))
                for subIndex, index in enumerate(indexes):
                    if scoreMask is None or scoreMask[subIndex] == defs.VALID:
                        newValue = scoreData[subIndex]
                        score = scores[index]
                        found = False
                        for pair in score:
                            if pair[0] == newValue:
                                pair[1] += weight
                                found = True
                                break
                        if not found:
                            score.append([newValue, weight])

            if which is self.SELECT_ALL:
                for fieldName, dataColumn in subTable.output.items():
                    newData = newOutputData.get(fieldName)
                    if newData is None:
                        newData = [[] for x in xrange(len(dataTable))]
                        newOutputData[fieldName] = newData

                    dataColumnData = dataColumn.data
                    dataColumnMask = dataColumn.mask
                    for subIndex, index in enumerate(indexes):
                        if scoreMask is None or scoreMask[subIndex] == defs.VALID:
                            if dataColumnMask is None or dataColumnMask[subIndex] == defs.VALID:
                                newData[index].append(dataColumnData[subIndex])
                            else:
                                newData[index].append(None)

        if which is self.SELECT_ALL:
            for fieldName, newData in newOutputData.items():
                finalNewData = NP("empty", len(dataTable), dtype=NP.dtype(object))
                for index, newDatum in enumerate(newData):
                    finalNewData[index] = tuple(newDatum)
                dataTable.output[fieldName] = DataColumn(self.scoreType, finalNewData, None)

            finalScoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
            for index, score in enumerate(scores):
                finalScoresData[index] = tuple(score)
            finalScores = DataColumn(self.scoreType, finalScoresData, None)

            if self.name is None:
                performanceTable.end(performanceLabel)
#.........这里部分代码省略.........
开发者ID:Huskyeder,项目名称:augustus,代码行数:103,代码来源:MiningModel.py

示例12: _sumAverageWeighted

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _sumAverageWeighted(self, dataTable, functionTable, performanceTable, segmentation, which):
        """Used by C{calculateScore}."""

        if which is self.SUM:
            performanceLabel = "Segmentation sum"
        elif which is self.AVERAGE:
            performanceLabel = "Segmentation average"
        elif which is self.WEIGHTED_AVERAGE:
            performanceLabel = "Segmentation weightedAverage"
        performanceTable.begin(performanceLabel)

        scoresData = NP("zeros", len(dataTable), dtype=NP.dtype(object))
        if which is not self.SUM:
            denominator = NP("zeros", len(dataTable), dtype=NP.dtype(float))
        invalid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))

        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause(performanceLabel)
            selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause(performanceLabel)
            if not selection.any():
                continue
            
            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause(performanceLabel)
            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause(performanceLabel)

            if subTable.score.fieldType.dataType in ("string", "boolean", "object"):
                raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"%s\" cannot be applied to models that produce dataType \"%s\"" % (self.childOfTag("Segmentation").get("multipleModelMethod"), subTable.score.fieldType.dataType))

            # ignore invalid in matches (like the built-in "+" and "avg" Apply functions)
            if subTable.score.mask is not None:
                NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection)

            if which is self.SUM:
                scoresData[selection] += subTable.score.data
            if which is self.AVERAGE:
                scoresData[selection] += subTable.score.data
                denominator[selection] += 1.0
            elif which is self.WEIGHTED_AVERAGE:
                weight = float(segment.get("weight", 1.0))
                scoresData[selection] += (subTable.score.data * weight)
                denominator[selection] += weight

            if subTable.score.mask is not None:
                invalid[selection] = NP("logical_or", invalid[selection], NP(subTable.score.mask != defs.VALID))

        if which is not self.SUM:
            NP("logical_or", invalid, NP(denominator == 0.0), invalid)
            valid = NP("logical_not", invalid)
            scoresData[valid] /= denominator[valid]

        if invalid.any():
            scoresMask = NP(NP("array", invalid, dtype=defs.maskType) * defs.INVALID)
        else:
            scoresMask = None
        
        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        performanceTable.end(performanceLabel)
        return {None: scores}
开发者ID:Huskyeder,项目名称:augustus,代码行数:65,代码来源:MiningModel.py

示例13: _selectFirst

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def _selectFirst(self, dataTable, functionTable, performanceTable, segmentation):
        """Used by C{calculateScore}."""

        performanceTable.begin("Segmentation selectFirst")

        scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
        scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType)
        unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))
        segments = NP("empty", len(dataTable), dtype=NP.dtype(object))

        newOutputData = []
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause("Segmentation selectFirst")
            selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")
            NP("logical_and", selection, unfilled, selection)
            if not selection.any():
                continue

            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause("Segmentation selectFirst")

            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")

            scoresData[selection] = subTable.score.data
            if subTable.score.mask is not None:
                scoresMask[selection] = subTable.score.mask
            else:
                scoresMask[selection] = defs.VALID

            segmentName = segment.get("id")
            if segmentName is not None:
                segments[selection] = segmentName

            for fieldName, dataColumn in subTable.output.items():
                if fieldName not in dataTable.output:
                    data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype)
                    data[selection] = dataColumn.data

                    mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING)
                    if dataColumn.mask is None:
                        mask[selection] = defs.VALID
                    else:
                        mask[selection] = dataColumn.mask

                    newDataColumn = DataColumn(dataColumn.fieldType, data, mask)
                    newDataColumn._unlock()
                    dataTable.output[fieldName] = newDataColumn
                    newOutputData.append(newDataColumn)

                else:
                    newDataColumn = dataTable.output[fieldName]

                    newDataColumn.data[selection] = dataColumn.data
                    if dataColumn.mask is None:
                        newDataColumn.mask[selection] = defs.VALID
                    else:
                        newDataColumn.mask[selection] = dataColumn.mask

            unfilled -= selection
            if not unfilled.any():
                break

        for newDataColumn in newOutputData:
            if not newDataColumn.mask.any():
                newDataColumn._mask = None
            newDataColumn._lock()

        if not scoresMask.any():
            scoresMask = None

        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        if self.name is None:
            performanceTable.end("Segmentation selectFirst")
            return {None: scores}
        else:
            performanceTable.end("Segmentation selectFirst")
            return {None: scores, "segment": DataColumn(self.scoreTypeSegment, segments, None)}
开发者ID:Huskyeder,项目名称:augustus,代码行数:83,代码来源:MiningModel.py

示例14: format

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def format(self, subTable, functionTable, performanceTable, score):
        """Extract or post-process output for the output field of a DataTable.

        @type subTable: DataTable
        @param subTable: The DataTable associated with this local lexical scope.
        @type functionTable: FunctionTable or None
        @param functionTable: A table of functions.
        @type performanceTable: PerformanceTable or None
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @type score: dict
        @param score: Dictionary mapping PMML score "feature" strings to DataColumns.  This dictionary always contains a None key, which is the basic feature ("predictedValue").
        @rtype: DataColumn
        @return: The output that would go into an output field of a DataTable.
        """

        performanceTable.begin("OutputField")

        feature = self.get("feature")
        if feature is None:
            dataColumn = subTable.fields[self["name"]]

        elif feature == "predictedValue":
            dataColumn = score[None]

        elif feature == "predictedDisplayValue":
            original = score[None]
            toString = original.fieldType.valueToString
            data = NP("empty", len(subTable), dtype=NP.dtype(object))
            for i, x in enumerate(original.data):
                data[i] = toString(x)
            dataColumn = DataColumn(FakeFieldType("string", "continuous"), data, None)

        elif feature == "transformedValue":
            expression = self.childOfClass(PmmlExpression)
            if expression is None:
                raise defs.PmmlValidationError("OutputField with feature \"transformedValue\" requires an EXPRESSION")
            
            performanceTable.pause("OutputField")
            dataColumn = expression.evaluate(subTable, functionTable, performanceTable)
            performanceTable.unpause("OutputField")

        elif feature == "decision":
            decisions = self.childOfTag("Decisions")
            if decisions is None:
                raise defs.PmmlValidationError("OutputField with feature \"decision\" requires a Decisions block")

            performanceTable.pause("OutputField")
            dataColumn = self.childOfClass(PmmlExpression).evaluate(subTable, functionTable, performanceTable)
            performanceTable.unpause("OutputField")

            if dataColumn.mask is None:
                valid = None
            else:
                valid = NP(dataColumn.mask == defs.VALID)

            fieldType = FakeFieldType("object", "any")
            data = NP("empty", len(subTable), dtype=fieldType.dtype)
            mask = NP(NP("ones", len(subTable), dtype=defs.maskType) * defs.MISSING)

            for decision in decisions.childrenOfTag("Decision"):
                value = dataColumn.fieldType.stringToValue(decision["value"])

                selection = NP(dataColumn.data == value)
                if valid is not None:
                    NP("logical_and", selection, valid, selection)

                for i in xrange(len(data)):
                    if selection[i]:
                        data[i] = decision

                mask[selection] = defs.VALID
            
            if not mask.any():
                mask = None

            dataColumn = DataColumn(fieldType, data, mask)

        elif feature in score:
            dataColumn = score[feature]

        else:
            model = self.getparent()
            if model is not None: model = model.getparent()

            if model is None:
                model = "(orphaned OutputField; no parent model)"
            else:
                model = model.t

            raise defs.PmmlValidationError("Models of type %s do not produce \"%s\" features (or at least, it is not yet implemented by Augustus)" % (model, feature))

        dataType = self.get("dataType", dataColumn.fieldType.dataType)
        optype = self.get("optype", dataColumn.fieldType.optype)
        if (dataType != dataColumn.fieldType.dataType or optype != dataColumn.fieldType.optype) and feature not in ("predictedDisplayValue", "decision"):
            dataColumn = FieldCastMethods.cast(FakeFieldType(dataType, optype), dataColumn)

        if feature is not None:
            subTable.fields[self.get("displayName", self["name"])] = dataColumn

        performanceTable.end("OutputField")
#.........这里部分代码省略.........
开发者ID:Huskyeder,项目名称:augustus,代码行数:103,代码来源:OutputField.py

示例15: calculateScore

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import any [as 别名]
    def calculateScore(self, dataTable, functionTable, performanceTable):
        """Calculate the score of this model.

        This method is called by C{calculate} to separate operations
        that are performed by all models (in C{calculate}) from
        operations that are performed by specific models (in
        C{calculateScore}).

        @type subTable: DataTable
        @param subTable: The DataTable representing this model's lexical scope.
        @type functionTable: FunctionTable or None
        @param functionTable: A table of functions.
        @type performanceTable: PerformanceTable or None
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: DataColumn
        @return: A DataColumn containing the score.
        """

        performanceTable.begin("ClusteringModel")

        performanceTable.begin("set up")

        distributionBased = (self["modelClass"] == "distributionBased")
        clusteringFields = self.xpath("pmml:ClusteringField[not(@isCenterField='false')]")
        fieldWeights = [clusteringField.get("fieldWeight", defaultFromXsd=True, convertType=True) for clusteringField in clusteringFields]
        for fieldWeight in fieldWeights:
            if fieldWeight < 0.0:
                raise defs.PmmlValidationError("ClusteringField fieldWeights must all be non-negative (encountered %g)" % fieldWeight)
        clusters = self.xpath("pmml:Cluster")
        comparisonMeasure = self.childOfClass(ComparisonMeasure)
        defaultCompareFunction = comparisonMeasure.get("compareFunction", defaultFromXsd=True)
        metric = comparisonMeasure.childOfClass(PmmlClusteringMetric)
        metrictag = metric.t

        performanceTable.end("set up")

        for clusteringField in clusteringFields:
            dataType = dataTable.fields[clusteringField["field"]].fieldType.dataType
            if dataType == "string":
                raise defs.PmmlValidationError("ClusteringField \"%s\" has dataType \"%s\", which cannot be used for clustering" % (clusteringField["field"], dataType))

        missingValueWeights = self.childOfTag("MissingValueWeights")
        if missingValueWeights is None:
            adjustM = None

        else:
            performanceTable.begin("MissingValueWeights")

            missingWeights = missingValueWeights.childOfClass(PmmlArray).values(convertType=True)

            sumNMqi = NP("zeros", len(dataTable), dtype=NP.dtype(float))
            for clusteringField, missingWeight in zip(clusteringFields, missingWeights):
                clusteringField.addToAdjustM(dataTable, functionTable, performanceTable, sumNMqi, missingWeight)

            adjustM = NP(sum(missingWeights) / sumNMqi)
            adjustM[NP(sumNMqi == 0.0)] = 1.0

            performanceTable.end("MissingValueWeights")

        anyInvalid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        for clusteringField in clusteringFields:
            mask = dataTable.fields[clusteringField["field"]].mask
            if mask is not None:
                NP("logical_or", anyInvalid, NP(mask == defs.INVALID), anyInvalid)

        bestClusterId = None
        bestClusterAffinity = None
        allClusterAffinities = {}

        for index, cluster in enumerate(clusters):
            array = cluster.childOfClass(PmmlArray)
            if array is None:
                raise defs.PmmlValidationError("Cluster must have an array to designate its center")

            centerStrings = array.values(convertType=False)
            if len(centerStrings) != len(clusteringFields):
                raise defs.PmmlValidationError("Cluster array has %d components, but there are %d ClusteringFields with isCenterField=true" % (len(centerStrings), len(clusteringFields)))

            performanceTable.begin(metrictag)

            if distributionBased:
                matrix = cluster.xpath("pmml:Covariances/pmml:Matrix")
                if len(matrix) != 1:
                    raise defs.PmmlValidationError("In distribution-based clustering, all clusters must have a Covariances/Matrix")
                try:
                    covarianceMatrix = NP("array", matrix[0].values(), dtype=NP.dtype(float))
                except ValueError:
                    raise defs.PmmlValidationError("Covariances/Matrix must contain real numbers for distribution-based clustering")

            else:
                covarianceMatrix = None

            state = self._State()
            metric.initialize(state, len(dataTable), len(clusteringFields), distributionBased)

            for clusteringField, centerString, fieldWeight in zip(clusteringFields, centerStrings, fieldWeights):
                if isinstance(metric, PmmlClusteringMetricBinary):
                    metric.accumulateBinary(state, dataTable.fields[clusteringField["field"]], centerString, distributionBased)
                else:
                    performanceTable.pause(metrictag)
#.........这里部分代码省略.........
开发者ID:Huskyeder,项目名称:augustus,代码行数:103,代码来源:ClusteringModel.py


注:本文中的augustus.core.NumpyInterface.NP.any方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。