当前位置: 首页>>代码示例>>Python>>正文


Python NP.dtype方法代码示例

本文整理汇总了Python中augustus.core.NumpyInterface.NP.dtype方法的典型用法代码示例。如果您正苦于以下问题:Python NP.dtype方法的具体用法?Python NP.dtype怎么用?Python NP.dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在augustus.core.NumpyInterface.NP的用法示例。


在下文中一共展示了NP.dtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cusum

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def cusum(self, testDistributions, fieldName, dataColumn, state, performanceTable):
        """Calculate the score of a CUSUM TestStatistic.

        The CUSUM cumulative sum is a stateful calculation: each row
        depends on the result of the previous row.  To continue
        calculations through multiple calls to C{calc} or
        C{calculate}, pass a DataTableState object and give the
        BaselineModel a C{stateId} attribute.  The C{stateId} is not
        valid in strict PMML, but it can be inserted after validation
        or used in custom-ODG models (C{from augustus.odg import *}).

        @type testDistributions: PmmlBinding
        @param testDistributions: The <TestDistributions> element.
        @type fieldName: string
        @param fieldName: The field name (for error messages).
        @type dataColumn: DataColumn
        @param dataColumn: The field.
        @type state: DataTableState
        @param state: The persistent state object, which is used to initialize the start state and save the end state of the cumulative sum.
        @type performanceTable: PerformanceTable or None
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: dict
        @return: A dictionary mapping PMML "feature" strings to DataColumns; CUSUM only defines the None key ("predictedValue").
        """

        baseline = testDistributions.xpath("pmml:Baseline/pmml:GaussianDistribution | pmml:Baseline/pmml:PoissonDistribution")
        alternate = testDistributions.xpath("pmml:Alternate/pmml:GaussianDistribution | pmml:Alternate/pmml:PoissonDistribution")

        if len(baseline) == 0 or len(alternate) == 0:
            raise defs.PmmlValidationError("BaselineModel CUSUM requires a Baseline and an Alternate that are either GaussianDistribution or PoissonDistribution")

        ratios = alternate[0].logpdf(dataColumn.data) - baseline[0].logpdf(dataColumn.data)
        if dataColumn.mask is None:
            good = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
        else:
            good = NP(dataColumn.mask == defs.VALID)

        stateId = self.get("stateId")
        last = None
        if stateId is not None:
            last = state.get(stateId)
        if last is None:
            last = 0.0

        resetValue = testDistributions.get("resetValue", defaultFromXsd=True, convertType=True)

        output = NP("empty", len(dataColumn), dtype=NP.dtype(float))

        performanceTable.begin("fill CUSUM")
        for index in xrange(len(dataColumn)):
            if good[index]:
                last = max(resetValue, last + ratios[index])
            output[index] = last
        performanceTable.end("fill CUSUM")

        if stateId is not None:
            state[stateId] = last

        return {None: DataColumn(self.scoreType, output, None)}
开发者ID:Huskyeder,项目名称:augustus,代码行数:61,代码来源:BaselineModel.py

示例2: _checkValues

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def _checkValues(self, data, mask):
        values = self.values
        if len(values) == 0:
            return data, mask

        if mask is None:
            missing = NP("zeros", len(data), dtype=NP.dtype(bool))
            invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
        else:
            missing = NP(mask == defs.MISSING)
            invalid = NP(mask == defs.INVALID)
        valid = NP("zeros", len(data), dtype=NP.dtype(bool))

        numberOfValidSpecified = 0
        for value in values:
            v = value.get("value")
            displayValue = value.get("displayValue")
            if displayValue is not None:
                self._displayValue[v] = displayValue

            prop = value.get("property", "valid")
            try:
                v2 = self.stringToValue(v)
            except ValueError:
                raise defs.PmmlValidationError("Improper value in Value specification: \"%s\"" % v)

            if prop == "valid":
                NP("logical_or", valid, NP(data == v2), valid)
                numberOfValidSpecified += 1
            elif prop == "missing":
                NP("logical_or", missing, NP(data == v2), missing)
            elif prop == "invalid":
                NP("logical_or", invalid, NP(data == v2), invalid)

        if numberOfValidSpecified > 0:
            # guilty until proven innocent
            NP("logical_and", valid, NP("logical_not", missing), valid)
            if valid.all():
                return data, None
            mask = NP(NP("ones", len(data), dtype=defs.maskType) * defs.INVALID)
            mask[missing] = defs.MISSING
            mask[valid] = defs.VALID

        else:
            # innocent until proven guilty
            NP("logical_and", invalid, NP("logical_not", missing), invalid)
            if not NP("logical_or", invalid, missing).any():
                return data, None
            mask = NP("zeros", len(data), dtype=defs.maskType)
            mask[missing] = defs.MISSING
            mask[invalid] = defs.INVALID

        return data, mask
开发者ID:Huskyeder,项目名称:augustus,代码行数:55,代码来源:FieldType.py

示例3: functionMultiset

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def functionMultiset(self, dataColumn, whereMask, groupSelection, getstate, setstate):
        """Derives a multiset of rows in a DataColumn, possibly with an SQL where mask and groupField.

        @type dataColumn: DataColumn
        @param dataColumn: The input data column.
        @type whereMask: 1d Numpy array of bool, or None
        @param whereMask: The result of the SQL where selection.
        @type groupSelection: 1d Numpy array of bool, or None.
        @param groupSelection: Rows corresponding to a particular value of the groupField.
        @type getstate: callable function
        @param getstate: Retrieve staring values from the DataTableState.
        @type setstate: callable function
        @param setstate: Store ending values to the DataTableState.
        @rtype: DataColumn of dict objects
        @return: A column of multisetted rows.
        """

        fieldType = FakeFieldType("object", "any")

        selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
        if dataColumn.mask is not None:
            selection = NP("logical_and", selection, NP(dataColumn.mask == defs.VALID))

        if whereMask is not None:
            NP("logical_and", selection, whereMask, selection)

        if groupSelection is not None:
            NP("logical_and", selection, groupSelection, selection)

        multiset = {}
        if getstate is not None:
            startingState = getstate()
            if startingState is not None:
                multiset = startingState
        current = dict(multiset)

        data = NP("empty", len(dataColumn), dtype=NP.dtype(object))

        toPython = dataColumn.fieldType.valueToPython
        for i, x in enumerate(dataColumn.data):
            if selection[i]:
                value = toPython(x)
                if value not in multiset:
                    multiset[value] = 0
                multiset[value] += 1
                current = dict(multiset)
            data[i] = current

        if setstate is not None:
            setstate(multiset)

        return DataColumn(fieldType, data, None)
开发者ID:Huskyeder,项目名称:augustus,代码行数:54,代码来源:Aggregate.py

示例4: _fromDataColumn_number

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
 def _fromDataColumn_number(self, dataColumn):
     if dataColumn.mask is None:
         return NP("array", dataColumn.data, dtype=NP.dtype(object))
     else:
         output = NP("empty", len(dataColumn), dtype=NP.dtype(object))
         mask = dataColumn.mask
         for i, x in enumerate(dataColumn.data):
             if mask[i] == defs.VALID:
                 output[i] = x
             elif mask[i] == defs.MISSING:
                 output[i] = defs.NAN
             else:
                 output[i] = None
         return output
开发者ID:Huskyeder,项目名称:augustus,代码行数:16,代码来源:FieldType.py

示例5: pointsToSmoothCurve

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def pointsToSmoothCurve(xarray, yarray, samples, smoothingScale, loop):
        """Fit a smooth line through a set of given numeric points
        with a characteristic smoothing scale.

        This is a non-parametric locally linear fit, used to plot data
        as a smooth line.

        @type xarray: 1d Numpy array of numbers
        @param xarray: Array of x values.
        @type yarray: 1d Numpy array of numbers
        @param yarray: Array of y values.
        @type samples: 1d Numpy array of numbers
        @param samples: Locations at which to fit the C{xarray} and C{yarray} with best-fit positions and derivatives.
        @type smoothingScale: number
        @param smoothingScale: Standard deviation of the Gaussian kernel used to smooth the locally linear fit.
        @type loop: bool
        @param loop: If False, disconnect the end of the fitted curve from the beginning.
        @rtype: 4-tuple of 1d Numpy arrays
        @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} appropriate for C{formatPathdata}.
        """

        ylist = []
        dylist = []

        for sample in samples:
            weights = NP(NP(NP("exp", NP(NP(-0.5 * NP("power", NP(xarray - sample), 2)) / NP(smoothingScale * smoothingScale))) / smoothingScale) / (math.sqrt(2.0*math.pi)))
            sum1 = weights.sum()
            sumx = NP(weights * xarray).sum()
            sumxx = NP(weights * NP(xarray * xarray)).sum()
            sumy = NP(weights * yarray).sum()
            sumxy = NP(weights * NP(xarray * yarray)).sum()

            delta = (sum1 * sumxx) - (sumx * sumx)
            intercept = ((sumxx * sumy) - (sumx * sumxy)) / delta
            slope = ((sum1 * sumxy) - (sumx * sumy)) / delta

            ylist.append(intercept + (sample * slope))
            dylist.append(slope)

        xlist = samples
        ylist = NP("array", ylist, dtype=NP.dtype(float))
        dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
        dylist = NP("array", dylist, dtype=NP.dtype(float)) * dxlist
        if not loop:
            dxlist[0] = 0.0
            dxlist[-1] = 0.0
            dylist[0] = 0.0
            dylist[-1] = 0.0

        return xlist, ylist, dxlist, dylist
开发者ID:Huskyeder,项目名称:augustus,代码行数:52,代码来源:PlotCurve.py

示例6: functionMax

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def functionMax(self, dataColumn, whereMask, groupSelection, getstate, setstate):
        """Finds the maximum of rows in a DataColumn, possibly with an SQL where mask and groupField.

        @type dataColumn: DataColumn
        @param dataColumn: The input data column.
        @type whereMask: 1d Numpy array of bool, or None
        @param whereMask: The result of the SQL where selection.
        @type groupSelection: 1d Numpy array of bool, or None.
        @param groupSelection: Rows corresponding to a particular value of the groupField.
        @type getstate: callable function
        @param getstate: Retrieve staring values from the DataTableState.
        @type setstate: callable function
        @param setstate: Store ending values to the DataTableState.
        @rtype: DataColumn
        @return: A column of maximized rows.
        """

        fieldType = dataColumn.fieldType

        if fieldType.optype not in ("continuous", "ordinal"):
            raise defs.PmmlValidationError("Aggregate function \"min\" requires a continuous or ordinal input field")

        if dataColumn.mask is None:
            selection = NP("ones", len(dataColumn), dtype=NP.dtype(bool))
        else:
            selection = NP(dataColumn.mask == defs.VALID)

        if whereMask is not None:
            NP("logical_and", selection, whereMask, selection)

        if groupSelection is not None:
            NP("logical_and", selection, groupSelection, selection)

        maximum = None
        if getstate is not None:
            startingState = getstate()
            if startingState is not None:
                maximum = startingState

        data = NP("empty", len(dataColumn), dtype=fieldType.dtype)
        mask = NP("zeros", len(dataColumn), dtype=defs.maskType)

        for i, x in enumerate(dataColumn.data):
            if selection[i]:
                if maximum is None or x > maximum:
                    maximum = x
            if maximum is None:
                mask[i] = defs.INVALID
            else:
                data[i] = maximum

        if not mask.any():
            mask = None

        if setstate is not None:
            setstate(maximum)

        return DataColumn(fieldType, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:60,代码来源:Aggregate.py

示例7: evaluate

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def evaluate(self, dataTable, functionTable, performanceTable, returnUnknowns=False):
        """Evaluate the predicate, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this predicate.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this predicate.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @type returnUnknowns: bool
        @param returnUnknowns: If True, return a "mask" for the selection that indicates which rows are unknown, rather than True or False.
        @rtype: 1d Numpy array of bool or 3-tuple of arrays
        @return: Either a simple selection array or selection, unknowns, encounteredUnknowns
        """

        performanceTable.begin("Predicate False")

        result = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        if returnUnknowns:
            unknowns = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
            result = result, unknowns, unknowns

        performanceTable.end("Predicate False")
        return result
开发者ID:justinrichie,项目名称:augustus,代码行数:26,代码来源:FALSE.py

示例8: _fromDataColumn

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
 def _fromDataColumn(self, dataColumn):
     # enumeration uses less memory and, interestingly, a little less time than a list comprehension (80 ns instead of 100 ns per record)
     output = NP("empty", len(dataColumn), dtype=NP.dtype(object))
     if dataColumn.mask is None:
         for i, x in enumerate(dataColumn.data):
             output[i] = self.valueToPython(x)
     else:
         mask = dataColumn.mask
         for i, x in enumerate(dataColumn.data):
             if mask[i] == defs.VALID:
                 output[i] = self.valueToPython(x)
             elif mask[i] == defs.MISSING:
                 output[i] = defs.NAN
             else:
                 output[i] = None
     return output
开发者ID:Huskyeder,项目名称:augustus,代码行数:18,代码来源:FieldType.py

示例9: initialize

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def initialize(self, state, numberOfRecords, numberOfFields, distributionBased):
        """First step in a vectorized metric calculation with missing values, called once before all fields and cluster centers.

        Only modifies the C{state} object.

        @type state: ad-hoc Python object
        @param state: State information that persists long enough to span the three steps of a metric calculation.  This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
        @type numberOfRecords: int
        @param numberOfRecords: The number of rows in the dataset.
        @type numberOfFields: int
        @param numberOfFields: The number of columns in the dataset.
        @type distributionBased: bool
        @param distributionBased: If True, use a covariance matrix to scale the distance result.
        """

        state.maximumComponent = NP("zeros", numberOfRecords, dtype=NP.dtype(float))
        if distributionBased:
            raise NotImplementedError("Distribution-based clustering has not been implemented for the %s metric" % self.t)
开发者ID:Huskyeder,项目名称:augustus,代码行数:20,代码来源:Chebychev.py

示例10: evaluate

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def evaluate(self, dataTable, functionTable, performanceTable, returnUnknowns=False):
        """Evaluate the predicate, using a DataTable as input.

        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this predicate.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this predicate.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @type returnUnknowns: bool
        @param returnUnknowns: If True, return a "mask" for the selection that indicates which rows are unknown, rather than True or False.
        @rtype: 1d Numpy array of bool or 3-tuple of arrays
        @return: Either a simple selection array or selection, unknowns, encounteredUnknowns
        """

        performanceTable.begin("SimpleSetPredicate")

        fieldName = self.get("field")
        dataColumn = dataTable.fields[fieldName]
        
        fromString = dataColumn.fieldType.stringToValue
        array = [fromString(x) for x in self.childOfClass(Array).values(convertType=False)]

        selection = NP("in1d", dataColumn.data, array)

        if self.get("booleanOperator") == "isNotIn":
            NP("logical_not", selection, selection)

        if returnUnknowns:
            if dataColumn.mask is None:
                unknowns = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
            else:
                unknowns = NP(dataColumn.mask != defs.VALID)

            performanceTable.end("SimpleSetPredicate")
            return selection, unknowns, unknowns

        else:
            if dataColumn.mask is not None:
                NP("logical_and", selection, NP(dataColumn.mask == defs.VALID), selection)

            performanceTable.end("SimpleSetPredicate")
            return selection
开发者ID:Huskyeder,项目名称:augustus,代码行数:45,代码来源:SimpleSetPredicate.py

示例11: _checkIntervals

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def _checkIntervals(self, data, mask):
        intervals = self.intervals
        if len(intervals) == 0:
            return data, mask

        # innocent until proven guilty
        invalid = NP("zeros", len(data), dtype=NP.dtype(bool))
        for interval in intervals:
            closure = interval["closure"]
            leftMargin = interval.get("leftMargin")
            rightMargin = interval.get("rightMargin")

            if leftMargin is not None:
                try:
                    leftMargin = self.stringToValue(leftMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval leftMargin specification: \"%s\"" % leftMargin)

                if closure in ("openClosed", "openOpen"):
                    invalid[NP(data <= leftMargin)] = True
                elif closure in ("closedOpen", "closedClosed"):
                    invalid[NP(data < leftMargin)] = True

            if rightMargin is not None:
                try:
                    rightMargin = self.stringToValue(rightMargin)
                except ValueError:
                    raise defs.PmmlValidationError("Improper value in Interval rightMargin specification: \"%s\"" % rightMargin)

                if closure in ("openOpen", "closedOpen"):
                    invalid[NP(data >= rightMargin)] = True
                elif closure in ("openClosed", "closedClosed"):
                    invalid[NP(data > rightMargin)] = True

        if not invalid.any():
            return data, mask

        if mask is None:
            return data, NP(invalid * defs.INVALID)
        else:
            NP("logical_and", invalid, NP(mask == defs.VALID), invalid)   # only change what wasn't already marked as MISSING
            mask[invalid] = defs.INVALID
            return data, mask
开发者ID:Huskyeder,项目名称:augustus,代码行数:45,代码来源:FieldType.py

示例12: initialize

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def initialize(self, state, numberOfRecords, numberOfFields, distributionBased):
        """First step in a vectorized metric calculation with missing values, called once before all fields and cluster centers.

        Only modifies the C{state} object.

        @type state: ad-hoc Python object
        @param state: State information that persists long enough to span the three steps of a metric calculation.  This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
        @type numberOfRecords: int
        @param numberOfRecords: The number of rows in the dataset.
        @type numberOfFields: int
        @param numberOfFields: The number of columns in the dataset.
        @type distributionBased: bool
        @param distributionBased: If True, use a covariance matrix to scale the distance result.
        """

        state.sumInQuadrature = NP("zeros", numberOfRecords, dtype=NP.dtype(float))
        if distributionBased:
            state.displacements = NP("empty", (numberOfRecords, numberOfFields), dtype=NP.dtype(float))
            state.displacementIndex = 0
开发者ID:Huskyeder,项目名称:augustus,代码行数:21,代码来源:Euclidean.py

示例13: evaluate

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
        def evaluate(self, dataTable, functionTable, performanceTable, arguments):
            arguments = [x.evaluate(dataTable, functionTable, performanceTable) for x in arguments]
            performanceTable.begin("built-in \"%s\"" % self.name)

            fieldType = self.allBooleanType(arguments, atleast=2)

            data = NP("zeros", len(dataTable), dtype=fieldType.dtype)
            mask = None
            allbad = NP("ones", len(dataTable), dtype=NP.dtype(bool))

            (data, allbad), mask = self.applySkipMissing((data, allbad), mask, arguments)

            if allbad.any():
                if mask is None:
                    mask = allbad * defs.MISSING
                else:
                    NP("logical_and", allbad, NP(mask == defs.VALID), allbad)
                    mask[allbad] = defs.MISSING

            performanceTable.end("built-in \"%s\"" % self.name)
            return DataColumn(fieldType, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:23,代码来源:FunctionTableExtra.py

示例14: _toDataColumn_number

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def _toDataColumn_number(self, data, mask):
        data, mask = self._checkNumpy(data, mask)
        if isinstance(data, NP.ndarray) and (mask is None or isinstance(mask, NP.ndarray)) and data.dtype == self.dtype:
            mask2 = NP("isnan", data)
            if mask is None:
                mask = NP("array", mask2, defs.maskType) * defs.MISSING
            else:
                mask[mask2] = defs.MISSING

        else:
            data, mask = self._checkNonNumpy(data, mask)
            try:
                data = NP("array", data, dtype=self.dtype)
                # mask is handled in the else statement after the except block

            except (ValueError, TypeError):
                data2 = NP("empty", len(data), dtype=self.dtype)
                if mask is None:
                    mask2 = NP("zeros", len(data), dtype=defs.maskType)
                else:
                    mask2 = NP("fromiter", ((defs.VALID if not m else defs.MISSING) for m in mask), dtype=defs.maskType, count=len(mask))

                for i, v in enumerate(data):
                    try:
                        data2[i] = v
                        if mask2[i] == defs.VALID and ((isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN")):
                            mask2[i] = defs.MISSING
                        if v is None:
                            raise TypeError
                    except (ValueError, TypeError):
                        data2[i] = defs.PADDING
                        if mask2[i] == defs.VALID:
                            if (isinstance(v, float) and math.isnan(v)) or (isinstance(v, basestring) and v.upper() == "NAN"):
                                mask2[i] = defs.MISSING
                            else:
                                mask2[i] = defs.INVALID

                if not mask2.any():
                    mask2 = None

                data, mask = data2, mask2

            else:
                mask2 = NP("isnan", data)
                if mask is None:
                    mask = NP("array", mask2, defs.maskType)
                else:
                    mask = NP(NP("array", NP("logical_or", mask2, NP("fromiter", (m != 0 for m in mask), dtype=NP.dtype(bool), count=len(mask))), defs.maskType) * defs.MISSING)
                if not mask.any():
                    mask = None

        data, mask = self._checkValues(data, mask)
        data, mask = self._checkIntervals(data, mask)
        return DataColumn(self, data, mask)
开发者ID:Huskyeder,项目名称:augustus,代码行数:56,代码来源:FieldType.py

示例15: compare

# 需要导入模块: from augustus.core.NumpyInterface import NP [as 别名]
# 或者: from augustus.core.NumpyInterface.NP import dtype [as 别名]
    def compare(self, dataTable, functionTable, performanceTable, centerString, defaultCompareFunction, anyInvalid):
        """Compare input data with a cluster centern along the
        direction of this field.

        Cluster distances are computed in two steps: this C{compare}
        function, which determines the distance in the direction of a
        field, and the metric, which combines results from each field.

        @type dataTable: DataTable
        @param dataTable: The input data.
        @type functionTable: FunctionTable
        @param functionTable: A table of functions.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @type centerString: string
        @param centerString: The center of the cluster in this field, represented as a string.
        @type defaultCompareFunction: string
        @param defaultCompareFunction: The C{compareFunction} defined at the model level, which may be overruled on a per-field basis.
        @type anyInvalid: 1d Numpy array of bool
        @param anyInvalid: Mask for invalid data, accumulated with each C{compare} call.  This method modifies it.
        @rtype: 1d Numpy array of numbers
        @return: The distances or similarities between the input data and the cluster center, along the distance of this field.
        """

        performanceTable.begin("ClusteringField")

        dataColumn = dataTable.fields[self["field"]]

        if dataColumn.mask is not None:
            # even though DataColumns are immutable, we're allowed to change the invalid values
            # because they're not defined; set them so that x - y = 0, and hence they'll be
            # effectively skipped in summations without any extra work
            dataColumn._unlock()
            dataColumn.data[NP(dataColumn.mask != defs.VALID)] = dataColumn.fieldType.stringToValue(centerString)
            dataColumn._lock()

        compareFunction = self.get("compareFunction", defaultCompareFunction)

        if compareFunction == "absDiff":
            result = NP("absolute", NP(dataColumn.data - dataColumn.fieldType.stringToValue(centerString)))

        elif compareFunction == "gaussSim":
            similarityScale = self.get("similarityScale")
            if similarityScale is None:
                raise defs.PmmlValidationError("If compareFunction is \"gaussSim\", a similarityScale must be provided")
            s = float(similarityScale)
            z = NP(dataColumn.data - dataColumn.fieldType.stringToValue(centerString))

            result = NP("exp", NP((-self.LOG2/s**2) * NP(z**2)))

        elif compareFunction == "delta":
            result = NP(dataColumn.data != dataColumn.fieldType.stringToValue(centerString))

        elif compareFunction == "equal":
            result = NP(dataColumn.data == dataColumn.fieldType.stringToValue(centerString))

        elif compareFunction == "table":
            if dataColumn.fieldType.dataType != "integer":
                raise defs.PmmlValidationError("If compareFunction is \"table\", the data must be integers")

            matrix = self.xpath("pmml:Comparisons/pmml:Matrix")
            if len(matrix) != 1:
                raise defs.PmmlValidationError("If compareFunction is \"table\", ClusteringFields needs a Comparisons/Matrix")
            values = matrix[0].values(convertType=False)

            centerValue = dataColumn.fieldType.stringToValue(centerString)
            try:
                row = values[centerValue]
            except IndexError:
                raise defs.PmmlValidationError("Cluster center component is %s, but this is an invalid row index for the Comparisons/Matrix (0-indexed)" % centerString)

            result = NP("empty", len(dataTable), dtype=NP.dtype(float))
            valid = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
            for j, value in enumerate(row):
                selection = NP(dataColumn.data == j)
                result[selection] = dataColumn.fieldType.stringToValue(value)
                NP("logical_or", valid, selection, valid)
            NP("logical_or", anyInvalid, NP("logical_not", valid), anyInvalid)

        performanceTable.end("ClusteringField")
        return result
开发者ID:Huskyeder,项目名称:augustus,代码行数:83,代码来源:ClusteringField.py


注:本文中的augustus.core.NumpyInterface.NP.dtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。