当前位置: 首页>>代码示例>>Python>>正文


Python types.DoubleType方法代码示例

本文整理汇总了Python中pyspark.sql.types.DoubleType方法的典型用法代码示例。如果您正苦于以下问题:Python types.DoubleType方法的具体用法?Python types.DoubleType怎么用?Python types.DoubleType使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.sql.types的用法示例。


在下文中一共展示了types.DoubleType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def main():
    temp_schema = StructType([
    StructField('StationID', StringType(), False),
    StructField('DateTime', StringType(), False),
    StructField('Observation', StringType(), False),
    StructField('DataValue', DoubleType(), False),
    StructField('MFlag', StringType(), True),
    StructField('QFlag', StringType(), True),
    StructField('SFlag', StringType(), True),
    StructField('OBSTime', StringType(), True),
    ])

    df = sqlContext.read.format('com.databricks.spark.csv').options(header='false').load(inputs1, schema=temp_schema)
    df = df.filter(df.QFlag == '')

    dfrange = get_range(df)
    result = dfrange.rdd.map(lambda r: str(r.DateTime)+' '+str(r.StationID)+' '+str(r.MaxRange))
    outdata = result.sortBy(lambda r: r[0]).coalesce(1)
    outdata.saveAsTextFile(output) 
开发者ID:hanhanwu,项目名称:Hanhan-Spark-Python,代码行数:21,代码来源:temp_range_sql.py

示例2: _convert_precision

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def _convert_precision(df, dtype):
    if dtype is None:
        return df

    if dtype != "float32" and dtype != "float64":
        raise ValueError("dtype {} is not supported. \
            Use 'float32' or float64".format(dtype))

    source_type, target_type = (DoubleType, FloatType) \
        if dtype == "float32" else (FloatType, DoubleType)

    logger.warning("Converting floating-point columns to %s", dtype)

    for field in df.schema:
        col_name = field.name
        if isinstance(field.dataType, source_type):
            df = df.withColumn(col_name, df[col_name].cast(target_type()))
        elif isinstance(field.dataType, ArrayType) and \
                isinstance(field.dataType.elementType, source_type):
            df = df.withColumn(col_name, df[col_name].cast(ArrayType(target_type())))
    return df 
开发者ID:uber,项目名称:petastorm,代码行数:23,代码来源:spark_dataset_converter.py

示例3: _transform

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def _transform(self, dataset):
        if any([field.dataType == DoubleType() for field in dataset.schema]):
            logger.warning("Detected DoubleType columns in dataframe passed to transform(). In "
                           "Deep Learning Pipelines 1.0 and above, DoubleType columns can only be "
                           "fed to input tensors of type tf.float64. To feed dataframe data to "
                           "tensors of other types (e.g. tf.float32, tf.int32, tf.int64), use the "
                           "corresponding Spark SQL data types (FloatType, IntegerType, LongType).")

        graph_def = self._optimize_for_inference()
        input_mapping = self.getInputMapping()
        output_mapping = self.getOutputMapping()

        graph = tf.Graph()
        with tf.Session(graph=graph):
            analyzed_df = tfs.analyze(dataset)
            out_tnsr_op_names = [tfx.op_name(tnsr_name) for tnsr_name, _ in output_mapping]
            # Load graph
            tf.import_graph_def(graph_def=graph_def, name='', return_elements=out_tnsr_op_names)
            # Feed dict maps from placeholder name to DF column name
            feed_dict = {tfx.op_name(tnsr_name): col_name for col_name, tnsr_name in input_mapping}
            fetches = [tfx.get_tensor(tnsr_name, graph) for tnsr_name in out_tnsr_op_names]
            out_df = tfs.map_blocks(fetches, analyzed_df, feed_dict=feed_dict)
            # We still have to rename output columns
            for tnsr_name, new_colname in output_mapping:
                old_colname = tfx.op_name(tnsr_name, graph)
                if old_colname != new_colname:
                    out_df = out_df.withColumnRenamed(old_colname, new_colname)

        return out_df 
开发者ID:databricks,项目名称:spark-deep-learning,代码行数:31,代码来源:tf_tensor.py

示例4: _transform

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def _transform(self, dataset):
        inp = self.getOrDefault(self.inputCol)
        out = self.getOrDefault(self.predictionCol)
        mod_str = self.getOrDefault(self.modStr)
        use_vector_out = self.getOrDefault(self.useVectorOut)

        model = dill.loads(codecs.decode(mod_str.encode(), "base64"))
        model_broadcast = dataset._sc.broadcast(model)

        def predict_vec(data):
            features = data.toArray().reshape((1, len(data)))
            x_data = torch.from_numpy(features).float()
            model = model_broadcast.value
            model.eval()
            return Vectors.dense(model(x_data).detach().numpy().flatten())

        def predict_float(data):
            features = data.toArray().reshape((1, len(data)))
            x_data = torch.from_numpy(features).float()
            model = model_broadcast.value
            model.eval()
            raw_prediction = model(x_data).detach().numpy().flatten()
            if len(raw_prediction) > 1:
                return float(np.argmax(raw_prediction))
            return float(raw_prediction[0])

        if use_vector_out:
            udfGenerateCode = F.udf(predict_vec, VectorUDT())
        else:
            udfGenerateCode = F.udf(predict_float, DoubleType())

        return dataset.withColumn(out, udfGenerateCode(inp)) 
开发者ID:dmmiller612,项目名称:sparktorch,代码行数:34,代码来源:torch_distributed.py

示例5: _transform

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def _transform(self, df):
        """Private transform method of a Transformer. This serves as batch-prediction method for our purposes.
        """
        output_col = self.getOutputCol()
        label_col = self.getLabelCol()
        new_schema = copy.deepcopy(df.schema)
        new_schema.add(StructField(output_col, StringType(), True))

        rdd = df.rdd.coalesce(1)
        features = np.asarray(
            rdd.map(lambda x: from_vector(x.features)).collect())
        # Note that we collect, since executing this on the rdd would require model serialization once again
        model = model_from_yaml(self.get_keras_model_config())
        model.set_weights(self.weights.value)
        predictions = rdd.ctx.parallelize(
            model.predict_classes(features)).coalesce(1)
        predictions = predictions.map(lambda x: tuple(str(x)))

        results_rdd = rdd.zip(predictions).map(lambda x: x[0] + x[1])
        results_df = df.sql_ctx.createDataFrame(results_rdd, new_schema)
        results_df = results_df.withColumn(
            output_col, results_df[output_col].cast(DoubleType()))
        results_df = results_df.withColumn(
            label_col, results_df[label_col].cast(DoubleType()))

        return results_df 
开发者ID:maxpumperla,项目名称:elephas,代码行数:28,代码来源:ml_model.py

示例6: _numpy_to_spark_mapping

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def _numpy_to_spark_mapping():
    """Returns a mapping from numpy to pyspark.sql type. Caches the mapping dictionary inorder to avoid instantiation
    of multiple objects in each call."""

    # Refer to the attribute of the function we use to cache the map using a name in the variable instead of a 'dot'
    # notation to avoid copy/paste/typo mistakes
    cache_attr_name = 'cached_numpy_to_pyspark_types_map'
    if not hasattr(_numpy_to_spark_mapping, cache_attr_name):
        import pyspark.sql.types as T

        setattr(_numpy_to_spark_mapping, cache_attr_name,
                {
                    np.int8: T.ByteType(),
                    np.uint8: T.ShortType(),
                    np.int16: T.ShortType(),
                    np.uint16: T.IntegerType(),
                    np.int32: T.IntegerType(),
                    np.int64: T.LongType(),
                    np.float32: T.FloatType(),
                    np.float64: T.DoubleType(),
                    np.string_: T.StringType(),
                    np.str_: T.StringType(),
                    np.unicode_: T.StringType(),
                    np.bool_: T.BooleanType(),
                })

    return getattr(_numpy_to_spark_mapping, cache_attr_name)


# TODO: Changing fields in this class or the UnischemaField will break reading due to the schema being pickled next to
# the dataset on disk 
开发者ID:uber,项目名称:petastorm,代码行数:33,代码来源:unischema.py

示例7: test_decode_numpy_scalar_with_explicit_scalar_codec

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def test_decode_numpy_scalar_with_explicit_scalar_codec():
    """Decoding a row that has a field with the codec set explicitly"""

    MatrixSchema = Unischema('TestSchema', [UnischemaField('scalar', np.float64, (), ScalarCodec(DoubleType()), False)])
    row = {'scalar': 42.0}
    decoded_value = decode_row(row, MatrixSchema)['scalar']
    assert decoded_value == 42
    assert isinstance(decoded_value, np.float64) 
开发者ID:uber,项目名称:petastorm,代码行数:10,代码来源:test_decode_row.py

示例8: encode

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def encode(self, unischema_field, value):
        # Lazy loading pyspark to avoid creating pyspark dependency on data reading code path
        # (currently works only with make_batch_reader). We should move all pyspark related code into a separate module
        import pyspark.sql.types as sql_types

        # We treat ndarrays with shape=() as scalars
        unsized_numpy_array = isinstance(value, np.ndarray) and value.shape == ()
        # Validate the input to be a scalar (or an unsized numpy array)
        if not unsized_numpy_array and hasattr(value, '__len__') and (not isinstance(value, str)):
            raise TypeError('Expected a scalar as a value for field \'{}\'. '
                            'Got a non-numpy type\'{}\''.format(unischema_field.name, type(value)))

        if unischema_field.shape:
            raise ValueError('The shape field of unischema_field \'%s\' must be an empty tuple (i.e. \'()\' '
                             'to indicate a scalar. However, the actual shape is %s',
                             unischema_field.name, unischema_field.shape)
        if isinstance(self._spark_type, (sql_types.ByteType, sql_types.ShortType, sql_types.IntegerType,
                                         sql_types.LongType)):
            return int(value)
        if isinstance(self._spark_type, (sql_types.FloatType, sql_types.DoubleType)):
            return float(value)
        if isinstance(self._spark_type, sql_types.BooleanType):
            return bool(value)
        if isinstance(self._spark_type, sql_types.StringType):
            if not isinstance(value, str):
                raise ValueError(
                    'Expected a string value for field {}. Got type {}'.format(unischema_field.name, type(value)))
            return str(value)

        return value 
开发者ID:uber,项目名称:petastorm,代码行数:32,代码来源:codecs.py

示例9: outputDataType

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def outputDataType(self):
        return DoubleType() 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:4,代码来源:tests.py

示例10: validateInputType

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def validateInputType(self, inputType):
        if inputType != DoubleType():
            raise TypeError("Bad input type: {}. ".format(inputType) +
                            "Requires Double.") 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:6,代码来源:tests.py

示例11: test_unary_transformer_validate_input_type

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def test_unary_transformer_validate_input_type(self):
        shiftVal = 3
        transformer = MockUnaryTransformer(shiftVal=shiftVal)\
            .setInputCol("input").setOutputCol("output")

        # should not raise any errors
        transformer.validateInputType(DoubleType())

        with self.assertRaises(TypeError):
            # passing the wrong input type should raise an error
            transformer.validateInputType(IntegerType()) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:13,代码来源:tests.py

示例12: __init__

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def __init__(self, scoreAndLabels):
        sc = scoreAndLabels.ctx
        sql_ctx = SQLContext.getOrCreate(sc)
        df = sql_ctx.createDataFrame(scoreAndLabels, schema=StructType([
            StructField("score", DoubleType(), nullable=False),
            StructField("label", DoubleType(), nullable=False)]))
        java_class = sc._jvm.org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
        java_model = java_class(df._jdf)
        super(BinaryClassificationMetrics, self).__init__(java_model) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:11,代码来源:evaluation.py

示例13: test_fromString

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def test_fromString(self):
        s = SmvSchema.fromString("a:string; b:double")
        fields = s.spark_schema.fields
        assert(len(fields) == 2)
        assert(fields[0] == st.StructField('a', st.StringType()))
        assert(fields[1] == st.StructField('b', st.DoubleType())) 
开发者ID:TresAmigosSD,项目名称:SMV,代码行数:8,代码来源:testSmvSchema.py

示例14: load_schema

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def load_schema(analysis_path):
  type_map = {
    'KEY': StringType(),
    'NUMBER': DoubleType(),
    'CATEGORY': StringType(),
    'TEXT': StringType(),
    'IMAGE_URL': StringType()
  }
  schema_file = os.path.join(analysis_path, 'schema.json')
  schema_json = json.loads(file_io.read_file_to_string(schema_file))
  fields = [StructField(x['name'], type_map[x['type']]) for x in schema_json]
  return schema_json, StructType(fields) 
开发者ID:kubeflow,项目名称:pipelines,代码行数:14,代码来源:transform_run.py

示例15: load_schema

# 需要导入模块: from pyspark.sql import types [as 别名]
# 或者: from pyspark.sql.types import DoubleType [as 别名]
def load_schema(schema_file):
  type_map = {
    'KEY': StringType(),
    'NUMBER': DoubleType(),
    'CATEGORY': StringType(),
    'TEXT': StringType(),
    'IMAGE_URL': StringType()
  }
  
  schema_json = json.loads(file_io.read_file_to_string(schema_file))
  fields = [StructField(x['name'], type_map[x['type']]) for x in schema_json]
  return schema_json, StructType(fields) 
开发者ID:kubeflow,项目名称:pipelines,代码行数:14,代码来源:analyze_run.py


注:本文中的pyspark.sql.types.DoubleType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。