当前位置: 首页>>代码示例>>Python>>正文


Python Vectors.dense方法代码示例

本文整理汇总了Python中pyspark.mllib.linalg.Vectors.dense方法的典型用法代码示例。如果您正苦于以下问题:Python Vectors.dense方法的具体用法?Python Vectors.dense怎么用?Python Vectors.dense使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.mllib.linalg.Vectors的用法示例。


在下文中一共展示了Vectors.dense方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_save_load

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def test_save_load(self):
     temp_path = tempfile.mkdtemp()
     sqlContext = SQLContext(self.sc)
     dataset = sqlContext.createDataFrame(
         [(Vectors.dense([0.0]), 0.0),
          (Vectors.dense([0.4]), 1.0),
          (Vectors.dense([0.5]), 0.0),
          (Vectors.dense([0.6]), 1.0),
          (Vectors.dense([1.0]), 1.0)] * 10,
         ["features", "label"])
     lr = LogisticRegression()
     grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
     evaluator = BinaryClassificationEvaluator()
     tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
     tvsModel = tvs.fit(dataset)
     tvsPath = temp_path + "/tvs"
     tvs.save(tvsPath)
     loadedTvs = TrainValidationSplit.load(tvsPath)
     self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
     self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid)
     self.assertEqual(loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps())
     tvsModelPath = temp_path + "/tvsModel"
     tvsModel.save(tvsModelPath)
     loadedModel = TrainValidationSplitModel.load(tvsModelPath)
     self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
开发者ID:Bella-Lin,项目名称:spark,代码行数:27,代码来源:tests.py

示例2: test_nnclassifier_in_pipeline

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
    def test_nnclassifier_in_pipeline(self):

        if self.sc.version.startswith("1"):
            from pyspark.mllib.linalg import Vectors

            df = self.sqlContext.createDataFrame(
                [(Vectors.dense([2.0, 1.0]), 1.0),
                 (Vectors.dense([1.0, 2.0]), 2.0),
                 (Vectors.dense([2.0, 1.0]), 1.0),
                 (Vectors.dense([1.0, 2.0]), 2.0),
                 ], ["features", "label"])

            scaler = MinMaxScaler().setInputCol("features").setOutputCol("scaled")
            model = Sequential().add(Linear(2, 2))
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(model, criterion, MLlibVectorToTensor([2]))\
                .setBatchSize(4) \
                .setLearningRate(0.01).setMaxEpoch(1).setFeaturesCol("scaled")

            pipeline = Pipeline(stages=[scaler, classifier])

            pipelineModel = pipeline.fit(df)

            res = pipelineModel.transform(df)
            assert type(res).__name__ == 'DataFrame'
开发者ID:ru003ar,项目名称:analytics-zoo,代码行数:27,代码来源:test_nn_classifier.py

示例3: test_model_transform

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
    def test_model_transform(self):
        weight = Vectors.dense([3, 2, 1])

        densevec = Vectors.dense([4, 5, 6])
        sparsevec = Vectors.sparse(3, [0], [1])
        eprod = ElementwiseProduct(weight)
        self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6]))
        self.assertEqual(
            eprod.transform(sparsevec), SparseVector(3, [0], [3]))
开发者ID:HodaAlemi,项目名称:spark,代码行数:11,代码来源:tests.py

示例4: _get_train_data

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def _get_train_data(self):
     sql_context = SQLContext(self.sc)
     l = [
         (1, Vectors.dense([1, 2, 3]), 1.0),
         (2, Vectors.dense([1, 2, 3]), 0.0),
         (3, Vectors.dense([1, 2, 3]), 1.0),
         (4, Vectors.dense([1, 2, 3]), 0.0),
     ]
     return sql_context.createDataFrame(l, ['id', 'features', 'label'])
开发者ID:ngarneau,项目名称:sentiment-analysis,代码行数:11,代码来源:pipelines.py

示例5: test_output_columns

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def test_output_columns(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     model = ovr.fit(df)
     output = model.transform(df)
     self.assertEqual(output.columns, ["label", "features", "prediction"])
开发者ID:A7mech,项目名称:spark,代码行数:12,代码来源:tests.py

示例6: test_idf_model

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def test_idf_model(self):
     data = [
         Vectors.dense([1, 2, 6, 0, 2, 3, 1, 1, 0, 0, 3]),
         Vectors.dense([1, 3, 0, 1, 3, 0, 0, 2, 0, 0, 1]),
         Vectors.dense([1, 4, 1, 0, 0, 4, 9, 0, 1, 2, 0]),
         Vectors.dense([2, 1, 0, 3, 0, 0, 5, 0, 2, 3, 9])
     ]
     model = IDF().fit(self.sc.parallelize(data, 2))
     idf = model.idf()
     self.assertEqual(len(idf), 11)
开发者ID:HodaAlemi,项目名称:spark,代码行数:12,代码来源:tests.py

示例7: load_data_rdd

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
def load_data_rdd(csv_file, shuffle=True, train=True):
    if shuffle:
        shuffle_csv(csv_file)
    data = sc.textFile(data_path + csv_file)
    data = data.filter(lambda x:x.split(',')[0] != 'id').map(lambda line: line.split(','))
    if train:
        data = data.map(
            lambda line: (Vectors.dense(np.asarray(line[1:-1]).astype(np.float32)),
            str(line[-1]).replace('Class_', '')) )
    else:
        data = data.map(lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)), "1") )
    return data
开发者ID:thisiskofi,项目名称:elephas,代码行数:14,代码来源:ml_pipeline_otto.py

示例8: remove_time_dependent_effects

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def remove_time_dependent_effects(self, ts):
     """
     Given a timeseries, apply inverse operations to obtain the original series of underlying errors.
     Parameters
     ----------
     ts:
         Time series of observations with this model's characteristics as a Numpy array
     
     returns the time series with removed time-dependent effects as a Numpy array
     """
     destts = Vectors.dense(np.array([0] * len(ts)))
     result =  self._jmodel.removeTimeDependentEffects(_py2java(self._ctx, Vectors.dense(ts)), _py2java(self._ctx, destts))
     return _java2py(self._ctx, result.toArray())
开发者ID:BabelTower,项目名称:spark-timeseries,代码行数:15,代码来源:_model.py

示例9: load_data_frame

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
def load_data_frame(csv_file, shuffle=True, train=True):
    if shuffle:
        shuffle_csv(csv_file)
    data = sc.textFile('/home/minglu/dist_spark/data/' + csv_file) # This is an RDD, which will later be transformed to a data frame
    data = data.filter(lambda x:x.split(',')[0] != 'label').map(lambda line: line.split(','))
    if train:
        data = data.map(
            lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)),
                          'class_'+str(line[0]),int(line[0])) )
    else:
        # Test data gets dummy labels. We need the same structure as in Train data
        data = data.map( lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)),'class_'+str(line[0]),int(line[0])) ) 
    return sqlcontext.createDataFrame(data, ['features', 'category','label'])
开发者ID:ChienHsiung,项目名称:python,代码行数:15,代码来源:spark101.py

示例10: create_rows_for_rdd

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
def create_rows_for_rdd(x):
    """

    :param x:
    :return:
    """
    features = list(x[1])
    l = len(features) - 1
    label = float(features.pop(l))
    meta_data = x[0]
    return Row(label=label,
               features=Vectors.dense(features),
               meta_data=Vectors.dense(meta_data))
开发者ID:USF-ML2,项目名称:SKYNET-,代码行数:15,代码来源:modeling_utils.py

示例11: test_copy

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def test_copy(self):
     df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                      (1.0, Vectors.sparse(2, [], [])),
                                      (2.0, Vectors.dense(0.5, 0.5))],
                                     ["label", "features"])
     lr = LogisticRegression(maxIter=5, regParam=0.01)
     ovr = OneVsRest(classifier=lr)
     ovr1 = ovr.copy({lr.maxIter: 10})
     self.assertEqual(ovr.getClassifier().getMaxIter(), 5)
     self.assertEqual(ovr1.getClassifier().getMaxIter(), 10)
     model = ovr.fit(df)
     model1 = model.copy({model.predictionCol: "indexed"})
     self.assertEqual(model1.getPredictionCol(), "indexed")
开发者ID:A7mech,项目名称:spark,代码行数:15,代码来源:tests.py

示例12: add_time_dependent_effects

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def add_time_dependent_effects(self, ts):
     """
     Given a timeseries, apply a model to it.
     
     Parameters
     ----------
     ts:
         Time series of i.i.d. observations as a Numpy array
     
     returns the time series with added time-dependent effects as a Numpy array.
     """
     destts = Vectors.dense([0] * len(ts))
     result =  self._jmodel.addTimeDependentEffects(_py2java(self._ctx, Vectors.dense(ts)), _py2java(self._ctx, destts))
     return _java2py(self._ctx, result.toArray())
开发者ID:BabelTower,项目名称:spark-timeseries,代码行数:16,代码来源:_model.py

示例13: to_vector

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
def to_vector(np_array):
    ''' Convert numpy array to MLlib Vector '''
    if len(np_array.shape) == 1:
        return Vectors.dense(np_array)
    else:
        raise Exception("""An MLLib Vector can only be created
                        from a one-dimensional numpy array""")
开发者ID:ZhangAustin,项目名称:elephas,代码行数:9,代码来源:adapter.py

示例14: test_persistence

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
 def test_persistence(self):
     # Test save/load for LDA, LocalLDAModel, DistributedLDAModel.
     sqlContext = SQLContext(self.sc)
     df = sqlContext.createDataFrame([
         [1, Vectors.dense([0.0, 1.0])],
         [2, Vectors.sparse(2, {0: 1.0})],
     ], ["id", "features"])
     # Fit model
     lda = LDA(k=2, seed=1, optimizer="em")
     distributedModel = lda.fit(df)
     self.assertTrue(distributedModel.isDistributed())
     localModel = distributedModel.toLocal()
     self.assertFalse(localModel.isDistributed())
     # Define paths
     path = tempfile.mkdtemp()
     lda_path = path + "/lda"
     dist_model_path = path + "/distLDAModel"
     local_model_path = path + "/localLDAModel"
     # Test LDA
     lda.save(lda_path)
     lda2 = LDA.load(lda_path)
     self._compare(lda, lda2)
     # Test DistributedLDAModel
     distributedModel.save(dist_model_path)
     distributedModel2 = DistributedLDAModel.load(dist_model_path)
     self._compare(distributedModel, distributedModel2)
     # Test LocalLDAModel
     localModel.save(local_model_path)
     localModel2 = LocalLDAModel.load(local_model_path)
     self._compare(localModel, localModel2)
     # Clean up
     try:
         rmtree(path)
     except OSError:
         pass
开发者ID:bsangee,项目名称:spark,代码行数:37,代码来源:tests.py

示例15: buildLabeledPoint

# 需要导入模块: from pyspark.mllib.linalg import Vectors [as 别名]
# 或者: from pyspark.mllib.linalg.Vectors import dense [as 别名]
def buildLabeledPoint(s, classification):
    features=[]
    for attr in attributes:
        features.append(getattr(s, attr + '_1'))
    for attr in attributes:
        features.append(getattr(s, attr + '_2'))
    return LabeledPoint(classification,Vectors.dense(features))
开发者ID:gadamc,项目名称:simple-data-pipe-connector-flightstats,代码行数:9,代码来源:__init__.py


注:本文中的pyspark.mllib.linalg.Vectors.dense方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。