Python tree.RandomForest类代码示例

本文整理汇总了Python中pyspark.mllib.tree.RandomForest类的典型用法代码示例。如果您正苦于以下问题：Python RandomForest类的具体用法？Python RandomForest怎么用？Python RandomForest使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了RandomForest类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testOnce

def testOnce ():
    # split the data into training and testing sets
    (trainingData, testData) = data.randomSplit([1-test_size, test_size])

    # train the random forest
    model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
                                     numTrees=num_trees, featureSubsetStrategy = strat,
                                     impurity='gini', maxDepth = max_depth, maxBins=32)

    # test the random forest
    predictions = model.predict(testData.map(lambda x: x.features))
    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
    Mg = float(labelsAndPredictions.filter(lambda (v, p): v == 0 and p == 1).count())
    Ng = float(labelsAndPredictions.filter(lambda (v, p): v == 0 and p == 0).count())
    Ms = float(labelsAndPredictions.filter(lambda (v, p): v == 1 and p == 0).count())
    Ns = float(labelsAndPredictions.filter(lambda (v, p): v == 1 and p == 1).count())
    probsAndScores = probTest(testData, model)
    threshold_accuracy = probsAndScores[0]
    probs = probsAndScores[1].map(lambda x: x/num_trees)
    labelsAndPredictions = labelsAndPredictions.zip(probs)
    labelsAndProbs = testData.map(lambda lp: lp.label).zip(probs)
    save(labelsAndProbs, 'answers')
    print ('Galaxy Purity = ' + str(Ng / (Ng+Ms)))
    print ('Galaxy Completeness = ' + str(Ng / (Ng+Mg)))
    print ('Star Purity = ' + str(Ns / (Ns+Mg)))
    print ('Star Completeness = ' + str(Ns/(Ns+Ms)))
    print ('Accuracy = ' + str(1 - testErr))
    print ('Threshold method accuracy = ' + str(threshold_accuracy))

开发者ID:beatriceliang，项目名称:POPREU，代码行数:29，代码来源:stargalaxy.py

示例2: rfTest

def rfTest(sqlContext,dataset_rdd):
	dataset_positive = dataset_rdd.filter(lambda e:e[1]>0.5)
	dataset_negotive =  dataset_rdd.filter(lambda e:e[1]<0.5)
	train_positive = dataset_positive.sample(False,0.8)
	test_positive = dataset_positive.subtract(train_positive)
	train_negotive = dataset_negotive.sample(False,0.8)
	test_negotive = dataset_negotive.subtract(train_negotive)
	trainset_rdd = train_positive.union(train_negotive)
	testset_rdd = test_positive.union(test_negotive)
	trainset = trainset_rdd.map(lambda e:LabeledPoint(e[1],e[2:]))
	trainset_nums = trainset.count()
	testset = testset_rdd.map(lambda e:LabeledPoint(e[1],e[2:]))
	testset_nums = testset.count()
	trainset_positive = train_positive.count()
	testset_positive = test_positive.count()
	model = RandomForest.trainClassifier(trainset,2,{},3)
	predictions = model.predict(testset.map(lambda x:x.features))
	predict = testset.map(lambda lp: lp.label).zip(predictions)
	hitALL =predict.filter(lambda e:e[0]==e[1]).count()
	hitPositive = predict.filter(lambda e:e[0]==e[1] and (e[0]>0.5)).count()
	positive = predict.filter(lambda e:e[1]>0.5).count()
	recallPositive = hitPositive/float(testset_positive)
	precision = hitPositive/float(positive)
	accuracy = hitALL/float(testset.count())
	F_Value = 2/(1/precision+1/recallPositive)
	return (trainset_nums,testset_nums,trainset_positive,testset_positive,positive,hitPositive,precision,recallPositive,accuracy,F_Value,model)

开发者ID:fighting410381，项目名称:youmi，代码行数:26，代码来源:spark_script.py

示例3: main

def main():
    sc = SparkContext(appName="MyApp")
    sc.setLogLevel('ERROR')

    # Parse data
    train_labels, train_data = load_data('train.csv')
    dummy_labels, test_data = load_data('test.csv', use_labels=False)

    # Truncate the last 2 features of the data
    for dataPoint in train_data:
        len = np.size(dataPoint)
        dataPoint = np.delete(dataPoint, [len - 2, len - 1])

    for dataPoint in test_data:
        len = np.size(dataPoint)
        dataPoint = np.delete(dataPoint, [len - 2, len - 1])

    # Map each data point's label to its features
    train_set = reformatData(train_data, train_labels)
    test_set = reformatData(test_data, dummy_labels)

    # Parallelize the data
    parallelized_train_set = sc.parallelize(train_set)
    parallelized_test_set = sc.parallelize(test_set)

    # Split the data
    trainSet, validationSet = parallelized_train_set.randomSplit([0.01, 0.99], seed=42)

    # Train the models
    randomForestModel = RandomForest.trainClassifier(trainSet, numClasses=4, impurity='gini', categoricalFeaturesInfo={},
                                         numTrees=750, seed=42, maxDepth=30, maxBins=32)

    # Test the model
    testRandomForest(randomForestModel, parallelized_test_set)

开发者ID:adepalatis，项目名称:379K_Final_Project，代码行数:34，代码来源:RandomForest.py

示例4: generateRandomForest

def generateRandomForest():
    if os.path.exists(RF_PATH):
        print("RF_PATH Already available")
        return

    data = sc.textFile(F_PATH).map(parseLine)

    (trainingData, testData) = data.randomSplit([0.9, 0.1], seed=1L)

    # Train a RandomForest model.
    #  Note: Use larger numTrees in practice.
    #  Setting featureSubsetStrategy="auto" lets the algorithm choose.
    model = RandomForest.trainClassifier(trainingData, numClasses=classes.__len__(), categoricalFeaturesInfo={},
                                         numTrees=4, featureSubsetStrategy="auto",
                                         impurity='gini', maxDepth=4, maxBins=32)

    # Evaluate model on test instances and compute test error
    predictions = model.predict(testData.map(lambda x: x.features))
    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
    print('Test Error', str(testErr))
    print('Learned classification forest model:')
    print(model.toDebugString())

    modelStatistics(labelsAndPredictions)

    # Save and load model
    model.save(sc, RF_PATH)
    print("Saved RF Model.")

开发者ID:GuruTeja，项目名称:iHear-Server，代码行数:29，代码来源:main.py

示例5: main

def main():
    input_train = sys.argv[1]
    input_test = sys.argv[2]

    conf = SparkConf().setAppName('Sentiment Analysis with Random Forest')
    sc = SparkContext(conf=conf)
    assert sc.version >= '1.5.1'

    train = sc.textFile(input_train).cache()
    test = sc.textFile(input_test).cache()

    '''sbaronia - get training and testing labeled points'''
    train_lp = train.map(to_labeledpoint).cache()
    test_lp = test.map(to_labeledpoint).cache()

    '''sbaronia - run RandomForest regression on our training data with
    default options except numTrees = 5'''
    rf_model = RandomForest.trainRegressor(train_lp,categoricalFeaturesInfo={},numTrees=5,featureSubsetStrategy="auto", impurity='variance', maxDepth=4, maxBins=32)
    
    '''sbaronia - run predictions on testing data and calculate RMSE value'''
    predictions = rf_model.predict(test_lp.map(lambda x: x.features))
    labelsAndPredictions = test_lp.map(lambda lp: lp.label).zip(predictions)
    rmse = math.sqrt(labelsAndPredictions.map(lambda (v, p): (v-p)**2).reduce(lambda x, y: x + y)/float(test_lp.count()))

    print("RMSE = " + str(rmse))

开发者ID:gitofsid，项目名称:MyBigDataCode，代码行数:25，代码来源:randomforest.py

示例6: Random_Forest

def Random_Forest(filename, sc):

	filename = "/Users/Jacob/SparkService/data/sample_libsvm_data.txt"
	# Load and parse the data file into an RDD of LabeledPoint.
	data = MLUtils.loadLibSVMFile(sc, filename)
	# Split the data into training and test sets (30% held out for testing)
	(trainingData, testData) = data.randomSplit([0.7, 0.3])

	# Train a RandomForest model.
	#  Empty categoricalFeaturesInfo indicates all features are continuous.
	#  Note: Use larger numTrees in practice.
	#  Setting featureSubsetStrategy="auto" lets the algorithm choose.
	model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
	                                     numTrees=3, featureSubsetStrategy="auto",
	                                     impurity='gini', maxDepth=4, maxBins=32)

	# Evaluate model on test instances and compute test error
	predictions = model.predict(testData.map(lambda x: x.features))
	labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
	testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
	print('Test Error = ' + str(testErr))
	print('Learned classification forest model:')
	print(model.toDebugString())

	# Save and load model
	#model.save(sc, "target/tmp/myRandomForestClassificationModel")
	#sameModel = RandomForestModel.load(sc, "target/tmp/myRandomForestClassificationModel")

开发者ID:bangjieliu，项目名称:SparkService，代码行数:27，代码来源:random_forest.py

示例7: test_regression

    def test_regression(self):
        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
            RidgeRegressionWithSGD
        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
        data = [
            LabeledPoint(-1.0, [0, -1]),
            LabeledPoint(1.0, [0, 1]),
            LabeledPoint(-1.0, [0, -2]),
            LabeledPoint(1.0, [0, 2])
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features.tolist() for p in data]

        lr_model = LinearRegressionWithSGD.train(rdd, iterations=10)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        lasso_model = LassoWithSGD.train(rdd, iterations=10)
        self.assertTrue(lasso_model.predict(features[0]) <= 0)
        self.assertTrue(lasso_model.predict(features[1]) > 0)
        self.assertTrue(lasso_model.predict(features[2]) <= 0)
        self.assertTrue(lasso_model.predict(features[3]) > 0)

        rr_model = RidgeRegressionWithSGD.train(rdd, iterations=10)
        self.assertTrue(rr_model.predict(features[0]) <= 0)
        self.assertTrue(rr_model.predict(features[1]) > 0)
        self.assertTrue(rr_model.predict(features[2]) <= 0)
        self.assertTrue(rr_model.predict(features[3]) > 0)

        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
        dt_model = DecisionTree.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, maxBins=4)
        self.assertTrue(dt_model.predict(features[0]) <= 0)
        self.assertTrue(dt_model.predict(features[1]) > 0)
        self.assertTrue(dt_model.predict(features[2]) <= 0)
        self.assertTrue(dt_model.predict(features[3]) > 0)

        rf_model = RandomForest.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=10, maxBins=4, seed=1)
        self.assertTrue(rf_model.predict(features[0]) <= 0)
        self.assertTrue(rf_model.predict(features[1]) > 0)
        self.assertTrue(rf_model.predict(features[2]) <= 0)
        self.assertTrue(rf_model.predict(features[3]) > 0)

        gbt_model = GradientBoostedTrees.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4)
        self.assertTrue(gbt_model.predict(features[0]) <= 0)
        self.assertTrue(gbt_model.predict(features[1]) > 0)
        self.assertTrue(gbt_model.predict(features[2]) <= 0)
        self.assertTrue(gbt_model.predict(features[3]) > 0)

        try:
            LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
            LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
            RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
        except ValueError:
            self.fail()

开发者ID:1ambda，项目名称:spark，代码行数:59，代码来源:tests.py

示例8: trainRandomForestModel

def trainRandomForestModel(data):
    """
    Train a random forest regression model and return it
    :param data: RDD[LabeledPoint]
    :return: random forest regression model
    """
    from pyspark.mllib.tree import RandomForest
    model = RandomForest.trainRegressor(data, categoricalFeaturesInfo={}, numTrees=2000, featureSubsetStrategy="auto", impurity="variance", maxDepth=4, maxBins=32)
    return model

开发者ID:theseusyang，项目名称:GEQE，代码行数:9，代码来源:createROC.py

示例9: train_model

 def train_model(cls, trianData, cateFeaInfo={}, trees=3, impurity="gini",\
     depth=4):
     """
     训练模型
     """
     model = RandomForest.trainClassifier(trainData, numClasses=2,\
         categoricalFeaturesInfo=cateFeaInfo, numTrees=trees, \
         featureSubsetStrategy="auto", impurity=impurity, maxDepth=depth,\
         maxBins=32)
     return model

开发者ID:yidun55，项目名称:mllib，代码行数:10，代码来源:randomForest_classification_spark_xiaodai.py

示例10: evaluate

 def evaluate(self, trainingData,  testData=None, metric=None):
     if testData !=None:
         model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
                                  numTrees=10, featureSubsetStrategy="auto",
                                  impurity='gini', maxDepth=4, maxBins=32)
         predictions = model.predict(testData.map(lambda x: x.features))
         labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
         testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
         print('Test Error = ' + str(testErr))
     else: #cross validation
         pass

开发者ID:bngonmang，项目名称:FIND，代码行数:11，代码来源:RF.py

示例11: trainModel

def trainModel(trainingData):
	print "\nTrainning Random Forest model started!"
	Utils.logTime()

	model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, 
											numTrees=3, featureSubsetStrategy="auto", impurity='gini',
											maxDepth=5, maxBins=32)

	print '\nTraining Random Forest model finished'
	Utils.logTime()
	return model

开发者ID:yfliu87，项目名称:MachineLearningModel，代码行数:11，代码来源:RandomForest.py

示例12: getRandomForestRMSE

def getRandomForestRMSE(trees_array):
	valRMSE_list = []
	for trees in trees_array:
		model = RandomForest.trainRegressor(train_featureScoreTimeRDD, categoricalFeaturesInfo={},
                                    numTrees=trees, featureSubsetStrategy="auto",
                                    impurity='variance', maxDepth=4, maxBins=32)
		predictions = model.predict(val_featureScoreTimeRDD.map(lambda lp: lp.features))
		labelsAndPreds = val_featureScoreTimeRDD.map(lambda lp: lp.label).zip(predictions)
		valMSE = labelsAndPreds.map(lambda (v, p): (v - p)*(v-p)).sum() / float(val_featureScoreTimeRDD.count())
		valRMSE=valMSE**0.5
		valRMSE_list.append((trees, valRMSE))
	return valRMSE_list

开发者ID:shaileshr，项目名称:SentimentAnalysis，代码行数:12，代码来源:Qn8.py

示例13: test_regression

    def test_regression(self):
        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
            RidgeRegressionWithSGD
        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
        data = [
            LabeledPoint(-1.0, [0, -1]),
            LabeledPoint(1.0, [0, 1]),
            LabeledPoint(-1.0, [0, -2]),
            LabeledPoint(1.0, [0, 2])
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features.tolist() for p in data]

        lr_model = LinearRegressionWithSGD.train(rdd)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        lasso_model = LassoWithSGD.train(rdd)
        self.assertTrue(lasso_model.predict(features[0]) <= 0)
        self.assertTrue(lasso_model.predict(features[1]) > 0)
        self.assertTrue(lasso_model.predict(features[2]) <= 0)
        self.assertTrue(lasso_model.predict(features[3]) > 0)

        rr_model = RidgeRegressionWithSGD.train(rdd)
        self.assertTrue(rr_model.predict(features[0]) <= 0)
        self.assertTrue(rr_model.predict(features[1]) > 0)
        self.assertTrue(rr_model.predict(features[2]) <= 0)
        self.assertTrue(rr_model.predict(features[3]) > 0)

        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
        dt_model = DecisionTree.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(dt_model.predict(features[0]) <= 0)
        self.assertTrue(dt_model.predict(features[1]) > 0)
        self.assertTrue(dt_model.predict(features[2]) <= 0)
        self.assertTrue(dt_model.predict(features[3]) > 0)

        rf_model = RandomForest.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=100)
        self.assertTrue(rf_model.predict(features[0]) <= 0)
        self.assertTrue(rf_model.predict(features[1]) > 0)
        self.assertTrue(rf_model.predict(features[2]) <= 0)
        self.assertTrue(rf_model.predict(features[3]) > 0)

        gbt_model = GradientBoostedTrees.trainRegressor(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(gbt_model.predict(features[0]) <= 0)
        self.assertTrue(gbt_model.predict(features[1]) > 0)
        self.assertTrue(gbt_model.predict(features[2]) <= 0)
        self.assertTrue(gbt_model.predict(features[3]) > 0)

开发者ID:greatyan，项目名称:spark，代码行数:52，代码来源:tests.py

示例14: trainOptimalModel

def trainOptimalModel(trainingData, testData):
	print "\nTraining optimal Random Forest model started!"
	Utils.logTime()

	numTreesVals = [3,5,8]
	featureSubsetStrategyVals = ['auto','all','sqrt','log2','onethird']
	impurityVals = ['gini', 'entropy']
	maxDepthVals = [3,4,5,6,7]
	maxBinsVals = [8,16,32]

	optimalModel = None
	optimalNumTrees = None
	optimalFeatureSubsetStrategy = None
	optimalMaxDepth = None
	optimalImpurity = None
	optimalBinsVal = None
	minError = None

	try:
		for curNumTree in numTreesVals:
			for curFeatureSubsetStrategy in featureSubsetStrategyVals:
				for curImpurity in impurityVals:
					for curMaxDepth in maxDepthVals:
						for curMaxBins in maxBinsVals:
							model = RandomForest.trainClassifier(trainingData, 
																numClasses=2, 
																categoricalFeaturesInfo={}, 
														 		numTrees=curNumTree,
														 		featureSubsetStrategy=curFeatureSubsetStrategy,
														 		impurity=curImpurity, 
														 		maxDepth=curMaxDepth,
														 		maxBins=curMaxBins)
							testErr = Evaluation.evaluate(model, testData)
							if testErr < minError or not minError:
								minError = testErr
								optimalNumTrees = curNumTree
								optimalFeatureSubsetStrategy = curFeatureSubsetStrategy
								optimalImpurity = curImpurity
								optimalMaxDepth = curMaxDepth
								optimalBinsVal = curMaxBins
								optimalModel = model
	except:
		msg = "\nException during model training with below parameters:"
		msg += "\tnum trees: " + str(optimalNumTrees)
		msg += "\tfeature subset strategy: " + optimalFeatureSubsetStrategy
		msg += "\timpurity: " + str(curImpurity)
		msg += "\tmaxDepth: " + str(curMaxDepth)
		msg += "\tmaxBins: " + str(curMaxBins)
		Utls.logMessage(msg)

	logMessage(optimalModel, optimalNumTrees, optimalFeatureSubsetStrategy, optimalMaxDepth, optimalImpurity, optimalBinsVal, minError)
	return optimalModel

开发者ID:yfliu87，项目名称:MachineLearningModel，代码行数:52，代码来源:RandomForest.py

示例15: test_classification

    def test_classification(self):
        from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
        data = [
            LabeledPoint(0.0, [1, 0, 0]),
            LabeledPoint(1.0, [0, 1, 1]),
            LabeledPoint(0.0, [2, 0, 0]),
            LabeledPoint(1.0, [0, 2, 1])
        ]
        rdd = self.sc.parallelize(data)
        features = [p.features.tolist() for p in data]

        lr_model = LogisticRegressionWithSGD.train(rdd)
        self.assertTrue(lr_model.predict(features[0]) <= 0)
        self.assertTrue(lr_model.predict(features[1]) > 0)
        self.assertTrue(lr_model.predict(features[2]) <= 0)
        self.assertTrue(lr_model.predict(features[3]) > 0)

        svm_model = SVMWithSGD.train(rdd)
        self.assertTrue(svm_model.predict(features[0]) <= 0)
        self.assertTrue(svm_model.predict(features[1]) > 0)
        self.assertTrue(svm_model.predict(features[2]) <= 0)
        self.assertTrue(svm_model.predict(features[3]) > 0)

        nb_model = NaiveBayes.train(rdd)
        self.assertTrue(nb_model.predict(features[0]) <= 0)
        self.assertTrue(nb_model.predict(features[1]) > 0)
        self.assertTrue(nb_model.predict(features[2]) <= 0)
        self.assertTrue(nb_model.predict(features[3]) > 0)

        categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
        dt_model = DecisionTree.trainClassifier(
            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(dt_model.predict(features[0]) <= 0)
        self.assertTrue(dt_model.predict(features[1]) > 0)
        self.assertTrue(dt_model.predict(features[2]) <= 0)
        self.assertTrue(dt_model.predict(features[3]) > 0)

        rf_model = RandomForest.trainClassifier(
            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=100)
        self.assertTrue(rf_model.predict(features[0]) <= 0)
        self.assertTrue(rf_model.predict(features[1]) > 0)
        self.assertTrue(rf_model.predict(features[2]) <= 0)
        self.assertTrue(rf_model.predict(features[3]) > 0)

        gbt_model = GradientBoostedTrees.trainClassifier(
            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
        self.assertTrue(gbt_model.predict(features[0]) <= 0)
        self.assertTrue(gbt_model.predict(features[1]) > 0)
        self.assertTrue(gbt_model.predict(features[2]) <= 0)
        self.assertTrue(gbt_model.predict(features[3]) > 0)

开发者ID:greatyan，项目名称:spark，代码行数:51，代码来源:tests.py

注：本文中的pyspark.mllib.tree.RandomForest类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。