当前位置: 首页>>代码示例>>Python>>正文


Python SparkContext.setSystemProperty方法代码示例

本文整理汇总了Python中pyspark.SparkContext.setSystemProperty方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.setSystemProperty方法的具体用法?Python SparkContext.setSystemProperty怎么用?Python SparkContext.setSystemProperty使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkContext的用法示例。


在下文中一共展示了SparkContext.setSystemProperty方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init_spark_context

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
def init_spark_context(details=[]):
    global spark_context
    if spark_context:
        return
    build_type = yb_dist_tests.global_conf.build_type
    from pyspark import SparkContext
    # We sometimes fail tasks due to unsynchronized clocks, so we should tolerate a fair number of
    # retries.
    # https://stackoverflow.com/questions/26260006/are-failed-tasks-resubmitted-in-apache-spark
    # NOTE: we never retry failed tests to avoid hiding bugs. This failure tolerance mechanism
    #       is just for the resilience of the test framework itself.
    SparkContext.setSystemProperty('spark.task.maxFailures', str(SPARK_TASK_MAX_FAILURES))
    if yb_dist_tests.global_conf.build_type == 'tsan':
        logging.info("Using a separate default Spark cluster for TSAN tests")
        default_spark_master_url = DEFAULT_SPARK_MASTER_URL_TSAN
    else:
        logging.info("Using the regular default Spark cluster for non-TSAN tests")
        default_spark_master_url = DEFAULT_SPARK_MASTER_URL

    spark_master_url = os.environ.get('YB_SPARK_MASTER_URL', default_spark_master_url)
    details += [
        'user: {}'.format(getpass.getuser()),
        'build type: {}'.format(build_type)
        ]

    if 'BUILD_URL' in os.environ:
        details.append('URL: {}'.format(os.environ['BUILD_URL']))

    spark_context = SparkContext(spark_master_url, "YB tests ({})".format(', '.join(details)))
    spark_context.addPyFile(yb_dist_tests.__file__)
开发者ID:krajasek,项目名称:yugabyte-db,代码行数:32,代码来源:run_tests_on_spark.py

示例2: SparkConf

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
from pyspark import SparkContext, SparkConf
from pyspark.sql import HiveContext
from pyspark.ml.feature import StringIndexer, VectorAssembler, OneHotEncoder
from pyspark.ml import Pipeline
from pyspark.ml.classification import RandomForestClassifier

# Initialize Spark
SparkContext.setSystemProperty("spark.executor.memory", "4g")
conf = SparkConf()
conf.set("spark.executor.instances", 20)
sc = SparkContext("yarn-client", "kdd99", conf=conf)
hc = HiveContext(sc)

kdd = hc.table("kdd99")

(trainData, testData) = kdd.randomSplit([0.7, 0.3], seed=42)
trainData.cache()
services = trainData.withColumnRenamed("service", "srvc").select("srvc").distinct()
testData = testData.join(services, testData.service == services.srvc)
# filter out any rows with a service not trained upon
testData.cache()

print "training set has " + str(trainData.count()) + " instances"
print "test set has " + str(testData.count()) + " instances"

# Build model
inx1 = StringIndexer(inputCol="protocol", outputCol="protocol-cat")
inx2 = StringIndexer(inputCol="service", outputCol="service-cat")
inx3 = StringIndexer(inputCol="flag", outputCol="flag-cat")
inx4 = StringIndexer(inputCol="is_anomaly", outputCol="label")
ohe2 = OneHotEncoder(inputCol="service-cat", outputCol="service-ohe")
开发者ID:ofermend,项目名称:data-science-with-hadoop-book,代码行数:33,代码来源:anomaly.py

示例3: __init__

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
# -*- coding: utf-8 -*-
from pyspark import SparkConf
from pyspark import SparkContext

from pyspark.sql import SparkSession
from pyspark.sql import HiveContext
import requests
import json

SparkContext.setSystemProperty('spark.executor.memory', '10g')
SparkContext.setSystemProperty("spark.executor.cores",'4')

class SparkHiveExample:

    def __init__(self):
        ## initialize spark session
        self.spark = SparkSession.builder.appName("Spark Hive example").enableHiveSupport().getOrCreate()

    def run(self):
        ## download with opendata API
        url = "http://data.coa.gov.tw/Service/OpenData/ODwsv/ODwsvTravelFood.aspx?"
        data = requests.get(url)

        ## convert from JSON to dataframe
        df = self.spark.createDataFrame(data.json())

        ## display schema
        df.printSchema()

        ## creates a temporary view using the DataFrame
        df.createOrReplaceTempView("travelfood")
开发者ID:ChienHsiung,项目名称:python,代码行数:33,代码来源:new.py

示例4: parse_meta

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
# Students: A. Romriell, D. Wen, J. Pastor, J. Pollard
# MSAN 694 Project


from pyspark import SparkContext
SparkContext.setSystemProperty('spark.executor.memory', '45g')
sc = SparkContext("local", "arXiv")
from pyspark.mllib.clustering import PowerIterationClustering


def parse_meta(line):
	"""
	"""
	pieces = line.strip().split("|")
	return {"id": pieces[0], "subj": pieces[1], "dt": pieces[2], "title": pieces[3]}


def get_paper_subj(d):
	"""
	"""
	return (d["id"], d["subj"])


def parse_auth(line):
	"""
	"""
	paper_id, the_authors = line.strip().split(":", 1)
	authors = the_authors.strip().split("|")
	return {"id": paper_id, "authors": authors}

开发者ID:jaimeps,项目名称:distributed-computing-arxiv,代码行数:31,代码来源:auth_network_interactice.py

示例5: parsePoint

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
from pyspark import SparkConf, SparkContext
SparkContext.setSystemProperty("hadoop.home.dir", "C:\\spark-1.5.1-bin-hadoop2.6\\")
import sys, pickle,math
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.tree import GradientBoostedTrees, GradientBoostedTreesModel
from pyspark.mllib.util import MLUtils

conf = SparkConf().setAppName('random-forest')
sc = SparkContext(conf=conf)

input = sys.argv[1]

# Load and parse the data
def parsePoint(line):
    return LabeledPoint(float(line[1]), line[0])

train = sc.pickleFile(input+'/bow_train/part-00000')
test = sc.pickleFile(input+'/bow_test/part-00000')
parsedtrain=train.map(parsePoint).filter(lambda line:len(line.features)!=0 or len(line.label)!=0)
parsedtest = test.map(parsePoint).filter(lambda line:len(line.features)!=0 or len(line.label)!=0).cache()
model = GradientBoostedTrees.trainRegressor(parsedtrain,categoricalFeaturesInfo={}, numIterations=1)
predictions = model.predict(parsedtest.map(lambda x: x.features))
labelsAndPredictions = parsedtest.map(lambda lp: lp.label).zip(predictions)
val_err = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(parsedtest.count())
parsedtest.unpersist()
RMSE=math.sqrt(val_err)

print("Root Mean Squared Error Test= " + str(RMSE))

开发者ID:gurpreetbajwa,项目名称:Sentiment-Analysis,代码行数:30,代码来源:gradient_boost.py

示例6: dot_prod

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
import time
import pyspark
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql import SQLContext
from pyspark.sql.functions import udf
from nvludfs import *

@nvl("(long,long,long)->long", tupargs=True)
def dot_prod(a,b,c):
  return 3*a+2*b+c

SparkContext.setSystemProperty("useNvl", "true")
SparkContext.setSystemProperty("offHeap", "true")
SparkContext.setSystemProperty("pythonNvl", "true")
conf = (SparkConf()
         .setMaster("local")
         .setAppName("udf_example")
         .set("spark.executor.memory", "2g"))
sc = SparkContext(conf = conf)
sqlContext = SQLContext(sc)
dot_udf = udf(dot_prod, LongType())
df = sqlContext.read.parquet("assembly/udf-test-s").cache()
times = []
for i in range(0, 11):
  t = time.time() 
  df.withColumn("udf", dot_udf(df['a'], df['b'], df['c'])).selectExpr("sum(udf)").show()
  times.append(time.time() - t)
print "average time: " + str(sum(times[1:])/10.0)
开发者ID:jjthomas,项目名称:spark-nvl,代码行数:32,代码来源:udf_example.py

示例7: SparkContext

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
#!/usr/bin/env python

import matplotlib.pyplot as plot
import csv
from ast import literal_eval
from pyspark import SparkContext, SparkConf, StorageLevel
from operator import add

"""
--------------------------------------------------------
SPARK CONFIGURATION

Used only for standalone execution via bin/spark-submit
--------------------------------------------------------
"""
SparkContext.setSystemProperty("spark.executor.memory", "28g")
SparkContext.setSystemProperty("spark.default.parallelism", "500")

conf = (SparkConf()
        .setMaster("local")
        .setAppName("Uptime per machine")
        .set("spark.worker.memory", "28g")
        .set("spark.driver.memory", "28g")
        .set("spark.local.dir", "/Users/ksmuga/workspace/data/out"))
sc = SparkContext(conf = conf)


"""
--------------------------------------------------------
FIRST MAPPING TRANSFORMATION 
开发者ID:kamilsmuga,项目名称:msc-thesis,代码行数:32,代码来源:calculate_cost_per_hour.py

示例8: SparkContext

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author [email protected]
import os
import sys

local_path = os.path.dirname(__file__)
sys.path.append(local_path + "/../lib")
sys.path.append(local_path + "/../")

from pyspark import SQLContext, HiveContext
from pyspark import SparkContext

import eod





if __name__ == "__main__":
    sc = SparkContext(appName="bintrade_candidate", master="yarn-client")
    sc.setSystemProperty("spark.driver.memory",     "1g")
    sc.setSystemProperty("spark.executor.memory",   "8g")
    sc.setSystemProperty("spark.executor.cores",    "2")

    sqlContext = HiveContext(sc)
    sqlContext.setConf("spark.sql.shuffle.partitions", "16")
    sqlContext.sql("use fex")

    eod.run(sc, sqlContext, isHive=True)
开发者ID:hongbin0908,项目名称:bintrade,代码行数:32,代码来源:eod_job.py

示例9: SparkContext

# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import setSystemProperty [as 别名]
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author [email protected]
import os
import sys

local_path = os.path.dirname(__file__)
sys.path.append(local_path + "/../lib")
sys.path.append(local_path + "/../")

from pyspark import SQLContext, HiveContext
from pyspark import SparkContext

import ta

if __name__ == "__main__":
    sc = SparkContext(appName="bintrade_candidate", master="yarn-client")
    sc.setSystemProperty("spark.driver.memory",     "1g")
    sc.setSystemProperty("spark.executor.memory",   "8g")
    sc.setSystemProperty("spark.executor.instances", "8")
    sc.setSystemProperty("spark.executor.cores",    "4")

    sqlContext = HiveContext(sc)
    sqlContext.setConf("spark.sql.shuffle.partitions", "32")
    sqlContext.sql("use fex")

    ta.run(sc, sqlContext, isHive=True)
开发者ID:hongbin0908,项目名称:bintrade,代码行数:29,代码来源:ta_job.py


注:本文中的pyspark.SparkContext.setSystemProperty方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。