本文整理汇总了Python中moztelemetry.dataset.Dataset.from_source方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.from_source方法的具体用法?Python Dataset.from_source怎么用?Python Dataset.from_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类moztelemetry.dataset.Dataset
的用法示例。
在下文中一共展示了Dataset.from_source方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: aggregate_metrics
# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def aggregate_metrics(sc, channels, submission_date, main_ping_fraction=1, fennec_ping_fraction=1, num_reducers=10000):
""" Returns the build-id and submission date aggregates for a given submission date.
:param sc: A SparkContext instance
:param channel: Either the name of a channel or a list/tuple of names
:param submission-date: The submission date for which the data will be aggregated
:param fraction: An approximative fraction of submissions to consider for aggregation
"""
if not isinstance(channels, (tuple, list)):
channels = [channels]
channels = set(channels)
pings = Dataset.from_source('telemetry') \
.where(appUpdateChannel=lambda x: x in channels,
submissionDate=submission_date,
docType='main',
sourceVersion='4',
appName=lambda x: x != 'Fennec') \
.records(sc, sample=main_ping_fraction)
fennec_pings = Dataset.from_source('telemetry') \
.where(appUpdateChannel=lambda x: x in channels,
submissionDate=submission_date,
docType='saved_session',
sourceVersion='4',
appName='Fennec') \
.records(sc, sample=fennec_ping_fraction)
all_pings = pings.union(fennec_pings)
return _aggregate_metrics(all_pings)
示例2: test_dataset_from_source
# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def test_dataset_from_source(my_mock_s3, monkeypatch):
meta_bucket_name = 'net-mozaws-prod-us-west-2-pipeline-metadata'
bucket = boto3.resource('s3').Bucket(meta_bucket_name)
bucket.create()
store = S3Store(meta_bucket_name)
data_dir = os.path.join(os.path.dirname(__file__), 'data')
with open(os.path.join(data_dir, 'sources.json'), 'rb') as f:
store.upload_file(f, '', 'sources.json')
with open(os.path.join(data_dir, 'schema.json'), 'rb') as f:
store.upload_file(f, 'telemetry-2/', 'schema.json')
f.seek(0)
expected_dimensions = json.loads(f.read().decode('utf-8'))['dimensions']
dimensions = [dim['field_name'] for dim in expected_dimensions]
assert Dataset.from_source('telemetry').schema == dimensions
示例3: aggregate_metrics
# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def aggregate_metrics(sc, begin, end=None, num_partitions=10000):
"""
Returns the build-id and submission date aggregates for a given submission date.
:param sc: A SparkContext instance
:param begin: A string for the beginning date, in form "YYYYMMDD"
:param end: An optional string for the end date, in form "YYYYMMDD". If
not provided, metrics will only be aggregrated for the date provided
with `begin`.
:param num_partitions: An optional value to be passed to `aggregateByKey`.
"""
if end is None:
end = begin
pings = (Dataset.from_source('telemetry')
.where(docType='mobile_metrics',
submissionDate=lambda x: begin <= x <= end)
.records(sc))
return _aggregate_metrics(pings, num_partitions)
示例4: in
# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
# COMMAND ----------
from moztelemetry.dataset import Dataset
import pandas as pd
from pyspark.sql import Row
from pyspark.sql import functions as f
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, DoubleType, LongType, MapType
from statsmodels.stats.weightstats import DescrStatsW
EXPERIMENT_ID = "prefflip-webrender-v1-2-1492568"
EXPERIMENT_ID_2 = "prefflip-webrender-v1-3-1492568"
PARTITIONS = [s.replace("-", "_") for s in (EXPERIMENT_ID, EXPERIMENT_ID_2)]
# COMMAND ----------
Dataset.from_source("telemetry-cohorts")
# COMMAND ----------
to_summarize = {
"composite_time": "payload.processes.gpu.histograms.COMPOSITE_TIME",
"content_frame_time": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME",
"content_frame_time_svg": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG",
"content_frame_time_reason": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_REASON",
"content_frame_time_without_upload": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_UPLOAD",
"content_paint_time": "payload.processes.content.histograms.CONTENT_PAINT_TIME",
"tab_switch_composite": "payload.histograms.FX_TAB_SWITCH_COMPOSITE_E10S_MS",
"content_full_paint_time": "payload.processes.gpu.histograms.CONTENT_FULL_PAINT_TIME",
"page_load_ms": "payload.histograms.FX_PAGE_LOAD_MS_2"
}