Python Dataset.from_source方法代码示例

本文整理汇总了Python中moztelemetry.dataset.Dataset.from_source方法的典型用法代码示例。如果您正苦于以下问题：Python Dataset.from_source方法的具体用法？Python Dataset.from_source怎么用？Python Dataset.from_source使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类moztelemetry.dataset.Dataset的用法示例。

在下文中一共展示了Dataset.from_source方法的4个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: aggregate_metrics

# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def aggregate_metrics(sc, channels, submission_date, main_ping_fraction=1, fennec_ping_fraction=1, num_reducers=10000):
    """ Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param channel: Either the name of a channel or a list/tuple of names
    :param submission-date: The submission date for which the data will be aggregated
    :param fraction: An approximative fraction of submissions to consider for aggregation
    """
    if not isinstance(channels, (tuple, list)):
        channels = [channels]

    channels = set(channels)
    pings = Dataset.from_source('telemetry') \
                   .where(appUpdateChannel=lambda x: x in channels,
                          submissionDate=submission_date,
                          docType='main',
                          sourceVersion='4',
                          appName=lambda x: x != 'Fennec') \
                   .records(sc, sample=main_ping_fraction)

    fennec_pings = Dataset.from_source('telemetry') \
                          .where(appUpdateChannel=lambda x: x in channels,
                                 submissionDate=submission_date,
                                 docType='saved_session',
                                 sourceVersion='4',
                                 appName='Fennec') \
                          .records(sc, sample=fennec_ping_fraction)

    all_pings = pings.union(fennec_pings)
    return _aggregate_metrics(all_pings)

开发者ID:mozilla，项目名称:python_mozaggregator，代码行数:32，代码来源:aggregator.py

示例2: test_dataset_from_source

# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def test_dataset_from_source(my_mock_s3, monkeypatch):
    meta_bucket_name = 'net-mozaws-prod-us-west-2-pipeline-metadata'

    bucket = boto3.resource('s3').Bucket(meta_bucket_name)
    bucket.create()

    store = S3Store(meta_bucket_name)
    data_dir = os.path.join(os.path.dirname(__file__), 'data')

    with open(os.path.join(data_dir, 'sources.json'), 'rb') as f:
        store.upload_file(f, '', 'sources.json')
    with open(os.path.join(data_dir, 'schema.json'), 'rb') as f:
        store.upload_file(f, 'telemetry-2/', 'schema.json')
        f.seek(0)
        expected_dimensions = json.loads(f.read().decode('utf-8'))['dimensions']

    dimensions = [dim['field_name'] for dim in expected_dimensions]

    assert Dataset.from_source('telemetry').schema == dimensions

开发者ID:mozilla，项目名称:python_moztelemetry，代码行数:21，代码来源:test_dataset.py

示例3: aggregate_metrics

# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
def aggregate_metrics(sc, begin, end=None, num_partitions=10000):
    """
    Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param begin: A string for the beginning date, in form "YYYYMMDD"
    :param end: An optional string for the end date, in form "YYYYMMDD". If
        not provided, metrics will only be aggregrated for the date provided
        with `begin`.
    :param num_partitions: An optional value to be passed to `aggregateByKey`.

    """
    if end is None:
        end = begin

    pings = (Dataset.from_source('telemetry')
                    .where(docType='mobile_metrics',
                           submissionDate=lambda x: begin <= x <= end)
                    .records(sc))

    return _aggregate_metrics(pings, num_partitions)

开发者ID:mozilla，项目名称:python_mozaggregator，代码行数:23，代码来源:mobile.py

示例4: in

# 需要导入模块: from moztelemetry.dataset import Dataset [as 别名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 别名]
# COMMAND ----------

from moztelemetry.dataset import Dataset
import pandas as pd
from pyspark.sql import Row
from pyspark.sql import functions as f
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, DoubleType, LongType, MapType
from statsmodels.stats.weightstats import DescrStatsW

EXPERIMENT_ID = "prefflip-webrender-v1-2-1492568"
EXPERIMENT_ID_2 = "prefflip-webrender-v1-3-1492568"
PARTITIONS = [s.replace("-", "_") for s in (EXPERIMENT_ID, EXPERIMENT_ID_2)]

# COMMAND ----------

  Dataset.from_source("telemetry-cohorts")

# COMMAND ----------

to_summarize = {
  "composite_time": "payload.processes.gpu.histograms.COMPOSITE_TIME",
  "content_frame_time": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME",
  "content_frame_time_svg": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG",
  "content_frame_time_reason": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_REASON",
  "content_frame_time_without_upload": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_UPLOAD",
  "content_paint_time": "payload.processes.content.histograms.CONTENT_PAINT_TIME",
  "tab_switch_composite": "payload.histograms.FX_TAB_SWITCH_COMPOSITE_E10S_MS",
  "content_full_paint_time": "payload.processes.gpu.histograms.CONTENT_FULL_PAINT_TIME",
  "page_load_ms": "payload.histograms.FX_PAGE_LOAD_MS_2"
}

开发者ID:jrmuizel，项目名称:gfx-analysis，代码行数:32，代码来源:composite_count.py

注：本文中的moztelemetry.dataset.Dataset.from_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。