當前位置: 首頁>>代碼示例>>Python>>正文


Python Dataset.from_source方法代碼示例

本文整理匯總了Python中moztelemetry.dataset.Dataset.from_source方法的典型用法代碼示例。如果您正苦於以下問題:Python Dataset.from_source方法的具體用法?Python Dataset.from_source怎麽用?Python Dataset.from_source使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在moztelemetry.dataset.Dataset的用法示例。


在下文中一共展示了Dataset.from_source方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: aggregate_metrics

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 別名]
def aggregate_metrics(sc, channels, submission_date, main_ping_fraction=1, fennec_ping_fraction=1, num_reducers=10000):
    """ Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param channel: Either the name of a channel or a list/tuple of names
    :param submission-date: The submission date for which the data will be aggregated
    :param fraction: An approximative fraction of submissions to consider for aggregation
    """
    if not isinstance(channels, (tuple, list)):
        channels = [channels]

    channels = set(channels)
    pings = Dataset.from_source('telemetry') \
                   .where(appUpdateChannel=lambda x: x in channels,
                          submissionDate=submission_date,
                          docType='main',
                          sourceVersion='4',
                          appName=lambda x: x != 'Fennec') \
                   .records(sc, sample=main_ping_fraction)

    fennec_pings = Dataset.from_source('telemetry') \
                          .where(appUpdateChannel=lambda x: x in channels,
                                 submissionDate=submission_date,
                                 docType='saved_session',
                                 sourceVersion='4',
                                 appName='Fennec') \
                          .records(sc, sample=fennec_ping_fraction)

    all_pings = pings.union(fennec_pings)
    return _aggregate_metrics(all_pings)
開發者ID:mozilla,項目名稱:python_mozaggregator,代碼行數:32,代碼來源:aggregator.py

示例2: test_dataset_from_source

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 別名]
def test_dataset_from_source(my_mock_s3, monkeypatch):
    meta_bucket_name = 'net-mozaws-prod-us-west-2-pipeline-metadata'

    bucket = boto3.resource('s3').Bucket(meta_bucket_name)
    bucket.create()

    store = S3Store(meta_bucket_name)
    data_dir = os.path.join(os.path.dirname(__file__), 'data')

    with open(os.path.join(data_dir, 'sources.json'), 'rb') as f:
        store.upload_file(f, '', 'sources.json')
    with open(os.path.join(data_dir, 'schema.json'), 'rb') as f:
        store.upload_file(f, 'telemetry-2/', 'schema.json')
        f.seek(0)
        expected_dimensions = json.loads(f.read().decode('utf-8'))['dimensions']

    dimensions = [dim['field_name'] for dim in expected_dimensions]

    assert Dataset.from_source('telemetry').schema == dimensions
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:21,代碼來源:test_dataset.py

示例3: aggregate_metrics

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 別名]
def aggregate_metrics(sc, begin, end=None, num_partitions=10000):
    """
    Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param begin: A string for the beginning date, in form "YYYYMMDD"
    :param end: An optional string for the end date, in form "YYYYMMDD". If
        not provided, metrics will only be aggregrated for the date provided
        with `begin`.
    :param num_partitions: An optional value to be passed to `aggregateByKey`.

    """
    if end is None:
        end = begin

    pings = (Dataset.from_source('telemetry')
                    .where(docType='mobile_metrics',
                           submissionDate=lambda x: begin <= x <= end)
                    .records(sc))

    return _aggregate_metrics(pings, num_partitions)
開發者ID:mozilla,項目名稱:python_mozaggregator,代碼行數:23,代碼來源:mobile.py

示例4: in

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import from_source [as 別名]
# COMMAND ----------

from moztelemetry.dataset import Dataset
import pandas as pd
from pyspark.sql import Row
from pyspark.sql import functions as f
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, DoubleType, LongType, MapType
from statsmodels.stats.weightstats import DescrStatsW

EXPERIMENT_ID = "prefflip-webrender-v1-2-1492568"
EXPERIMENT_ID_2 = "prefflip-webrender-v1-3-1492568"
PARTITIONS = [s.replace("-", "_") for s in (EXPERIMENT_ID, EXPERIMENT_ID_2)]

# COMMAND ----------

  Dataset.from_source("telemetry-cohorts")

# COMMAND ----------

to_summarize = {
  "composite_time": "payload.processes.gpu.histograms.COMPOSITE_TIME",
  "content_frame_time": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME",
  "content_frame_time_svg": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG",
  "content_frame_time_reason": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_REASON",
  "content_frame_time_without_upload": "payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_UPLOAD",
  "content_paint_time": "payload.processes.content.histograms.CONTENT_PAINT_TIME",
  "tab_switch_composite": "payload.histograms.FX_TAB_SWITCH_COMPOSITE_E10S_MS",
  "content_full_paint_time": "payload.processes.gpu.histograms.CONTENT_FULL_PAINT_TIME",
  "page_load_ms": "payload.histograms.FX_PAGE_LOAD_MS_2"
}
開發者ID:jrmuizel,項目名稱:gfx-analysis,代碼行數:32,代碼來源:composite_count.py


注:本文中的moztelemetry.dataset.Dataset.from_source方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。