當前位置: 首頁>>代碼示例>>Python>>正文


Python Dataset.records方法代碼示例

本文整理匯總了Python中moztelemetry.dataset.Dataset.records方法的典型用法代碼示例。如果您正苦於以下問題:Python Dataset.records方法的具體用法?Python Dataset.records怎麽用?Python Dataset.records使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在moztelemetry.dataset.Dataset的用法示例。


在下文中一共展示了Dataset.records方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_records_print_output

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_print_output(spark_context, capsys):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    for i in range(1, 100+1):
        key = 'dir{}/subdir{}/key{}'.format(*[i]*3)
        value = 'value{}'.format(i)
        store.store[key] = value
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store)
    dataset.records(spark_context, decode=lambda x: x)
    out, err = capsys.readouterr()
    assert out.rstrip() == "fetching 0.00066MB in 100 files..."
開發者ID:Dexterp37,項目名稱:python_moztelemetry,代碼行數:13,代碼來源:test_dataset.py

示例2: test_records_limit_and_sample

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_limit_and_sample(spark_context):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    for i in range(1, 100 + 1):
        key = 'dir{}/subdir{}/key{}'.format(*[i] * 3)
        value = 'value{}'.format(i)
        store.store[key] = value
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store)
    records = dataset.records(spark_context, decode=lambda x: x, limit=5, sample=0.9)
    assert records.count() == 5
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:12,代碼來源:test_dataset.py

示例3: test_records

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records(spark_context):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    store.store['dir1/subdir1/key1'] = 'value1'
    store.store['dir2/subdir2/key2'] = 'value2'
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store)
    records = dataset.records(spark_context, decode=lambda x: x)
    records = sorted(records.collect())

    assert records == [b'value1', b'value2']
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:12,代碼來源:test_dataset.py

示例4: test_records_summaries

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_summaries(spark_context):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    store.store['dir1/subdir1/key1'] = 'value1'
    store.store['dir2/subdir2/key2'] = 'value2'
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store, max_concurrency=1)
    records = dataset.records(spark_context, decode=lambda x: x,
                              summaries=[{'key': 'dir1/subdir1/key1', 'size': len('value1')}])
    records = records.collect()

    assert records == [b'value1']
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:13,代碼來源:test_dataset.py

示例5: test_records_object

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_object(spark_context):
    expect = {"uid": 1}

    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    store.store['key'] = json.dumps(expect)

    ds = Dataset(bucket_name, None, store=store, max_concurrency=1)
    row = ds.records(spark_context, decode=decode).first()

    assert isinstance(row, dict)
    assert row == expect
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:14,代碼來源:test_dataset.py

示例6: test_records_many_groups

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_many_groups(spark_context, monkeypatch):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    for i in range(1, spark_context.defaultParallelism + 2):
        store.store['dir1/subdir1/key{}'.format(i)] = 'value{}'.format(i)
    # create one group per item
    monkeypatch.setattr(moztelemetry.dataset, '_group_by_size', lambda x: [[y] for y in x])
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store)
    records = dataset.records(spark_context, decode=lambda x: x)
    records = records.collect()

    assert records == ['value{}'.format(i) for i in range(1, spark_context.defaultParallelism + 2)]
開發者ID:whd,項目名稱:python_moztelemetry,代碼行數:14,代碼來源:test_dataset.py

示例7: test_records_selection

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_selection(spark_context):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    key = 'dir1/subdir1/key1'
    value = '{"a": {"b": { "c": "value"}}}'
    store.store[key] = value
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store).select(field='a.b.c')
    records = dataset.records(spark_context, decode=decode)
    assert records.collect() == [{'field': 'value'}]

    # Check that concatenating `select`s works as expected
    records = dataset.select(field2='a.b').records(spark_context, decode=decode)
    assert records.collect() == [{'field': 'value', 'field2': {'c': 'value'}}]
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:15,代碼來源:test_dataset.py

示例8: test_records_sample

# 需要導入模塊: from moztelemetry.dataset import Dataset [as 別名]
# 或者: from moztelemetry.dataset.Dataset import records [as 別名]
def test_records_sample(spark_context):
    bucket_name = 'test-bucket'
    store = InMemoryStore(bucket_name)
    for i in range(1, 100 + 1):
        key = 'dir{}/subdir{}/key{}'.format(*[i] * 3)
        value = 'value{}'.format(i)
        store.store[key] = value
    dataset = Dataset(bucket_name, ['dim1', 'dim2'], store=store)

    records_1 = dataset.records(spark_context, decode=lambda x: x, sample=0.1, seed=None).collect()
    assert len(records_1) == 10

    records_2 = dataset.records(spark_context, decode=lambda x: x, sample=0.1, seed=None).collect()

    # The sampling seed is different, so we have two different samples.
    assert sorted(records_1) != sorted(records_2)

    records_1 = dataset.records(spark_context, decode=lambda x: x, sample=0.1).collect()
    records_2 = dataset.records(spark_context, decode=lambda x: x, sample=0.1).collect()

    # Same seed, same sample.
    assert sorted(records_1) == sorted(records_2)
開發者ID:mozilla,項目名稱:python_moztelemetry,代碼行數:24,代碼來源:test_dataset.py


注:本文中的moztelemetry.dataset.Dataset.records方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。