当前位置: 首页>>代码示例>>Python>>正文


Python dot.coalesce函数代码示例

本文整理汇总了Python中pyLibrary.dot.coalesce函数的典型用法代码示例。如果您正苦于以下问题:Python coalesce函数的具体用法?Python coalesce怎么用?Python coalesce使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了coalesce函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(
        self,
        exchange,  # name of the Pulse exchange
        topic,  # message name pattern to subscribe to  ('#' is wildcard)
        target=None,  # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
        target_queue=None,  # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
        host='pulse.mozilla.org',  # url to connect,
        port=5671,  # tcp port
        user=None,
        password=None,
        vhost="/",
        start=0,  # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
        ssl=True,
        applabel=None,
        heartbeat=False,  # True to also get the Pulse heartbeat message
        durable=False,  # True to keep queue after shutdown
        serializer='json',
        broker_timezone='GMT',
        settings=None
    ):
        self.target_queue = target_queue
        self.pulse_target = target
        if (target_queue == None and target == None) or (target_queue != None and target != None):
            Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)")

        Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker)
        self.settings = settings
        settings.callback = self._got_result
        settings.user = coalesce(settings.user, settings.username)
        settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name)
        settings.topic = topic

        self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings)
        self.count = coalesce(start, 0)
        self.start()
开发者ID:klahnakoski,项目名称:MoDevETL,代码行数:35,代码来源:pulse.py

示例2: __init__

 def __init__(self, **desc):
     desc = wrap(desc)
     self._set_slots_to_null(self.__class__)
     set_default(self, desc)
     self.name = coalesce(desc.name, desc.type)
     self.isFacet = coalesce(desc.isFacet, False)
     self.dimension = Null
开发者ID:klahnakoski,项目名称:TestFailures,代码行数:7,代码来源:domains.py

示例3: _aggs_iterator

    def _aggs_iterator(agg, d):
        deeper = coalesce(agg._filter, agg._nested)
        while deeper:
            agg = deeper
            deeper = coalesce(agg._filter, agg._nested)

        if d > 0:
            for b in agg._match.buckets:
                parts[d] = b
                for a in _aggs_iterator(b, d - 1):
                    yield a
            parts[d] = Null
            for b in agg._other.buckets:
                for a in _aggs_iterator(b, d - 1):
                    yield a
            b = agg._missing
            if b.doc_count:
                for a in _aggs_iterator(b, d - 1):
                    yield a
        else:
            for b in agg._match.buckets:
                parts[d] = b
                if b.doc_count:
                    yield b
            parts[d] = Null
            for b in agg._other.buckets:
                if b.doc_count:
                    yield b
            b = agg._missing
            if b.doc_count:
                yield b
开发者ID:klahnakoski,项目名称:intermittents,代码行数:31,代码来源:aggs.py

示例4: _range_composer

def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if is_keyword(edge.value):
        calc = {"field": edge.value}
    else:
        calc = {"script": qb_expression_to_ruby(edge.value)}

    if is_keyword(edge.value):
        missing_range = {"or": [
            {"range": {edge.value: {"lt": to_float(_min)}}},
            {"range": {edge.value: {"gte": to_float(_max)}}}
        ]}
    else:
        missing_range = {"script": {"script": qb_expression_to_ruby({"or": [
            {"lt": [edge.value, to_float(_min)]},
            {"gt": [edge.value, to_float(_max)]},
        ]})}}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": set_default(
            {"filter": {"or": [
                missing_range,
                {"missing": {"field": get_all_vars(edge.value)}}
            ]}},
            es_query
        ),
    }})
开发者ID:klahnakoski,项目名称:intermittents,代码行数:35,代码来源:aggs.py

示例5: __init__

    def __init__(
        self, host, index, type=None, alias=None, name=None, port=9200, read_only=True, typed=None, settings=None
    ):
        Container.__init__(self, None)
        if not containers.config.default:
            containers.config.default.settings = settings
        self.settings = settings
        self.name = coalesce(name, alias, index)
        if read_only:
            self._es = elasticsearch.Alias(alias=coalesce(alias, index), settings=settings)
        else:
            self._es = elasticsearch.Cluster(settings=settings).get_index(read_only=read_only, settings=settings)

        self.meta = FromESMetadata(settings=settings)
        self.settings.type = self._es.settings.type
        self.edges = Dict()
        self.worker = None

        columns = self.get_columns(table_name=name)
        self._schema = Schema(columns)

        if typed == None:
            # SWITCH ON TYPED MODE
            self.typed = any(c.name in ("$value", "$object") for c in columns)
        else:
            self.typed = typed
开发者ID:klahnakoski,项目名称:esReplicate,代码行数:26,代码来源:jx_usingES.py

示例6: _range_composer

def _range_composer(edge, domain, es_query, to_float):
    # USE RANGES
    _min = coalesce(domain.min, MAX(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if isinstance(edge.value, Variable):
        calc = {"field": edge.value.var}
    else:
        calc = {"script_field": edge.value.to_ruby()}

    if edge.allowNulls:    # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
        missing_filter = set_default(
            {"filter": {"or": [
                OrOp("or", [
                    BinaryOp("lt", [edge.value, Literal(None, to_float(_min))]),
                    BinaryOp("gte", [edge.value, Literal(None, to_float(_max))]),
                ]).to_esfilter(),
                edge.value.missing().to_esfilter()
            ]}},
            es_query
        )
    else:
        missing_filter = None

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
开发者ID:klahnakoski,项目名称:esReplicate,代码行数:32,代码来源:decoders.py

示例7: Stats2ZeroMoment

def Stats2ZeroMoment(stats):
    # MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
    # ADDED count
    mc0, mc1, mc2, skew, kurt = stats.count, coalesce(stats.mean, 0), coalesce(stats.variance, 0), coalesce(stats.skew, 0), coalesce(stats.kurtosis, 0)

    mz0 = mc0
    mz1 = mc1 * mc0
    mz2 = (mc2 + mc1 * mc1) * mc0
    mc3 = coalesce(skew, 0) * (mc2 ** 1.5) # 3rd central moment
    mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0  # 3rd non-central moment
    mc4 = (coalesce(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment
    mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0

    m = ZeroMoment(mz0, mz1, mz2, mz3, mz4)
    if DEBUG:
        from pyLibrary.testing.fuzzytestcase import assertAlmostEqualValue

        globals()["DEBUG"] = False
        try:
            v = ZeroMoment2Stats(m)
            assertAlmostEqualValue(v.count, stats.count, places=10)
            assertAlmostEqualValue(v.mean, stats.mean, places=10)
            assertAlmostEqualValue(v.variance, stats.variance, places=10)
            assertAlmostEqualValue(v.skew, stats.skew, places=10)
            assertAlmostEqualValue(v.kurtosis, stats.kurtosis, places=10)
        except Exception, e:
            v = ZeroMoment2Stats(m)
            Log.error("programmer error")
        globals()["DEBUG"] = True
开发者ID:klahnakoski,项目名称:MoDataSubmission,代码行数:29,代码来源:stats.py

示例8: window

def window(data, param):
    """
    MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
    data - list of records
    """
    name = param.name            # column to assign window function result
    edges = param.edges          # columns to gourp by
    where = param.where          # DO NOT CONSIDER THESE VALUES
    sortColumns = param.sort     # columns to sort by
    calc_value = wrap_function(jx_expression_to_function(param.value))  # function that takes a record and returns a value (for aggregation)
    aggregate = param.aggregate  # WindowFunction to apply
    _range = param.range         # of form {"min":-10, "max":0} to specify the size and relative position of window

    data = filter(data, where)

    if not aggregate and not edges:
        if sortColumns:
            data = sort(data, sortColumns, already_normalized=True)
        # SIMPLE CALCULATED VALUE
        for rownum, r in enumerate(data):
            r[name] = calc_value(r, rownum, data)
        return

    if not aggregate or aggregate == "none":
        for _, values in groupby(data, edges.value):
            if not values:
                continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

            sequence = sort(values, sortColumns, already_normalized=True)

            for rownum, r in enumerate(sequence):
                r[name] = calc_value(r, rownum, sequence)
        return

    for keys, values in groupby(data, edges.value):
        if not values:
            continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

        sequence = sort(values, sortColumns)

        for rownum, r in enumerate(sequence):
            r["__temp__"] = calc_value(r, rownum, sequence)

        head = coalesce(_range.max, _range.stop)
        tail = coalesce(_range.min, _range.start)

        # PRELOAD total
        total = aggregate()
        for i in range(tail, head):
            total.add(sequence[i].__temp__)

        # WINDOW FUNCTION APPLICATION
        for i, r in enumerate(sequence):
            r[name] = total.end()
            total.add(sequence[i + head].__temp__)
            total.sub(sequence[i + tail].__temp__)

    for r in data:
        r["__temp__"] = None  # CLEANUP
开发者ID:klahnakoski,项目名称:esReplicate,代码行数:59,代码来源:jx.py

示例9: __new__

    def __new__(cls, e=None, query=None, *args, **kwargs):
        e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            if query.groupby:
                return object.__new__(DefaultDecoder, e)

            if isinstance(e.value, basestring):
                Log.error("Expecting Variable or Expression, not plain string")

            if isinstance(e.value, TupleOp):
                # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
                # JUST PULL THE FIELDS
                if not all(isinstance(t, Variable) for t in e.value.terms):
                    Log.error("Can only handle variables in tuples")

                e.domain = Dict(dimension={"fields": e.value.terms})
                return object.__new__(DimFieldListDecoder, e)
            elif isinstance(e.value, Variable):
                cols = query.frum.get_columns()
                col = cols.filter(lambda c: c.name == e.value.var)[0]
                if not col:
                    return object.__new__(DefaultDecoder, e)
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.partitions != None:
                    e.domain = SimpleSetDomain(partitions=col.partitions[:limit:])
                else:
                    e.domain = set_default(DefaultDomain(limit=limit), e.domain.as_dict())
                    return object.__new__(DefaultDecoder, e)

            else:
                return object.__new__(DefaultDecoder, e)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder, e)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder, e)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder, e)
        if e.range:
            return object.__new__(GeneralRangeDecoder, e)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder, e)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder, e)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if isinstance(fields, Mapping):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder, e)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder, e)
        else:
            Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
开发者ID:klahnakoski,项目名称:TestFailures,代码行数:59,代码来源:decoders.py

示例10: _normalize_edge

def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if isinstance(edge, basestring):
        if schema:
            e = schema[edge]
            if e:
                if isinstance(e, _Column):
                    return Dict(
                        name=edge,
                        value=edge,
                        allowNulls=True,
                        domain=_normalize_domain(schema=schema)
                    )
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Dict(
                        name=e.name,
                        value=e.fields[0],
                        allowNulls=True,
                        domain=e.getDomain()
                    )
                else:
                    return Dict(
                        name=e.name,
                        allowNulls=True,
                        domain=e.getDomain()
                    )
        return Dict(
            name=edge,
            value=edge,
            allowNulls=True,
            domain=_normalize_domain(schema=schema)
        )
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, basestring):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Dict(fields=edge.value)

            return Dict(
                name=edge.name,
                allowNulls=bool(coalesce(edge.allowNulls, True)),
                domain=domain
            )

        domain = _normalize_domain(edge.domain, schema=schema)
        return Dict(
            name=coalesce(edge.name, edge.value),
            value=edge.value,
            range=edge.range,
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            domain=domain
        )
开发者ID:klahnakoski,项目名称:MoDevETL,代码行数:58,代码来源:query.py

示例11: full_etl

def full_etl(settings):
    schema = convert.json2value(convert.value2json(SCHEMA), leaves=True)
    Cluster(settings.destination).get_or_create_index(settings=settings.destination, schema=schema, limit_replicas=True)
    destq = FromES(settings.destination)
    if settings.incremental:
        min_bug_id = destq.query({
            "from": coalesce(settings.destination.alias, settings.destination.index),
            "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"}
        })

        min_bug_id = int(MAX(min_bug_id-1000, 0))
    else:
        min_bug_id = 0

    sourceq = FromES(settings.source)
    max_bug_id = sourceq.query({
        "from": coalesce(settings.source.alias, settings.source.index),
        "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"}
    }) + 1
    max_bug_id = int(coalesce(max_bug_id, 0))

    # FIRST, GET ALL MISSING BUGS
    for s, e in qb.reverse(list(qb.intervals(min_bug_id, max_bug_id, 10000))):
        with Timer("pull {{start}}..{{end}} from ES", {"start": s, "end": e}):
            children = sourceq.query({
                "from": settings.source.alias,
                "select": ["bug_id", "dependson", "blocked", "modified_ts", "expires_on"],
                "where": {"and": [
                    {"range": {"bug_id": {"gte": s, "lt": e}}},
                    {"or": [
                        {"exists": "dependson"},
                        {"exists": "blocked"}
                    ]}
                ]},
                "limit": 10000
            })

        with Timer("fixpoint work"):
            to_fix_point(settings, destq, children.data)

    # PROCESS RECENT CHANGES
    with Timer("pull recent dependancies from ES"):
        children = sourceq.query({
            "from": settings.source.alias,
            "select": ["bug_id", "dependson", "blocked"],
            "where": {"and": [
                {"range": {"modified_ts": {"gte": convert.datetime2milli(datetime.utcnow() - timedelta(days=7))}}},
                {"or": [
                    {"exists": "dependson"},
                    {"exists": "blocked"}
                ]}
            ]},
            "limit": 100000
        })

    to_fix_point(settings, destq, children.data)
开发者ID:klahnakoski,项目名称:MoDevETL,代码行数:56,代码来源:hierarchy.py

示例12: comparer

 def comparer(left, right):
     left = coalesce(left)
     right = coalesce(right)
     for f in formal:
         try:
             result = value_compare(f.func(left), f.func(right), f.sort)
             if result != 0:
                 return result
         except Exception, e:
             Log.error("problem with compare", e)
开发者ID:klahnakoski,项目名称:MoDevETL,代码行数:10,代码来源:qb.py

示例13: single

 def single(col, r):
     min = coalesce(r["gte"], r[">="])
     max = coalesce(r["lte"], r["<="])
     if min and max:
         # SPECIAL CASE (BETWEEN)
         return db.quote_column(col) + " BETWEEN " + db.quote_value(min) + " AND " + db.quote_value(max)
     else:
         return " AND ".join(
             db.quote_column(col) + name2sign[sign] + db.quote_value(value)
             for sign, value in r.items()
         )
开发者ID:klahnakoski,项目名称:MoDataSubmission,代码行数:11,代码来源:jx_usingMySQL.py

示例14: main

def main():
    settings = startup.read_settings(defs={
       "name": ["--restart", "--reset", "--redo"],
       "help": "force a reprocessing of all data",
       "action": "store_true",
       "dest": "restart"
    })
    Log.start(settings.debug)

    try:
        with startup.SingleInstance(flavor_id=settings.args.filename):
            if settings.args.restart:
                reviews = Cluster(settings.destination).create_index(settings.destination)
            else:
                reviews = Cluster(settings.destination).get_proto(settings.destination)

            bugs = Cluster(settings.source).get_index(settings.source)

            with FromES(bugs) as esq:
                es_max_bug = esq.query({
                    "from": "private_bugs",
                    "select": {"name": "max_bug", "value": "bug_id", "aggregate": "maximum"}
                })

            #PROBE WHAT RANGE OF BUGS IS LEFT TO DO (IN EVENT OF FAILURE)
            with FromES(reviews) as esq:
                es_min_bug = esq.query({
                    "from": "reviews",
                    "select": {"name": "min_bug", "value": "bug_id", "aggregate": "minimum"}
                })

            batch_size = coalesce(bugs.settings.batch_size, settings.size, 1000)
            threads = coalesce(settings.threads, 4)
            Log.note(str(settings.min_bug))
            min_bug = int(coalesce(settings.min_bug, 0))
            max_bug = int(coalesce(settings.max_bug, Math.min(es_min_bug + batch_size * threads, es_max_bug)))

            with ThreadedQueue(reviews, batch_size=coalesce(reviews.settings.batch_size, 100)) as sink:
                func = functools.partial(full_etl, settings, sink)
                with Multithread(func, threads=threads) as m:
                    m.inbound.silent = True
                    Log.note("bugs from {{min}} to {{max}}, step {{step}}", {
                        "min": min_bug,
                        "max": max_bug,
                        "step": batch_size
                    })
                    m.execute(reversed([{"bugs": range(s, e)} for s, e in qb.intervals(min_bug, max_bug, size=1000)]))

            if settings.args.restart:
                reviews.add_alias()
                reviews.delete_all_but_self()
    finally:
        Log.stop()
开发者ID:klahnakoski,项目名称:MoDevETL,代码行数:53,代码来源:reviews.py

示例15: format_cube_from_aggop

def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = aggs
    b = coalesce(agg._filter, agg._nested)
    while b:
        agg = b
        b = coalesce(agg._filter, agg._nested)

    matricies = [(s, Matrix(dims=[], zeros=(s.aggregate == "count"))) for s in select]
    for s, m in matricies:
        m[tuple()] = agg[s.pull]
    cube = Cube(query.select, [], {s.name: m for s, m in matricies})
    cube.frum = query
    return cube
开发者ID:klahnakoski,项目名称:Activedata-ETL,代码行数:13,代码来源:format.py


注:本文中的pyLibrary.dot.coalesce函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。