当前位置: 首页>>代码示例>>Python>>正文


Python Connection.create_table方法代码示例

本文整理汇总了Python中happybase.Connection.create_table方法的典型用法代码示例。如果您正苦于以下问题:Python Connection.create_table方法的具体用法?Python Connection.create_table怎么用?Python Connection.create_table使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在happybase.Connection的用法示例。


在下文中一共展示了Connection.create_table方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setup_module

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
def setup_module():
    global connection, table
    connection = Connection(**connection_kwargs)

    assert_is_not_none(connection)

    cfs = {
        'cf1': {},
        'cf2': None,
        'cf3': {'max_versions': 1},
    }
    connection.create_table(TEST_TABLE_NAME, families=cfs)

    table = connection.table(TEST_TABLE_NAME)
    assert_is_not_none(table)
开发者ID:defcube,项目名称:happybase,代码行数:17,代码来源:test_api.py

示例2: test_drop_all_tables_when_table_name_is_str

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
 def test_drop_all_tables_when_table_name_is_str(self):
     connection = Connection(host='hbase-docker', port=9090)
     for table in connection.tables():
         connection.delete_table(table, True)
     hbase_queue_table = 'queue'
     hbase_metadata_table = 'metadata'
     connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}})
     connection.create_table(hbase_metadata_table, {'f': {'max_versions': 1}})
     tables = connection.tables()
     assert set(tables) == set([b'metadata', b'queue'])  # Failure of test itself
     try:
         HBaseQueue(connection=connection, partitions=1, table_name=hbase_queue_table, drop=True)
         HBaseMetadata(connection=connection, table_name=hbase_metadata_table, drop_all_tables=True,
                       use_snappy=False, batch_size=300000, store_content=True)
     except AlreadyExists:
         assert False, "failed to drop hbase tables"
开发者ID:voith,项目名称:frontera,代码行数:18,代码来源:test_hbase.py

示例3: TestDomainCache

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class TestDomainCache(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.DEBUG)
        self.conn = Connection(host="hbase-docker")
        if b'domain_metadata' not in self.conn.tables():
            self.conn.create_table('domain_metadata', {
                'm': {'max_versions': 1, 'block_cache_enabled': 1,}
            })
        t = self.conn.table('domain_metadata')
        t.delete('d1')
        t.delete('d2')
        t.delete('d3')
        t.delete('d4')

    def test_domain_cache_both_generations(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}

        # eviction should happen
        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        assert dc['d1'] == {'domain': 1}
        assert dc['d2'] == {'domain': 2}
        assert dc['d3'] == {'domain': [3, 2, 1]}
        assert dc['d4'] == {'domain': 4}

    def test_domain_cache_get_with_default(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}
        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        assert dc.get('d1', {}) == {'domain': 1}
        assert dc.get('d3', {}) == {'domain': [3, 2, 1]}

    def test_domain_cache_setdefault(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}
        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        assert dc.setdefault('d1', {}) == {'domain': 1}
        assert dc.setdefault('d5', {'domain': 6}) == {'domain': 6}
        dc.flush()
        assert dc.setdefault('d3', {}) == {'domain': [3, 2, 1]}

    def test_domain_cache_setdefault_with_second_gen_flush(self):
        dc = DomainCache(2, self.conn, 'domain_metadata', batch_size=3)
        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}

        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        dc.setdefault('d1', {})['domain'] += 1

        assert dc.setdefault('d1', {}) == {'domain': 2}

    def test_empty_key(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        with self.assertRaises(KeyError):
            dc[''] = {'test':1}

    def test_deletion(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        with self.assertRaises(KeyError):
            del dc['d1']

        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}
        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        del dc['d1'] # second gen
        del dc['d3'] # first gen

        dc.flush()

        del dc['d4'] # hbase

    def test_contains(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
        dc['d1'] = {'domain': 1}
        dc['d2'] = {'domain': 2}
        dc['d3'] = {'domain': [3, 2, 1]}
        dc['d4'] = {'domain': 4}

        assert 'd1' in dc # second gen
        assert 'd3' in dc # first gen

        dc.flush()

        assert 'd4' in dc

    def test_pop(self):
        dc = DomainCache(2, self.conn, 'domain_metadata')
#.........这里部分代码省略.........
开发者ID:scrapinghub,项目名称:frontera,代码行数:103,代码来源:test_domain_cache.py

示例4: HBaseBackend

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class HBaseBackend(Backend):
    component_name = 'HBase Backend'

    def __init__(self, manager):
        self.manager = manager

        settings = manager.settings
        port = settings.get('HBASE_THRIFT_PORT', 9090)
        hosts = settings.get('HBASE_THRIFT_HOST', 'localhost')
        namespace = settings.get('HBASE_NAMESPACE', 'crawler')
        drop_all_tables = settings.get('HBASE_DROP_ALL_TABLES', False)
        self.queue_partitions = settings.get('HBASE_QUEUE_PARTITIONS', 4)
        self._table_name = settings.get('HBASE_METADATA_TABLE', 'metadata')
        host = choice(hosts) if type(hosts) in [list, tuple] else hosts

        self.connection = Connection(host=host, port=int(port), table_prefix=namespace, table_prefix_separator=':')
        # protocol='compact', transport='framed'
        self.queue = HBaseQueue(self.connection, self.queue_partitions, self.manager.logger.backend,
                                drop=drop_all_tables)
        self.state_checker = HBaseState(self.connection, self._table_name)


        tables = set(self.connection.tables())
        if drop_all_tables and self._table_name in tables:
            self.connection.delete_table(self._table_name, disable=True)
            tables.remove(self._table_name)

        if self._table_name not in tables:
            self.connection.create_table(self._table_name, {'m': {'max_versions': 5}, # 'compression': 'SNAPPY'
                                                            's': {'max_versions': 1, 'block_cache_enabled': 1,
                                                            'bloom_filter_type': 'ROW', 'in_memory': True, },
                                                            'c': {'max_versions': 1}
                                                            })
        table = self.connection.table(self._table_name)
        self.batch = table.batch(batch_size=9216)

    @classmethod
    def from_manager(cls, manager):
        return cls(manager)

    def frontier_start(self):
        pass

    def frontier_stop(self):
        self.connection.close()
        self.flush()

    def add_seeds(self, seeds):
        for seed in seeds:
            url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(seed)
            obj = prepare_hbase_object(url=url,
                                       depth=0,
                                       created_at=utcnow_timestamp(),
                                       domain_fingerprint=domain['fingerprint'])
            self.batch.put(unhexlify(fingerprint), obj)

    def page_crawled(self, response, links):
        url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(response)
        obj = prepare_hbase_object(status_code=response.status_code, content=response.body)

        links_dict = dict()
        for link in links:
            link_url, link_fingerprint, link_domain = self.manager.canonicalsolver.get_canonical_url(link)
            links_dict[unhexlify(link_fingerprint)] = (link, link_url, link_domain)


        self.batch.put(unhexlify(fingerprint), obj)
        for link_fingerprint, (link, link_url, link_domain) in links_dict.iteritems():
            obj = prepare_hbase_object(url=link_url,
                                       created_at=utcnow_timestamp(),
                                       domain_fingerprint=link_domain['fingerprint'])
            self.batch.put(link_fingerprint, obj)

    def request_error(self, request, error):
        url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(request)
        obj = prepare_hbase_object(url=request.url,
                                   created_at=utcnow_timestamp(),
                                   error=error,
                                   domain_fingerprint=domain['fingerprint'])
        rk = unhexlify(request.meta['fingerprint'])
        self.batch.put(rk, obj)

    def get_next_requests(self, max_next_requests, **kwargs):
        next_pages = []
        log = self.manager.logger.backend
        log.debug("Querying queue table.")
        partitions = set(kwargs.pop('partitions', []))
        for partition_id in range(0, self.queue_partitions):
            if partition_id not in partitions:
                continue
            results = self.queue.get(partition_id, max_next_requests,
                                                    min_hosts=24, max_requests_per_host=128)

            log.debug("Got %d items for partition id %d" % (len(results), partition_id))
            for fingerprint, url, score in results:
                r = self.manager.request_model(url=url)
                r.meta['fingerprint'] = fingerprint
                r.meta['score'] = score
                next_pages.append(r)
        return next_pages
#.........这里部分代码省略.........
开发者ID:marcolin,项目名称:distributed-frontera,代码行数:103,代码来源:hbase.py

示例5: HBaseBackend

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class HBaseBackend(Backend):
    component_name = "HBase Backend"

    def __init__(self, manager):
        self.manager = manager

        settings = manager.settings
        port = settings.get("HBASE_THRIFT_PORT")
        hosts = settings.get("HBASE_THRIFT_HOST")
        namespace = settings.get("HBASE_NAMESPACE")
        drop_all_tables = settings.get("HBASE_DROP_ALL_TABLES")
        self.queue_partitions = settings.get("HBASE_QUEUE_PARTITIONS")
        self._table_name = settings.get("HBASE_METADATA_TABLE")
        host = choice(hosts) if type(hosts) in [list, tuple] else hosts
        kwargs = {"host": host, "port": int(port), "table_prefix": namespace, "table_prefix_separator": ":"}
        if settings.get("HBASE_USE_COMPACT_PROTOCOL"):
            kwargs.update({"protocol": "compact", "transport": "framed"})
        self.connection = Connection(**kwargs)
        self.queue = HBaseQueue(
            self.connection,
            self.queue_partitions,
            self.manager.logger.backend,
            settings.get("HBASE_QUEUE_TABLE"),
            drop=drop_all_tables,
        )
        self.state_checker = HBaseState(
            self.connection, self._table_name, self.manager.logger.backend, settings.get("HBASE_STATE_CACHE_SIZE_LIMIT")
        )
        tables = set(self.connection.tables())
        if drop_all_tables and self._table_name in tables:
            self.connection.delete_table(self._table_name, disable=True)
            tables.remove(self._table_name)

        if self._table_name not in tables:
            schema = {
                "m": {"max_versions": 1},
                "s": {"max_versions": 1, "block_cache_enabled": 1, "bloom_filter_type": "ROW", "in_memory": True},
                "c": {"max_versions": 1},
            }
            if settings.get("HBASE_USE_SNAPPY"):
                schema["m"]["compression"] = "SNAPPY"
                schema["c"]["compression"] = "SNAPPY"
            self.connection.create_table(self._table_name, schema)
        table = self.connection.table(self._table_name)
        self.batch = table.batch(batch_size=settings.get("HBASE_BATCH_SIZE"))
        self.store_content = settings.get("HBASE_STORE_CONTENT")

    @classmethod
    def from_manager(cls, manager):
        return cls(manager)

    def frontier_start(self):
        pass

    def frontier_stop(self):
        self.connection.close()
        self.flush()

    def add_seeds(self, seeds):
        for seed in seeds:
            url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(seed)
            obj = prepare_hbase_object(
                url=url, depth=0, created_at=utcnow_timestamp(), domain_fingerprint=domain["fingerprint"]
            )
            self.batch.put(unhexlify(fingerprint), obj)

    def page_crawled(self, response, links):
        url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(response)
        obj = (
            prepare_hbase_object(status_code=response.status_code, content=response.body)
            if self.store_content
            else prepare_hbase_object(status_code=response.status_code)
        )
        links_dict = dict()
        for link in links:
            link_url, link_fingerprint, link_domain = self.manager.canonicalsolver.get_canonical_url(link)
            links_dict[unhexlify(link_fingerprint)] = (link, link_url, link_domain)
        self.batch.put(unhexlify(fingerprint), obj)
        for link_fingerprint, (link, link_url, link_domain) in links_dict.iteritems():
            obj = prepare_hbase_object(
                url=link_url, created_at=utcnow_timestamp(), domain_fingerprint=link_domain["fingerprint"]
            )
            self.batch.put(link_fingerprint, obj)

    def request_error(self, request, error):
        url, fingerprint, domain = self.manager.canonicalsolver.get_canonical_url(request)
        obj = prepare_hbase_object(
            url=request.url, created_at=utcnow_timestamp(), error=error, domain_fingerprint=domain["fingerprint"]
        )
        rk = unhexlify(request.meta["fingerprint"])
        self.batch.put(rk, obj)

    def get_next_requests(self, max_next_requests, **kwargs):
        next_pages = []
        log = self.manager.logger.backend
        log.debug("Querying queue table.")
        partitions = set(kwargs.pop("partitions", []))
        for partition_id in range(0, self.queue_partitions):
            if partition_id not in partitions:
                continue
#.........这里部分代码省略.........
开发者ID:vu3jej,项目名称:distributed-frontera,代码行数:103,代码来源:hbase.py

示例6: HBaseCache

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class HBaseCache(BaseCache):
    def __init__(self, host='127.0.0.1', port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs): 
        super(HBaseCache, self).__init__(default_timeout)
        
        if not table_name:
            raise TypeError('table_name is a required argument')
        self.table_name = table_name

        self._c = Connection(host=host, port=port, table_prefix=prefix, **kwargs)
        self._table = self._c.table(table_name) # Note: initialisation overwrites the existing rows of the Hbase table
        
        self.clear()

    def _put(self, key, value, timeout):
        timestamp = (datetime.now() + timedelta(0, timeout or self.default_timeout)).isoformat()
        return key, {'cf:value': value, 'cf:timestamp': timestamp}

    def _extract(self, value):
        if value:
            v = value.get('cf:value')
            ts = from_iso(value.get('cf:timestamp'))
            if ts > datetime.now():
                return v
            else:
                return None
        else:
            return None

    def add(self, key, value, timeout=None):
        table = self._table
        try:
            if not table.row(key):
                table.put(*self._put(key, value, timeout))
            else:
                return False
        except:
            return False
        return True

    def clear(self):
        try:
            self._c.delete_table(self.table_name, disable=True)
        except:
            pass
        self._c.create_table(self.table_name, {'cf': dict()})
        return super(HBaseCache, self).clear()

    def dec(self, key, delta=1):
        return self.inc(key, -delta)

    def delete(self, key):
        # delete in happybase just uses batch()
        return self.delete_many([key])

    def delete_many(self, *keys):
        with self._table.batch() as batch: # TO-DO: exceptions here?
            for k in keys:
                batch.delete()
        return True

    def get(self, key):
        value = self._table.row(key)
        return self._extract(value) or None

    def get_dict(self, *keys):
        keys = keys[0]
        table = self._table
        results = dict(table.rows(keys))
        return {k: self._extract(results.get(k, None)) for k in keys}  # Non-existing keys are not returned by table.rows()

    def get_many(self, *keys):
        result = self.get_dict(*keys)
        return [result[k] for k in keys[0]]

    def has(self, key):
        return super(HBaseCache, self).has(key)

    def inc(self, key, delta=1):
        table = self._table
        new_value = table.counter_inc(key, 'cf:value', delta)
        return new_value

    # TO-DO: rewrite this to use set_many. Check if delete is necessary, etc.
    def set(self, key, value, timeout=None):
        # set in happybase just uses batch
        table = self._table
        try:
            table.delete(key)
            table.put(*self._put(key, value, timeout))
        except:
            return False
        return True

    def set_many(self, mapping, timeout=None):
        batch = self._table.batch()
        for key, value in _items(mapping):
            batch.put(*self._put(key, value, timeout))
        try:
            batch.send()
        except:
#.........这里部分代码省略.........
开发者ID:gglanzani,项目名称:hbase-cache,代码行数:103,代码来源:hbase_cache.py

示例7: HBaseCache

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class HBaseCache(BaseCache):
    def __init__(self, host='127.0.0.1', port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
        super(HBaseCache, self).__init__(default_timeout)
        
        if not table_name:
            raise TypeError('table_name is a required argument')
        self.table_name = table_name

        self._c = Connection(host=host, port=port, table_prefix=table_prefix, **kwargs)
        self._table = self._c.table(table_name)
        self.clear()

    def _put(self, key, value):
        return key, {'cf:value': value}

    def _extract(self, value):
        if value:
            return value.get('cf:value')
        else:
            return value

    def add(self, key, value, timeout=None):
        table = self._table
        try:
            if not table.row(key):  # TO-DO: what does table.row returns for non existing keys?
                table.put(*self._put(key, value))
            else:
                return False
        except:
            return False
        return True

    def clear(self):
        self._c.delete_table(self.table_name, disable=True)
        self._c.create_table(self.table_name, {'cf': dict()})
        return super(HBaseCache, self).clear()

    def dec(self, key, delta=1):
        return self.inc(key, -delta)
#        table = self._table
#        new_value = table.counter_inc(key, 'cf:value', -delta)
#        value = table.row(key)
#        new_value = (self._extract(value) or 0) - delta
#        table.put(*self._put(key, new_value))
        # TO-DO the above should in principle be guarded by some exception handling
#        return new_value

    def delete(self, key):
        try:
            self._table.delete(key)
        except:
            return False
        return True

    def delete_many(self, *keys):
        batch = self._table.batch()
        try:
            for k in keys:
                batch.delete(k)
            batch.send()
        except:
            return False
        return True

    def get(self, key):
        value = self._table.row(key)
        return self._extract(value)

    def get_dict(self, *keys):
        table = self._table
        _, values = table.rows(keys)
        return {k: self._extract(v) for v in zip(keys, values)}

    def get_many(self, *keys):
        table = self._table
        _, values = table.rows(keys)
        return map(self._extract, values)

    def has(self, key):
        return super(HBaseCache, self).has(key)

    def inc(self, key, delta=1):
        table = self._table
        new_value = table.counter_inc(key, 'cf:value', delta)
        return new_value

    def set(self, key, value, timeout=None):
        table = self._table
        try:
            table.delete(key)  # TO-DO Does this return an exception if it doesn't exist? Otherwise we need to put a table.row before that
            table.put(*self._put(key, value))
        except:
            return False
        return True

    def set_many(self, mapping, timeout=None):
        batch = self._table.batch()
        for key, value in _items(mapping):
            batch.put(*self._put(key, value))
        try:
#.........这里部分代码省略.........
开发者ID:Mirkainthecity,项目名称:hbase,代码行数:103,代码来源:hbase_cache.py

示例8: HBaseCache

# 需要导入模块: from happybase import Connection [as 别名]
# 或者: from happybase.Connection import create_table [as 别名]
class HBaseCache(BaseCache):
    def __init__(self, host="127.0.0.1", port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
        # Potential bug: table_prefix instead of prefix
        BaseCache.__init__(self, default_timeout)

        if not table_name:
            raise TypeError("table_name is a required argument")
        self.table_name = table_name

        self._c = Connection(host=host, port=port, table_prefix=prefix, **kwargs)
        self._table = self._c.table(table_name)  # Note: initialisation overwrites the existing rows of the Hbase table

        self.clear()

    def _put(self, key, value):
        return key, {"cf:value": value}

    def _extract(self, value):
        if value:
            return value.get("cf:value")
        else:
            return value

    def add(self, key, value, timeout=None):  # Note: timeout is not used in this method, but should be
        print "Adding stuff"
        table = self._table
        print table
        try:
            if not table.row(
                key
            ):  # TO-DO: what does table.row returns for non existing keys? # Returns empty dict >> check for it and return None
                table.put(*self._put(key, value))
            else:
                return False
        except:
            return False
        return True

    def clear(self):
        print "Clearing stuff"
        try:
            self._c.delete_table(self.table_name, disable=True)
        except:
            pass
        self._c.create_table(self.table_name, {"cf": dict()})
        return super(HBaseCache, self).clear()

    def dec(self, key, delta=1):
        return self.inc(key, -delta)

    #        table = self._table
    #        new_value = table.counter_inc(key, 'cf:value', -delta)
    #        value = table.row(key)
    #        new_value = (self._extract(value) or 0) - delta
    #        table.put(*self._put(key, new_value))
    # TO-DO the above should in principle be guarded by some exception handling
    #        return new_value

    def delete(self, key):
        try:
            self._table.delete(key)
        except:
            return False
        return True

    def delete_many(self, *keys):
        batch = self._table.batch()
        try:
            for k in keys:
                batch.delete(k)
            batch.send()
        except:
            return False
        return True

    def get(self, key):
        value = self._table.row(key)
        return self._extract(value) or None

    def get_dict(self, *keys):
        table = self._table
        rows = table.rows(keys)
        if not rows:
            return {k: None for k in keys}
        return {k: self._extract(v) for k, v in rows}

    def get_many(self, *keys):
        table = self._table
        rows = table.rows(keys)
        if not rows:
            return [None for _ in keys]
        return map(self._extract, map(itemgetter(1), rows))

    def has(self, key):
        return super(HBaseCache, self).has(key)

    def inc(self, key, delta=1):
        table = self._table
        new_value = table.counter_inc(key, "cf:value", delta)
        return new_value
#.........这里部分代码省略.........
开发者ID:wkuling,项目名称:hbase-cache,代码行数:103,代码来源:hbase_cache.py


注:本文中的happybase.Connection.create_table方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。