本文整理汇总了Python中happybase.Connection类的典型用法代码示例。如果您正苦于以下问题:Python Connection类的具体用法?Python Connection怎么用?Python Connection使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Connection类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_metadata
def test_metadata(self):
connection = Connection(host='hbase-docker', port=9090)
metadata = HBaseMetadata(connection, b'metadata', True, False, 300000, True)
metadata.add_seeds([r1, r2, r3])
resp = Response('https://www.example.com', request=r1)
metadata.page_crawled(resp)
metadata.links_extracted(resp.request, [r2, r3])
metadata.request_error(r4, 'error')
metadata.frontier_stop()
table = connection.table('metadata')
assert set([to_native_str(data[b'm:url'], 'utf-8') for _, data in table.scan()]) == \
set([r1.url, r2.url, r3.url])
self.delete_rows(table, [b'10', b'11', b'12'])
示例2: setup_module
def setup_module():
global connection, table
connection = Connection(**connection_kwargs)
assert_is_not_none(connection)
cfs = {
'cf1': {},
'cf2': None,
'cf3': {'max_versions': 1},
}
connection.create_table(TEST_TABLE_NAME, families=cfs)
table = connection.table(TEST_TABLE_NAME)
assert_is_not_none(table)
示例3: test_prefix
def test_prefix():
assert_equal(TABLE_PREFIX + '_', connection._table_name(''))
assert_equal(TABLE_PREFIX + '_foo', connection._table_name('foo'))
assert_equal(connection.table('foobar').name, TABLE_PREFIX + '_foobar')
assert_equal(connection.table('foobar', use_prefix=False).name, 'foobar')
c = Connection(autoconnect=False)
assert_equal('foo', c._table_name('foo'))
with assert_raises(TypeError):
Connection(autoconnect=False, table_prefix=123)
with assert_raises(TypeError):
Connection(autoconnect=False, table_prefix_separator=2.1)
示例4: __init__
def __init__(self, manager):
self.manager = manager
settings = manager.settings
port = settings.get('HBASE_THRIFT_PORT', 9090)
hosts = settings.get('HBASE_THRIFT_HOST', 'localhost')
namespace = settings.get('HBASE_NAMESPACE', 'crawler')
drop_all_tables = settings.get('HBASE_DROP_ALL_TABLES', False)
self.queue_partitions = settings.get('HBASE_QUEUE_PARTITIONS', 4)
self._table_name = settings.get('HBASE_METADATA_TABLE', 'metadata')
host = choice(hosts) if type(hosts) in [list, tuple] else hosts
self.connection = Connection(host=host, port=int(port), table_prefix=namespace, table_prefix_separator=':')
# protocol='compact', transport='framed'
self.queue = HBaseQueue(self.connection, self.queue_partitions, self.manager.logger.backend,
drop=drop_all_tables)
self.state_checker = HBaseState(self.connection, self._table_name)
tables = set(self.connection.tables())
if drop_all_tables and self._table_name in tables:
self.connection.delete_table(self._table_name, disable=True)
tables.remove(self._table_name)
if self._table_name not in tables:
self.connection.create_table(self._table_name, {'m': {'max_versions': 5}, # 'compression': 'SNAPPY'
's': {'max_versions': 1, 'block_cache_enabled': 1,
'bloom_filter_type': 'ROW', 'in_memory': True, },
'c': {'max_versions': 1}
})
table = self.connection.table(self._table_name)
self.batch = table.batch(batch_size=9216)
示例5: __init__
def __init__(self, manager):
self.manager = manager
self.logger = logging.getLogger("hbase.backend")
settings = manager.settings
port = settings.get('HBASE_THRIFT_PORT')
hosts = settings.get('HBASE_THRIFT_HOST')
namespace = settings.get('HBASE_NAMESPACE')
self._min_requests = settings.get('BC_MIN_REQUESTS')
self._min_hosts = settings.get('BC_MIN_HOSTS')
self._max_requests_per_host = settings.get('BC_MAX_REQUESTS_PER_HOST')
self.queue_partitions = settings.get('SPIDER_FEED_PARTITIONS')
host = choice(hosts) if type(hosts) in [list, tuple] else hosts
kwargs = {
'host': host,
'port': int(port),
'table_prefix': namespace,
'table_prefix_separator': ':'
}
if settings.get('HBASE_USE_FRAMED_COMPACT'):
kwargs.update({
'protocol': 'compact',
'transport': 'framed'
})
self.connection = Connection(**kwargs)
self._metadata = None
self._queue = None
self._states = None
示例6: __init__
def __init__(self, name):
from happybase import Connection
from thrift.transport import TTransport
try:
self._conn = Connection('localhost')
self._table = self._conn.table(name)
except TTransport.TTransportException, e:
raise UserWarning(e)
示例7: __init__
def __init__(self, host='127.0.0.1', port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
super(HBaseCache, self).__init__(default_timeout)
if not table_name:
raise TypeError('table_name is a required argument')
self.table_name = table_name
self._c = Connection(host=host, port=port, table_prefix=table_prefix, **kwargs)
self._table = self._c.table(table_name)
self.clear()
示例8: put_data_into_hbase
def put_data_into_hbase(rdd):
"""
functions to store data into hbase table
"""
# collecting the results
results = rdd.collect()
# computing the exact time: this will serve as the row id
date = str(datetime.datetime.now())[:19]
# making connection to the right
connection = Connection(host='localhost', port=9090, autoconnect=True)
table = connection.table(name='base_tweets')
#
for data in results:
if data[0] == 0:
table.put(row=date, data={'tweet_count:neg': str(data[1])})
else:
table.put(row=date, data={'tweet_count:pos': str(data[1])})
connection.close()
示例9: test_drop_all_tables_when_table_name_is_str
def test_drop_all_tables_when_table_name_is_str(self):
connection = Connection(host='hbase-docker', port=9090)
for table in connection.tables():
connection.delete_table(table, True)
hbase_queue_table = 'queue'
hbase_metadata_table = 'metadata'
connection.create_table(hbase_queue_table, {'f': {'max_versions': 1}})
connection.create_table(hbase_metadata_table, {'f': {'max_versions': 1}})
tables = connection.tables()
assert set(tables) == set([b'metadata', b'queue']) # Failure of test itself
try:
HBaseQueue(connection=connection, partitions=1, table_name=hbase_queue_table, drop=True)
HBaseMetadata(connection=connection, table_name=hbase_metadata_table, drop_all_tables=True,
use_snappy=False, batch_size=300000, store_content=True)
except AlreadyExists:
assert False, "failed to drop hbase tables"
示例10: setUp
def setUp(self):
logging.basicConfig(level=logging.DEBUG)
self.conn = Connection(host="hbase-docker")
if b'domain_metadata' not in self.conn.tables():
self.conn.create_table('domain_metadata', {
'm': {'max_versions': 1, 'block_cache_enabled': 1,}
})
t = self.conn.table('domain_metadata')
t.delete('d1')
t.delete('d2')
t.delete('d3')
t.delete('d4')
示例11: __init__
def __init__(self, host="127.0.0.1", port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
# Potential bug: table_prefix instead of prefix
BaseCache.__init__(self, default_timeout)
if not table_name:
raise TypeError("table_name is a required argument")
self.table_name = table_name
self._c = Connection(host=host, port=port, table_prefix=prefix, **kwargs)
self._table = self._c.table(table_name) # Note: initialisation overwrites the existing rows of the Hbase table
self.clear()
示例12: __init__
def __init__(self, manager):
self.manager = manager
settings = manager.settings
port = settings.get("HBASE_THRIFT_PORT")
hosts = settings.get("HBASE_THRIFT_HOST")
namespace = settings.get("HBASE_NAMESPACE")
drop_all_tables = settings.get("HBASE_DROP_ALL_TABLES")
self.queue_partitions = settings.get("HBASE_QUEUE_PARTITIONS")
self._table_name = settings.get("HBASE_METADATA_TABLE")
host = choice(hosts) if type(hosts) in [list, tuple] else hosts
kwargs = {"host": host, "port": int(port), "table_prefix": namespace, "table_prefix_separator": ":"}
if settings.get("HBASE_USE_COMPACT_PROTOCOL"):
kwargs.update({"protocol": "compact", "transport": "framed"})
self.connection = Connection(**kwargs)
self.queue = HBaseQueue(
self.connection,
self.queue_partitions,
self.manager.logger.backend,
settings.get("HBASE_QUEUE_TABLE"),
drop=drop_all_tables,
)
self.state_checker = HBaseState(
self.connection, self._table_name, self.manager.logger.backend, settings.get("HBASE_STATE_CACHE_SIZE_LIMIT")
)
tables = set(self.connection.tables())
if drop_all_tables and self._table_name in tables:
self.connection.delete_table(self._table_name, disable=True)
tables.remove(self._table_name)
if self._table_name not in tables:
schema = {
"m": {"max_versions": 1},
"s": {"max_versions": 1, "block_cache_enabled": 1, "bloom_filter_type": "ROW", "in_memory": True},
"c": {"max_versions": 1},
}
if settings.get("HBASE_USE_SNAPPY"):
schema["m"]["compression"] = "SNAPPY"
schema["c"]["compression"] = "SNAPPY"
self.connection.create_table(self._table_name, schema)
table = self.connection.table(self._table_name)
self.batch = table.batch(batch_size=settings.get("HBASE_BATCH_SIZE"))
self.store_content = settings.get("HBASE_STORE_CONTENT")
示例13: HBaseCache
class HBaseCache(BaseCache):
def __init__(self, host="127.0.0.1", port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
# Potential bug: table_prefix instead of prefix
BaseCache.__init__(self, default_timeout)
if not table_name:
raise TypeError("table_name is a required argument")
self.table_name = table_name
self._c = Connection(host=host, port=port, table_prefix=prefix, **kwargs)
self._table = self._c.table(table_name) # Note: initialisation overwrites the existing rows of the Hbase table
self.clear()
def _put(self, key, value):
return key, {"cf:value": value}
def _extract(self, value):
if value:
return value.get("cf:value")
else:
return value
def add(self, key, value, timeout=None): # Note: timeout is not used in this method, but should be
print "Adding stuff"
table = self._table
print table
try:
if not table.row(
key
): # TO-DO: what does table.row returns for non existing keys? # Returns empty dict >> check for it and return None
table.put(*self._put(key, value))
else:
return False
except:
return False
return True
def clear(self):
print "Clearing stuff"
try:
self._c.delete_table(self.table_name, disable=True)
except:
pass
self._c.create_table(self.table_name, {"cf": dict()})
return super(HBaseCache, self).clear()
def dec(self, key, delta=1):
return self.inc(key, -delta)
# table = self._table
# new_value = table.counter_inc(key, 'cf:value', -delta)
# value = table.row(key)
# new_value = (self._extract(value) or 0) - delta
# table.put(*self._put(key, new_value))
# TO-DO the above should in principle be guarded by some exception handling
# return new_value
def delete(self, key):
try:
self._table.delete(key)
except:
return False
return True
def delete_many(self, *keys):
batch = self._table.batch()
try:
for k in keys:
batch.delete(k)
batch.send()
except:
return False
return True
def get(self, key):
value = self._table.row(key)
return self._extract(value) or None
def get_dict(self, *keys):
table = self._table
rows = table.rows(keys)
if not rows:
return {k: None for k in keys}
return {k: self._extract(v) for k, v in rows}
def get_many(self, *keys):
table = self._table
rows = table.rows(keys)
if not rows:
return [None for _ in keys]
return map(self._extract, map(itemgetter(1), rows))
def has(self, key):
return super(HBaseCache, self).has(key)
def inc(self, key, delta=1):
table = self._table
new_value = table.counter_inc(key, "cf:value", delta)
return new_value
#.........这里部分代码省略.........
示例14: HBaseCache
class HBaseCache(BaseCache):
def __init__(self, host='127.0.0.1', port=9090, prefix=None, table_name=None, default_timeout=300, **kwargs):
super(HBaseCache, self).__init__(default_timeout)
if not table_name:
raise TypeError('table_name is a required argument')
self.table_name = table_name
self._c = Connection(host=host, port=port, table_prefix=table_prefix, **kwargs)
self._table = self._c.table(table_name)
self.clear()
def _put(self, key, value):
return key, {'cf:value': value}
def _extract(self, value):
if value:
return value.get('cf:value')
else:
return value
def add(self, key, value, timeout=None):
table = self._table
try:
if not table.row(key): # TO-DO: what does table.row returns for non existing keys?
table.put(*self._put(key, value))
else:
return False
except:
return False
return True
def clear(self):
self._c.delete_table(self.table_name, disable=True)
self._c.create_table(self.table_name, {'cf': dict()})
return super(HBaseCache, self).clear()
def dec(self, key, delta=1):
return self.inc(key, -delta)
# table = self._table
# new_value = table.counter_inc(key, 'cf:value', -delta)
# value = table.row(key)
# new_value = (self._extract(value) or 0) - delta
# table.put(*self._put(key, new_value))
# TO-DO the above should in principle be guarded by some exception handling
# return new_value
def delete(self, key):
try:
self._table.delete(key)
except:
return False
return True
def delete_many(self, *keys):
batch = self._table.batch()
try:
for k in keys:
batch.delete(k)
batch.send()
except:
return False
return True
def get(self, key):
value = self._table.row(key)
return self._extract(value)
def get_dict(self, *keys):
table = self._table
_, values = table.rows(keys)
return {k: self._extract(v) for v in zip(keys, values)}
def get_many(self, *keys):
table = self._table
_, values = table.rows(keys)
return map(self._extract, values)
def has(self, key):
return super(HBaseCache, self).has(key)
def inc(self, key, delta=1):
table = self._table
new_value = table.counter_inc(key, 'cf:value', delta)
return new_value
def set(self, key, value, timeout=None):
table = self._table
try:
table.delete(key) # TO-DO Does this return an exception if it doesn't exist? Otherwise we need to put a table.row before that
table.put(*self._put(key, value))
except:
return False
return True
def set_many(self, mapping, timeout=None):
batch = self._table.batch()
for key, value in _items(mapping):
batch.put(*self._put(key, value))
try:
#.........这里部分代码省略.........
示例15: HBaseBackend
class HBaseBackend(DistributedBackend):
component_name = 'HBase Backend'
def __init__(self, manager):
self.manager = manager
self.logger = logging.getLogger("hbase.backend")
settings = manager.settings
port = settings.get('HBASE_THRIFT_PORT')
hosts = settings.get('HBASE_THRIFT_HOST')
namespace = settings.get('HBASE_NAMESPACE')
self._min_requests = settings.get('BC_MIN_REQUESTS')
self._min_hosts = settings.get('BC_MIN_HOSTS')
self._max_requests_per_host = settings.get('BC_MAX_REQUESTS_PER_HOST')
self.queue_partitions = settings.get('SPIDER_FEED_PARTITIONS')
host = choice(hosts) if type(hosts) in [list, tuple] else hosts
kwargs = {
'host': host,
'port': int(port),
'table_prefix': namespace,
'table_prefix_separator': ':'
}
if settings.get('HBASE_USE_FRAMED_COMPACT'):
kwargs.update({
'protocol': 'compact',
'transport': 'framed'
})
self.connection = Connection(**kwargs)
self._metadata = None
self._queue = None
self._states = None
@classmethod
def strategy_worker(cls, manager):
o = cls(manager)
settings = manager.settings
o._states = HBaseState(o.connection, settings.get('HBASE_METADATA_TABLE'),
settings.get('HBASE_STATE_CACHE_SIZE_LIMIT'))
return o
@classmethod
def db_worker(cls, manager):
o = cls(manager)
settings = manager.settings
drop_all_tables = settings.get('HBASE_DROP_ALL_TABLES')
o._queue = HBaseQueue(o.connection, o.queue_partitions,
settings.get('HBASE_QUEUE_TABLE'), drop=drop_all_tables)
o._metadata = HBaseMetadata(o.connection, settings.get('HBASE_METADATA_TABLE'), drop_all_tables,
settings.get('HBASE_USE_SNAPPY'), settings.get('HBASE_BATCH_SIZE'),
settings.get('STORE_CONTENT'))
return o
@property
def metadata(self):
return self._metadata
@property
def queue(self):
return self._queue
@property
def states(self):
return self._states
def frontier_start(self):
for component in [self.metadata, self.queue, self.states]:
if component:
component.frontier_start()
def frontier_stop(self):
for component in [self.metadata, self.queue, self.states]:
if component:
component.frontier_stop()
self.connection.close()
def add_seeds(self, seeds):
self.metadata.add_seeds(seeds)
def page_crawled(self, response):
self.metadata.page_crawled(response)
def links_extracted(self, request, links):
self.metadata.links_extracted(request, links)
def request_error(self, page, error):
self.metadata.request_error(page, error)
def finished(self):
raise NotImplementedError
def get_next_requests(self, max_next_requests, **kwargs):
next_pages = []
self.logger.debug("Querying queue table.")
partitions = set(kwargs.pop('partitions', []))
for partition_id in range(0, self.queue_partitions):
if partition_id not in partitions:
continue
results = self.queue.get_next_requests(max_next_requests, partition_id,
min_requests=self._min_requests,
min_hosts=self._min_hosts,
#.........这里部分代码省略.........