本文整理汇总了Python中pycassa.columnfamily.ColumnFamily.multiget方法的典型用法代码示例。如果您正苦于以下问题:Python ColumnFamily.multiget方法的具体用法?Python ColumnFamily.multiget怎么用?Python ColumnFamily.multiget使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycassa.columnfamily.ColumnFamily
的用法示例。
在下文中一共展示了ColumnFamily.multiget方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: multiget
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def multiget(self, *args, **kwargs):
"""
Like :meth:`get()`, but a list of keys may be specified.
The result of multiget will be a dictionary where the keys
are the keys from the `keys` argument, minus any missing rows.
The value for each key in the dictionary will be the same as
if :meth:`get()` were called on that individual key.
"""
if 'columns' not in kwargs and not self.super and not self.raw_columns:
kwargs['columns'] = self.fields
kcmap = ColumnFamily.multiget(self, *args, **kwargs)
ret = self.dict_class()
for key, columns in kcmap.iteritems():
if self.super:
if 'super_column' not in kwargs:
vals = self.dict_class()
for super_column, subcols in columns.iteritems():
combined = self.combine_columns(subcols)
vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
ret[key] = vals
else:
combined = self.combine_columns(columns)
ret[key] = create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
else:
combined = self.combine_columns(columns)
ret[key] = create_instance(self.cls, key=key, **combined)
return ret
示例2: get
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def get():
#################### TEMP
#userId = 'user-784b9158-5233-454e-8dcf-c229cdff12c6'
print 'Getting result for userId: {0} between time {1} and {2}'.format(userId, startTime, startTime)
con = util.getConnection()
logCF = ColumnFamily(con, 'event_log_mux')
rowKeys = ['{0}:{1}'.format(userId, i+1) for i in range(4)]
rows = logCF.multiget(rowKeys)
print 'Shows rows multiplexes into different rows each individually sorted in reverse cronological order:'
merge = {}
for row in rows:
print '>> '+str(row)
merge = dict(merge.items() + rows[row].items())
for col in rows[row]:
colstr = rows[row][col]
coljson = json.loads(colstr)
print '\tInsertion Timestamp: {0}'.format(coljson['insert_time'])
final = collections.OrderedDict(sorted(merge.items(), reverse=True))
for k,v in final.iteritems():
coljson = json.loads(v)
print 'insertion timestamp: {0}'.format(coljson['insert_time'])
"""
示例3: file_metadata
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def file_metadata(self, keys):
"""Obtain metadata for a stored file.
Argument is an iterable of file keys whose data to obtain.
"""
cf = ColumnFamily(self.pool, 'files')
return cf.multiget(keys)
示例4: get_data
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def get_data(self, cf_name, key, start_time, end_time, output_json=False):
cf = ColumnFamily(self.pool, cf_name)
try:
result = cf.multiget(self.gen_key_range(key, start_time, end_time), column_start=start_time*1000, column_finish=end_time*1000, column_count=10000000)
if output_json:
self.dump_json(result)
except NotFoundException:
pass
示例5: get_all_posts_in_thread
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def get_all_posts_in_thread(self, thread_id):
posts = ColumnFamily(self.conn, str(thread_id))
# get all row keys
row_keys = []
ret = list(posts.get_range())
for v in ret:
row_keys.append(v[0])
# get all row data
result = []
ret = posts.multiget(row_keys)
for key in row_keys:
row = {}
row['key'] = int(key)
row['name'] = ret[key]['name']
row['content'] = ret[key]['content']
row['post_time'] = ret[key]['post_time']
result.append(row)
result.sort(cmp=lambda x,y: cmp(x['key'], y['key']))
return result
示例6: _get_oldest_thread
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def _get_oldest_thread(self):
threads = ColumnFamily(self.conn, 'threads')
# get all row keys
row_keys = []
ret = list(threads.get_range())
for v in ret:
row_keys.append(v[0])
result = []
ret = threads.multiget(row_keys)
for key in row_keys:
row = {}
row['thread_id'] = int(key)
row['thread_name'] = int(key)
row['post_count'] = ret[key]['post_count']
row['create_time'] = ret[key]['create_time']
row['update_time'] = ret[key]['update_time']
result.append(row)
result.sort(cmp=lambda x,y: cmp(x['update_time'], y['update_time']))
return result[0]
示例7: DailyTemporalBloomFilter
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
#.........这里部分代码省略.........
yield end - dt.timedelta(days=i)
else:
yield start + dt.timedelta(days=i)
def _drop_archive(self):
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
for hour in hours:
try:
row = "%s_%s" % (self.bf_name, hour.strftime('%Y-%m-%d:%H'))
nbr_keys = self.columnfamily.get_count(row)
keys = self.columnfamily.remove(row)
except:
pass
def rebuild_from_archive(self, rebuild_snapshot=True):
"""Rebuild the BF using the archived items"""
self.initialize_bitarray()
#if rebuild_snapshot:
# self.delete_snapshots()
def multi_rows_itr(rows):
for row in rows.values():
for k in row.keys():
yield k
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
days = self._day_range(last_period, dt.datetime.now())
rows = []
for i,day in enumerate(days):
rows = ["%s_%s:%s" % (self.bf_name, day.strftime('%Y-%m-%d'), hour_str) for hour_str in ["%02d" % i for i in range(24)]]
rows_content = self.columnfamily.multiget(rows, column_count=1E6)
update_current = day == self.current_period
for k in multi_rows_itr(rows_content):
self.add_rebuild(k, update_current)
if rebuild_snapshot:
self.save_snaphot(override_period=day)
if not update_current:
self.initialize_current_day_bitarray()
def restore_from_disk(self, clean_old_snapshot=False):
"""Restore the state of the BF using previous snapshots.
:clean_old_snapshot: Delete the old snapshot on the disk (period < current - expiration)
"""
base_filename = "%s/%s_%s_*.dat" % (self.snapshot_path, self.bf_name, self.expiration)
availables_snapshots = glob.glob(base_filename)
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
for filename in availables_snapshots:
snapshot_period = dt.datetime.strptime(filename.split('_')[-1].strip('.dat'), "%Y-%m-%d")
if snapshot_period < last_period and not clean_old_snapshot:
continue
else:
self._union_bf_from_file(filename)
if snapshot_period == self.current_period:
self._union_bf_from_file(filename, current=True)
if snapshot_period < last_period and clean_old_snapshot:
os.remove(filename)
self.ready = True
示例8: range
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
print col_fam.get_count('author')
col_fam.insert('row_key', {'col_name': 'col_val'})
col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}})
#col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}})
print col_fam.get_count('row_key', columns=['foo', 'bar'])
print col_fam.get_count('row_key', column_start='foo')
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'])
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3'])
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3')
print col_fam.get_count('row_key')
print col_fam.get('row_key')
print col_fam.get('author')
print col_fam.get('row_key', columns=['col_name', 'col_name2'])
print col_fam.get('row_key', column_reversed=True, column_count=3)
print col_fam.multiget(['row1', 'row2'])
for i in range(1, 10):
col_fam.insert('row_key', {str(i): 'val'})
print col_fam.get('row_key', column_start='5', column_finish='7')
result = col_fam.get_range(start='row_key5', finish='row_key7')
for key, columns in result:
print key, '=>', columns
#Supper column
# col_fam = pycassa.ColumnFamily(pool, 'Super1')
# col_fam.insert('row_key', {'supercol_name': {'col_name': 'col_val'}})
print col_fam.get('row_key')
# col_fam = pycassa.ColumnFamily(pool, 'Letters')
# col_fam.insert('row_key', {'super': {'a': '1', 'b': '2', 'c': '3'}})
# print col_fam.get('row_key', super_column='super')
# print col_fam.get('row_key', super_column='super', columns=['a', 'b'])
# print col_fam.get('row_key', super_column='super', column_start='b')
示例9: CassandraDemo
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
class CassandraDemo(object):
def __init__(self, database, table):
self.database = database
self.table = table
def create_connections(self):
self.pool = ConnectionPool(self.database)
self.cf = ColumnFamily(self.pool, self.table)
def create_database_and_table(self):
super_cf = False # consider super columns to be deprecated
s = SystemManager()
# create keyspace if it doesn't exist
if database not in s.list_keyspaces():
s.create_keyspace(database, SIMPLE_STRATEGY, {'replication_factor': '1'})
# delete column family from the keyspace if it does exist.
if table in s.get_keyspace_column_families(database):
s.drop_column_family(database, table)
# create coulmn family in the keyspace
if table not in s.get_keyspace_column_families(database):
print("table is creating...")
s.create_column_family(database, table, super = super_cf, comparator_type = ASCII_TYPE)
s.close()
return True
def insert_data(self):
print '\nemployee data is inserting...'
self.cf.insert('1', {'fn':'yogesh', 'ln':'kumar', 'ct': 'Ajmer', 'em': '[email protected]'})
self.cf.insert('2', {'fn':'amit', 'ln':'pandita', 'ct': 'Delhi', 'em': '[email protected]'})
self.cf.insert('3', {'fn':'sandeep', 'ln':'tak', 'ct': 'Ajmer', 'em': '[email protected]', 'mb': '8890467032'})
def get_data(self):
print '\nemployee data is featching...'
data1 = self.cf.get('1')
data2 = self.cf.get('2', columns = ['fn', 'ln', 'em'])
data3 = self.cf.get('3', column_start = 'ct', column_finish = 'fn')
data4 = self.cf.get('1', column_reversed = False, column_count = 3)
data5 = self.cf.get('1', column_reversed = True, column_count = 3)
print data1
print data2
print data3
print data4
print data5
def get_multiple_data(self):
print '\ngetting multiple employees data...'
row_keys = ['1','2','3']
data = self.cf.multiget(row_keys)
print data
def get_data_by_range(self):
'''
if you get an error don't worry about this, it's a Cassandra limitation Issue
'''
print '\ngetting employees data by range...'
start_row_key = '1'
end_row_key = '3'
data = self.cf.get_range(start = start_row_key, finish = end_row_key)
for key, columns in data:
print key,coulmns
def get_count(self):
print '\nget employee row\'s colunm count'
print self.cf.get_count('1')
print self.cf.get_count('1', columns = ['fn', 'ln'])
print self.cf.get_count('1', column_start = 'em')
def get_multi_count(self):
print '\nget multiple employees row\'s colunm count'
row_keys = ['1','2','3']
columns = ['fn', 'ln', 'mb']
column_start = 'ct'
column_finish = 'fn'
print self.cf.multiget_count(row_keys)
print self.cf.multiget_count(row_keys, columns = columns)
print self.cf.multiget_count(row_keys, column_start = column_start, column_finish = column_finish)
def update_data(self):
print '\nemployee data is updating...'
self.cf.insert('1', {'pwd':'[email protected]', 'ct':'Noida'})
def delete_data(self):
print '\ndelete data from employee'
row = '2'
self.cf.remove(row)
def get_all_rows(self):
print '\ngetting rows name...'
print [v[0] for v in self.cf.get_range()]
def get_all_columns_of_row(self):
print '\ngetting columns name of a row'
row = '1'
data = self.cf.get(row)
#.........这里部分代码省略.........
示例10: DailyTemporalBloomFilter
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
class DailyTemporalBloomFilter(object):
"""Long Range Temporal BloomFilter using a daily resolution.
For really high value of expiration (like 60 days) with low requirement on precision.
The actual error of this BF will the be native error of the BF + the error related
to the coarse aspect of the expiration, since we no longer expires information precisely.
Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
AND false negative (reporting non-membership for a member).
The upper bound of the temporal_error can be theoricaly quite high. However, if the
items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
"""
def __init__(self, capacity, error_rate, expiration, name, snapshot_path, cassandra_session):
self.error_rate = error_rate
self.capacity = capacity
self._initialize_parameters()
self.initialize_bitarray()
self.count = 0
self.hashed_values = []
self.name = name
self.snapshot_path = snapshot_path
self.expiration = expiration
self.initialize_period()
self.snapshot_to_load = None
self.ready = False
self.warm_period = None
self.next_snapshot_load = time.time()
self.cassandra_session = cassandra_session
self.cassandra_columns_family = "temporal_bf"
self.keyspace = 'parsely'
self.uncommited_keys = []
self.commit_batch = 1000
self.columnfamily = None
self.ensure_cassandra_cf()
def _initialize_parameters(self):
self.nbr_slices = int(np.ceil(np.log2(1.0 / self.error_rate)))
self.bits_per_slice = int(np.ceil((self.capacity * abs(np.log(self.error_rate))) / (self.nbr_slices * (np.log(2) ** 2))))
self.nbr_bits = self.nbr_slices * self.bits_per_slice
self.hashes = generate_hashfunctions(self.bits_per_slice, self.nbr_slices)
def ensure_cassandra_cf(self):
s = SystemManager()
if self.keyspace not in s.list_keyspaces():
s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'})
if self.cassandra_columns_family not in s.get_keyspace_column_families(self.keyspace):
s.create_column_family(self.keyspace, self.cassandra_columns_family)
self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)
def archive_bf_key(self, bf_key):
self.uncommited_keys.append(bf_key)
if len(self.uncommited_keys) >= self.commit_batch:
current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
self.columnfamily.insert('%s_%s' % (self.name, current_period_hour), {k:'' for k in self.uncommited_keys})
self.uncommited_keys = []
def _hour_range(self, start, end, reverse=False, inclusive=True):
"""Generator that gives us all the hours between a start and end datetime
(inclusive)."""
def total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6
hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
if inclusive:
hours += 1
for i in xrange(hours):
if reverse:
yield end - dt.timedelta(hours=i)
else:
yield start + dt.timedelta(hours=i)
def resize(self, new_capacity):
self.capacity = new_capacity
self._initialize_parameters()
self.rebuild_from_archive()
def _drop_archive(self):
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
for hour in hours:
try:
row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
nbr_keys = self.columnfamily.get_count(row)
keys = self.columnfamily.remove(row)
except:
pass
def rebuild_from_archive(self):
"""Rebuild the BF using the archived items"""
self.initialize_bitarray()
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
rows = []
for i,hour in enumerate(hours):
row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
rows.append(row)
rows_content = self.columnfamily.multiget(rows, column_count=1E6)
#.........这里部分代码省略.........
示例11: ConnectionPool
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool = ConnectionPool("pykeyspace", ["localhost:9160"])
col_family = ColumnFamily(pool, "UserInfo")
col_family.insert("dosht2", {"email": "[email protected]", "name": "mostafa"})
# print col_family.get("dosht2", columns=["email"])['email']
print col_family.get("dosht2")
b = col_family.batch()
b.insert("dodo", {"email": "[email protected]"})
b.remove("dosht2", ["name"])
b.send()
print col_family.get("dosht2")
print col_family.multiget(["dosht", "dodo"])["dodo"]
from pycassa.types import *
class User(object):
key = UTF8Type() # name key is mandatory
email = AsciiType()
age = IntegerType()
def __repr__(self):
return "User(key: %s, email: %s, age: %s)" % (self.key, self.email, self.age)
from pycassa.columnfamilymap import ColumnFamilyMap
cfmap = ColumnFamilyMap(User, pool, "UserInfo")
user = User()
示例12: multiget
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
def multiget(self, *args, **kwargs):
col_fam = ColumnFamily(self.pool, self.__column_family__)
return col_fam.multiget(*args, **kwargs)
示例13: ConnectionPool
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
#!/usr/bin/python
#-*- coding:utf-8 -*-
import csv
import glob
from pycassa.system_manager import *
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool = ConnectionPool('employees')
filenames = glob.glob('employees/*.csv')
for filename in filenames:
only_name = filename.split('/')[-1].split('.')[0]
print only_name
cf = ColumnFamily(pool, only_name)
print cf.multiget(['row1','row2'])
break
示例14: list
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
################################### GET #######################################
# Get the row for the rowkey
authors = author_cf.get('sacharya')
print authors
# Get value for column
print "Get value for column"
authors = author_cf.get('sacharya1', columns=['first_name'])
print authors
# Get the colums for the row key and column key
authors = author_cf.get('sacharya1', columns=['first_name', 'last_name'])
print authors
authors = author_cf.multiget('sacharya', 'sacharya1')
print authors
print "Printing the keys"
keys = authors.keys()
for k in keys:
print authors.get(k)
print "Keys printed"
#authors = list(author.get_range().get_keys())
for value in author_cf.get_range():
print value[0]
# Only if using OrderPreservingPartitioner. Default is RandomPartitioner, which
# does md5 on the key
#for value in author_cf.get_range(start='sacharya5', finish='sacharya10'):
示例15: IntegerType
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import multiget [as 别名]
from pycassa.types import *
col_fam.column_validators['IntColumn5'] = IntegerType()
col_fam.column_validators['IntColumn6'] = IntegerType()
col_fam.insert('intData', {'IntColumn5':5, 'IntColumn6':6})
print col_fam.get('intData')
# OrderedDict([('IntColumn5', 5), ('IntColumn6', 6)])
#Batch operations
col_fam.batch_insert({'key4': {'Column1': 'PycassaData4',
'Column2': 'PycassaData5',
'Column3': 'PycassaData6',
'Column4': 'PycassaData7',
'Column5': 'PycassaData8'},
'key5': {'Column7': 'PycassaData9'}})
readData = col_fam.multiget(['key3', 'key4', 'key5'])
readData = col_fam.multiget(['key3', 'key4', 'key5'], columns=['Column1', 'Column7'])
#Column Slices
readData = col_fam.get('key4', column_start='Column2', column_finish='Column4')
readData = col_fam.get('key4', column_reversed=True, column_count=3)
#Types
# from pycassa.types import *
# class User(object):
# key = AsciiType()
# name = UTF8Type()
# age = IntegerType()
# height = FloatType()