本文整理汇总了Python中pycassa.columnfamily.ColumnFamily.get_count方法的典型用法代码示例。如果您正苦于以下问题:Python ColumnFamily.get_count方法的具体用法?Python ColumnFamily.get_count怎么用?Python ColumnFamily.get_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycassa.columnfamily.ColumnFamily
的用法示例。
在下文中一共展示了ColumnFamily.get_count方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ClientCassandra
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
class ClientCassandra():
def __init__(self,keySpace):
self.pool = ConnectionPool(keySpace, ['localhost:9160'])
self.col_fam_page = ColumnFamily(self.pool, 'Page')
self.col_fam_publication = ColumnFamily(self.pool, 'Publication')
self.col_fam_company = ColumnFamily(self.pool, 'Company')
self.col_fam_location = ColumnFamily(self.pool, 'Location')
self.col_fam_category = ColumnFamily(self.pool, 'Category')
#pycassaShell
#SYSTEM_MANAGER.create_keyspace('BlwData', strategy_options={"replication_factor": "1"});
#SYSTEM_MANAGER.create_column_family('BlwData', 'Page');
#SYSTEM_MANAGER.create_column_family('BlwData', 'Publication');
#SYSTEM_MANAGER.create_column_family('BlwData', 'Company');
#SYSTEM_MANAGER.create_column_family('BlwData', 'Location');
#SYSTEM_MANAGER.create_column_family('BlwData', 'Category');
def insertPage(self,page):
timestamp= self.col_fam_page.insert(page.getUrl(), page.toJson())
print "sizeof category " + page.category.name + " is " + str(self.col_fam_category.get_count(page.category.name))
self.col_fam_category.insert(page.category.name,{'url': page.getUrl()})
print "sizeof category " + page.category.name + " is " + str(self.col_fam_category.get_count(page.category.name))
return timestamp
# should raise an exception for an immutable sequence
#self.assertRaises(TypeError, random.shuffle, (1,2,3))
def getPages(self,url,column):
readData = self.col_fam_page .get(url,columns=[column])
return readData
def getCountCategory(self,category):
return self.col_fam_category.get_count(category)
示例2: _check_cassandra
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
def _check_cassandra(self, del_network_keyname, local_hostname, cassandra_listen_port):
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool1=ConnectionPool('config_db_uuid', [local_hostname+":"+cassandra_listen_port])
col_fam=ColumnFamily(pool1, 'obj_fq_name_table')
return col_fam.get_count('virtual_network', columns=[del_network_keyname])
示例3: GetValueCount
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
def GetValueCount(pool, columnFamily, key, *args, **kwargs):
d = None
try:
col_fam = ColumnFamily(pool, columnFamily)
d = col_fam.get_count(key, *args, **kwargs)
except Exception,e:
#print('empty column '+key)
pass
示例4: DailyTemporalBloomFilter
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
class DailyTemporalBloomFilter(DailyTemporalBase):
"""Long Range Temporal BloomFilter using a daily resolution.
For really high value of expiration (like 60 days) with low requirement on precision.
The actual error of this BF will the be native error of the BF + the error related
to the coarse aspect of the expiration, since we no longer expires information precisely.
Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
AND false negative (reporting non-membership for a member).
The upper bound of the temporal_error can be theoricaly quite high. However, if the
items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
"""
def __new__(cls, capacity, error_rate, expiration, name, cassandra_session, snapshot_path='./'):
return super(DailyTemporalBloomFilter, cls).__new__(cls, capacity=capacity, error_rate=error_rate)
def __init__(self, capacity, error_rate, expiration, name, cassandra_session, snapshot_path='./'):
filename = ""
super(DailyTemporalBloomFilter, self).__init__(capacity=capacity, error_rate=error_rate)
self.bf_name = name
self.expiration = expiration
self.initialize_period()
self.cassandra_session = cassandra_session
self.cassandra_columns_family = "temporal_bf"
self.keyspace = 'parsely'
self.uncommited_keys = []
self.commit_batch = 1000
self.columnfamily = None
self.ensure_cassandra_cf()
self.snapshot_path = snapshot_path
def ensure_cassandra_cf(self):
s = SystemManager()
if self.keyspace not in s.list_keyspaces():
s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'})
if self.cassandra_columns_family not in s.get_keyspace_column_families(self.keyspace):
s.create_column_family(self.keyspace, self.cassandra_columns_family)
self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)
def archive_bf_key(self, bf_key):
self.uncommited_keys.append(bf_key)
if len(self.uncommited_keys) >= self.commit_batch:
current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
self.columnfamily.insert('%s_%s' % (self.bf_name, current_period_hour), {k:'' for k in self.uncommited_keys})
self.uncommited_keys = []
def _hour_range(self, start, end, reverse=False, inclusive=True):
"""Generator that gives us all the hours between a start and end datetime
(inclusive)."""
def total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6
hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
if inclusive:
hours += 1
for i in xrange(hours):
if reverse:
yield end - dt.timedelta(hours=i)
else:
yield start + dt.timedelta(hours=i)
def _day_range(self, start, end, reverse=False, inclusive=True):
"""Generator that gives us all the days between a start and end datetime
(inclusive)."""
days = (end - start).days
if inclusive:
days += 1
for i in xrange(days):
if reverse:
yield end - dt.timedelta(days=i)
else:
yield start + dt.timedelta(days=i)
def _drop_archive(self):
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
for hour in hours:
try:
row = "%s_%s" % (self.bf_name, hour.strftime('%Y-%m-%d:%H'))
nbr_keys = self.columnfamily.get_count(row)
keys = self.columnfamily.remove(row)
except:
pass
def rebuild_from_archive(self, rebuild_snapshot=True):
"""Rebuild the BF using the archived items"""
self.initialize_bitarray()
#if rebuild_snapshot:
# self.delete_snapshots()
def multi_rows_itr(rows):
for row in rows.values():
for k in row.keys():
yield k
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
days = self._day_range(last_period, dt.datetime.now())
#.........这里部分代码省略.........
示例5: get_count
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
def get_count(columnFamily, uid):
"get number of columns in a row"
column = ColumnFamily(pool, columnFamily)
count = column.get_count(uid)
print uid, count
return count
示例6: ConnectionPool
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
from pycassa.pool import ConnectionPool
from pycassa.columnfamilymap import ColumnFamilyMap
from pycassa.columnfamily import ColumnFamily
if __name__ == '__main__':
#['10.15.62.100:9160','10.15.62.101:9160','10.15.62.102:9160']
pool = ConnectionPool('Cassandra_Test',['10.107.4.187:9160'])
print pool
# cf_map = ColumnFamilyMap(User, pool, 'Users')
col_fam = ColumnFamily(pool, 'Users')
print col_fam.get('author')
print col_fam.get_count('author')
col_fam.insert('row_key', {'col_name': 'col_val'})
col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
col_fam.batch_insert({'row1': {'name1': 'val1', 'name2': 'val2'},'row2': {'foo': 'bar'}})
#col_fam.insert('super_key', {'key':{'col_name':'col_val', 'col_name2':'col_val2'}})
print col_fam.get_count('row_key', columns=['foo', 'bar'])
print col_fam.get_count('row_key', column_start='foo')
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'])
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],columns=['col1', 'col2', 'col3'])
print col_fam.multiget_count(['fib0', 'fib1', 'fib2', 'fib3', 'fib4'],column_start='col1', column_finish='col3')
print col_fam.get_count('row_key')
print col_fam.get('row_key')
print col_fam.get('author')
print col_fam.get('row_key', columns=['col_name', 'col_name2'])
print col_fam.get('row_key', column_reversed=True, column_count=3)
print col_fam.multiget(['row1', 'row2'])
for i in range(1, 10):
col_fam.insert('row_key', {str(i): 'val'})
print col_fam.get('row_key', column_start='5', column_finish='7')
示例7: CassandraDemo
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
class CassandraDemo(object):
def __init__(self, database, table):
self.database = database
self.table = table
def create_connections(self):
self.pool = ConnectionPool(self.database)
self.cf = ColumnFamily(self.pool, self.table)
def create_database_and_table(self):
super_cf = False # consider super columns to be deprecated
s = SystemManager()
# create keyspace if it doesn't exist
if database not in s.list_keyspaces():
s.create_keyspace(database, SIMPLE_STRATEGY, {'replication_factor': '1'})
# delete column family from the keyspace if it does exist.
if table in s.get_keyspace_column_families(database):
s.drop_column_family(database, table)
# create coulmn family in the keyspace
if table not in s.get_keyspace_column_families(database):
print("table is creating...")
s.create_column_family(database, table, super = super_cf, comparator_type = ASCII_TYPE)
s.close()
return True
def insert_data(self):
print '\nemployee data is inserting...'
self.cf.insert('1', {'fn':'yogesh', 'ln':'kumar', 'ct': 'Ajmer', 'em': '[email protected]'})
self.cf.insert('2', {'fn':'amit', 'ln':'pandita', 'ct': 'Delhi', 'em': '[email protected]'})
self.cf.insert('3', {'fn':'sandeep', 'ln':'tak', 'ct': 'Ajmer', 'em': '[email protected]', 'mb': '8890467032'})
def get_data(self):
print '\nemployee data is featching...'
data1 = self.cf.get('1')
data2 = self.cf.get('2', columns = ['fn', 'ln', 'em'])
data3 = self.cf.get('3', column_start = 'ct', column_finish = 'fn')
data4 = self.cf.get('1', column_reversed = False, column_count = 3)
data5 = self.cf.get('1', column_reversed = True, column_count = 3)
print data1
print data2
print data3
print data4
print data5
def get_multiple_data(self):
print '\ngetting multiple employees data...'
row_keys = ['1','2','3']
data = self.cf.multiget(row_keys)
print data
def get_data_by_range(self):
'''
if you get an error don't worry about this, it's a Cassandra limitation Issue
'''
print '\ngetting employees data by range...'
start_row_key = '1'
end_row_key = '3'
data = self.cf.get_range(start = start_row_key, finish = end_row_key)
for key, columns in data:
print key,coulmns
def get_count(self):
print '\nget employee row\'s colunm count'
print self.cf.get_count('1')
print self.cf.get_count('1', columns = ['fn', 'ln'])
print self.cf.get_count('1', column_start = 'em')
def get_multi_count(self):
print '\nget multiple employees row\'s colunm count'
row_keys = ['1','2','3']
columns = ['fn', 'ln', 'mb']
column_start = 'ct'
column_finish = 'fn'
print self.cf.multiget_count(row_keys)
print self.cf.multiget_count(row_keys, columns = columns)
print self.cf.multiget_count(row_keys, column_start = column_start, column_finish = column_finish)
def update_data(self):
print '\nemployee data is updating...'
self.cf.insert('1', {'pwd':'[email protected]', 'ct':'Noida'})
def delete_data(self):
print '\ndelete data from employee'
row = '2'
self.cf.remove(row)
def get_all_rows(self):
print '\ngetting rows name...'
print [v[0] for v in self.cf.get_range()]
def get_all_columns_of_row(self):
print '\ngetting columns name of a row'
row = '1'
data = self.cf.get(row)
#.........这里部分代码省略.........
示例8: ConnectionPool
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
import pycassa
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
# connecting to Cassandra
pool = ConnectionPool('Keyspace1')
# getting a ColumnFamily
col_fam = ColumnFamily(pool, 'ColumnFamily1')
# inserting Data
col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
# getting Data
col_fam.get('row_key')
# {'col_name': 'col_val', 'col_name2': 'col_val2'}
# counting
col_fam.get_count('row_key')
示例9: ConnectionPool
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
#!/usr/bin/env python
import pycassa
from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
pool = ConnectionPool('Keyspace1',server_list=['localhost:9160'])
col_fam = ColumnFamily(pool, 'ColumnFamily1')
check1 = col_fam.insert('row_key', {'col_name': 'col_val'})
print check1
print " is the added row into a column\n"
check2 = col_fam.insert('row_key', {'col_name':'col_val', 'col_name2':'col_val2'})
print check2
print " Multiple columns are added \n"
get_data1 = col_fam.get('row_key')
print get_data1
get_data2 = col_fam.get('row_key', columns=['col_name', 'col_name2'])
print get_data2
print "\n"
print "Slicing\n"
for i in range(1, 10):
col_fam.insert('row_key', {str(i): 'val'})
print col_fam.get('row_key', column_start='5', column_finish='7')
print "\n"
print "Counting rows: "
print col_fam.get_count('row_key')
示例10: float
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
dinvCost = float(e1['invCost']) - float(e2['invCost'])
if (abs(dinvCost) > epson):
fl.write("+invCost:" + str(dinvCost) + '.' + e1['invCost'] + ',' + e2['invCost'] + ':' + str(e1) + ',' + str(e2) + '\n')
return
dbillingInvoice = float(e1['billingInvoice']) - float(e2['billingInvoice'])
if (abs(dbillingInvoice) > epson):
fl.write("+billingInvoice:" + str(dbillingInvoice) + ',' + e1['billingInvoice'] + ',' + e2['billingInvoice'] + ':' + str(e1) + ',' + str(e2) + '\n')
return
c.write(str(e1) + '\n')
servers = ['pb036:9160', 'pb037:9160', 'pb038:9160']
pool = ConnectionPool('RSS', server_list = servers, timeout = 1, pool_size=20)
meta = ColumnFamily(pool, 'MetaData')
counter = ColumnFamily(pool, 'Counter')
oid_count = meta.get_count('rss.All')
print 'Total oids => ', oid_count
oids_gen = meta.xget('rss.All', column_reversed=True, include_timestamp=True)
oids = dict(oids_gen)
#oids = meta.get('rss.All', column_reversed=True)
#print oids
home = os.path.expanduser("~")
fi = open(home + "/rss/diff_cas.txt", "w")
fj = open(home + "/rss/diff_rss.txt", "w")
difi = open(home + "/rss/difi.txt", "w")
difl = open(home + "/rss/difl.txt", "w")
f = open(home + "/rss/good.txt", "w")
示例11: DailyTemporalBloomFilter
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
class DailyTemporalBloomFilter(object):
"""Long Range Temporal BloomFilter using a daily resolution.
For really high value of expiration (like 60 days) with low requirement on precision.
The actual error of this BF will the be native error of the BF + the error related
to the coarse aspect of the expiration, since we no longer expires information precisely.
Also, as opposed to a classic Bloom Filter, this one will aslo have false positive (reporting membership for a non-member)
AND false negative (reporting non-membership for a member).
The upper bound of the temporal_error can be theoricaly quite high. However, if the
items of the set are uniformly distributed over time, the avg error will be something like 1.0 / expiration
"""
def __init__(self, capacity, error_rate, expiration, name, snapshot_path, cassandra_session):
self.error_rate = error_rate
self.capacity = capacity
self._initialize_parameters()
self.initialize_bitarray()
self.count = 0
self.hashed_values = []
self.name = name
self.snapshot_path = snapshot_path
self.expiration = expiration
self.initialize_period()
self.snapshot_to_load = None
self.ready = False
self.warm_period = None
self.next_snapshot_load = time.time()
self.cassandra_session = cassandra_session
self.cassandra_columns_family = "temporal_bf"
self.keyspace = 'parsely'
self.uncommited_keys = []
self.commit_batch = 1000
self.columnfamily = None
self.ensure_cassandra_cf()
def _initialize_parameters(self):
self.nbr_slices = int(np.ceil(np.log2(1.0 / self.error_rate)))
self.bits_per_slice = int(np.ceil((self.capacity * abs(np.log(self.error_rate))) / (self.nbr_slices * (np.log(2) ** 2))))
self.nbr_bits = self.nbr_slices * self.bits_per_slice
self.hashes = generate_hashfunctions(self.bits_per_slice, self.nbr_slices)
def ensure_cassandra_cf(self):
s = SystemManager()
if self.keyspace not in s.list_keyspaces():
s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'})
if self.cassandra_columns_family not in s.get_keyspace_column_families(self.keyspace):
s.create_column_family(self.keyspace, self.cassandra_columns_family)
self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)
def archive_bf_key(self, bf_key):
self.uncommited_keys.append(bf_key)
if len(self.uncommited_keys) >= self.commit_batch:
current_period_hour = dt.datetime.now().strftime('%Y-%m-%d:%H')
self.columnfamily.insert('%s_%s' % (self.name, current_period_hour), {k:'' for k in self.uncommited_keys})
self.uncommited_keys = []
def _hour_range(self, start, end, reverse=False, inclusive=True):
"""Generator that gives us all the hours between a start and end datetime
(inclusive)."""
def total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24.0 * 3600.0) * 10.0**6) / 10.0**6
hours = int(math.ceil(total_seconds(end - start) / (60.0 * 60.0)))
if inclusive:
hours += 1
for i in xrange(hours):
if reverse:
yield end - dt.timedelta(hours=i)
else:
yield start + dt.timedelta(hours=i)
def resize(self, new_capacity):
self.capacity = new_capacity
self._initialize_parameters()
self.rebuild_from_archive()
def _drop_archive(self):
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
for hour in hours:
try:
row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
nbr_keys = self.columnfamily.get_count(row)
keys = self.columnfamily.remove(row)
except:
pass
def rebuild_from_archive(self):
"""Rebuild the BF using the archived items"""
self.initialize_bitarray()
last_period = self.current_period - dt.timedelta(days=self.expiration-1)
hours = self._hour_range(last_period, dt.datetime.now())
rows = []
for i,hour in enumerate(hours):
row = "%s_%s" % (self.name, hour.strftime('%Y-%m-%d:%H'))
rows.append(row)
rows_content = self.columnfamily.multiget(rows, column_count=1E6)
#.........这里部分代码省略.........
示例12: get_count
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
def get_count(self, *args, **kwargs):
col_fam = ColumnFamily(self.pool, self.__column_family__)
return col_fam.get_count(*args, **kwargs)
示例13: ColumnFamily
# 需要导入模块: from pycassa.columnfamily import ColumnFamily [as 别名]
# 或者: from pycassa.columnfamily.ColumnFamily import get_count [as 别名]
name_cf = ColumnFamily(pool, 'myname')
x = ['acharya1', 'acharya2']
name_cf.insert('sacharya3', {'last_name': x})
names3 = name_cf.get('sacharya3')
print "List as a value"
print names3
attrs = dict([(attr_name, set([attr_values])) for attr_name, attr_values in
names3.iteritems()])
name_cf.insert("sacharya3", {'last_name':
attrs['last_name'].append("acharya3")})
print name_cf.get('sacharya3')
################################# COUNT #######################################
# Count the number of columns for the row key
count=author_cf.get_count("sacharya1")
print count
count=author_cf.multiget_count(["sacharya1","sacharya2"])
print count
################################## REMOVE #####################################
# Remove the column for the row key and column key
print "Removing the column last_name for row key sacharya1"
author_cf.remove('sacharya1', columns=['last_name'])
time.sleep(5)
authors = author_cf.get('sacharya')
print authors
# REMOVE the entire row