本文整理汇总了Python中redis.StrictRedis.scard方法的典型用法代码示例。如果您正苦于以下问题:Python StrictRedis.scard方法的具体用法?Python StrictRedis.scard怎么用?Python StrictRedis.scard使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类redis.StrictRedis
的用法示例。
在下文中一共展示了StrictRedis.scard方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ProxyCheckSpider
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class ProxyCheckSpider(Spider):
''' Spider to crawl free proxy servers for intern
'''
name = 'proxy_check'
def __init__(self, mode='prod', *args, **kwargs):
if mode == 'prod':
LOCAL_CONFIG_YAML = './hq-proxies.yml'
elif mode == 'test':
LOCAL_CONFIG_YAML = './hq-proxies.test.yml'
with open(LOCAL_CONFIG_YAML, 'r', encoding='utf-8') as f:
LOCAL_CONFIG = yaml.load(f)
self.redis_db = StrictRedis(
host=LOCAL_CONFIG['REDIS_HOST'],
port=LOCAL_CONFIG['REDIS_PORT'],
password=LOCAL_CONFIG['REDIS_PASSWORD'],
db=LOCAL_CONFIG['REDIS_DB']
)
self.validator_pool = set([])
for validator in LOCAL_CONFIG['PROXY_VALIDATORS']:
self.validator_pool.add((validator['url'], validator['startstring']))
self.PROXY_COUNT = LOCAL_CONFIG['PROXY_COUNT']
self.PROXY_SET = LOCAL_CONFIG['PROXY_SET']
def start_requests(self):
logger.info('测试代理池内代理质量...')
self.redis_db.set(self.PROXY_COUNT, self.redis_db.scard(self.PROXY_SET))
for proxy in self.redis_db.smembers(self.PROXY_SET):
proxy = proxy.decode('utf-8')
vaurl, vastart = random.choice(list(self.validator_pool))
yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
def checkin(self, response):
res = response.body_as_unicode()
if 'startstring' in response.meta and res.startswith(response.meta['startstring']):
proxy = response.meta['proxy']
self.redis_db.sadd(self.PROXY_SET, proxy)
logger.info('可用代理+1 %s' % proxy)
yield None
else:
proxy = response.url if 'proxy' not in response.meta else response.meta['proxy']
self.redis_db.srem(self.PROXY_SET, proxy)
logger.info('无效代理 %s' % proxy)
yield None
def closed(self, reason):
pcount = self.redis_db.scard(self.PROXY_SET)
logger.info('代理池测试完成,有效代理数: %s' % pcount)
self.redis_db.set(self.PROXY_COUNT, pcount)
示例2: Redis
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class Redis(BaseDB):
def __init__(self, host='localhost', port=6379, db=0):
super().__init__()
self.db = StrictRedis(host=host, port=port, db=db)
self.available = False
def push_to_available(self, user_id):
self.db.sadd('available', user_id) # Add user to awaiting queue
self.check_available()
def pop_first_available(self, self_user_id):
while not self.available:
threading.Event().wait() # Prevent busy wait and CPU load
res = [int(x) for x in self.db.smembers('available')] # Get all available possible interlocutors
shuffle(res) # Randomize queue
if self_user_id in res: # User may be listed too
res.remove(self_user_id) # If so, remove him
res = res.pop() # Get random interlocutor
self.check_available()
return res
def remove(self, user_id):
self.db.srem('available', user_id)
self.check_available()
def check_available(self):
self.available = True if self.db.scard('available') - 1 > 0 else False # If there are more than 1 user in queue
示例3: ganswer_post
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
def ganswer_post(request):
'''
i am assuming that the above condition noofusers==totusers would have already initialized
the wordpks in redis for the first time, so you get that and put it in the user session after updating it!
remove the first pk from the wordpks and update wordpks in user session to wordpks[1:]
continue till wordpks is empty which will signify that all the words have been given to user
'''
username = str(request.user)
print "inside ganswer_post for user:"+username
pref = settings.MY_PREFIX
facility = request.POST.get('facility')
prefg = pref+":"+facility
response_dict = {'done':False}
wordpks = request.session.get('wordpks', 0)
print 'initial value of wordpks:',wordpks
if wordpks == 0:
# just adds wordpks,totwords to user session
print "wordpks not in session!"
rd = StrictRedis()
d = rd.hgetall(prefg+":hash")
# store this dictionary in the session first
# there is no need to do that!
# request.session['hash'] = d
# print "stored the hash dictionary in session"
# print "wordpks: ",request.session['hash']['wordpks']
wordpks = d['wordpks']
# our first word, there won't be any user input here!
wordpks = wordpks.split('-')
request.session['totwords'] = len(wordpks)
print "wordpks:"+str(wordpks)
print 'now removing',request.user,'from pref:groupname'
print rd.keys(pref+"*")
print 'cardinality changed from',rd.scard(prefg),'to',
rd.srem(prefg, str(request.user))
print rd.scard(prefg)
if rd.scard(prefg) == 0:
# delete this key and hash
print "now deleting the group hash"
rd.delete(prefg)
rd.delete(prefg+":hash")
print rd.keys(pref+"*")
else:
# get user_input and send message that user gave the answer
print "wordpks in session"
print "wordpks:"+wordpks
#take and store the user input here
msgword = "the user "+username+' entered the word '+request.POST.get('input_word')
print msgword
if not (wordpks == [] or wordpks == ['']):
#print "adhicha word"
correct_ans=request.session.get('prev_word').strip()
ans=request.POST.get('input_word').strip()
if(str(correct_ans)==str(ans)):
print "correct"
marks=1
else:
print "wrong"
marks=0
obj1=GroupFinalResult.objects.filter(re_user=request.user).order_by('-starttime')[:1]
currentobj=obj1[0]
#print str(currentobj.starttime)
print str(request.session.get('prev_word'))
print str(request.POST.get('input_word'))
ans=request.POST.get('input_word')
currentobj.marks+=marks
currentobj.save()
print str(currentobj.marks)
obj2=GroupResultTable(usertest=currentobj,correct_ans=correct_ans,ans=ans,marks=marks) #to store result of each word
obj2.save()
print "zala"+str(obj2.marks)
# calculate the question no. for which this answer was received
x = int(request.session['totwords'])
wordpks = wordpks.split('-')
print 'now splitting wordpks...'
print wordpks
lenwordpks = 0 if wordpks == [''] else len(wordpks)
currentqno = x - lenwordpks
# currentqno != 0 because lenwordpks < x always in this else block
# and it is equal in the if block above this else block
# let's publish this message, shall we?
redis_publisher = RedisPublisher(facility = facility, broadcast = True)
# TODO make this json for consistency
msgword = username+", gave the answer for question no. "+str(currentqno);
message = RedisMessage(msgword)
redis_publisher.publish_message(message)
if wordpks == [] or wordpks == ['']:
print "wordpks is empty"
# no more words to dispatch redirect to result page
# delete the session variables here
del request.session['wordpks']
del request.session['totwords']
# del request.session['hash']
'''
so the thing is that we should display the results only
#.........这里部分代码省略.........
示例4: StrictRedis
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
import json
from redis import StrictRedis
# redis server config
rs = StrictRedis('10.118.187.21')
rs_sname_result = 'tmall_shopinfo_temp'
title = 'name,href,addr,brnad,monthsale,productsum,dsr_desc_mark,' \
'dsr_desc_avg,dsr_service_mark,dsr_service_avg,dsr_sending_mark,' \
'dsr_sending_avg,sgr,srn,encryptedUserId,productDataNid_1,' \
'product_link_1,price_1,productDataNid_2,product_link_2,price_2,' \
'productDataNid_3,product_link_3,price_3,' \
'productDataNid_4,product_link_4,price_4,shopDataUid'.split(',')
result = list()
n = 0
while rs.scard(rs_sname_result):
result.append(json.loads(rs.spop(rs_sname_result)))
n += 1
print n
# file_path = 'd:/spider/tmall/baseInfo'
# CentOS
file_path = '/app/tmall_temp'
csv_writer = MyCsv.Write_Csv(path=file_path,
name='shopInfo',
title=title,
result=result
)
csv_writer.add_title_data()
rs.delete(rs_sname_result)
示例5: RedisConnector
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class RedisConnector (Connector):
def __init__ (self, *args, **kw):
self.handler = StrictRedis(*args, **kw)
def getkey (self, model):
return ':'.join((model.getprefix(), model.getid()))
def getpipe (self, pipe=None):
return self.handler.pipeline(transaction=True) if pipe is None else pipe
def save (self, model, pipe=None):
_pipe = self.getpipe(pipe)
for field in model.getfields().values():
if not field.index and not field.unique:
continue
if field.name in model._dels:
self._del_idx(
model=model,
field=field,
pipe=_pipe,
)
elif field.name in model._diff:
self._save_idx(field, model, _pipe)
if model._exists is not False and len(model._dels):
_pipe.hdel(self.getkey(model), *list(model._dels))
if len(model._diff):
_pipe.hmset(self.getkey(model), model._diff)
if model._exists is not True:
_pipe.sadd(model.getprefix(), model.getid())
if pipe is None and len(_pipe):
_pipe.execute()
def delete (self, model, pipe=None):
""" Delete model within optionally given pipe. """
_pipe = self.getpipe(pipe)
for field in model.getfields().values():
if field.index or field.unique:
self._del_idx(field, model, _pipe)
_pipe.delete(self.getkey(model))
if model._exists is not False:
_pipe.srem(model.getprefix(), model.getid())
if pipe is None and len(_pipe):
_pipe.execute()
def exists (self, model):
return self.handler.exists(self.getkey(model))
def all (self, model_cls):
""" Return all model instances id's. """
return self.handler.smembers(model_cls.getprefix())
def count_all (self, model_cls):
""" Return all model instances count. """
return self.handler.scard(model_cls.getprefix())
def get (self, model, name):
""" Return value of model hash key. """
val = self.handler.hget(self.getkey(model), name)
return val.decode('utf-8') if PY3K and val is not None else val
def getall (self, model):
""" Return model data (all hash keys). """
data = dict()
for k, v in self.handler.hgetall(self.getkey(model)).items():
k = k.decode(encoding='UTF-8')
v = v.decode(encoding='UTF-8')
data[k] = v
return data
@staticmethod
def idx_key (prefix, field_name, val):
val = str(val) if PY3K else unicode(val)
return ':'.join((prefix, field_name, val))
@staticmethod
def ridx_key (prefix, field_name):
return ':'.join((prefix, field_name))
def find (self, expr):
assert isinstance(expr, BExpr)
if isinstance(expr.field, IndexField):
val = expr.field.to_db(expr.val)
key = self.idx_key(expr.model_cls.getprefix(), expr.field.name, val)
#.........这里部分代码省略.........
示例6: __init__
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
#.........这里部分代码省略.........
def inc_coll_caches_get(self, coll, *fields):
"""
:ret: return [] if no data exists. Normal structure is:
[value1, value2, ..., valueN]
"""
if not fields:
return []
key = self.inc_coll_cache_fmt.format(name=coll.name)
rv = self.rdb.hmget(key, *fields)
# print('inc_coll_caches_get - ', rv)
# print('inc_coll_caches_get After - ', [unpackb(r) for r in rv if r])
return [unpackb(r) for r in rv if r]
def inc_coll_caches_del(self, coll, *fields):
key = self.inc_coll_cache_fmt.format(name=coll.name)
return self.rdb.hdel(key, *fields)
def uniq_count_coll_cache_set(self, coll, ts, tagging, values):
"""
:param values: should be a iterable object contain members
"""
values = {packb(v) for v in values}
key_fmt = self.unique_count_coll_cache_fmt
key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
return self.rdb.sadd(key, *values)
def uniq_count_coll_cache_get(self, coll, tagging, timestamps, count_only=False):
key_fmt = self.unique_count_coll_cache_fmt
rv = []
for ts in timestamps:
key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
if count_only:
count = self.rdb.scard(key)
rv.append(count)
else:
members = self.rdb.smembers(key)
rv.append({unpackb(m) for m in members})
return rv
def uniq_count_coll_cache_pop(self, coll, tagging, timestamps, number):
"""
:note: Redis `SPOP key [count]` command, The count argument will be
available in a later version and is not available
in 2.6, 2.8, 3.0.
Now use SRANDMEMBER and SREM commands to mimic the effect of
SPOP count.
"""
key_fmt = self.unique_count_coll_cache_fmt
rv = []
for ts in timestamps:
key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
# :: srandmember + srem == spop(key, number)
members = self.rdb.srandmember(key, number)
self.rdb.srem(key, *members)
rv.append({unpackb(m) for m in members})
return rv
def uniq_count_coll_cache_del(self, coll, tagging, timestamps):
keys = self._gen_count_keys(coll.name, tagging,
'unique_count', timestamps)
return self.rdb.delete(*keys)
def sorted_count_coll_cache_set(self, coll, ts, tagging, values):
"""
:param values: should be a dict of <member: score> pair
示例7: SnapshotConnector
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
#.........这里部分代码省略.........
def merge(self, events):
'''
Merge a list of events into one set.
The key of the set is <event1>|<event2>|<event3>|...
'''
events = sorted(events, key=int)
out_key = '|'.join(map(str, events))
if not self.r.exists(out_key):
p = self.r.pipeline(False)
p.sunionstore(out_key, *['event_vals:{}'.format(eid) for eid in events])
p.expire(out_key, 300)
p.execute()
return out_key
def intersection(self, events):
'''
Keeps only the values in *all* the sets
The key of the set is <event1>&<event2>&<event3>&...
'''
events = sorted(events, key=int)
out_key = '&'.join(map(str, events))
if not self.r.exists(out_key):
p = self.r.pipeline(False)
p.sinterstore(out_key, *['event_vals:{}'.format(eid) for eid in events])
p.expire(out_key, 300)
p.execute()
return out_key
def events_similarities(self, *events):
'''
Returns the intersection and the total amount of values in multiple events
'''
return self.r.scard(self.intersection(events)), self.r.scard(self.merge(events))
# ##### Group functions #####
def get_events_in_group(self, name):
return self.r.smembers(name)
def make_group(self, name, *events):
'''
Create a group of events
'''
if not self.r.exists(name):
self.r.sadd(name, *events)
self.r.sadd('groups', name)
else:
raise Exception('Group name already exists, maybe you want to update.')
def update_group(self, name, *events):
'''
Update a group of events
'''
self.r.sadd(name, *events)
self.r.sadd('groups', name)
def delete_all_groups(self):
for g in self.r.smembers('groups'):
self.del_group(g)
def del_group(self, name):
'''
Delete a group of events
'''
self.r.delete(name)
示例8: ProxyFetchSpider
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
#.........这里部分代码省略.........
else:
logger.info('该代理已收录..')
def parse_66ip(self, response):
'''
@url http://www.66ip.cn/nmtq.php?getnum=100&isp=0&anonymoustype=3&start=&ports=&export=&ipaddress=&area=1&proxytype=0&api=66ip
'''
logger.info('开始爬取66ip')
if 'proxy' in response.meta:
logger.info('=>使用代理%s' % response.meta['proxy'])
res = response.body_as_unicode()
for addr in re.findall('\d+\.\d+\.\d+\.\d+\:\d+', res):
proxy = 'http://' + addr
print(proxy)
logger.info('验证: %s' % proxy)
if not self.redis_db.sismember(self.PROXY_SET, proxy):
vaurl, vastart = random.choice(list(self.validator_pool))
yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
else:
logger.info('该代理已收录..')
def parse_ip181(self, response):
'''
@url http://www.ip181.com/
'''
logger.info('开始爬取ip181')
if 'proxy' in response.meta:
logger.info('=>使用代理%s' % response.meta['proxy'])
for tr in response.css('table tbody tr'):
ip = tr.css('td::text').extract()[0]
port = tr.css('td::text').extract()[1]
type = tr.css('td::text').extract()[2]
proxy = 'http://%s:%s' % (ip, port)
if type != '高匿':
logger.info('丢弃非高匿代理:%s' % proxy)
continue
logger.info('验证: %s' % proxy)
if not self.redis_db.sismember(self.PROXY_SET, proxy):
vaurl, vastart = random.choice(list(self.validator_pool))
yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
else:
logger.info('该代理已收录..')
def parse_kxdaili(self, response):
'''
@url http://www.kxdaili.com/dailiip/1/1.html#ip
'''
logger.info('开始爬取kxdaili')
if 'proxy' in response.meta:
logger.info('=>使用代理%s' % response.meta['proxy'])
url_pattern = 'http://www.kxdaili.com/dailiip/1/%s.html#ip'
try:
page = re.search('(\d)+\.html', response.url).group(1)
page = int(page)
except Exception as e:
logger.exception(e)
logger.error(response.url)
for tr in response.css('table.ui.table.segment tbody tr'):
ip = tr.css('td::text').extract()[0]
port = tr.css('td::text').extract()[1]
proxy = 'http://%s:%s' % (ip, port)
logger.info('验证: %s' % proxy)
if not self.redis_db.sismember(self.PROXY_SET, proxy):
vaurl, vastart = random.choice(list(self.validator_pool))
yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
else:
logger.info('该代理已收录..')
if page < 3: # 爬取前3页
page += 1
new_url = url_pattern % page
new_meta = response.meta.copy()
new_meta['page'] = page
yield Request(url=new_url, meta=new_meta, callback=self.parse_kxdaili)
def parse_jiangxianli(self, response):
'''
@url = "http://ip.jiangxianli.com/api/proxy_ips?page=" + str(page)
'''
logger.info('开始爬取jiangxianli')
if 'proxy' in response.meta:
logger.info('=>使用代理%s' % response.meta['proxy'])
page = 1
api = 'http://ip.jiangxianli.com/api/proxy_ips?page={}'.format(page)
result = json.loads(response.body)
for r in result['data']['data']:
proxy = 'http://%s:%s' % (r['ip'], r['port'])
if not self.redis_db.sismember(self.PROXY_SET, proxy):
vaurl, vastart = random.choice(list(self.validator_pool))
yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
else:
logger.info('该代理已收录..')
if page <= result['data']['last_page']:
page += 1
new_meta = response.meta.copy()
new_meta['page'] = page
yield Request(url=api, meta=new_meta, callback=self.parse_jiangxianli)
def closed(self, reason):
logger.info('代理池更新完成,有效代理数: %s' % self.redis_db.scard(self.PROXY_SET))
示例9: open
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
# Firefox
"Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
"Mozilla/5.0 (Windows NT 10.0; rv:41.0) Gecko/20100101 Firefox/41.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0",
"Mozilla/5.0 (Windows NT 6.3; rv:41.0) Gecko/20100101 Firefox/41.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0",
# Safari
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
]
print redis.scard( "csdn:blog_user")
raise
with open('/Users/lifeifei/Downloads/www.csdn.net.sql', 'r') as f:
for line in f:
datas=line.split("#")
url="http://blog.csdn.net/"+datas[0].strip()
print url
send_headers = {
'Host':'blog.csdn.net',
'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
'Accept':random.choice(USER_AGENTS),
'Connection':'keep-alive'
}
try:
status= requests.get(url,headers=send_headers).status_code
except:
示例10: SSDC
# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class SSDC(object):
def __init__(self):
self.r = StrictRedis(unix_socket_path=redis_socket, decode_responses=True)
def __get_all_7_char_chunks(self, h):
return set((unpack("<Q", base64.b64decode(h[i : i + 7] + "=") + b"\x00\x00\x00")[0] for i in range(len(h) - 6)))
def __preprocess_hash(self, h):
block_size, block_data, double_block_data = h.split(":")
# Reduce any sequence of the same char greater than 3 to 3
re.sub(r"(\w)\1\1\1(\1+)", r"\1\1\1", block_data)
re.sub(r"(\w)\1\1\1(\1+)", r"\1\1\1", double_block_data)
return block_size, self.__get_all_7_char_chunks(block_data), self.__get_all_7_char_chunks(double_block_data)
def __add_chunks_db(self, p, block_size, chunk, sha256):
for i in chunk:
chunk = "ssdeep:{}:{}".format(block_size, i)
p.sadd(chunk, sha256)
p.sadd("ssdeep:chunks", chunk)
def update_chunks_db(self, sha256, deephash):
block_size, chunk, double_chunk = self.__preprocess_hash(deephash)
p = self.r.pipeline(False)
self.__add_chunks_db(p, block_size, chunk, sha256)
self.__add_chunks_db(p, block_size, double_chunk, sha256)
p.execute()
def generate_all_chunks(self):
for sha256 in self.r.smembers("hashes_sha256"):
self.update_chunks_db(sha256, self.r.hget(sha256, "ssdeep"))
def find_matches(self, key):
similar_hashes = self.r.smembers(key)
if len(similar_hashes) > 1:
cur_hash = similar_hashes.pop()
cur_ssdeep = self.r.hget(cur_hash, "ssdeep")
p = self.r.pipeline(False)
for sha256 in similar_hashes:
score = pydeep.compare(cur_ssdeep.encode("utf-8"), self.r.hget(sha256, "ssdeep").encode("utf-8"))
if score > 0:
key1 = "ssdeep:matches_{}".format(cur_hash)
key2 = "ssdeep:matches_{}".format(sha256)
p.zadd(key1, score, sha256)
p.zadd(key2, score, cur_hash)
p.sadd("ssdeep:all_matches", key1)
p.sadd("ssdeep:all_matches", key2)
p.execute()
def compare_similar_chunks(self):
for key in self.r.smembers("ssdeep:chunks"):
self.find_matches(key)
def make_groups(self):
all_hashes = self.r.smembers("hashes_sha256")
while all_hashes:
cur_hash = all_hashes.pop()
matches = self.r.zrange("ssdeep:matches_{}".format(cur_hash), 0, -1)
if matches:
if isinstance(matches, list):
matches = set(matches)
else:
matches = set([matches])
all_hashes -= matches
matches |= set([cur_hash])
else:
# NOTE: Should we make a group?
# matches = set([cur_hash])
self.r.sadd("ssdeep:no_matches", cur_hash)
continue
key = "ssdeep:group_{}".format(self.r.scard("ssdeep:groups"))
self.r.sadd("ssdeep:groups", key)
self.r.sadd(key, *matches)
def clean_groups(self):
self.r.delete(*self.r.smembers("ssdeep:groups"))
self.r.delete(*self.r.smembers("ssdeep:all_matches"))
self.r.delete("ssdeep:groups")
self.r.delete("ssdeep:all_matches")
self.r.delete("ssdeep:no_matches")
# ########## Querying ##########
def get_all_groups(self):
return [(g, self.r.smembers(g)) for g in self.r.smembers("ssdeep:groups")]
def get_group_samples(self, group):
return self.r.smembers(group)