当前位置: 首页>>代码示例>>Python>>正文


Python StrictRedis.scard方法代码示例

本文整理汇总了Python中redis.StrictRedis.scard方法的典型用法代码示例。如果您正苦于以下问题:Python StrictRedis.scard方法的具体用法?Python StrictRedis.scard怎么用?Python StrictRedis.scard使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在redis.StrictRedis的用法示例。


在下文中一共展示了StrictRedis.scard方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ProxyCheckSpider

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class ProxyCheckSpider(Spider):
    ''' Spider to crawl free proxy servers for intern
    '''
    name = 'proxy_check'
    
    def __init__(self, mode='prod', *args, **kwargs):
        if mode == 'prod':            
            LOCAL_CONFIG_YAML = './hq-proxies.yml'
        elif mode == 'test':
            LOCAL_CONFIG_YAML = './hq-proxies.test.yml'
        with open(LOCAL_CONFIG_YAML, 'r', encoding='utf-8') as f:
            LOCAL_CONFIG = yaml.load(f)
        
        self.redis_db = StrictRedis(
            host=LOCAL_CONFIG['REDIS_HOST'], 
            port=LOCAL_CONFIG['REDIS_PORT'], 
            password=LOCAL_CONFIG['REDIS_PASSWORD'],
            db=LOCAL_CONFIG['REDIS_DB']
        )
        
        self.validator_pool = set([])
        for validator in LOCAL_CONFIG['PROXY_VALIDATORS']:
            self.validator_pool.add((validator['url'], validator['startstring']))
        self.PROXY_COUNT = LOCAL_CONFIG['PROXY_COUNT']
        self.PROXY_SET = LOCAL_CONFIG['PROXY_SET']
        
    def start_requests(self):

        logger.info('测试代理池内代理质量...')
        self.redis_db.set(self.PROXY_COUNT, self.redis_db.scard(self.PROXY_SET))
        for proxy in self.redis_db.smembers(self.PROXY_SET):
            proxy = proxy.decode('utf-8')
            vaurl, vastart = random.choice(list(self.validator_pool))
            yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
    
    def checkin(self, response):
        res = response.body_as_unicode()
        if 'startstring' in response.meta and res.startswith(response.meta['startstring']):
            proxy = response.meta['proxy']
            self.redis_db.sadd(self.PROXY_SET, proxy)
            logger.info('可用代理+1  %s' % proxy)
            yield None
        else:
            proxy = response.url if 'proxy' not in response.meta else response.meta['proxy']
            self.redis_db.srem(self.PROXY_SET, proxy)
            logger.info('无效代理  %s' % proxy)
            yield None
    
    def closed(self, reason):
        pcount = self.redis_db.scard(self.PROXY_SET)
        logger.info('代理池测试完成,有效代理数: %s' % pcount)
        self.redis_db.set(self.PROXY_COUNT, pcount)
开发者ID:BeanWei,项目名称:Scrapy-proxies,代码行数:54,代码来源:proxy_spider.py

示例2: Redis

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class Redis(BaseDB):
    def __init__(self, host='localhost', port=6379, db=0):
        super().__init__()
        self.db = StrictRedis(host=host, port=port, db=db)
        self.available = False

    def push_to_available(self, user_id):
        self.db.sadd('available', user_id)  # Add user to awaiting queue
        self.check_available()

    def pop_first_available(self, self_user_id):
        while not self.available:
            threading.Event().wait()  # Prevent busy wait and CPU load

        res = [int(x) for x in self.db.smembers('available')]  # Get all available possible interlocutors
        shuffle(res)  # Randomize queue
        if self_user_id in res:  # User may be listed too
            res.remove(self_user_id)  # If so, remove him
        res = res.pop()  # Get random interlocutor
        self.check_available()
        return res

    def remove(self, user_id):
        self.db.srem('available', user_id)
        self.check_available()

    def check_available(self):
        self.available = True if self.db.scard('available') - 1 > 0 else False  # If there are more than 1 user in queue
开发者ID:Elishanto,项目名称:chatbotscommunity-entry-bot,代码行数:30,代码来源:providers.py

示例3: ganswer_post

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
def ganswer_post(request):
    '''
    i am assuming that the above condition noofusers==totusers would have already initialized
    the wordpks in redis for the first time, so you get that and put it in the user session after updating it!
    remove the first pk from the wordpks and update wordpks in user session to wordpks[1:]
    continue till wordpks is empty which will signify that all the words have been given to user
    '''
    username = str(request.user)
    print "inside ganswer_post for user:"+username
    pref = settings.MY_PREFIX
    facility = request.POST.get('facility')
    prefg = pref+":"+facility
    response_dict = {'done':False}
    
    wordpks = request.session.get('wordpks', 0)
    print 'initial value of wordpks:',wordpks

    if wordpks == 0:
        # just adds wordpks,totwords to user session
        print "wordpks not in session!"
        rd = StrictRedis()
        d = rd.hgetall(prefg+":hash")
        # store this dictionary in the session first
        # there is no need to do that!
        # request.session['hash'] = d
        # print "stored the hash dictionary in session"
        # print "wordpks: ",request.session['hash']['wordpks']
        wordpks = d['wordpks']


        # our first word, there won't be any user input here!
        wordpks = wordpks.split('-')
        request.session['totwords'] = len(wordpks)
        print "wordpks:"+str(wordpks)
        print 'now removing',request.user,'from pref:groupname'
        print rd.keys(pref+"*")
        print 'cardinality changed from',rd.scard(prefg),'to',
        rd.srem(prefg, str(request.user))
        print rd.scard(prefg)
        if rd.scard(prefg) == 0:
            # delete this key and hash
            print "now deleting the group hash"
            rd.delete(prefg)
            rd.delete(prefg+":hash")
            print rd.keys(pref+"*")
    else:
        # get user_input and send message that user gave the answer
        print "wordpks in session"
        print "wordpks:"+wordpks
        #take and store the user input here
        msgword = "the user "+username+' entered the word '+request.POST.get('input_word')
        print msgword
        if not (wordpks == [] or wordpks == ['']):
            #print "adhicha word"
            correct_ans=request.session.get('prev_word').strip()
            ans=request.POST.get('input_word').strip()
            if(str(correct_ans)==str(ans)):
                print "correct"
                marks=1
            else:
                print "wrong"
                marks=0
            obj1=GroupFinalResult.objects.filter(re_user=request.user).order_by('-starttime')[:1]
            currentobj=obj1[0]
            #print str(currentobj.starttime)
            print str(request.session.get('prev_word'))
            print str(request.POST.get('input_word'))
            ans=request.POST.get('input_word')
            currentobj.marks+=marks
            currentobj.save()
            print str(currentobj.marks)
            obj2=GroupResultTable(usertest=currentobj,correct_ans=correct_ans,ans=ans,marks=marks)   #to store result of each word
            obj2.save()
            print "zala"+str(obj2.marks)

        # calculate the question no. for which this answer was received
        x = int(request.session['totwords'])
        wordpks = wordpks.split('-')
        print 'now splitting wordpks...'
        print wordpks
        lenwordpks = 0 if wordpks == [''] else len(wordpks)
        currentqno = x - lenwordpks
        # currentqno != 0 because lenwordpks < x always in this else block
        # and it is equal in the if block above this else block
        # let's publish this message, shall we?
        redis_publisher = RedisPublisher(facility = facility, broadcast = True)
        # TODO make this json for consistency
        msgword = username+", gave the answer for question no. "+str(currentqno);
        message = RedisMessage(msgword)
        redis_publisher.publish_message(message)

    if wordpks == [] or wordpks == ['']:
        print "wordpks is empty"
        # no more words to dispatch redirect to result page
        # delete the session variables here
        del request.session['wordpks']
        del request.session['totwords']
        # del request.session['hash']
        '''
            so the thing is that we should display the results only 
#.........这里部分代码省略.........
开发者ID:murtraja,项目名称:wordify,代码行数:103,代码来源:views.py

示例4: StrictRedis

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
import json
from redis import StrictRedis

# redis server config
rs = StrictRedis('10.118.187.21')
rs_sname_result = 'tmall_shopinfo_temp'

title = 'name,href,addr,brnad,monthsale,productsum,dsr_desc_mark,' \
        'dsr_desc_avg,dsr_service_mark,dsr_service_avg,dsr_sending_mark,' \
        'dsr_sending_avg,sgr,srn,encryptedUserId,productDataNid_1,' \
        'product_link_1,price_1,productDataNid_2,product_link_2,price_2,' \
        'productDataNid_3,product_link_3,price_3,' \
        'productDataNid_4,product_link_4,price_4,shopDataUid'.split(',')

result = list()
n = 0
while rs.scard(rs_sname_result):
    result.append(json.loads(rs.spop(rs_sname_result)))
    n += 1
    print n
# file_path = 'd:/spider/tmall/baseInfo'
# CentOS
file_path = '/app/tmall_temp'
csv_writer = MyCsv.Write_Csv(path=file_path,
                             name='shopInfo',
                             title=title,
                             result=result
                             )
csv_writer.add_title_data()
rs.delete(rs_sname_result)
开发者ID:yangmingsong,项目名称:python,代码行数:32,代码来源:temp.py

示例5: RedisConnector

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class RedisConnector (Connector):
	def __init__ (self, *args, **kw):
		self.handler = StrictRedis(*args, **kw)

	def getkey (self, model):
		return ':'.join((model.getprefix(), model.getid()))

	def getpipe (self, pipe=None):
		return self.handler.pipeline(transaction=True) if pipe is None else pipe

	def save (self, model, pipe=None):
		_pipe = self.getpipe(pipe)

		for field in model.getfields().values():
			if not field.index and not field.unique:
				continue

			if field.name in model._dels:
				self._del_idx(
					model=model,
					field=field,
					pipe=_pipe,
				)

			elif field.name in model._diff:
				self._save_idx(field, model, _pipe)

		if model._exists is not False and len(model._dels):
			_pipe.hdel(self.getkey(model), *list(model._dels))

		if len(model._diff):
			_pipe.hmset(self.getkey(model), model._diff)

		if model._exists is not True:
			_pipe.sadd(model.getprefix(), model.getid())

		if pipe is None and len(_pipe):
			_pipe.execute()

	def delete (self, model, pipe=None):
		""" Delete model within optionally given pipe. """

		_pipe = self.getpipe(pipe)

		for field in model.getfields().values():
			if field.index or field.unique:
				self._del_idx(field, model, _pipe)

		_pipe.delete(self.getkey(model))

		if model._exists is not False:
			_pipe.srem(model.getprefix(), model.getid())

		if pipe is None and len(_pipe):
			_pipe.execute()

	def exists (self, model):
		return self.handler.exists(self.getkey(model))

	def all (self, model_cls):
		""" Return all model instances id's. """
		return self.handler.smembers(model_cls.getprefix())

	def count_all (self, model_cls):
		""" Return all model instances count. """
		return self.handler.scard(model_cls.getprefix())

	def get (self, model, name):
		""" Return value of model hash key. """

		val = self.handler.hget(self.getkey(model), name)
		return val.decode('utf-8') if PY3K and val is not None else val

	def getall (self, model):
		""" Return model data (all hash keys). """

		data = dict()

		for k, v in self.handler.hgetall(self.getkey(model)).items():
			k = k.decode(encoding='UTF-8')
			v = v.decode(encoding='UTF-8')
			data[k] = v

		return data

	@staticmethod
	def idx_key (prefix, field_name, val):
		val = str(val) if PY3K else unicode(val)
		return ':'.join((prefix, field_name, val))

	@staticmethod
	def ridx_key (prefix, field_name):
		return ':'.join((prefix, field_name))

	def find (self, expr):
		assert isinstance(expr, BExpr)

		if isinstance(expr.field, IndexField):
			val = expr.field.to_db(expr.val)
			key = self.idx_key(expr.model_cls.getprefix(), expr.field.name, val)
#.........这里部分代码省略.........
开发者ID:jg9lt,项目名称:redisca2,代码行数:103,代码来源:redis.py

示例6: __init__

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]

#.........这里部分代码省略.........
    def inc_coll_caches_get(self, coll, *fields):
        """
        :ret: return [] if no data exists. Normal structure is:
                [value1, value2, ..., valueN]
        """
        if not fields:
            return []

        key = self.inc_coll_cache_fmt.format(name=coll.name)
        rv = self.rdb.hmget(key, *fields)
        # print('inc_coll_caches_get - ', rv)
        # print('inc_coll_caches_get After - ', [unpackb(r) for r in rv if r])
        return [unpackb(r) for r in rv if r]

    def inc_coll_caches_del(self, coll, *fields):
        key = self.inc_coll_cache_fmt.format(name=coll.name)
        return self.rdb.hdel(key, *fields)

    def uniq_count_coll_cache_set(self, coll, ts, tagging, values):
        """
        :param values: should be a iterable object contain members
        """
        values = {packb(v) for v in values}
        key_fmt = self.unique_count_coll_cache_fmt
        key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
        return self.rdb.sadd(key, *values)

    def uniq_count_coll_cache_get(self, coll, tagging, timestamps, count_only=False):
        key_fmt = self.unique_count_coll_cache_fmt
        rv = []
        for ts in timestamps:
            key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
            if count_only:
                count = self.rdb.scard(key)
                rv.append(count)
            else:
                members = self.rdb.smembers(key)
                rv.append({unpackb(m) for m in members})
        return rv

    def uniq_count_coll_cache_pop(self, coll, tagging, timestamps, number):
        """
        :note: Redis `SPOP key [count]` command, The count argument will be
               available in a later version and is not available
               in 2.6, 2.8, 3.0.
               Now use SRANDMEMBER and SREM commands to mimic the effect of
               SPOP count.
        """
        key_fmt = self.unique_count_coll_cache_fmt
        rv = []
        for ts in timestamps:
            key = key_fmt.format(name=coll.name, tagging=tagging, ts=ts)
            # :: srandmember + srem == spop(key, number)
            members = self.rdb.srandmember(key, number)
            self.rdb.srem(key, *members)
            rv.append({unpackb(m) for m in members})
        return rv

    def uniq_count_coll_cache_del(self, coll, tagging, timestamps):
        keys = self._gen_count_keys(coll.name, tagging,
                                    'unique_count', timestamps)
        return self.rdb.delete(*keys)

    def sorted_count_coll_cache_set(self, coll, ts, tagging, values):
        """
        :param values: should be a dict of <member: score> pair
开发者ID:JasonLai256,项目名称:plumbca,代码行数:70,代码来源:backend.py

示例7: SnapshotConnector

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]

#.........这里部分代码省略.........

    def merge(self, events):
        '''
            Merge a list of events into one set.
            The key of the set is <event1>|<event2>|<event3>|...
        '''
        events = sorted(events, key=int)
        out_key = '|'.join(map(str, events))
        if not self.r.exists(out_key):
            p = self.r.pipeline(False)
            p.sunionstore(out_key, *['event_vals:{}'.format(eid) for eid in events])
            p.expire(out_key, 300)
            p.execute()
        return out_key

    def intersection(self, events):
        '''
            Keeps only the values in *all* the sets
            The key of the set is <event1>&<event2>&<event3>&...
        '''
        events = sorted(events, key=int)
        out_key = '&'.join(map(str, events))
        if not self.r.exists(out_key):
            p = self.r.pipeline(False)
            p.sinterstore(out_key, *['event_vals:{}'.format(eid) for eid in events])
            p.expire(out_key, 300)
            p.execute()
        return out_key

    def events_similarities(self, *events):
        '''
            Returns the intersection and the total amount of values in multiple events
        '''
        return self.r.scard(self.intersection(events)), self.r.scard(self.merge(events))

    # ##### Group functions #####

    def get_events_in_group(self, name):
        return self.r.smembers(name)

    def make_group(self, name, *events):
        '''
            Create a group of events
        '''
        if not self.r.exists(name):
            self.r.sadd(name, *events)
            self.r.sadd('groups', name)
        else:
            raise Exception('Group name already exists, maybe you want to update.')

    def update_group(self, name, *events):
        '''
            Update a group of events
        '''
        self.r.sadd(name, *events)
        self.r.sadd('groups', name)

    def delete_all_groups(self):
        for g in self.r.smembers('groups'):
            self.del_group(g)

    def del_group(self, name):
        '''
            Delete a group of events
        '''
        self.r.delete(name)
开发者ID:FloatingGhost,项目名称:misp-workbench,代码行数:70,代码来源:connector.py

示例8: ProxyFetchSpider

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]

#.........这里部分代码省略.........
            else:
                logger.info('该代理已收录..')
    
    def parse_66ip(self, response):
        ''' 
        @url http://www.66ip.cn/nmtq.php?getnum=100&isp=0&anonymoustype=3&start=&ports=&export=&ipaddress=&area=1&proxytype=0&api=66ip
        '''
        logger.info('开始爬取66ip')
        if 'proxy' in response.meta:
            logger.info('=>使用代理%s' % response.meta['proxy'])
        res = response.body_as_unicode()
        for addr in re.findall('\d+\.\d+\.\d+\.\d+\:\d+', res):
            proxy = 'http://' + addr
            print(proxy)
            logger.info('验证: %s' % proxy)
            if not self.redis_db.sismember(self.PROXY_SET, proxy):
                vaurl, vastart = random.choice(list(self.validator_pool))
                yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
            else:
                logger.info('该代理已收录..')
    
    def parse_ip181(self, response):
        ''' 
        @url http://www.ip181.com/
        '''
        logger.info('开始爬取ip181')
        if 'proxy' in response.meta:
            logger.info('=>使用代理%s' % response.meta['proxy'])
        for tr in response.css('table tbody tr'):
            ip = tr.css('td::text').extract()[0]
            port = tr.css('td::text').extract()[1]
            type = tr.css('td::text').extract()[2]
            proxy = 'http://%s:%s' % (ip, port)
            if type != '高匿':
                logger.info('丢弃非高匿代理:%s' % proxy)
                continue
            logger.info('验证: %s' % proxy)
            if not self.redis_db.sismember(self.PROXY_SET, proxy):
                vaurl, vastart = random.choice(list(self.validator_pool))
                yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
            else:
                logger.info('该代理已收录..')
    
    def parse_kxdaili(self, response):
        ''' 
        @url http://www.kxdaili.com/dailiip/1/1.html#ip
        '''
        logger.info('开始爬取kxdaili')
        if 'proxy' in response.meta:
            logger.info('=>使用代理%s' % response.meta['proxy'])
        url_pattern = 'http://www.kxdaili.com/dailiip/1/%s.html#ip'
        try:
            page = re.search('(\d)+\.html', response.url).group(1)
            page = int(page)
        except Exception as e:
            logger.exception(e)
            logger.error(response.url)
        for tr in response.css('table.ui.table.segment tbody tr'):
            ip = tr.css('td::text').extract()[0]
            port = tr.css('td::text').extract()[1]
            proxy = 'http://%s:%s' % (ip, port)
            logger.info('验证: %s' % proxy)
            if not self.redis_db.sismember(self.PROXY_SET, proxy):
                vaurl, vastart = random.choice(list(self.validator_pool))
                yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
            else:
                logger.info('该代理已收录..')
        if page < 3: # 爬取前3页
            page += 1
            new_url = url_pattern % page
            new_meta = response.meta.copy()
            new_meta['page'] = page
            yield Request(url=new_url, meta=new_meta, callback=self.parse_kxdaili)

    def parse_jiangxianli(self, response):
        '''
        @url = "http://ip.jiangxianli.com/api/proxy_ips?page=" + str(page)
        '''
        logger.info('开始爬取jiangxianli')
        if 'proxy' in response.meta:
            logger.info('=>使用代理%s' % response.meta['proxy'])
        page = 1
        api = 'http://ip.jiangxianli.com/api/proxy_ips?page={}'.format(page)
        result = json.loads(response.body)
        for r in result['data']['data']:
            proxy = 'http://%s:%s' % (r['ip'], r['port'])
            if not self.redis_db.sismember(self.PROXY_SET, proxy):
                vaurl, vastart = random.choice(list(self.validator_pool))
                yield Request(url=vaurl, meta={'proxy': proxy, 'startstring': vastart}, callback=self.checkin, dont_filter=True)
            else:
                logger.info('该代理已收录..')         
        if page <= result['data']['last_page']:
            page += 1
            new_meta = response.meta.copy()
            new_meta['page'] = page
            yield Request(url=api, meta=new_meta, callback=self.parse_jiangxianli)
            
    
    def closed(self, reason):
        logger.info('代理池更新完成,有效代理数: %s' % self.redis_db.scard(self.PROXY_SET))
开发者ID:BeanWei,项目名称:Scrapy-proxies,代码行数:104,代码来源:proxy_spider.py

示例9: open

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
    # Firefox
    "Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0",
    "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
    "Mozilla/5.0 (Windows NT 10.0; rv:41.0) Gecko/20100101 Firefox/41.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0",
    "Mozilla/5.0 (Windows NT 6.3; rv:41.0) Gecko/20100101 Firefox/41.0",
    "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0",

    # Safari
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
]
print redis.scard( "csdn:blog_user")
raise
with open('/Users/lifeifei/Downloads/www.csdn.net.sql', 'r') as f:
    for line in f:
        datas=line.split("#")
        url="http://blog.csdn.net/"+datas[0].strip()
        print url
        send_headers = {
             'Host':'blog.csdn.net',
             'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
             'Accept':random.choice(USER_AGENTS),
             'Connection':'keep-alive'
            }
        try:
            status= requests.get(url,headers=send_headers).status_code
        except:
开发者ID:openslack,项目名称:openslack-crawler,代码行数:32,代码来源:csdn.py

示例10: SSDC

# 需要导入模块: from redis import StrictRedis [as 别名]
# 或者: from redis.StrictRedis import scard [as 别名]
class SSDC(object):
    def __init__(self):
        self.r = StrictRedis(unix_socket_path=redis_socket, decode_responses=True)

    def __get_all_7_char_chunks(self, h):
        return set((unpack("<Q", base64.b64decode(h[i : i + 7] + "=") + b"\x00\x00\x00")[0] for i in range(len(h) - 6)))

    def __preprocess_hash(self, h):
        block_size, block_data, double_block_data = h.split(":")

        # Reduce any sequence of the same char greater than 3 to 3
        re.sub(r"(\w)\1\1\1(\1+)", r"\1\1\1", block_data)
        re.sub(r"(\w)\1\1\1(\1+)", r"\1\1\1", double_block_data)

        return block_size, self.__get_all_7_char_chunks(block_data), self.__get_all_7_char_chunks(double_block_data)

    def __add_chunks_db(self, p, block_size, chunk, sha256):
        for i in chunk:
            chunk = "ssdeep:{}:{}".format(block_size, i)
            p.sadd(chunk, sha256)
            p.sadd("ssdeep:chunks", chunk)

    def update_chunks_db(self, sha256, deephash):
        block_size, chunk, double_chunk = self.__preprocess_hash(deephash)
        p = self.r.pipeline(False)
        self.__add_chunks_db(p, block_size, chunk, sha256)
        self.__add_chunks_db(p, block_size, double_chunk, sha256)
        p.execute()

    def generate_all_chunks(self):
        for sha256 in self.r.smembers("hashes_sha256"):
            self.update_chunks_db(sha256, self.r.hget(sha256, "ssdeep"))

    def find_matches(self, key):
        similar_hashes = self.r.smembers(key)
        if len(similar_hashes) > 1:
            cur_hash = similar_hashes.pop()
            cur_ssdeep = self.r.hget(cur_hash, "ssdeep")
            p = self.r.pipeline(False)
            for sha256 in similar_hashes:
                score = pydeep.compare(cur_ssdeep.encode("utf-8"), self.r.hget(sha256, "ssdeep").encode("utf-8"))
                if score > 0:
                    key1 = "ssdeep:matches_{}".format(cur_hash)
                    key2 = "ssdeep:matches_{}".format(sha256)
                    p.zadd(key1, score, sha256)
                    p.zadd(key2, score, cur_hash)
                    p.sadd("ssdeep:all_matches", key1)
                    p.sadd("ssdeep:all_matches", key2)
            p.execute()

    def compare_similar_chunks(self):
        for key in self.r.smembers("ssdeep:chunks"):
            self.find_matches(key)

    def make_groups(self):
        all_hashes = self.r.smembers("hashes_sha256")
        while all_hashes:
            cur_hash = all_hashes.pop()
            matches = self.r.zrange("ssdeep:matches_{}".format(cur_hash), 0, -1)
            if matches:
                if isinstance(matches, list):
                    matches = set(matches)
                else:
                    matches = set([matches])
                all_hashes -= matches
                matches |= set([cur_hash])
            else:
                # NOTE: Should we make a group?
                # matches = set([cur_hash])
                self.r.sadd("ssdeep:no_matches", cur_hash)
                continue
            key = "ssdeep:group_{}".format(self.r.scard("ssdeep:groups"))
            self.r.sadd("ssdeep:groups", key)
            self.r.sadd(key, *matches)

    def clean_groups(self):
        self.r.delete(*self.r.smembers("ssdeep:groups"))
        self.r.delete(*self.r.smembers("ssdeep:all_matches"))
        self.r.delete("ssdeep:groups")
        self.r.delete("ssdeep:all_matches")
        self.r.delete("ssdeep:no_matches")

    # ########## Querying ##########

    def get_all_groups(self):
        return [(g, self.r.smembers(g)) for g in self.r.smembers("ssdeep:groups")]

    def get_group_samples(self, group):
        return self.r.smembers(group)
开发者ID:MISP,项目名称:misp-workbench,代码行数:91,代码来源:ssdeep_processing.py


注:本文中的redis.StrictRedis.scard方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。