本文整理汇总了Python中Helper.Process.populate_set_out方法的典型用法代码示例。如果您正苦于以下问题:Python Process.populate_set_out方法的具体用法?Python Process.populate_set_out怎么用?Python Process.populate_set_out使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Helper.Process
的用法示例。
在下文中一共展示了Process.populate_set_out方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: len
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
while True:
message = p.get_from_set()
# Recovering the streamed message informations.
if message is not None:
splitted = message.split()
if len(splitted) == 2:
paste, gzip64encoded = splitted
else:
# TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed"
publisher.debug("Empty Paste: {0} not processed".format(paste))
continue
else:
print "Empty Queues: Waiting..."
time.sleep(1)
continue
# Creating the full filepath
filename = os.path.join(os.environ["AIL_HOME"], p.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename, "wb") as f:
f.write(base64.standard_b64decode(gzip64encoded))
p.populate_set_out(filename)
示例2:
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
relay_message = "{0} {1}".format(paste_name, gzip64encoded)
#relay_message = b" ".join( [paste_name, gzip64encoded] )
digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest()
# Avoid any duplicate coming from any sources
if operation_mode == 1:
if server.exists(digest): # Content already exists
#STATS
duplicated_paste_per_feeder[feeder_name] += 1
else: # New content
# populate Global OR populate another set based on the feeder_name
if feeder_name in FEED_QUEUE_MAPPING:
p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
else:
p.populate_set_out(relay_message, 'Mixer')
server.sadd(digest, feeder_name)
server.expire(digest, ttl_key)
# Keep duplicate coming from different sources
elif operation_mode == 2:
# Filter to avoid duplicate
content = server.get('HASH_'+paste_name)
if content is None:
# New content
# Store in redis for filtering
server.set('HASH_'+paste_name, digest)
示例3: len
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
domain = re.findall(url_regex, url)
if len(domain) > 0:
domain = domain[0][4]
else:
continue
if not r_onion.sismember('month_onion_up:{}'.format(date_month), domain) and not r_onion.sismember('onion_down:'+date , domain):
if not r_onion.sismember('onion_domain_crawler_queue', domain):
print('send to onion crawler')
r_onion.sadd('onion_domain_crawler_queue', domain)
msg = '{};{}'.format(url,PST.p_path)
r_onion.sadd('onion_crawler_queue', msg)
#p.populate_set_out(msg, 'Crawler')
else:
for url in fetch(p, r_cache, urls, domains_list, path):
publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path))
p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler')
msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_path)
p.populate_set_out(msg, 'Tags')
else:
publisher.info('{}Onion related;{}'.format(to_print, PST.p_path))
prec_filename = filename
else:
publisher.debug("Script url is Idling 10s")
#print('Sleeping')
time.sleep(10)
message = p.get_from_set()
示例4: accordingly
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = '<section name>'
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("<description of the module>")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
something_has_been_done = do_something(message)
# (Optional) Send that thing to the next queue
p.populate_set_out(something_has_been_done)
示例5: list
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
r_serv2, PST.get_regex(email_regex))
if MX_values[0] >= 1:
PST.__setattr__(channel, MX_values)
PST.save_attribute_redis(channel, (MX_values[0],
list(MX_values[1])))
pprint.pprint(MX_values)
to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\
format(PST.p_source, PST.p_date, PST.p_name,
MX_values[0])
if MX_values[0] > is_critical:
publisher.warning(to_print)
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
else:
publisher.info(to_print)
#Send to ModuleStats
for mail in MX_values[1]:
print 'mail;{};{};{}'.format(1, mail, PST.p_date)
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
p.populate_set_out('mail;{}'.format(filename), 'BrowseWarningPaste')
prec_filename = filename
else:
publisher.debug("Script Mails is Idling 10s")
print 'Sleeping'
time.sleep(10)
示例6: set
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
sites_set = set(re.findall(regex_web, content))
message = 'Checked {} credentials found.'.format(len(creds))
if sites_set:
message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_rel_path)
print('\n '.join(creds))
#num of creds above tresh, publish an alert
if len(creds) > criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format( filepath ))
publisher.warning(to_print)
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
msg = 'infoleak:automatic-detection="credential";{}'.format(filepath)
p.populate_set_out(msg, 'Tags')
#Put in form, count occurences, then send to moduleStats
creds_sites = {}
site_occurence = re.findall(regex_site_for_stats, content)
for site in site_occurence:
site_domain = site[1:-1]
if site_domain in creds_sites.keys():
creds_sites[site_domain] += 1
else:
creds_sites[site_domain] = 1
for url in sites:
示例7: set
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
creditcard_set = set([])
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card):
print(clean_card, 'is valid')
creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set)
paste.save_attribute_redis(channel, creditcard_set)
pprint.pprint(creditcard_set)
to_print = 'CreditCard;{};{};{};'.format(
paste.p_source, paste.p_date, paste.p_name)
if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_rel_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_rel_path))
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename)
p.populate_set_out(msg, 'Tags')
else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path))
else:
publisher.debug("Script creditcard is idling 1m")
time.sleep(10)
示例8: Process
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
"""
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Tokenize'
p = Process(config_section)
# LOGGING #
publisher.info("Tokeniser started")
while True:
message = p.get_from_set()
print message
if message is not None:
paste = Paste.Paste(message)
for word, score in paste._get_top_words().items():
if len(word) >= 4:
msg = '{} {} {}'.format(paste.p_path, word, score)
p.populate_set_out(msg)
else:
publisher.debug("Tokeniser is idling 10s")
time.sleep(10)
print "sleepin"
示例9: range
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
config_section = 'Keys'
p = Process(config_section)
r_tags = redis.StrictRedis(
host=p.config.get("ARDB_Tags", "host"),
port=p.config.getint("ARDB_Tags", "port"),
db=p.config.getint("ARDB_Tags", "db"),
decode_responses=True)
tag = 'infoleak:automatic-detection="pgp-message"'
# get tag first/last seen
first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
l_dates = substract_date(first_seen, last_seen)
# get all tagged items
for date in l_dates:
daily_tagged_items = r_tags.smembers('{}:{}'.format(tag, date))
for item in daily_tagged_items:
p.populate_set_out(item, 'PgpDump')
示例10: sendEmailNotification
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
# Send a notification only when the member is in the set
if dico_setname_to_redis[str(the_set)] in server_term.smembers(TrackedTermsNotificationEnabled_Name):
# create mail body
mail_body = ("AIL Framework,\n"
"New occurrence for term: " + dico_setname_to_redis[str(the_set)] + "\n"
''+full_paste_url + filename)
# Send to every associated email adress
for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + dico_setname_to_redis[str(the_set)]):
sendEmailNotification(email, 'Term', mail_body)
# tag paste
for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + dico_setname_to_redis[str(the_set)]):
msg = '{};{}'.format(tag, filename)
p.populate_set_out(msg, 'Tags')
print(the_set, "matched in", filename)
set_name = 'set_' + dico_setname_to_redis[the_set]
new_to_the_set = server_term.sadd(set_name, filename)
new_to_the_set = True if new_to_the_set == 1 else False
#consider the num of occurence of this set
set_value = int(server_term.hincrby(timestamp, dico_setname_to_redis[the_set], int(1)))
# FIXME - avoid using per paste as a set is checked over the entire paste
#1 term per paste
if new_to_the_set:
set_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_setname_to_redis[the_set], int(1)))
server_term.zincrby("per_paste_" + curr_set, dico_setname_to_redis[the_set], float(1))
server_term.zincrby(curr_set, dico_setname_to_redis[the_set], float(1))
示例11: set
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
sites= re.findall(regex_web, content) #Use to count occurences
sites_set = set(re.findall(regex_web, content))
message = 'Checked {} credentials found.'.format(len(creds))
if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set))
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
print('\n '.join(creds))
if len(creds) > critical:
print("========> Found more than 10 credentials in this file : {}".format(filepath))
publisher.warning(to_print)
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
#Send to BrowseWarningPaste
p.populate_set_out('credential;{}'.format(filepath), 'BrowseWarningPaste')
#Put in form, count occurences, then send to moduleStats
creds_sites = {}
site_occurence = re.findall(regex_site_for_stats, content)
for site in site_occurence:
site_domain = site[1:-1]
if site_domain in creds_sites.keys():
creds_sites[site_domain] += 1
else:
creds_sites[site_domain] = 1
for url in sites:
faup.decode(url)
示例12: TorSplashSpider
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
#.........这里部分代码省略.........
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename_har+'.json', 'wb') as f:
f.write(json.dumps(response.data['har']).encode())
# save external links in set
#lext = LinkExtractor(deny_domains=self.domains, unique=True)
#for link in lext.extract_links(response):
# self.r_serv_onion.sadd('domain_{}_external_links:{}'.format(self.type, self.domains[0]), link.url)
# self.r_serv_metadata.sadd('paste_{}_external_links:{}'.format(self.type, filename_paste), link.url)
le = LinkExtractor(allow_domains=self.domains, unique=True)
for link in le.extract_links(response):
yield SplashRequest(
link.url,
self.parse,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': relative_filename_paste, 'root_key': response.meta['root_key']},
args=self.arg_crawler
)
def errback_catcher(self, failure):
# catch all errback failures,
self.logger.error(repr(failure))
if failure.check(ResponseNeverReceived):
request = failure.request
url = request.meta['splash']['args']['url']
father = request.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
time.sleep(10)
if response:
response_root_key = response.meta['root_key']
else:
response_root_key = None
yield SplashRequest(
url,
self.parse,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': father, 'root_key': response.meta['root_key']},
args=self.arg_crawler
)
else:
print('failure')
#print(failure)
print(failure.type)
#print(failure.request.meta['item'])
'''
#if isinstance(failure.value, HttpError):
elif failure.check(HttpError):
# you can get the response
response = failure.value.response
print('HttpError')
self.logger.error('HttpError on %s', response.url)
#elif isinstance(failure.value, DNSLookupError):
elif failure.check(DNSLookupError):
# this is the original request
request = failure.request
print(DNSLookupError)
print('DNSLookupError')
self.logger.error('DNSLookupError on %s', request.url)
#elif isinstance(failure.value, TimeoutError):
elif failure.check(TimeoutError):
request = failure.request
print('TimeoutError')
print(TimeoutError)
self.logger.error('TimeoutError on %s', request.url)
'''
def save_crawled_paste(self, filename, content):
if os.path.isfile(filename):
print('File: {} already exist in submitted pastes'.format(filename))
return False
try:
gzipencoded = gzip.compress(content.encode())
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
except:
print("file error: {}".format(filename))
return False
# send paste to Global
relay_message = "{0} {1}".format(filename, gzip64encoded)
self.p.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
# tag crawled paste
msg = 'infoleak:submission="crawler";{}'.format(filename)
self.p.populate_set_out(msg, 'Tags')
return True
示例13: open
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
decoded = base64.standard_b64decode(gzip64encoded)
with open(filename, 'wb') as f:
f.write(decoded)
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
type = magic.from_buffer(decoded2, mime=True)
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
# remove PASTES_FOLDER from item path (crawled item + submited)
if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
p.populate_set_out(paste)
processed_paste+=1
示例14: ip2asn
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
url_regex = "("+uri_scheme+")\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"
while True:
if message is not None:
filename, score = message.split()
if prec_filename is None or filename != prec_filename:
domains_list = []
PST = Paste.Paste(filename)
client = ip2asn()
for x in PST.get_regex(url_regex):
matching_url = re.search(url_regex, PST.get_p_content())
url = matching_url.group(0)
to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
p.populate_set_out(to_send, 'Url')
faup.decode(url)
domain = faup.get_domain()
subdomain = faup.get_subdomain()
f1 = None
domains_list.append(domain)
publisher.debug('{} Published'.format(url))
if f1 == "onion":
print domain
hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
try:
示例15: range
# 需要导入模块: from Helper import Process [as 别名]
# 或者: from Helper.Process import populate_set_out [as 别名]
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
config_section = 'Global'
p = Process(config_section)
r_tags = redis.StrictRedis(
host=p.config.get("ARDB_Tags", "host"),
port=p.config.getint("ARDB_Tags", "port"),
db=p.config.getint("ARDB_Tags", "db"),
decode_responses=True)
tag = 'infoleak:automatic-detection="bitcoin-address"'
# get tag first/last seen
first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
l_dates = substract_date(first_seen, last_seen)
# get all tagged items
for date in l_dates:
daily_tagged_items = r_tags.smembers('{}:{}'.format(tag, date))
for item in daily_tagged_items:
p.populate_set_out(item)