本文整理汇总了Python中Helper.Process类的典型用法代码示例。如果您正苦于以下问题:Python Process类的具体用法?Python Process怎么用?Python Process使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Process类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
publisher.port = 6380
publisher.channel = "Script"
config_section = 'DomClassifier'
p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns")
publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld")
while True:
try:
message = p.get_from_set()
if message is not None:
PST = Paste.Paste(message)
else:
publisher.debug("Script DomClassifier is idling 1s")
time.sleep(1)
continue
paste = PST.get_p_content()
mimetype = PST._get_p_encoding()
if mimetype == "text/plain":
c.text(rawtext=paste)
c.potentialdomain()
c.validdomain(rtype=['A'], extended=True)
localizeddomains = c.include(expression=cc_tld)
if localizeddomains:
print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
localizeddomains = c.localizedomain(cc=cc)
if localizeddomains:
print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError:
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))
示例2: __init__
def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, original_item, *args, **kwargs):
self.type = type
self.requested_mode = requested_mode
self.original_item = original_item
self.root_key = None
self.start_urls = url
self.domains = [domain]
self.port = str(port)
date_str = '{}/{}/{}'.format(date['date_day'][0:4], date['date_day'][4:6], date['date_day'][6:8])
self.full_date = date['date_day']
self.date_month = date['date_month']
self.date_epoch = int(date['epoch'])
self.arg_crawler = { 'html': crawler_options['html'],
'wait': 10,
'render_all': 1,
'har': crawler_options['har'],
'png': crawler_options['png']}
config_section = 'Crawler'
self.p = Process(config_section)
self.r_cache = redis.StrictRedis(
host=self.p.config.get("Redis_Cache", "host"),
port=self.p.config.getint("Redis_Cache", "port"),
db=self.p.config.getint("Redis_Cache", "db"),
decode_responses=True)
self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True)
self.r_serv_metadata = redis.StrictRedis(
host=self.p.config.get("ARDB_Metadata", "host"),
port=self.p.config.getint("ARDB_Metadata", "port"),
db=self.p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
self.r_serv_onion = redis.StrictRedis(
host=self.p.config.get("ARDB_Onion", "host"),
port=self.p.config.getint("ARDB_Onion", "port"),
db=self.p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
self.p.config.get("Directories", "crawled"), date_str )
self.crawled_har = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot") )
示例3: search_phone
def search_phone(message):
paste = Paste.Paste(message)
content = paste.get_p_content()
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
# list of the regex results in the Paste, may be null
results = reg_phone.findall(content)
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4 :
print results
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Phone'
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Phone module")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
search_phone(message)
示例4: __init__
def __init__(self, type, url, domain,original_paste, super_father, *args, **kwargs):
self.type = type
self.original_paste = original_paste
self.super_father = super_father
self.start_urls = url
self.domains = [domain]
date = datetime.datetime.now().strftime("%Y/%m/%d")
self.full_date = datetime.datetime.now().strftime("%Y%m%d")
self.date_month = datetime.datetime.now().strftime("%Y%m")
config_section = 'Crawler'
self.p = Process(config_section)
self.r_cache = redis.StrictRedis(
host=self.p.config.get("Redis_Cache", "host"),
port=self.p.config.getint("Redis_Cache", "port"),
db=self.p.config.getint("Redis_Cache", "db"),
decode_responses=True)
self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True)
self.r_serv_metadata = redis.StrictRedis(
host=self.p.config.get("ARDB_Metadata", "host"),
port=self.p.config.getint("ARDB_Metadata", "port"),
db=self.p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
self.r_serv_onion = redis.StrictRedis(
host=self.p.config.get("ARDB_Onion", "host"),
port=self.p.config.getint("ARDB_Onion", "port"),
db=self.p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date )
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
self.p.config.get("Directories", "crawled"), date )
self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date )
示例5: Process
*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
"""
import argparse
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Script'
config_section = 'Lines'
p = Process(config_section)
# SCRIPT PARSER #
parser = argparse.ArgumentParser(
description='This script is a part of the Analysis Information \
Leak framework.')
parser.add_argument(
'-max', type=int, default=500,
help='The limit between "short lines" and "long lines"',
action='store')
args = parser.parse_args()
# FUNCTIONS #
tmp_string = "Lines script Subscribed to channel {} and Start to publish \
示例6: Process
"""
import base64
import os
import time
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Global"
p = Process(config_section)
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
while True:
message = p.get_from_set()
# Recovering the streamed message informations.
if message is not None:
splitted = message.split()
if len(splitted) == 2:
paste, gzip64encoded = splitted
else:
# TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed"
示例7: Process
import configparser
from Helper import Process
# CONFIG #
refresh_time = 30
FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Script'
config_section = 'Mixer'
p = Process(config_section)
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Mixer_Cache", "host"),
port=cfg.getint("Redis_Mixer_Cache", "port"),
db=cfg.getint("Redis_Mixer_Cache", "db"),
示例8: Process
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
import sys
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
from pyfaup.faup import Faup
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Credential"
p = Process(config_section)
publisher.info("Find credentials")
faup = Faup()
critical = 8
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-][email protected][a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True:
message = p.get_from_set()
if message is None:
publisher.debug("Script Credential is Idling 10s")
time.sleep(10)
continue
示例9: TimeoutException
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Tokenize'
p = Process(config_section)
# LOGGING #
publisher.info("Tokeniser started")
while True:
message = p.get_from_set()
print(message)
if message is not None:
paste = Paste.Paste(message)
signal.alarm(5)
try:
for word, score in paste._get_top_words().items():
if len(word) >= 4:
msg = '{} {} {}'.format(paste.p_rel_path, word, score)
p.populate_set_out(msg)
示例10: Process
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = "Release"
p = Process(config_section)
publisher.info("Release scripts to find release names")
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
regexs = [movie, tv, xxx]
regex = '|'.join(regexs)
while True:
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
time.sleep(10)
continue
paste = Paste.Paste(filepath)
示例11: Process
# Country and ASN lookup
from cymru.ip2asn.dns import DNSClient as ip2asn
import socket
import pycountry
import ipaddress
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Web'
p = Process(config_section)
# REDIS #
r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"))
# Country to log as critical
cc_critical = p.config.get("Url", "cc_critical")
# FUNCTIONS #
publisher.info("Script URL subscribed to channel web_categ")
# FIXME For retro compatibility
channel = 'web_categ'
示例12: substract_date
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
config_section = 'Keys'
p = Process(config_section)
r_tags = redis.StrictRedis(
host=p.config.get("ARDB_Tags", "host"),
port=p.config.getint("ARDB_Tags", "port"),
db=p.config.getint("ARDB_Tags", "db"),
decode_responses=True)
tag = 'infoleak:automatic-detection="pgp-message"'
# get tag first/last seen
first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
l_dates = substract_date(first_seen, last_seen)
示例13: Process
import pprint
import time
import dns.exception
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Mail'
p = Process(config_section)
# REDIS #
r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"))
# FUNCTIONS #
publisher.info("Suscribed to channel mails_categ")
# FIXME For retro compatibility
channel = 'mails_categ'
message = p.get_from_set()
prec_filename = None
示例14: Process
from pubsublogger import publisher
from Helper import Process
from packages import Paste
if __name__ == '__main__':
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Tags'
# Setup the I/O queues
p = Process(config_section)
server = redis.StrictRedis(
host=p.config.get("ARDB_Tags", "host"),
port=p.config.get("ARDB_Tags", "port"),
db=p.config.get("ARDB_Tags", "db"),
decode_responses=True)
server_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"),
port=p.config.get("ARDB_Metadata", "port"),
db=p.config.get("ARDB_Metadata", "db"),
decode_responses=True)
serv_statistics = redis.StrictRedis(
host=p.config.get('ARDB_Statistics', 'host'),
示例15: TorSplashSpider
class TorSplashSpider(Spider):
name = 'TorSplashSpider'
def __init__(self, type, url, domain,original_paste, super_father, *args, **kwargs):
self.type = type
self.original_paste = original_paste
self.super_father = super_father
self.start_urls = url
self.domains = [domain]
date = datetime.datetime.now().strftime("%Y/%m/%d")
self.full_date = datetime.datetime.now().strftime("%Y%m%d")
self.date_month = datetime.datetime.now().strftime("%Y%m")
config_section = 'Crawler'
self.p = Process(config_section)
self.r_cache = redis.StrictRedis(
host=self.p.config.get("Redis_Cache", "host"),
port=self.p.config.getint("Redis_Cache", "port"),
db=self.p.config.getint("Redis_Cache", "db"),
decode_responses=True)
self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True)
self.r_serv_metadata = redis.StrictRedis(
host=self.p.config.get("ARDB_Metadata", "host"),
port=self.p.config.getint("ARDB_Metadata", "port"),
db=self.p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
self.r_serv_onion = redis.StrictRedis(
host=self.p.config.get("ARDB_Onion", "host"),
port=self.p.config.getint("ARDB_Onion", "port"),
db=self.p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date )
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
self.p.config.get("Directories", "crawled"), date )
self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date )
def start_requests(self):
yield SplashRequest(
self.start_urls,
self.parse,
#errback=self.errback_catcher,
endpoint='render.json',
meta={'father': self.original_paste},
args={ 'html': 1,
'wait': 10,
'render_all': 1,
'har': 1,
'png': 1}
)
def parse(self,response):
#print(response.headers)
#print(response.status)
if response.status == 504:
# down ?
print('504 detected')
elif response.status != 200:
print('other response: {}'.format(response.status))
#print(error_log)
#detect connection to proxy refused
error_log = (json.loads(response.body.decode()))
if(error_log['info']['text'] == 'Connection to proxy refused'):
print('Connection to proxy refused')
else:
UUID = self.domains[0]+str(uuid.uuid4())
filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID)
filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')
# save new paste on disk
if self.save_crawled_paste(filename_paste, response.data['html']):
# add this paste to the domain crawled set # TODO: # FIXME: put this on cache ?
#self.r_serv_onion.sadd('temp:crawled_domain_pastes:{}'.format(self.domains[0]), filename_paste)
self.r_serv_onion.sadd('{}_up:{}'.format(self.type, self.full_date), self.domains[0])
self.r_serv_onion.sadd('full_{}_up'.format(self.type), self.domains[0])
self.r_serv_onion.sadd('month_{}_up:{}'.format(self.type, self.date_month), self.domains[0])
# create onion metadata
if not self.r_serv_onion.exists('{}_metadata:{}'.format(self.type, self.domains[0])):
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'first_seen', self.full_date)
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'last_seen', self.full_date)
#create paste metadata
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'super_father', self.super_father)
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'father', response.meta['father'])
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'domain', self.domains[0])
#.........这里部分代码省略.........