当前位置: 首页>>代码示例>>Python>>正文


Python Helper.Process类代码示例

本文整理汇总了Python中Helper.Process的典型用法代码示例。如果您正苦于以下问题:Python Process类的具体用法?Python Process怎么用?Python Process使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Process类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
开发者ID:mokaddem,项目名称:AIL-framework,代码行数:47,代码来源:DomClassifier.py

示例2: __init__

        def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, original_item, *args, **kwargs):
            self.type = type
            self.requested_mode = requested_mode
            self.original_item = original_item
            self.root_key = None
            self.start_urls = url
            self.domains = [domain]
            self.port = str(port)
            date_str = '{}/{}/{}'.format(date['date_day'][0:4], date['date_day'][4:6], date['date_day'][6:8])
            self.full_date = date['date_day']
            self.date_month = date['date_month']
            self.date_epoch = int(date['epoch'])

            self.arg_crawler = {  'html': crawler_options['html'],
                                  'wait': 10,
                                  'render_all': 1,
                                  'har': crawler_options['har'],
                                  'png': crawler_options['png']}

            config_section = 'Crawler'
            self.p = Process(config_section)

            self.r_cache = redis.StrictRedis(
                host=self.p.config.get("Redis_Cache", "host"),
                port=self.p.config.getint("Redis_Cache", "port"),
                db=self.p.config.getint("Redis_Cache", "db"),
                decode_responses=True)

            self.r_serv_log_submit = redis.StrictRedis(
                host=self.p.config.get("Redis_Log_submit", "host"),
                port=self.p.config.getint("Redis_Log_submit", "port"),
                db=self.p.config.getint("Redis_Log_submit", "db"),
                decode_responses=True)

            self.r_serv_metadata = redis.StrictRedis(
                host=self.p.config.get("ARDB_Metadata", "host"),
                port=self.p.config.getint("ARDB_Metadata", "port"),
                db=self.p.config.getint("ARDB_Metadata", "db"),
                decode_responses=True)

            self.r_serv_onion = redis.StrictRedis(
                host=self.p.config.get("ARDB_Onion", "host"),
                port=self.p.config.getint("ARDB_Onion", "port"),
                db=self.p.config.getint("ARDB_Onion", "db"),
                decode_responses=True)

            self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date_str )

            self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
                                            self.p.config.get("Directories", "crawled"), date_str )

            self.crawled_har = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
            self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot") )
开发者ID:CIRCL,项目名称:AIL-framework,代码行数:53,代码来源:TorSplashCrawler.py

示例3: search_phone

def search_phone(message):
    paste = Paste.Paste(message)
    content = paste.get_p_content()
    # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
    reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
    # list of the regex results in the Paste, may be null
    results = reg_phone.findall(content)

    # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
    if len(results) > 4 :
        print results
        publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))

	if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Phone'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Phone module")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        search_phone(message)
开发者ID:Starow,项目名称:AIL-framework,代码行数:40,代码来源:Phone.py

示例4: __init__

        def __init__(self, type, url, domain,original_paste, super_father, *args, **kwargs):
            self.type = type
            self.original_paste = original_paste
            self.super_father = super_father
            self.start_urls = url
            self.domains = [domain]
            date = datetime.datetime.now().strftime("%Y/%m/%d")
            self.full_date = datetime.datetime.now().strftime("%Y%m%d")
            self.date_month = datetime.datetime.now().strftime("%Y%m")

            config_section = 'Crawler'
            self.p = Process(config_section)

            self.r_cache = redis.StrictRedis(
                host=self.p.config.get("Redis_Cache", "host"),
                port=self.p.config.getint("Redis_Cache", "port"),
                db=self.p.config.getint("Redis_Cache", "db"),
                decode_responses=True)

            self.r_serv_log_submit = redis.StrictRedis(
                host=self.p.config.get("Redis_Log_submit", "host"),
                port=self.p.config.getint("Redis_Log_submit", "port"),
                db=self.p.config.getint("Redis_Log_submit", "db"),
                decode_responses=True)

            self.r_serv_metadata = redis.StrictRedis(
                host=self.p.config.get("ARDB_Metadata", "host"),
                port=self.p.config.getint("ARDB_Metadata", "port"),
                db=self.p.config.getint("ARDB_Metadata", "db"),
                decode_responses=True)

            self.r_serv_onion = redis.StrictRedis(
                host=self.p.config.get("ARDB_Onion", "host"),
                port=self.p.config.getint("ARDB_Onion", "port"),
                db=self.p.config.getint("ARDB_Onion", "db"),
                decode_responses=True)

            self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date )

            self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
                                            self.p.config.get("Directories", "crawled"), date )

            self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date )
开发者ID:mokaddem,项目名称:AIL-framework,代码行数:43,代码来源:TorSplashCrawler.py

示例5: Process

*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.

"""
import argparse
import time
from packages import Paste
from pubsublogger import publisher

from Helper import Process

if __name__ == '__main__':
    publisher.port = 6380
    publisher.channel = 'Script'

    config_section = 'Lines'
    p = Process(config_section)

    # SCRIPT PARSER #
    parser = argparse.ArgumentParser(
        description='This script is a part of the Analysis Information \
                Leak framework.')

    parser.add_argument(
        '-max', type=int, default=500,
        help='The limit between "short lines" and "long lines"',
        action='store')

    args = parser.parse_args()

    # FUNCTIONS #
    tmp_string = "Lines script Subscribed to channel {} and Start to publish \
开发者ID:CIRCL,项目名称:AIL-framework,代码行数:31,代码来源:Lines.py

示例6: Process

"""
import base64
import os
import time
from pubsublogger import publisher

from Helper import Process


if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = "Global"

    p = Process(config_section)

    # LOGGING #
    publisher.info("Feed Script started to receive & publish.")

    while True:

        message = p.get_from_set()
        # Recovering the streamed message informations.
        if message is not None:
            splitted = message.split()
            if len(splitted) == 2:
                paste, gzip64encoded = splitted
            else:
                # TODO Store the name of the empty paste inside a Redis-list.
                print "Empty Paste: not processed"
开发者ID:tonirss,项目名称:AIL-framework,代码行数:31,代码来源:Global.py

示例7: Process

import configparser

from Helper import Process


# CONFIG #
refresh_time = 30
FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module

if __name__ == '__main__':
    publisher.port = 6380
    publisher.channel = 'Script'

    config_section = 'Mixer'

    p = Process(config_section)

    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
    if not os.path.exists(configfile):
        raise Exception('Unable to find the configuration file. \
                        Did you set environment variables? \
                        Or activate the virtualenv.')

    cfg = configparser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    server = redis.StrictRedis(
        host=cfg.get("Redis_Mixer_Cache", "host"),
        port=cfg.getint("Redis_Mixer_Cache", "port"),
        db=cfg.getint("Redis_Mixer_Cache", "db"),
开发者ID:CIRCL,项目名称:AIL-framework,代码行数:31,代码来源:Mixer.py

示例8: Process

#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
import sys
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
from pyfaup.faup import Faup

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    critical = 8

    regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    regex_cred = "[a-zA-Z0-9._-][email protected][a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
    while True:
        message = p.get_from_set()
        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue
开发者ID:Rafiot,项目名称:AIL-framework,代码行数:30,代码来源:Credential.py

示例9: TimeoutException

import signal

class TimeoutException(Exception):
    pass

def timeout_handler(signum, frame):
    raise TimeoutException

signal.signal(signal.SIGALRM, timeout_handler)

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'Tokenize'
    p = Process(config_section)

    # LOGGING #
    publisher.info("Tokeniser started")

    while True:
        message = p.get_from_set()
        print(message)
        if message is not None:
            paste = Paste.Paste(message)
            signal.alarm(5)
            try:
                for word, score in paste._get_top_words().items():
                    if len(word) >= 4:
                        msg = '{} {} {}'.format(paste.p_rel_path, word, score)
                        p.populate_set_out(msg)
开发者ID:CIRCL,项目名称:AIL-framework,代码行数:31,代码来源:Tokenize.py

示例10: Process

#!/usr/bin/env python2
# -*-coding:UTF-8 -*
import time
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Release"
    p = Process(config_section)
    publisher.info("Release scripts to find release names")

    movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
    tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
    xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"

    regexs = [movie, tv, xxx]

    regex = '|'.join(regexs)
    while True:
        filepath = p.get_from_set()
        if filepath is None:
            publisher.debug("Script Release is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
            continue

        paste = Paste.Paste(filepath)
开发者ID:Rafiot,项目名称:AIL-framework,代码行数:31,代码来源:Release.py

示例11: Process

# Country and ASN lookup
from cymru.ip2asn.dns import DNSClient as ip2asn
import socket
import pycountry
import ipaddress

from Helper import Process

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'Web'

    p = Process(config_section)

    # REDIS #
    r_serv2 = redis.StrictRedis(
        host=p.config.get("Redis_Cache", "host"),
        port=p.config.getint("Redis_Cache", "port"),
        db=p.config.getint("Redis_Cache", "db"))

    # Country to log as critical
    cc_critical = p.config.get("Url", "cc_critical")

    # FUNCTIONS #
    publisher.info("Script URL subscribed to channel web_categ")

    # FIXME For retro compatibility
    channel = 'web_categ'
开发者ID:MaximeStor,项目名称:AIL-framework,代码行数:30,代码来源:Url.py

示例12: substract_date

sys.path.append(os.environ['AIL_BIN'])
from Helper import Process

def substract_date(date_from, date_to):
    date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
    date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
    delta = date_to - date_from # timedelta
    l_date = []
    for i in range(delta.days + 1):
        date = date_from + datetime.timedelta(i)
        l_date.append( date.strftime('%Y%m%d') )
    return l_date

config_section = 'Keys'
p = Process(config_section)

r_tags = redis.StrictRedis(
    host=p.config.get("ARDB_Tags", "host"),
    port=p.config.getint("ARDB_Tags", "port"),
    db=p.config.getint("ARDB_Tags", "db"),
    decode_responses=True)

tag = 'infoleak:automatic-detection="pgp-message"'

# get tag first/last seen
first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')

l_dates = substract_date(first_seen, last_seen)
开发者ID:CIRCL,项目名称:AIL-framework,代码行数:29,代码来源:reprocess_pgp_message.py

示例13: Process

import pprint
import time
import dns.exception
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher

from Helper import Process

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'Mail'

    p = Process(config_section)

    # REDIS #
    r_serv2 = redis.StrictRedis(
        host=p.config.get("Redis_Cache", "host"),
        port=p.config.getint("Redis_Cache", "port"),
        db=p.config.getint("Redis_Cache", "db"))

    # FUNCTIONS #
    publisher.info("Suscribed to channel mails_categ")

    # FIXME For retro compatibility
    channel = 'mails_categ'

    message = p.get_from_set()
    prec_filename = None
开发者ID:Rafiot,项目名称:AIL-framework,代码行数:31,代码来源:Mail.py

示例14: Process

from pubsublogger import publisher
from Helper import Process
from packages import Paste

if __name__ == '__main__':

    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Tags'

    # Setup the I/O queues
    p = Process(config_section)

    server = redis.StrictRedis(
                host=p.config.get("ARDB_Tags", "host"),
                port=p.config.get("ARDB_Tags", "port"),
                db=p.config.get("ARDB_Tags", "db"),
                decode_responses=True)

    server_metadata = redis.StrictRedis(
                host=p.config.get("ARDB_Metadata", "host"),
                port=p.config.get("ARDB_Metadata", "port"),
                db=p.config.get("ARDB_Metadata", "db"),
                decode_responses=True)

    serv_statistics = redis.StrictRedis(
        host=p.config.get('ARDB_Statistics', 'host'),
开发者ID:mokaddem,项目名称:AIL-framework,代码行数:31,代码来源:Tags.py

示例15: TorSplashSpider

    class TorSplashSpider(Spider):
        name = 'TorSplashSpider'

        def __init__(self, type, url, domain,original_paste, super_father, *args, **kwargs):
            self.type = type
            self.original_paste = original_paste
            self.super_father = super_father
            self.start_urls = url
            self.domains = [domain]
            date = datetime.datetime.now().strftime("%Y/%m/%d")
            self.full_date = datetime.datetime.now().strftime("%Y%m%d")
            self.date_month = datetime.datetime.now().strftime("%Y%m")

            config_section = 'Crawler'
            self.p = Process(config_section)

            self.r_cache = redis.StrictRedis(
                host=self.p.config.get("Redis_Cache", "host"),
                port=self.p.config.getint("Redis_Cache", "port"),
                db=self.p.config.getint("Redis_Cache", "db"),
                decode_responses=True)

            self.r_serv_log_submit = redis.StrictRedis(
                host=self.p.config.get("Redis_Log_submit", "host"),
                port=self.p.config.getint("Redis_Log_submit", "port"),
                db=self.p.config.getint("Redis_Log_submit", "db"),
                decode_responses=True)

            self.r_serv_metadata = redis.StrictRedis(
                host=self.p.config.get("ARDB_Metadata", "host"),
                port=self.p.config.getint("ARDB_Metadata", "port"),
                db=self.p.config.getint("ARDB_Metadata", "db"),
                decode_responses=True)

            self.r_serv_onion = redis.StrictRedis(
                host=self.p.config.get("ARDB_Onion", "host"),
                port=self.p.config.getint("ARDB_Onion", "port"),
                db=self.p.config.getint("ARDB_Onion", "db"),
                decode_responses=True)

            self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date )

            self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
                                            self.p.config.get("Directories", "crawled"), date )

            self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date )

        def start_requests(self):
            yield SplashRequest(
                self.start_urls,
                self.parse,
                #errback=self.errback_catcher,
                endpoint='render.json',
                meta={'father': self.original_paste},
                args={  'html': 1,
                        'wait': 10,
                        'render_all': 1,
                        'har': 1,
                        'png': 1}
            )

        def parse(self,response):
            #print(response.headers)
            #print(response.status)
            if response.status == 504:
                # down ?
                print('504 detected')
            elif response.status != 200:
                print('other response: {}'.format(response.status))
                #print(error_log)
                #detect connection to proxy refused
                error_log = (json.loads(response.body.decode()))
                if(error_log['info']['text'] == 'Connection to proxy refused'):
                    print('Connection to proxy refused')
            else:

                UUID = self.domains[0]+str(uuid.uuid4())
                filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
                relative_filename_paste = os.path.join(self.crawler_path, UUID)
                filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')

                # save new paste on disk
                if self.save_crawled_paste(filename_paste, response.data['html']):

                    # add this paste to the domain crawled set # TODO: # FIXME:  put this on cache ?
                    #self.r_serv_onion.sadd('temp:crawled_domain_pastes:{}'.format(self.domains[0]), filename_paste)

                    self.r_serv_onion.sadd('{}_up:{}'.format(self.type, self.full_date), self.domains[0])
                    self.r_serv_onion.sadd('full_{}_up'.format(self.type), self.domains[0])
                    self.r_serv_onion.sadd('month_{}_up:{}'.format(self.type, self.date_month), self.domains[0])

                    # create onion metadata
                    if not self.r_serv_onion.exists('{}_metadata:{}'.format(self.type, self.domains[0])):
                        self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'first_seen', self.full_date)
                    self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'last_seen', self.full_date)

                    #create paste metadata
                    self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'super_father', self.super_father)
                    self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'father', response.meta['father'])
                    self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'domain', self.domains[0])
#.........这里部分代码省略.........
开发者ID:mokaddem,项目名称:AIL-framework,代码行数:101,代码来源:TorSplashCrawler.py


注:本文中的Helper.Process类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。