當前位置: 首頁>>代碼示例>>Python>>正文


Python JoinableQueue.empty方法代碼示例

本文整理匯總了Python中gevent.queue.JoinableQueue.empty方法的典型用法代碼示例。如果您正苦於以下問題:Python JoinableQueue.empty方法的具體用法?Python JoinableQueue.empty怎麽用?Python JoinableQueue.empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gevent.queue.JoinableQueue的用法示例。


在下文中一共展示了JoinableQueue.empty方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: HttpScanner

# 需要導入模塊: from gevent.queue import JoinableQueue [as 別名]
# 或者: from gevent.queue.JoinableQueue import empty [as 別名]
class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' % self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename, logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename, logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.",
                                          logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip))
#.........這裏部分代碼省略.........
開發者ID:Badikov,項目名稱:httpscan,代碼行數:103,代碼來源:httpscan.py

示例2: RequestBase

# 需要導入模塊: from gevent.queue import JoinableQueue [as 別名]
# 或者: from gevent.queue.JoinableQueue import empty [as 別名]
class RequestBase(object):
    def __init__(self,url,parameter,HTTPClients,ClientConnectionPool,task=None):

        if task is not None:
            self.celeryTask = task
            self.celeryTaskId = task.request.id
        else:
            self.celeryTask = None

        self.parameter = parameter
        self.url = url
        self.numberHTTPClients = HTTPClients
        self.numberClientConnectionPool = ClientConnectionPool

        self.http = HTTPClient.from_url(URL(url),concurrency=self.numberClientConnectionPool)
        self.clientPool = gevent.pool.Pool(self.numberHTTPClients)
        self.workQueue = JoinableQueue()

        self.resultList = {}
        self.workQueueMax = 0
        self.workQueueDone = 0
        self.countRequests = 0
        self.status_codes = {}
        self.status_codes_count = {}
        self.meta = {}

        self.greenletList = {}
        self.initAdditionalStructures()
        self.progressMeta = None

        self.exitFlag = False
        self.pauseRequests = False


    def destroy(self):
        self.http.close()

    def initAdditionalStructures(self):
        pass

    def destroyAdditionstrucutres(self):
        pass

    def getProgress(self):
        return self.meta

    def updateProgress(self,state="PROGRESS"):
        '''Updates the status'''
        self.meta = {'state':state,'workQueueDone': self.workQueueDone, 'workQueueMax': self.workQueueMax,'current':len(self.resultList),'workQueue':self.workQueue.qsize(),'requests':self.countRequests}

        #iterate over status_codes dict and save the queue size. may be not the best solution from performance view
        for code,queue in self.status_codes.iteritems():
            self.status_codes_count[code] = queue.qsize()
        self.meta['status_codes'] = self.status_codes_count
        if self.celeryTask is not None:
            self.celeryTask.update_state(task_id=self.celeryTaskId,state=state,meta=self.meta)

    def worker(self,http,clientId):
        while not self.workQueue.empty() or self.exitFlag:
                try:
                    code = self.makeRequest(http,self.getWorkQueueItem())
                finally:
                    self.workQueue.task_done()
      
    def stop(self):
        self.exitFlag=True

    def buildRequestURL(self,workQueueItem):
        '''Function used to build the request URL from a workingQueue item'''
        pass

    def handleRequestSuccess(self,workQueueItem, result):
        '''Required function, called after every successful request'''
        pass

    def handleRequestFailure(self,result):
        '''Function called after a failed request. For example error code 404'''
        pass

    def makeRequest(self,http,workQueueItem):
        '''Makes the request to and '''
        url_string = self.buildRequestURL(workQueueItem)

        self.countRequests += 1
        try:
            
            response = http.get(URL(url_string).request_uri)
            statusCode = response.status_code

            #create a new queue if the response status_code did not exist and adds the item to the queue
            if str(statusCode) not in self.status_codes:
                self.status_codes[str(statusCode)] = JoinableQueue()
            self.status_codes[str(statusCode)].put(workQueueItem)

            try:
                self.handleRequestSuccess(workQueueItem,response)
            except SSLError,e:
                print e

            return statusCode
#.........這裏部分代碼省略.........
開發者ID:hptk,項目名稱:YouTube-Video-and-Metadata-Fetcher,代碼行數:103,代碼來源:RequestBase.py

示例3: BaseCrawler

# 需要導入模塊: from gevent.queue import JoinableQueue [as 別名]
# 或者: from gevent.queue.JoinableQueue import empty [as 別名]
class BaseCrawler(object):
    def __init__(self, requestHandler=BaseRequestHandler(),
                       parseHandler=BaseParseHandler(),
                       sheduler=BaseScheduler(),
                       pipeline=BasePipeline()):
        self.requestHandler = requestHandler
        self.parseHandler = parseHandler
        self.sheduler = sheduler
        self.pipeline = pipeline
        self.task_queue = JoinableQueue()
        self.response_queue = JoinableQueue()
        self.tasks_cnt = 0
        self.result_queue = JoinableQueue()
        self.jobs_cnt = config.num_threads
        self.start_time = time.time()
        self.stop = False
    
    def doScheduler(self):
        """Generate tasks, one thread
        """
        logging.info('scheduler started!')
        for task in self.sheduler.init_generator():
            self.task_queue.put(task)
            self.tasks_cnt += 1

        while self.tasks_cnt > 0 and not self.stop:
            gevent.sleep(config.new_task_check_time)

        logging.info('scheduler finished! All task done.')

        for i in xrange(config.num_threads):
            self.task_queue.put(StopIteration)

    def worker(self):
        """Fetch url and parse, config.num_threads threads
        """
        task = self.task_queue.get()
        cnt = config.error_retry_cnt
        while task != StopIteration:
            try:
                #timeout = gevent.Timeout(config.TASK_TIMEOUT)
                #timeout.start()
                response = self.requestHandler.handle(task)
                result, new_tasks = self.parseHandler.handle(response)
                #timeout.cancel()
                #if isinstance(result, collections.Iterable):
                #if isinstance(result, list):
                #    for ret in result:
                #        self.result_queue.put(ret)
                #else:
                if result:
                    self.result_queue.put(result)
                for task in new_tasks:
                    self.task_queue.put(task)
                    self.tasks_cnt += 1
                #self.task_queue.task_done()
                self.tasks_cnt -= 1
                task = self.task_queue.get()
                cnt = config.error_retry_cnt
            except Exception as e:
                try:
                    #timeout.cancel()
                    cnt -= 1
                    logging.exception(e)
                    if cnt <= 0:
                        #self.task_queue.task_done()
                        self.tasks_cnt -= 1
                        task = self.task_queue.get()
                        logging.error('task failed, try \033[31m%d\033[0m times! will not try' % (config.error_retry_cnt - cnt))
                        cnt = config.error_retry_cnt
                    #logging.exception('task failed!')
                    else:
                        logging.error('task failed, try \033[31m%d\033[0m times!' % (config.error_retry_cnt - cnt))
                except Exception as e:
                    self.tasks_cnt -= 1
                    #self.jobs_cnt -= 1
                    raise
            finally:
                #timeout.cancel()
                pass
        self.jobs_cnt -= 1

    def doPipeline(self):
        while self.jobs_cnt > 0 or not self.result_queue.empty():
            gevent.sleep(config.pipeline_sleeptime)
            results = []
            try:
                while 1:
                    results.append(self.result_queue.get_nowait())
                    if len(results) > 100:
                        raise gevent.queue.Empty
            except gevent.queue.Empty:
                if results:
                    try:
                        self.pipeline.process(results)
                    except:
                        logging.exception('')
                #logging.exception('')
            except:
                logging.exception('')
#.........這裏部分代碼省略.........
開發者ID:atupal,項目名稱:ccrawler,代碼行數:103,代碼來源:stand_alone_run.py


注:本文中的gevent.queue.JoinableQueue.empty方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。