Python fetcher.Fetcher类代码示例

本文整理汇总了Python中core.fetcher.Fetcher类的典型用法代码示例。如果您正苦于以下问题：Python Fetcher类的具体用法？Python Fetcher怎么用？Python Fetcher使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Fetcher类的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

def execute():
    """ Fetch /.svn/entries and parse for target paths """

    textutils.output_info(" - Svn Plugin: Searching for /.svn/entries")
    target_url = conf.target_base_path + "/.svn/entries"

    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(
        target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
    )
    if response_code is 200 or response_code is 302:
        if conf.allow_download:
            textutils.output_info(" - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)")
        else:
            textutils.output_info(
                " - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)"
            )
        # parse entries
        parse_svn_entries(conf.target_base_path)

        # Clean up display
        if conf.allow_download:
            textutils.output_info("")
    else:
        textutils.output_info(" - Svn Plugin: no /.svn/entries found")

开发者ID:gionniboy，项目名称:tachyon，代码行数:25，代码来源:Svn.py

示例2: TestFileExistsWorker

class TestFileExistsWorker(Thread):
    """ This worker get an url from the work queue and call the url fetcher """

    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get("url")
                description = queued.get("description")
                match_string = queued.get("match_string")

                textutils.output_debug("Testing: " + url + " " + str(queued))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                if match_string:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url, conf.user_agent, database.latest_successful_request_time, limit_len=False
                    )
                else:
                    response_code, content, headers = self.fetcher.fetch_url(
                        url, conf.user_agent, database.latest_successful_request_time
                    )
                end_time = datetime.now()

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                elif response_code == 500:
                    textutils.output_found("ISE, " + description + " at: " + conf.target_host + url)
                elif response_code in conf.expected_file_responses:
                    # If the CRC missmatch, and we have an expected code, we found a valid link
                    if match_string and re.search(re.escape(match_string), content, re.I):
                        textutils.output_found("String-Matched " + description + " at: " + conf.target_host + url)
                    elif test_valid_result(content):
                        textutils.output_found(description + " at: " + conf.target_host + url)

                elif response_code in conf.redirect_codes:
                    location = headers.get("location")
                    if location:
                        handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Mark item as processed
                database.fetch_queue.task_done()
            except Empty:
                continue

开发者ID:Rafiot，项目名称:tachyon，代码行数:60，代码来源:workers.py

示例3: execute

def execute():
    """ Fetch /.svn/entries and parse for target paths """
    current_template = dict(conf.path_template)
    current_template['description'] = '/.svn/entries found directory'

    target_url = urljoin(conf.target_base_path, "/.svn/entries")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)

    if response_code is 200 or response_code is 302 and content:
        added = 0
        try:
            tree = ElementTree.fromstring(content)
            entry_tags = tree.iter()
            if entry_tags:
                for entry in entry_tags:
                    kind = entry.attrib.get("kind")
                    if kind and kind == "dir":
                        current_template = current_template.copy()
                        current_template['url'] = '/' + entry.attrib["name"]
                        database.paths.append(current_template)
                        added += 1

        except Exception:
            textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries')
        else:
            if added > 0:
                textutils.output_info(' - Svn Plugin: added ' + str(added) + ' base paths using /.svn/entries')
            else :
                textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries')
    else:
        textutils.output_info(' - Svn Plugin: no /.svn/entries found')

开发者ID:Rafiot，项目名称:tachyon，代码行数:32，代码来源:Svn.py

示例4: get_session_cookies

def get_session_cookies():
    """ Fetch initial session cookies """
    textutils.output_info('Fetching session cookie')
    path = conf.path_template.copy()
    path['url'] = '/'

    # Were not using the fetch cache for session cookie sampling
    fetcher = Fetcher()

    code, content, headers = fetcher.fetch_url('/', conf.user_agent, 10)
    if code is 200:
        cookies = headers.get('Set-Cookie')
        if cookies:
            database.session_cookie = cookies

开发者ID:grosdev，项目名称:tachyon，代码行数:14，代码来源:tachyon.py

示例5: FetchCrafted404Worker

class FetchCrafted404Worker(Thread):
    """
    This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing
    """
    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')

                textutils.output_debug("Fetching crafted 404: " + str(url))
                stats.update_stats(url)

                # Fetch the target url
                timeout = False
                response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs)

                # Handle fetch timeouts by re-adding the url back to the global fetch queue
                # if timeout count is under max timeout count
                if response_code is 0 or response_code is 500:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                    # increase throttle delay
                    throttle.increase_throttle_delay()
                    timeout = True
                elif response_code in conf.expected_file_responses:
                    # The server responded with whatever code but 404 or invalid stuff (500). We take a sample
                    if len(content) < conf.file_sample_len:
                        crafted_404 = content[0:len(content) - 1]
                    else:
                        crafted_404 = content[0:conf.file_sample_len - 1]

                    database.crafted_404s.append(crafted_404)

                    # Exception case for root 404, since it's used as a model for other directories
                    textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Decrease throttle delay if needed
                if not timeout:
                    throttle.decrease_throttle_delay()

                # Dequeue item
                stats.update_processed_items()
                database.fetch_queue.task_done()

            except Empty:
                continue

        textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")

开发者ID:h3xstream，项目名称:tachyon，代码行数:60，代码来源:workers.py

示例6: parse_svn_entries

def parse_svn_entries(url):
    description_file = "SVN entries file at"
    description_dir = "SVN entries Dir at"
    target_url = url + "/.svn/entries"
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(
        target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
    )

    if response_code is 200 or response_code is 302 and content:
        tokens = content.split("\n")
        if "dir" in tokens:
            for pos, token in enumerate(tokens):
                if token == "dir":
                    # Fetch more entries recursively
                    if tokens[pos - 1] != "":
                        textutils.output_debug(" - Svn Plugin: Found dir: " + url + "/" + tokens[pos - 1])

                        if conf.allow_download:
                            textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
                        else:
                            textutils.output_found(description_dir + " at: " + url + "/" + tokens[pos - 1])

                        # Parse next
                        parse_svn_entries(url + "/" + tokens[pos - 1])

                elif token == "file":
                    textutils.output_debug(" - Svn Plugin: Found file: " + url + "/" + tokens[pos - 1])
                    if conf.allow_download:
                        textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
                        # Fetch text-base file
                        path = url + "/.svn/text-base" + "/" + tokens[pos - 1] + ".svn-base"
                        fetcher = Fetcher()
                        response_code, content, headers = fetcher.fetch_url(
                            path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
                        )
                        save_file(url + "/" + tokens[pos - 1], content)
                    else:
                        textutils.output_found(description_file + " at: " + url + "/" + tokens[pos - 1])

开发者ID:gionniboy，项目名称:tachyon，代码行数:39，代码来源:Svn.py

示例7: execute

def execute():
    """ Fetch /robots.txt and add the disallowed paths as target """
    worker_template = {'url': '', 'expected_response': [200, 302], 'timeout_count': 0, 'description': 'Robots.txt entry'}
    target_url = urljoin(conf.target_host, "/robots.txt")
    fetcher = Fetcher()
    response_code, content, headers = fetcher.fetch_url(target_url, 'GET', conf.user_agent, True, conf.fetch_timeout_secs)

    if response_code is 200 or response_code is 302 and content:
        if conf.debug:
            utils.output_debug(content)

        match = re.findall(r'Disallow:\s*[a-zA-Z0-9-/.]*', content)
        added = 0
        for match_obj in match:
            if '?' not in match_obj and '.' not in match_obj:                
                splitted = match_obj.split(':')
                if splitted[1]:
                    path = splitted[1].strip() 
                    if path != '/' or path != '':
                        new_path = urljoin(conf.target_host, path)
                        current_template = dict(worker_template)
                        current_template['url'] = new_path
                        database.paths.append(current_template)
        
                        if conf.debug:
                            utils.output_debug(str(current_template))
                            
                        added += 1
                    
        if added > 0:
            utils.output_info('Robots plugin: added ' + str(added) + ' base paths using /robots.txt')
        else :
            utils.output_info('Robots plugin: no usable entries in /robots.txt')     
               
    else:
        utils.output_info('Robots plugin: /robots.txt not found on target site')

开发者ID:pdelagrave，项目名称:tachyon，代码行数:36，代码来源:Robots.py

示例8: TestPathExistsWorker

class TestPathExistsWorker(Thread):
    """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """
    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output
        
    def run(self):
         while not self.kill_received:
            try:
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')
                description = queued.get('description')
                textutils.output_debug("Testing directory: " + url + " " + str(queued))

                stats.update_stats(url)

                # Throttle if needed
               # if throttle.get_throttle() > 0:
                  #  sleep(throttle.get_throttle())

                # Add trailing / for paths
                if url[:-1] != '/' and url != '/':
                    url += '/'

                # Fetch directory
                timeout = False
                response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)

                # Fetch '/' but don't submit it to more logging/existance tests
                if queued.get('url') == '/':
                    if queued not in database.valid_paths:
                        database.valid_paths.append(queued)

                    database.fetch_queue.task_done()
                    continue

                if response_code == 500:
                    textutils.output_debug("HIT 500 on: " + str(queued))

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                    # increase throttle delay
                    throttle.increase_throttle_delay()
                    timeout = True
                elif response_code in conf.expected_path_responses:
                    # Compare content with generated 404 samples
                    is_valid_result = test_valid_result(content)

                    # Skip subfile testing if forbidden
                    if response_code == 401:
                        # Output result, but don't keep the url since we can't poke in protected folder
                        textutils.output_found('Password Protected - ' + description + ' at: ' + conf.target_host + url)
                    elif is_valid_result:
                        # Add path to valid_path for future actions
                        database.valid_paths.append(queued)

                        if response_code == 500:
                            textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url)    
                        elif response_code == 403:
                            textutils.output_found('*Forbidden* ' + description + ' at: ' + conf.target_host + url)
                        else:
                            textutils.output_found(description + ' at: ' + conf.target_host + url)

                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Decrease throttle delay if needed
                if not timeout:	
                    throttle.decrease_throttle_delay()
					
                # Mark item as processed
                stats.update_processed_items()
                database.fetch_queue.task_done()
            except Empty:
                continue

开发者ID:h3xstream，项目名称:tachyon，代码行数:81，代码来源:workers.py

示例9: FetchUrlWorker

class FetchUrlWorker(Thread):
    """ This worker get an url from the work queue and call the url fetcher """
    def __init__(self, thread_id, output):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
        while not self.kill_received:
            # don't wait for any items if empty
            if not database.fetch_queue.empty():
                queued = database.fetch_queue.get()
                url = urljoin(conf.target_host, queued.get('url'))
                expected = queued.get('expected_response')
                description = queued.get('description')
                content_type_blacklist = queued.get('blacklist_content_types')

                if not content_type_blacklist:
                    content_type_blacklist = []

                if conf.use_get:
                    method = 'GET'
                else:
                    method = 'HEAD'

                response_code, content, headers = self.fetcher.fetch_url(url, method, conf.user_agent, False, conf.fetch_timeout_secs)

                if conf.debug:
                    utils.output_info("Thread #" + str(self.thread_id) + ": " + str(queued))

                if response_code is 0: # timeout
                    if queued.get('timeout_count') < conf.max_timeout_count:
                        new_timeout_count = queued.get('timeout_count') + 1
                        queued['timeout_count'] = new_timeout_count

                        if conf.debug:
                            utils.output_info('Thread #' + str(self.thread_id) + ': re-queuing ' + str(queued))

                        # Add back the timed-out item
                        database.fetch_queue.put(queued)
                    else:
                        utils.output_timeout(url)

                elif response_code in expected:
                    # Response content type
                    content_type = headers['content-type']
                    if not content_type:
                        content_type = ''

                    # Fuse with current url. (/test become url.dom/test)
                    queued['url'] = urljoin(conf.target_host, queued['url'])

                    # If we don't blacklist, just show the result
                    if not conf.content_type_blacklist:
                        if self.output:
                            if response_code == 401:
                                utils.output_found('*Password Protected* ' + description + ' at: ' + url)
                            else:
                                utils.output_found(description + ' at: ' + url)

                        # Add to valid path
                        database.valid_paths.append(queued)

                    # if we DO blacklist but content is not blacklisted, show the result
                    elif content_type not in content_type_blacklist:
                        if self.output:
                            if response_code == 401:
                                utils.output_found('*Password Protected* ' + description + ' at: ' + url)
                            else:
                                utils.output_found(description + ' at: ' + url)

                        # Add to valid path
                        database.valid_paths.append(queued)

                # Mark item as processed
                database.fetch_queue.task_done()

开发者ID:pdelagrave，项目名称:tachyon，代码行数:78，代码来源:workers.py

示例10: init

 def __init__(self, thread_id, output=True):
     Thread.__init__(self)
     self.kill_received = False
     self.thread_id = thread_id
     self.fetcher = Fetcher()
     self.output = output

开发者ID:Rafiot，项目名称:tachyon，代码行数:6，代码来源:workers.py

示例11: TestPathExistsWorker

class TestPathExistsWorker(Thread):
    """ This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """

    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
        while not self.kill_received:
            try:
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get("url")
                description = queued.get("description")
                textutils.output_debug("Testing directory: " + url + " " + str(queued))

                stats.update_stats(url)

                # Add trailing / for paths
                if not url.endswith("/") and url != "/":
                    url += "/"

                # Fetch directory
                start_time = datetime.now()
                response_code, content, headers = self.fetcher.fetch_url(
                    url, conf.user_agent, database.latest_successful_request_time, limit_len=False
                )
                end_time = datetime.now()

                # Fetch '/' but don't submit it to more logging/existance tests
                if queued.get("url") == "/":
                    if queued not in database.valid_paths:
                        database.valid_paths.append(queued)

                    database.fetch_queue.task_done()
                    continue

                if response_code == 500:
                    textutils.output_debug("HIT 500 on: " + str(queued))

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                elif response_code == 404 and detect_tomcat_fake_404(content):
                    database.valid_paths.append(queued)
                    textutils.output_found("Tomcat redirect, " + description + " at: " + conf.target_host + url)
                elif response_code in conf.expected_path_responses:
                    # Compare content with generated 404 samples
                    is_valid_result = test_valid_result(content)

                    # Skip subfile testing if forbidden
                    if response_code == 401:
                        # Output result, but don't keep the url since we can't poke in protected folder
                        textutils.output_found("Password Protected - " + description + " at: " + conf.target_host + url)
                    elif is_valid_result:
                        # Add path to valid_path for future actions
                        database.valid_paths.append(queued)

                        if response_code == 500:
                            textutils.output_found("ISE, " + description + " at: " + conf.target_host + url)
                        elif response_code == 403:
                            textutils.output_found("*Forbidden* " + description + " at: " + conf.target_host + url)
                        else:
                            textutils.output_found(description + " at: " + conf.target_host + url)

                elif response_code in conf.redirect_codes:
                    location = headers.get("location")
                    if location:
                        handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Mark item as processed
                database.fetch_queue.task_done()
            except Empty:
                continue

开发者ID:Rafiot，项目名称:tachyon，代码行数:81，代码来源:workers.py

示例12: FetchCrafted404Worker

class FetchCrafted404Worker(Thread):
    """
    This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing
    """
    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
        while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')

                textutils.output_debug("Fetching crafted 404: " + str(url))
                stats.update_stats(url)

                # Fetch the target url
                start_time = datetime.now()
                response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time)
                end_time = datetime.now()

                # Handle fetch timeouts by re-adding the url back to the global fetch queue
                # if timeout count is under max timeout count
                if response_code is 0 or response_code is 500:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                elif response_code in conf.expected_file_responses:
                    # Encoding edge case
                    # Must be a string to be compared to the 404 fingerprint
                    if not isinstance(content, str):
                        content = content.decode('utf-8', 'ignore')

                    # The server responded with whatever code but 404 or invalid stuff (500). We take a sample
                    if not len(content):
                        crafted_404 = ""  # empty file, still a forged 404
                    elif len(content) < conf.file_sample_len:
                        crafted_404 = content[0:len(content) - 1]
                    else:
                        crafted_404 = content[0:conf.file_sample_len - 1]

                    crafted_404 = crafted_404.strip('\r\n ')
                    database.crafted_404s.append(crafted_404)

                    # Exception case for root 404, since it's used as a model for other directories
                    textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
                elif response_code in conf.redirect_codes:
                    if queued.get('handle_redirect', True):
                        location = headers.get('location')
                        if location:
                            handle_redirects(queued, location)

                # Stats
                if response_code not in conf.timeout_codes:
                    stats.update_processed_items()
                    compute_request_time(start_time, end_time)

                # Dequeue item
                database.fetch_queue.task_done()

            except Empty:
                continue

        textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")

开发者ID:GoSecure，项目名称:tachyon，代码行数:67，代码来源:workers.py

示例13: TestFileExistsWorker

class TestFileExistsWorker(Thread):
    """ This worker get an url from the work queue and call the url fetcher """
    def __init__(self, thread_id, output=True):
        Thread.__init__(self)
        self.kill_received = False
        self.thread_id = thread_id
        self.fetcher = Fetcher()
        self.output = output

    def run(self):
         while not self.kill_received:
            try:
                # Non-Blocking get since we use the queue as a ringbuffer
                queued = database.fetch_queue.get(False)
                url = conf.target_base_path + queued.get('url')
                description = queued.get('description')
                match_string = queued.get('match_string')

                textutils.output_debug("Testing: " + url + " " + str(queued))
                stats.update_stats(url)

                # Throttle if needed
                #if throttle.get_throttle() > 0:
                 #   sleep(throttle.get_throttle())

                # Fetch the target url
                timeout = False
                if match_string:
                    response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)
                else:
                    response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs)

                # handle timeout
                if response_code in conf.timeout_codes:
                    handle_timeout(queued, url, self.thread_id, output=self.output)
                    throttle.increase_throttle_delay()
                    timeout = True
                elif response_code in conf.expected_file_responses:
                      # Compare content with generated 404 samples
                    is_valid_result = test_valid_result(content)
                    
                    # If the CRC missmatch, and we have an expected code, we found a valid link
                    if is_valid_result:
                        # Content Test if match_string provided
                        if match_string and re.search(re.escape(match_string), content, re.I):
                            # Add path to valid_path for future actions
                            database.valid_paths.append(queued)
                            textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url)
                        elif not match_string:
                            if response_code == 500:
                                textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url)    
                            else:
                                textutils.output_found(description + ' at: ' + conf.target_host + url)
                            
                            # Add path to valid_path for future actions
                            database.valid_paths.append(queued)

                elif response_code in conf.redirect_codes:
                    location = headers.get('location')
                    if location:
                        handle_redirects(queued, location)

                # Decrease throttle delay if needed
                if not timeout:	
                    throttle.decrease_throttle_delay()
					
                # Mark item as processed
                stats.update_processed_items()
                database.fetch_queue.task_done()
            except Empty:
                continue

开发者ID:h3xstream，项目名称:tachyon，代码行数:71，代码来源:workers.py

注：本文中的core.fetcher.Fetcher类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。