本文整理汇总了Python中core.fetcher.Fetcher类的典型用法代码示例。如果您正苦于以下问题:Python Fetcher类的具体用法?Python Fetcher怎么用?Python Fetcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Fetcher类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: execute
def execute():
""" Fetch /.svn/entries and parse for target paths """
textutils.output_info(" - Svn Plugin: Searching for /.svn/entries")
target_url = conf.target_base_path + "/.svn/entries"
fetcher = Fetcher()
response_code, content, headers = fetcher.fetch_url(
target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
)
if response_code is 200 or response_code is 302:
if conf.allow_download:
textutils.output_info(" - Svn Plugin: /.svn/entries found! crawling... (will download files to output/)")
else:
textutils.output_info(
" - Svn Plugin: /.svn/entries found! crawling... (use -a to download files instead of printing)"
)
# parse entries
parse_svn_entries(conf.target_base_path)
# Clean up display
if conf.allow_download:
textutils.output_info("")
else:
textutils.output_info(" - Svn Plugin: no /.svn/entries found")
示例2: TestFileExistsWorker
class TestFileExistsWorker(Thread):
""" This worker get an url from the work queue and call the url fetcher """
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
# Non-Blocking get since we use the queue as a ringbuffer
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get("url")
description = queued.get("description")
match_string = queued.get("match_string")
textutils.output_debug("Testing: " + url + " " + str(queued))
stats.update_stats(url)
# Fetch the target url
start_time = datetime.now()
if match_string:
response_code, content, headers = self.fetcher.fetch_url(
url, conf.user_agent, database.latest_successful_request_time, limit_len=False
)
else:
response_code, content, headers = self.fetcher.fetch_url(
url, conf.user_agent, database.latest_successful_request_time
)
end_time = datetime.now()
# handle timeout
if response_code in conf.timeout_codes:
handle_timeout(queued, url, self.thread_id, output=self.output)
elif response_code == 500:
textutils.output_found("ISE, " + description + " at: " + conf.target_host + url)
elif response_code in conf.expected_file_responses:
# If the CRC missmatch, and we have an expected code, we found a valid link
if match_string and re.search(re.escape(match_string), content, re.I):
textutils.output_found("String-Matched " + description + " at: " + conf.target_host + url)
elif test_valid_result(content):
textutils.output_found(description + " at: " + conf.target_host + url)
elif response_code in conf.redirect_codes:
location = headers.get("location")
if location:
handle_redirects(queued, location)
# Stats
if response_code not in conf.timeout_codes:
stats.update_processed_items()
compute_request_time(start_time, end_time)
# Mark item as processed
database.fetch_queue.task_done()
except Empty:
continue
示例3: execute
def execute():
""" Fetch /.svn/entries and parse for target paths """
current_template = dict(conf.path_template)
current_template['description'] = '/.svn/entries found directory'
target_url = urljoin(conf.target_base_path, "/.svn/entries")
fetcher = Fetcher()
response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)
if response_code is 200 or response_code is 302 and content:
added = 0
try:
tree = ElementTree.fromstring(content)
entry_tags = tree.iter()
if entry_tags:
for entry in entry_tags:
kind = entry.attrib.get("kind")
if kind and kind == "dir":
current_template = current_template.copy()
current_template['url'] = '/' + entry.attrib["name"]
database.paths.append(current_template)
added += 1
except Exception:
textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries')
else:
if added > 0:
textutils.output_info(' - Svn Plugin: added ' + str(added) + ' base paths using /.svn/entries')
else :
textutils.output_info(' - Svn Plugin: no usable entries in /.svn/entries')
else:
textutils.output_info(' - Svn Plugin: no /.svn/entries found')
示例4: get_session_cookies
def get_session_cookies():
""" Fetch initial session cookies """
textutils.output_info('Fetching session cookie')
path = conf.path_template.copy()
path['url'] = '/'
# Were not using the fetch cache for session cookie sampling
fetcher = Fetcher()
code, content, headers = fetcher.fetch_url('/', conf.user_agent, 10)
if code is 200:
cookies = headers.get('Set-Cookie')
if cookies:
database.session_cookie = cookies
示例5: FetchCrafted404Worker
class FetchCrafted404Worker(Thread):
"""
This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing
"""
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
# Non-Blocking get since we use the queue as a ringbuffer
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get('url')
textutils.output_debug("Fetching crafted 404: " + str(url))
stats.update_stats(url)
# Fetch the target url
timeout = False
response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs)
# Handle fetch timeouts by re-adding the url back to the global fetch queue
# if timeout count is under max timeout count
if response_code is 0 or response_code is 500:
handle_timeout(queued, url, self.thread_id, output=self.output)
# increase throttle delay
throttle.increase_throttle_delay()
timeout = True
elif response_code in conf.expected_file_responses:
# The server responded with whatever code but 404 or invalid stuff (500). We take a sample
if len(content) < conf.file_sample_len:
crafted_404 = content[0:len(content) - 1]
else:
crafted_404 = content[0:conf.file_sample_len - 1]
database.crafted_404s.append(crafted_404)
# Exception case for root 404, since it's used as a model for other directories
textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
elif response_code in conf.redirect_codes:
location = headers.get('location')
if location:
handle_redirects(queued, location)
# Decrease throttle delay if needed
if not timeout:
throttle.decrease_throttle_delay()
# Dequeue item
stats.update_processed_items()
database.fetch_queue.task_done()
except Empty:
continue
textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
示例6: parse_svn_entries
def parse_svn_entries(url):
description_file = "SVN entries file at"
description_dir = "SVN entries Dir at"
target_url = url + "/.svn/entries"
fetcher = Fetcher()
response_code, content, headers = fetcher.fetch_url(
target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
)
if response_code is 200 or response_code is 302 and content:
tokens = content.split("\n")
if "dir" in tokens:
for pos, token in enumerate(tokens):
if token == "dir":
# Fetch more entries recursively
if tokens[pos - 1] != "":
textutils.output_debug(" - Svn Plugin: Found dir: " + url + "/" + tokens[pos - 1])
if conf.allow_download:
textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
else:
textutils.output_found(description_dir + " at: " + url + "/" + tokens[pos - 1])
# Parse next
parse_svn_entries(url + "/" + tokens[pos - 1])
elif token == "file":
textutils.output_debug(" - Svn Plugin: Found file: " + url + "/" + tokens[pos - 1])
if conf.allow_download:
textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r")
# Fetch text-base file
path = url + "/.svn/text-base" + "/" + tokens[pos - 1] + ".svn-base"
fetcher = Fetcher()
response_code, content, headers = fetcher.fetch_url(
path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False
)
save_file(url + "/" + tokens[pos - 1], content)
else:
textutils.output_found(description_file + " at: " + url + "/" + tokens[pos - 1])
示例7: execute
def execute():
""" Fetch /robots.txt and add the disallowed paths as target """
worker_template = {'url': '', 'expected_response': [200, 302], 'timeout_count': 0, 'description': 'Robots.txt entry'}
target_url = urljoin(conf.target_host, "/robots.txt")
fetcher = Fetcher()
response_code, content, headers = fetcher.fetch_url(target_url, 'GET', conf.user_agent, True, conf.fetch_timeout_secs)
if response_code is 200 or response_code is 302 and content:
if conf.debug:
utils.output_debug(content)
match = re.findall(r'Disallow:\s*[a-zA-Z0-9-/.]*', content)
added = 0
for match_obj in match:
if '?' not in match_obj and '.' not in match_obj:
splitted = match_obj.split(':')
if splitted[1]:
path = splitted[1].strip()
if path != '/' or path != '':
new_path = urljoin(conf.target_host, path)
current_template = dict(worker_template)
current_template['url'] = new_path
database.paths.append(current_template)
if conf.debug:
utils.output_debug(str(current_template))
added += 1
if added > 0:
utils.output_info('Robots plugin: added ' + str(added) + ' base paths using /robots.txt')
else :
utils.output_info('Robots plugin: no usable entries in /robots.txt')
else:
utils.output_info('Robots plugin: /robots.txt not found on target site')
示例8: TestPathExistsWorker
class TestPathExistsWorker(Thread):
""" This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get('url')
description = queued.get('description')
textutils.output_debug("Testing directory: " + url + " " + str(queued))
stats.update_stats(url)
# Throttle if needed
# if throttle.get_throttle() > 0:
# sleep(throttle.get_throttle())
# Add trailing / for paths
if url[:-1] != '/' and url != '/':
url += '/'
# Fetch directory
timeout = False
response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)
# Fetch '/' but don't submit it to more logging/existance tests
if queued.get('url') == '/':
if queued not in database.valid_paths:
database.valid_paths.append(queued)
database.fetch_queue.task_done()
continue
if response_code == 500:
textutils.output_debug("HIT 500 on: " + str(queued))
# handle timeout
if response_code in conf.timeout_codes:
handle_timeout(queued, url, self.thread_id, output=self.output)
# increase throttle delay
throttle.increase_throttle_delay()
timeout = True
elif response_code in conf.expected_path_responses:
# Compare content with generated 404 samples
is_valid_result = test_valid_result(content)
# Skip subfile testing if forbidden
if response_code == 401:
# Output result, but don't keep the url since we can't poke in protected folder
textutils.output_found('Password Protected - ' + description + ' at: ' + conf.target_host + url)
elif is_valid_result:
# Add path to valid_path for future actions
database.valid_paths.append(queued)
if response_code == 500:
textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url)
elif response_code == 403:
textutils.output_found('*Forbidden* ' + description + ' at: ' + conf.target_host + url)
else:
textutils.output_found(description + ' at: ' + conf.target_host + url)
elif response_code in conf.redirect_codes:
location = headers.get('location')
if location:
handle_redirects(queued, location)
# Decrease throttle delay if needed
if not timeout:
throttle.decrease_throttle_delay()
# Mark item as processed
stats.update_processed_items()
database.fetch_queue.task_done()
except Empty:
continue
示例9: FetchUrlWorker
class FetchUrlWorker(Thread):
""" This worker get an url from the work queue and call the url fetcher """
def __init__(self, thread_id, output):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
# don't wait for any items if empty
if not database.fetch_queue.empty():
queued = database.fetch_queue.get()
url = urljoin(conf.target_host, queued.get('url'))
expected = queued.get('expected_response')
description = queued.get('description')
content_type_blacklist = queued.get('blacklist_content_types')
if not content_type_blacklist:
content_type_blacklist = []
if conf.use_get:
method = 'GET'
else:
method = 'HEAD'
response_code, content, headers = self.fetcher.fetch_url(url, method, conf.user_agent, False, conf.fetch_timeout_secs)
if conf.debug:
utils.output_info("Thread #" + str(self.thread_id) + ": " + str(queued))
if response_code is 0: # timeout
if queued.get('timeout_count') < conf.max_timeout_count:
new_timeout_count = queued.get('timeout_count') + 1
queued['timeout_count'] = new_timeout_count
if conf.debug:
utils.output_info('Thread #' + str(self.thread_id) + ': re-queuing ' + str(queued))
# Add back the timed-out item
database.fetch_queue.put(queued)
else:
utils.output_timeout(url)
elif response_code in expected:
# Response content type
content_type = headers['content-type']
if not content_type:
content_type = ''
# Fuse with current url. (/test become url.dom/test)
queued['url'] = urljoin(conf.target_host, queued['url'])
# If we don't blacklist, just show the result
if not conf.content_type_blacklist:
if self.output:
if response_code == 401:
utils.output_found('*Password Protected* ' + description + ' at: ' + url)
else:
utils.output_found(description + ' at: ' + url)
# Add to valid path
database.valid_paths.append(queued)
# if we DO blacklist but content is not blacklisted, show the result
elif content_type not in content_type_blacklist:
if self.output:
if response_code == 401:
utils.output_found('*Password Protected* ' + description + ' at: ' + url)
else:
utils.output_found(description + ' at: ' + url)
# Add to valid path
database.valid_paths.append(queued)
# Mark item as processed
database.fetch_queue.task_done()
示例10: __init__
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
示例11: TestPathExistsWorker
class TestPathExistsWorker(Thread):
""" This worker test if a path exists. Each path is matched against a fake generated path while scanning root. """
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get("url")
description = queued.get("description")
textutils.output_debug("Testing directory: " + url + " " + str(queued))
stats.update_stats(url)
# Add trailing / for paths
if not url.endswith("/") and url != "/":
url += "/"
# Fetch directory
start_time = datetime.now()
response_code, content, headers = self.fetcher.fetch_url(
url, conf.user_agent, database.latest_successful_request_time, limit_len=False
)
end_time = datetime.now()
# Fetch '/' but don't submit it to more logging/existance tests
if queued.get("url") == "/":
if queued not in database.valid_paths:
database.valid_paths.append(queued)
database.fetch_queue.task_done()
continue
if response_code == 500:
textutils.output_debug("HIT 500 on: " + str(queued))
# handle timeout
if response_code in conf.timeout_codes:
handle_timeout(queued, url, self.thread_id, output=self.output)
elif response_code == 404 and detect_tomcat_fake_404(content):
database.valid_paths.append(queued)
textutils.output_found("Tomcat redirect, " + description + " at: " + conf.target_host + url)
elif response_code in conf.expected_path_responses:
# Compare content with generated 404 samples
is_valid_result = test_valid_result(content)
# Skip subfile testing if forbidden
if response_code == 401:
# Output result, but don't keep the url since we can't poke in protected folder
textutils.output_found("Password Protected - " + description + " at: " + conf.target_host + url)
elif is_valid_result:
# Add path to valid_path for future actions
database.valid_paths.append(queued)
if response_code == 500:
textutils.output_found("ISE, " + description + " at: " + conf.target_host + url)
elif response_code == 403:
textutils.output_found("*Forbidden* " + description + " at: " + conf.target_host + url)
else:
textutils.output_found(description + " at: " + conf.target_host + url)
elif response_code in conf.redirect_codes:
location = headers.get("location")
if location:
handle_redirects(queued, location)
# Stats
if response_code not in conf.timeout_codes:
stats.update_processed_items()
compute_request_time(start_time, end_time)
# Mark item as processed
database.fetch_queue.task_done()
except Empty:
continue
示例12: FetchCrafted404Worker
class FetchCrafted404Worker(Thread):
"""
This worker fetch lenght-limited 404 footprint and store them for Ratcliff-Obershelf comparing
"""
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
# Non-Blocking get since we use the queue as a ringbuffer
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get('url')
textutils.output_debug("Fetching crafted 404: " + str(url))
stats.update_stats(url)
# Fetch the target url
start_time = datetime.now()
response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time)
end_time = datetime.now()
# Handle fetch timeouts by re-adding the url back to the global fetch queue
# if timeout count is under max timeout count
if response_code is 0 or response_code is 500:
handle_timeout(queued, url, self.thread_id, output=self.output)
elif response_code in conf.expected_file_responses:
# Encoding edge case
# Must be a string to be compared to the 404 fingerprint
if not isinstance(content, str):
content = content.decode('utf-8', 'ignore')
# The server responded with whatever code but 404 or invalid stuff (500). We take a sample
if not len(content):
crafted_404 = "" # empty file, still a forged 404
elif len(content) < conf.file_sample_len:
crafted_404 = content[0:len(content) - 1]
else:
crafted_404 = content[0:conf.file_sample_len - 1]
crafted_404 = crafted_404.strip('\r\n ')
database.crafted_404s.append(crafted_404)
# Exception case for root 404, since it's used as a model for other directories
textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404)
elif response_code in conf.redirect_codes:
if queued.get('handle_redirect', True):
location = headers.get('location')
if location:
handle_redirects(queued, location)
# Stats
if response_code not in conf.timeout_codes:
stats.update_processed_items()
compute_request_time(start_time, end_time)
# Dequeue item
database.fetch_queue.task_done()
except Empty:
continue
textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
示例13: TestFileExistsWorker
class TestFileExistsWorker(Thread):
""" This worker get an url from the work queue and call the url fetcher """
def __init__(self, thread_id, output=True):
Thread.__init__(self)
self.kill_received = False
self.thread_id = thread_id
self.fetcher = Fetcher()
self.output = output
def run(self):
while not self.kill_received:
try:
# Non-Blocking get since we use the queue as a ringbuffer
queued = database.fetch_queue.get(False)
url = conf.target_base_path + queued.get('url')
description = queued.get('description')
match_string = queued.get('match_string')
textutils.output_debug("Testing: " + url + " " + str(queued))
stats.update_stats(url)
# Throttle if needed
#if throttle.get_throttle() > 0:
# sleep(throttle.get_throttle())
# Fetch the target url
timeout = False
if match_string:
response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False)
else:
response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs)
# handle timeout
if response_code in conf.timeout_codes:
handle_timeout(queued, url, self.thread_id, output=self.output)
throttle.increase_throttle_delay()
timeout = True
elif response_code in conf.expected_file_responses:
# Compare content with generated 404 samples
is_valid_result = test_valid_result(content)
# If the CRC missmatch, and we have an expected code, we found a valid link
if is_valid_result:
# Content Test if match_string provided
if match_string and re.search(re.escape(match_string), content, re.I):
# Add path to valid_path for future actions
database.valid_paths.append(queued)
textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url)
elif not match_string:
if response_code == 500:
textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url)
else:
textutils.output_found(description + ' at: ' + conf.target_host + url)
# Add path to valid_path for future actions
database.valid_paths.append(queued)
elif response_code in conf.redirect_codes:
location = headers.get('location')
if location:
handle_redirects(queued, location)
# Decrease throttle delay if needed
if not timeout:
throttle.decrease_throttle_delay()
# Mark item as processed
stats.update_processed_items()
database.fetch_queue.task_done()
except Empty:
continue