本文整理汇总了Python中golismero.api.text.wordlist.WordListLoader.get_wordlist方法的典型用法代码示例。如果您正苦于以下问题:Python WordListLoader.get_wordlist方法的具体用法?Python WordListLoader.get_wordlist怎么用?Python WordListLoader.get_wordlist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类golismero.api.text.wordlist.WordListLoader
的用法示例。
在下文中一共展示了WordListLoader.get_wordlist方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: recv_info
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def recv_info(self, info):
m_parsed_url = info.parsed_url
m_results = []
#------------------------------------------------------------------
# Find suspicious URLs by matching against known substrings.
# Load wordlists
m_wordlist_middle = WordListLoader.get_wordlist(Config.plugin_config['middle'])
m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions'])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_middle
if x in m_parsed_url.directory.split("/") or
x == m_parsed_url.filebase or
x == m_parsed_url.extension])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_extensions
if m_parsed_url.extension == x])
#------------------------------------------------------------------
# Find suspicious URLs by calculating the Shannon entropy of the hostname.
# Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
# TODO: test with unicode enabled hostnames!
# Check the Shannon entropy for the hostname.
hostname = info.parsed_url.hostname
entropy = calculate_shannon_entropy(hostname)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, hostname) )
# Check the Shannon entropy for the subdomains.
for subdomain in info.parsed_url.hostname.split('.'):
if len(subdomain) > 3:
entropy = calculate_shannon_entropy(subdomain)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, subdomain) )
#------------------------------------------------------------------
#
#
#
# Get malware suspicious links
#
#
#
#------------------------------------------------------------------
p = None
m_url = info.url
Logger.log_more_verbose("Looking for output links to malware sites")
try:
allow_redirects = Config.audit_config.follow_redirects or \
(info.depth == 0 and Config.audit_config.follow_first_redirect)
p = download(m_url, self.check_download, allow_redirects=allow_redirects)
except NetworkException,e:
Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
示例2: load_wordlists
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def load_wordlists(wordlists):
"""
Load the with names pased as parameter.
This function receives a list of names of wordlist, defined in plugin
configuration file, and return a dict with instances of wordlists.
:param wordlists: list with wordlists names
:type wordlists: list
:returns: A dict with wordlists
:rtype: dict
"""
m_tmp_wordlist = {}
# Get wordlist to load
for l_w in wordlists:
for wordlist_family, l_wordlists in Config.plugin_extra_config.iteritems():
if wordlist_family.lower() in l_w.lower():
m_tmp_wordlist[l_w] = l_wordlists
# Load the wordlist
m_return = {}
for k, w_paths in m_tmp_wordlist.iteritems():
m_return[k] = [WordListLoader.get_wordlist(w) for w in w_paths]
return m_return
示例3: analyze_url
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def analyze_url(self, info):
m_parsed_url = info.parsed_url
m_results = []
Logger.log_more_verbose("Processing URL: %s" % m_parsed_url)
#----------------------------------------------------------------------
# Find suspicious URLs by matching against known substrings.
# Load wordlists
m_wordlist_middle = WordListLoader.get_wordlist(Config.plugin_config['middle'])
m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions'])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_middle
if x in m_parsed_url.directory.split("/") or
x == m_parsed_url.filebase or
x == m_parsed_url.extension])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_extensions
if m_parsed_url.extension == x])
#----------------------------------------------------------------------
# Find suspicious URLs by calculating the Shannon entropy of the hostname.
# Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
# TODO: test with unicode enabled hostnames!
# Check the Shannon entropy for the hostname.
hostname = info.parsed_url.hostname
entropy = calculate_shannon_entropy(hostname)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, hostname) )
# Check the Shannon entropy for the subdomains.
for subdomain in info.parsed_url.hostname.split('.'):
if len(subdomain) > 3:
entropy = calculate_shannon_entropy(subdomain)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, subdomain) )
return m_results
示例4: __detect_wordpress_installation
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def __detect_wordpress_installation(self, url, wordpress_urls):
"""
Try to detect a wordpress instalation in the current path.
:param url: URL where try to find the WordPress installation.
:type url: str
:param wordpress_urls: string with wordlist name with WordPress URLs.
:type wordpress_urls: str
:return: True if wordpress installation found. False otherwise.
:rtype: bool
"""
Logger.log_more_verbose("Detecting Wordpress instalation in URI: '%s'." % url)
total_urls = 0
urls_found = 0
error_page = get_error_page(url).raw_data
for u in WordListLoader.get_wordlist(wordpress_urls):
total_urls += 1
tmp_url = urljoin(url, u)
r = HTTP.get_url(tmp_url, use_cache=False)
if r.status == "200":
# Try to detect non-default error pages
ratio = get_diff_ratio(r.raw_response, error_page)
if ratio < 0.35:
urls_found += 1
discard_data(r)
# If Oks > 85% continue
if (urls_found / float(total_urls)) < 0.85:
# If all fails, make another last test
url_wp_admin = urljoin(url, "wp-admin/")
try:
p = HTTP.get_url(url_wp_admin, use_cache=False, allow_redirects=False)
if p:
discard_data(p)
except Exception, e:
return False
if p.status == "302" and "wp-login.php?redirect_to=" in p.headers.get("Location", ""):
return True
else:
return False
示例5: analyze_html
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def analyze_html(self, info):
#----------------------------------------------------------------------
# Get malware suspicious links.
Logger.log_more_verbose("Processing HTML: %s" % info.identity)
# Load the malware wordlist.
wordlist_filename = Config.plugin_config["malware_sites"]
try:
wordlist = WordListLoader.get_advanced_wordlist_as_list(
wordlist_filename)
except WordlistNotFound:
Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
return
except TypeError:
Logger.log_error(
"Wordlist '%s' is not a file." % wordlist_filename)
return
if not wordlist:
Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)
Logger.log("1")
# Get links
base_urls = set()
for url in info.find_linked_data(Data.TYPE_RESOURCE,
Resource.RESOURCE_URL):
m_url = url.url
base_urls.add(m_url)
if info.information_type == Information.INFORMATION_HTML:
m_links = extract_from_html(info.raw_data, m_url)
m_links.update( extract_from_text(info.raw_data, m_url) )
elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
m_links = extract_from_text(info.raw_data, m_url)
else:
raise Exception("Internal error!")
m_links.difference_update(base_urls)
Logger.log("2")
# If we have no links, abort now
if not m_links:
Logger.log_verbose("No output links found.")
return
# Do not follow URLs that contain certain keywords
m_forbidden = WordListLoader.get_wordlist(
Config.plugin_config["wordlist_no_spider"])
m_urls_allowed = {
url for url in m_links
if url and not any(x in url for x in m_forbidden)
}
Logger.log("3")
# Get only output links
m_output_links = []
for url in m_urls_allowed:
try:
if url not in Config.audit_scope:
m_output_links.append(url)
except Exception, e:
Logger.log_error_more_verbose(format_exc())
示例6: check_download
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist [as 别名]
def check_download(self, url, name, content_length, content_type):
# Only accept content when the content type header is present.
if not content_type:
Logger.log_more_verbose(
"Skipping URL, missing content type: %s" % url)
return False
# Is the content length present?
if content_length is not None:
# Check the file doesn't have 0 bytes.
if content_length <= 0:
Logger.log_more_verbose(
"Skipping URL, empty content: %s" % url)
return False
# Check the file is not too big.
if content_type.strip().lower().startswith("text/"):
if content_length > 100000:
Logger.log_more_verbose(
"Skipping URL, content too large (%d bytes): %s"
% (content_length, url))
return False
else:
if content_length > 5000000:
Logger.log_more_verbose(
"Skipping URL, content too large (%d bytes): %s"
% (content_length, url))
return False
# Approved!
return True
# Content length absent but likely points to a directory index.
parsed_url = parse_url(url)
if not parsed_url.filename:
# Approved!
return True
# Extension absent.
if not parsed_url.extension:
# Approved!
return True
# Match against a known list of valid HTML extensions.
# See: http://en.wikipedia.org/wiki/List_of_file_formats#Webpage
if parsed_url.extension in (
".xml", ".html", ".htm", ".xhtml", ".xht",
".mht", ".mhtml", ".maff", ".asp", ".aspx", ".bml",
".cfm", ".cgi", ".ihtml", ".jsp", ".las", ".lasso",
".lassoapp", ".pl", ".php", ".php3", ".phtml",
".rna", ".r", ".rnx", ".shtml", ".stm", ".atom",
".xml", ".eml", ".jsonld", ".metalink", ".met",
".rss", ".xml", ".markdown"):
# Approved!
return True
# If URL path in blacklist?
m_forbidden = [x for x in WordListLoader.get_wordlist(Config.plugin_config["wordlist_no_spider"])]
if any(x in url for x in m_forbidden):
return False
# Success!
return True