本文整理汇总了Python中golismero.api.text.wordlist.WordListLoader.get_wordlist_as_raw方法的典型用法代码示例。如果您正苦于以下问题:Python WordListLoader.get_wordlist_as_raw方法的具体用法?Python WordListLoader.get_wordlist_as_raw怎么用?Python WordListLoader.get_wordlist_as_raw使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类golismero.api.text.wordlist.WordListLoader
的用法示例。
在下文中一共展示了WordListLoader.get_wordlist_as_raw方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: analyze_url
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist_as_raw [as 别名]
def analyze_url(self, info):
m_parsed_url = info.parsed_url
m_results = []
Logger.log_more_verbose("Processing URL: %s" % m_parsed_url)
#----------------------------------------------------------------------
# Find suspicious URLs by matching against known substrings.
# Load wordlists
m_wordlist_middle = WordListLoader.get_wordlist_as_raw(Config.plugin_config['middle'])
m_wordlist_extensions = WordListLoader.get_wordlist_as_raw(Config.plugin_config['extensions'])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_middle
if x in m_parsed_url.directory.split("/") or
x == m_parsed_url.filebase or
x == m_parsed_url.extension])
# Add matching keywords at any positions of URL.
m_results.extend([SuspiciousURLPath(info, x)
for x in m_wordlist_extensions
if m_parsed_url.extension == x])
#----------------------------------------------------------------------
# Find suspicious URLs by calculating the Shannon entropy of the hostname.
# Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
# TODO: test with unicode enabled hostnames!
# Check the Shannon entropy for the hostname.
hostname = info.parsed_url.hostname
entropy = calculate_shannon_entropy(hostname)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, hostname) )
# Check the Shannon entropy for the subdomains.
for subdomain in info.parsed_url.hostname.split('.'):
if len(subdomain) > 3:
entropy = calculate_shannon_entropy(subdomain)
if entropy > 4.0:
m_results.append( SuspiciousURLPath(info, subdomain) )
return m_results
示例2: analyze_html
# 需要导入模块: from golismero.api.text.wordlist import WordListLoader [as 别名]
# 或者: from golismero.api.text.wordlist.WordListLoader import get_wordlist_as_raw [as 别名]
def analyze_html(self, info):
#----------------------------------------------------------------------
# Get malware suspicious links.
Logger.log_more_verbose("Processing HTML: %s" % info.identity)
# Load the malware wordlist.
wordlist_filename = Config.plugin_config["malware_sites"]
try:
wordlist = WordListLoader.get_wordlist_as_list(
wordlist_filename)
except WordlistNotFound:
Logger.log_error("Wordlist '%s' not found.." % wordlist_filename)
return
except TypeError:
Logger.log_error(
"Wordlist '%s' is not a file." % wordlist_filename)
return
if not wordlist:
Logger.log_error("Wordlist '%s' is empty." % wordlist_filename)
Logger.log("1")
# Get links
base_urls = set()
for url in info.find_linked_data(Data.TYPE_RESOURCE,
Resource.RESOURCE_URL):
m_url = url.url
base_urls.add(m_url)
if info.information_type == Information.INFORMATION_HTML:
m_links = extract_from_html(info.raw_data, m_url)
m_links.update( extract_from_text(info.raw_data, m_url) )
elif info.information_type == Information.INFORMATION_PLAIN_TEXT:
m_links = extract_from_text(info.raw_data, m_url)
else:
raise Exception("Internal error!")
m_links.difference_update(base_urls)
Logger.log("2")
# If we have no links, abort now
if not m_links:
Logger.log_verbose("No output links found.")
return
# Do not follow URLs that contain certain keywords
m_forbidden = WordListLoader.get_wordlist_as_raw(
Config.plugin_config["wordlist_no_spider"])
m_urls_allowed = {
url for url in m_links
if url and not any(x in url for x in m_forbidden)
}
Logger.log("3")
# Get only output links
m_output_links = []
for url in m_urls_allowed:
try:
if url not in Config.audit_scope:
m_output_links.append(url)
except Exception, e:
Logger.log_error_more_verbose(format_exc())