本文整理汇总了Python中ssdeep.compare方法的典型用法代码示例。如果您正苦于以下问题:Python ssdeep.compare方法的具体用法?Python ssdeep.compare怎么用?Python ssdeep.compare使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ssdeep
的用法示例。
在下文中一共展示了ssdeep.compare方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_spam_level
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def get_spam_level(player, message_content):
"""
Get's a spam level for a message using a
fuzzy hash > 50% means it's probably spam
"""
message_hash = ssdeep.hash(message_content)
spam_level = 0
spam_levels = [ssdeep.compare(message_hash, prior_hash) for prior_hash in player.last_message_hashes if
prior_hash is not None]
if len(spam_levels) > 0:
spam_level = max(spam_levels)
player.last_message_hashes.append(message_hash)
if spam_level > SPAM_TOLERANCE:
player.spam_detections += 1
return spam_level
示例2: searchFuzzy
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def searchFuzzy(fuzz, limit, thresh):
client = MongoClient(envget('metadata.host'), envget('metadata.port'))
db = client[envget('db_metadata_name')]
coll_meta = db["db_metadata_collection"]
f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit)
l = []
for f in f1:
l.append(f)
ret = {}
for a in l:
res = -1
try:
res = ssdeep.compare(a["fuzzy_hash"], fuzz)
except InternalError:
print(str(res) + "------" +
str(a["fuzzy_hash"]) + "-----" + str(a["file_id"]))
continue
if(res >= thresh):
ret[a["file_id"]] = res
return ret
示例3: fuzz_search_fast
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def fuzz_search_fast(id, p, fuzz):
block = int(fuzz.split(':')[0])
lap = 500
coll_meta = db[envget("db_metadata_collection")]
f1 = coll_meta.find({}, {"file_id": 1, p: 1})
l = []
for f in f1:
l.append(f)
dic = {}
for a in l:
res = -1
try:
f_comp = a[p]
block_comp = int(f_comp.split(':')[0])
if(block_comp <= block + lap and block_comp >= block - lap):
res = ssdeep.compare(f_comp, fuzz)
if(res > 0):
dic[a["file_id"]] = res
except Exception, e:
logging.exception(
"fuzz_search_fast(id=" + str(id) + ",p=" + str(p) + ",fuzz=" + str(fuzz))
continue
示例4: main
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def main():
parser = argparse.ArgumentParser(description='Script to interact with '
'Multiscanner\'s Elasticsearch datastore to run analytics based on '
'ssdeep hash.')
group = parser.add_mutually_exclusive_group(required=True)
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='Increase output to stdout')
group.add_argument('-c', '--compare', dest='compare', action='store_true',
help='Run ssdeep.compare using a few optimizations based on ssdeep'
' hash structure.')
group.add_argument('-g', '--group', dest='group', action='store_true',
help='Returns group of samples based on ssdeep hash.')
args = parser.parse_args()
ssdeep_analytic = SSDeepAnalytic(debug=args.verbose)
if args.compare:
ssdeep_analytic.ssdeep_compare()
print('[*] Success')
elif args.group:
pprint(ssdeep_analytic.ssdeep_group())
print('[*] Success')
示例5: should_parse
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def should_parse(repo, state, is_gist=False):
owner_login = repo.owner.login if is_gist else repo.repository.owner.login
if owner_login in state.bad_users:
print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC)
return False
if not is_gist and repo.repository.name in state.bad_repos:
print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC)
return False
if not is_gist and repo.name in state.bad_files:
print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC)
return False
# Fuzzy Hash Comparison
try:
if not is_gist:
# Temporary fix for PyGithub until fixed upstream (PyGithub#1178)
repo._url.value = repo._url.value.replace(
repo._path.value,
urllib.parse.quote(repo._path.value))
candidate_sig = ssdeep.hash(repo.decoded_content)
for sig in state.bad_signatures:
similarity = ssdeep.compare(candidate_sig, sig)
if similarity > SIMILARITY_THRESHOLD:
print(
bcolors.FAIL +
"Failed check: Ignore Fuzzy Signature on Contents "
"({}% Similarity)".format(similarity) +
bcolors.ENDC)
return False
except github.UnknownObjectException:
print(
bcolors.FAIL +
"API Error: File no longer exists on github.com" +
bcolors.ENDC)
return False
return True
示例6: vectorize
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def vectorize(feature_set, c2_data):
vector = np.zeros((len(feature_set),), dtype=np.float)
for index, (offset, code, ssdeep_hash) in enumerate(feature_set):
if offset not in c2_data:
continue
if c2_data[offset]["code"] == code:
d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash)
d = float(d) / float(100.0)
vector[index] = d
return vector
示例7: vectorize_with_sparse_features
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data):
vector = lil_matrix((1, feature_count), dtype=np.float)
for index, (offset, code, ssdeep_hash) in sparse_feature_set:
if offset not in c2_data:
continue
if c2_data[offset]["code"] == code:
d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash)
d = float(d) / float(100.0)
vector[0, index] = d
return vector
示例8: ssdeepcompare
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def ssdeepcompare(target, IP):
try:
ss_target = requests.get('http://{}/'.format(target))
ssdeep_target_fuzz = ssdeep.hash(ss_target.text)
print target, ssdeep_target_fuzz
content = requests.get('https://{}'.format(IP), verify=False, timeout = 5, headers = {'Host': target})
ssdeep_fuzz = ssdeep.hash(content.text)
print IP, ssdeep_fuzz
print "ssdeep score for", IP, "is", ssdeep.compare(ssdeep_target_fuzz, ssdeep_fuzz)
except(requests.exceptions.ConnectionError):
print "cant connect to", IP
示例9: get_ssdeep_comparison
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def get_ssdeep_comparison(first, second):
return ssdeep.compare(first, second)
示例10: _find_similar_file_for
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def _find_similar_file_for(self, file_uid: str, parent_uid: str, comparison_fo: FileObject):
hash_one = self.database.get_ssdeep_hash(file_uid)
if hash_one:
id1 = self._get_similar_file_id(file_uid, parent_uid)
for potential_match in comparison_fo.files_included:
id2 = self._get_similar_file_id(potential_match, comparison_fo.uid)
hash_two = self.database.get_ssdeep_hash(potential_match)
ssdeep_similarity = ssdeep.compare(hash_one, hash_two)
if hash_two and ssdeep_similarity > self.ssdeep_ignore_threshold:
yield (id1, id2), ssdeep_similarity
示例11: get_matching_items_by_ssdeep
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def get_matching_items_by_ssdeep(ssdeep_value, threshold_grade):
"""
A function that finds matching items by ssdeep comparison with optimizations using ElasticSearch
:param ssdeep_value: The ssdeep hash value of the item
:param threshold_grade: The grade being used as a threshold, only items that pass this grade will be returned
:return: A List of matching items (in this case, a list of sha256 hash values)
"""
chunksize, chunk, double_chunk = ssdeep_value.split(':')
chunksize = int(chunksize)
es = elasticsearch.Elasticsearch(['localhost:9200'])
query = {
'query': {
'bool': {
'must': [
{
'terms': {
'chunksize': [chunksize, chunksize * 2, int(chunksize / 2)]
}
},
{
'bool': {
'should': [
{
'match': {
'chunk': {
'query': chunk
}
}
},
{
'match': {
'double_chunk': {
'query': double_chunk
}
}
}
],
'minimum_should_match': 1
}
}
]
}
}
}
results = es.search('ssdeep-index', body=query)
sha256_list_to_return = []
for record in results['hits']['hits']:
record_ssdeep = record['_source']['ssdeep']
ssdeep_grade = ssdeep.compare(record_ssdeep, ssdeep_value)
if ssdeep_grade >= threshold_grade:
sha256_list_to_return.append(record['_source']['sha256'])
return sha256_list_to_return
示例12: main
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def main():
all_hashes = {'malicious': [], 'benign': []}
app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware
similarity_buckets = ['similarity_limit_0', 'similarity_limit_0.2', 'similarity_limit_0.4', 'similarity_limit_0.6', 'similarity_limit_0.8', 'similarity_limit_1.0']
root_dir = os.getcwd()
for i, directory in enumerate(['benign_apk', 'malicious_apk']):
os.chdir(directory)
with open(directory.split('_')[0] + '_apk_ssdeep.csv') as hashes:
for j, line in enumerate(hashes):
if j == 0: continue
b64hash = line.split(',')[0]
app_name = line.split(',')[-1].split('/')[-1][:-2]
app_malicious_map[app_name] = [1,0] if i else [0,1]
all_hashes['malicious' if i else 'benign'].append((app_name, b64hash))
os.chdir(root_dir)
all_apps = {} # mapping from each app to its similarity score and classification
num_zero = {}
num_each = {}
for category in all_hashes:
num_zero[category] = 0
num_each[category] = 0
for app_and_hash in all_hashes[category]:
similarity_scores = []
this_score = app_and_hash[1]
for i in range(1000):
other_score = random.choice(all_hashes[category])[1]
similarity_scores.append(ssdeep.compare(this_score, other_score))
score = numpy.mean(similarity_scores)
num_each[category] += 1
if score == 0: num_zero[category] += 1
bit_vector = []
last_limit = -0.01
for limit in similarity_buckets:
float_limit = float(limit.split('_')[-1])
if score <= float_limit and score > last_limit:
bit_vector.append(1)
else:
bit_vector.append(0)
last_limit = float_limit
if not any(bit_vector): # score > 1
bit_vector[-1] = 1
all_apps[app_and_hash[0]] = {'vector': bit_vector, 'malicious': app_malicious_map[app_and_hash[0]]}
with open('app_hash_vectors.json', 'w') as outfile:
json.dump({'features': similarity_buckets, 'apps': all_apps}, outfile)
print('{} of {} malicious apps and {} of {} benign apps had zero similarity found'.format(num_zero['malicious'], num_each['malicious'], num_zero['benign'], num_zero['benign']))
print('Wrote data on ' + str(len(all_apps)) + ' apps to a file.')
示例13: main
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def main(known_file, comparison, output_type):
"""
The main function handles the main operations of the script
:param known_file: path to known file
:param comparison: path to look for similar files
:param output_type: type of output to provide
:return: None
"""
# Check output formats
if output_type not in OUTPUT_OPTS:
logger.error(
"Unsupported output format '{}' selected. Please "
"use one of {}".format(
output_type, ", ".join(OUTPUT_OPTS)))
sys.exit(2)
elif output_type == 'csv':
# Special handling for CSV headers
print('"similarity","known_file","known_hash",'
'"comp_file","comp_hash"')
# Check provided file paths
known_file = os.path.abspath(known_file)
comparison = os.path.abspath(comparison)
# Generate ssdeep signature for known file
if not os.path.exists(known_file):
logger.error("Error - path {} not found".format(
comparison))
sys.exit(1)
known_hash = ssdeep.hash_from_file(known_file)
# Generate and test ssdeep signature for comparison file(s)
if os.path.isdir(comparison):
# Process files in folders
for root, _, files in os.walk(comparison):
for f in files:
file_entry = os.path.join(root, f)
comp_hash = ssdeep.hash_from_file(file_entry)
comp_val = ssdeep.compare(known_hash, comp_hash)
output(known_file, known_hash,
file_entry, comp_hash,
comp_val, output_type)
elif os.path.isfile(comparison):
# Process a single file
comp_hash = ssdeep.hash_from_file(comparison)
comp_val = ssdeep.compare(known_hash, comp_hash)
output(known_file, known_hash, file_entry, comp_hash,
comp_val, output_type)
else:
logger.error("Error - path {} not found".format(
comparison))
sys.exit(1)
开发者ID:PacktPublishing,项目名称:Learning-Python-for-Forensics-Second-Edition,代码行数:57,代码来源:ssdeep_python.py
示例14: process_sigs
# 需要导入模块: import ssdeep [as 别名]
# 或者: from ssdeep import compare [as 别名]
def process_sigs(cursor, sig_list, bin_list):
""" Process the function signatures
Go through every function and compare it to functions in every binary
Get the highest score per function and add it to a score_list
cursor - the database cursor
sig_list - the list of function signatures for analysis
bin_list - the list of binaries in the current database to compare to"""
score_list = []
maxval = 0
for row in bin_list:
cursor.execute("SELECT count(hash) FROM functions WHERE binaryid=?", (row[0],))
maxval += int(cursor.fetchone()[0])
maxval = maxval*len(sig_list)
if progressbar:
widgets = [" ", progressbar.Bar(marker="#"), " ", progressbar.Percentage(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(widgets=widgets,
maxval=maxval).start()
else:
pbar = None
i = 0
for row in bin_list:
function_score_list = []
for sig in sig_list:
highest_score = 0
cursor.execute("SELECT hash FROM functions WHERE binaryid=?",
(row[0], ))
# h means hash, hash is a keyword in Python
# so we can't use it
for h in cursor.fetchall():
strength = ssdeep.compare(sig, h[0])
if strength > highest_score:
highest_score = strength
i += 1
if pbar:
pbar.update(i)
elif i % 10000 == 0 or i == maxval:
print("%d / %d Done" % (i, maxval))
function_score_list.append(highest_score)
score_list.append(function_score_list)
if pbar:
pbar.finish()
return score_list