本文整理汇总了Python中priorityQueue.PriorityQueue.createOutput方法的典型用法代码示例。如果您正苦于以下问题:Python PriorityQueue.createOutput方法的具体用法?Python PriorityQueue.createOutput怎么用?Python PriorityQueue.createOutput使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类priorityQueue.PriorityQueue
的用法示例。
在下文中一共展示了PriorityQueue.createOutput方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_and_report
# 需要导入模块: from priorityQueue import PriorityQueue [as 别名]
# 或者: from priorityQueue.PriorityQueue import createOutput [as 别名]
def test_and_report(self):
""" Assumptions:
- test.mat exists in directory structure and
clf is classifier trained on all data matrices.
- test.mat has data['email_index']
Results is [path, index, probability]
"""
self.clean_all()
if not os.path.exists(self.results_dir):
os.makedirs(self.results_dir)
# creates this file in common/output
email_probabilities = open(os.path.join("output", "email_probabilities.txt"), "w")
low_volume_top_10 = PriorityQueue()
high_volume_top_10 = PriorityQueue()
numPhish, testSize = 0, 0
numEmails4Sender = {}
logging_interval = 60 # TODO(matthew): Move to config.yaml
progress_logger.info("Starting to test on data.")
start_time = time.time()
last_logged_time = start_time
results = np.empty(shape=(0, TOTAL_SIZE), dtype='S200')
end_of_last_memory_track = dt.datetime.now()
num_senders_completed = 0
for root, dirs, files in os.walk(self.email_path):
curr_time = time.time()
if (curr_time - last_logged_time) > logging_interval * 60:
progress_logger.info('Exploring directory #{}'.format(num_senders_completed))
progress_logger.info('Testing has run for {} minutes'.format(int((curr_time - start_time) / 60)))
last_logged_time = curr_time
if self.memlog_freq >= 0:
now = dt.datetime.now()
time_elapsed = now - end_of_last_memory_track
minutes_elapsed = time_elapsed.seconds / 60.0
if minutes_elapsed > self.memlog_freq:
MemTracker.logMemory("After completing " + str(num_senders_completed) + " iterations in test_and_report")
end_of_last_memory_track = dt.datetime.now()
if 'test.mat' in files:
path = os.path.join(root, "test.mat")
data = sio.loadmat(path)
test_X = data['test_data']
sample_size = test_X.shape[0]
if sample_size == 0:
continue
test_indx = np.arange(sample_size).reshape(sample_size, 1)
indx = data['email_index'].reshape(sample_size, 1)
test_mess_id = data['message_id'].reshape(sample_size, 1).astype("S200")
test_res = self.output_phish_probabilities(test_X, indx, root, test_indx, test_mess_id)
if test_res is not None:
for email in test_res:
testSize += 1
sender = self.get_sender(email[0])
emailPath = email[0]
probability = float(email[2])
message_ID = email[4].strip(" ")
if probability > 0.5:
numPhish += 1
# caches the num_emails value for each sender
if sender not in numEmails4Sender:
num_emails = sum(1 for line in open(emailPath))
numEmails4Sender[sender] = num_emails
else:
num_emails = numEmails4Sender[sender]
# checks which priority queue to add item to
if num_emails < self.bucket_thres:
low_volume_top_10.push(email, probability)
else:
high_volume_top_10.push(email, probability)
# writes an email's message ID and phish probability to a file
email_probabilities.write(message_ID + "," + str(probability) + "\n")
email_probabilities.close()
self.num_phish, self.test_size = numPhish, testSize
low_volume_output = low_volume_top_10.createOutput()
high_volume_output = high_volume_top_10.createOutput()
output = [low_volume_output, high_volume_output]
# DEBUG information - don't print to main log
# debug_logger.info(pp.pformat(output))
self.d_name_per_feat = self.parse_feature_names()
self.pretty_print(low_volume_output, "low_volume")
self.pretty_print(high_volume_output, "high_volume")
self.write_summary_output(output)
end_time = time.time()
min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60)
progress_logger.info("Finished testing on data in {} minutes, {} seconds. {} directories tested.".format(min_elapsed, sec_elapsed, num_senders_completed))