本文整理汇总了Python中cluster.Cluster.add_leaf方法的典型用法代码示例。如果您正苦于以下问题:Python Cluster.add_leaf方法的具体用法?Python Cluster.add_leaf怎么用?Python Cluster.add_leaf使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cluster.Cluster
的用法示例。
在下文中一共展示了Cluster.add_leaf方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: split_leaf
# 需要导入模块: from cluster import Cluster [as 别名]
# 或者: from cluster.Cluster import add_leaf [as 别名]
def split_leaf(self, min_items_for_split, skip_count, min_word_pos_entropy, min_percent):
'''
Split this leaf into multiple leaves
TODO: This function is gross clean it up!
'''
if self.get_num_lines() < min_items_for_split:
return None
# use line length of first line as a hack for max line length
line_length = max([len(self.log_lines[i].split()) for i in range(len(self.log_lines))])
# Build position dependent word counter
word_counts = [defaultdict(int) for i in range(line_length)]
# Count words in each line
for line in self.log_lines:
for i, word in enumerate(line.split()[skip_count:]):
word_counts[i][word] += 1
# Calculate entropies for each word position
entropies = get_entropy_of_word_positions(word_counts, len(self.log_lines))
# Get minimum, non-zero entropy
min_entropy = 1e9
min_entropy_index = None
for i, entropy in enumerate(entropies):
if entropy < min_entropy and entropy >= min_word_pos_entropy:
min_entropy_index = i
min_entropy = entropy
if min_entropy_index == None:
# Every entropy is zero, can't split
return None
# Iterate through words in the position with the least entropy and see if they have a sufficient percentage
split_words = set()
min_word_occurrence = floor(min_percent * len(self.log_lines))
for word in word_counts[min_entropy_index]:
if word_counts[min_entropy_index][word] > min_word_occurrence:
split_words.add(word)
if len(split_words) == 0:
# Cluster not splittable
return None # TODO: Think about how to avoid checking this leaf every time?
#print min_word_occurrence, word_counts[min_entropy_index]
#raise ValueError("Need at least one canddiate word to split on, only got %d"%len(split_words))
# Add log lines to leaves
leafs = {}
for line in self.log_lines:
line_split = line.split()[skip_count:]
try:
split_word = line_split[min_entropy_index]
except:
split_word = None # Handle the case where the field with the min entropy is at the end of the line
if split_word not in split_words:
split_word = None # Handle the other case
if split_word not in leafs:
# If this is a new word we are supposed to create a leaf for, create that leaf
leafs[split_word] = Leaf(line, index=min_entropy_index, index_val=split_word)
else:
# We are adding this line to an existing cluster
leafs[split_word].add_to_leaf(line, None, None) # Threshold and skip count don't matter here?
# Make sure to add other to avoid trouble later
split_word = None
if split_word not in leafs:
leafs[split_word] = Leaf(line, index=min_entropy_index, index_val=split_word)
print "Splitting Leaf: ", split_words
cluster = None
for word in leafs:
if cluster is None:
cluster = Cluster(leafs[word], index=min_entropy_index, index_val=None)
else:
cluster.add_leaf(leafs[word])
return cluster #[leafs[word] for word in leafs]