本文整理汇总了Python中editdistance.eval方法的典型用法代码示例。如果您正苦于以下问题:Python editdistance.eval方法的具体用法?Python editdistance.eval怎么用?Python editdistance.eval使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类editdistance
的用法示例。
在下文中一共展示了editdistance.eval方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __is_wrong_permission
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def __is_wrong_permission(self, permission):
"""
Return True if the permission contains a typo
"""
if permission not in YaraRule.PERMISSION_SET:
if permission in YaraRule.WRONG_PERMISSION_DICT.keys():
return True
if permission in YaraRule.CACHE_NNSTD_PERMISSION_DICT:
return False
for standard_perm in YaraRule.PERMISSION_SET:
distance = editdistance.eval(permission, standard_perm)
if distance > 0 and distance <= 3:
YaraRule.WRONG_PERMISSION_DICT[permission] = standard_perm
return True
else:
YaraRule.CACHE_NNSTD_PERMISSION_DICT.add(permission)
return False
示例2: add_iter
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def add_iter(self, output, out_length, label_length, labels):
start = 0
start_o = 0
self.total_samples += label_length.size()[0]
raw_prdts = output.topk(1)[1]
prdt_texts, prdt_prob = self.de.decode(output, out_length)
for i in range(0, len(prdt_texts)):
if not self.case_sensitive:
prdt_texts[i] = prdt_texts[i].lower()
labels[i] = labels[i].lower()
all_words = []
for w in labels[i].split('|') + prdt_texts[i].split('|'):
if w not in all_words:
all_words.append(w)
l_words = [all_words.index(_) for _ in labels[i].split('|')]
p_words = [all_words.index(_) for _ in prdt_texts[i].split('|')]
self.distance_C += ed.eval(labels[i], prdt_texts[i])
self.distance_W += ed.eval(l_words, p_words)
self.total_C += len(labels[i])
self.total_W += len(l_words)
self.correct = self.correct + 1 if labels[i] == prdt_texts[i] else self.correct
示例3: _query
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def _query(self, node, txt, tolerance):
# handle empty root node
if node is None:
return []
# distance between query and current node
d = ed.eval(node[0], txt)
# add current node to result if within tolerance
res = []
if d <= tolerance:
res.append(node[0])
# iterate over children
for (edge, child) in node[1].items():
if d - tolerance <= edge and edge <= d + tolerance:
res += self._query(child, txt, tolerance)
return res
示例4: transform
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def transform(self, X: dt.Frame):
import editdistance
output = []
X = X.to_pandas()
text1_arr = X.iloc[:, 0].values
text2_arr = X.iloc[:, 1].values
for ind, text1 in enumerate(text1_arr):
try:
text1 = str(text1).lower().split()
text2 = text2_arr[ind]
text2 = str(text2).lower().split()
edit_distance = editdistance.eval(text1, text2)
output.append(edit_distance)
except:
output.append(-1)
return np.array(output)
示例5: edit_distance_batch
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def edit_distance_batch(hyp, tar, tar_len, eos_idx):
cer = 0.
bs = hyp.shape[0]
for i in range(bs):
# filter hyp for eos
hyp_len = np.argmax(hyp[i] == eos_idx, axis=0)
if hyp_len.size == 0:
hyp_len = hyp[i].size
# filter tar for eos
eos_pos = np.argmax(tar[i] == eos_idx, axis=0)
if eos_pos.size > 0:
tar_len[i] = eos_pos
cer += editdistance.eval(hyp[i, :hyp_len], tar[i, :tar_len[i]]) / float(tar_len[i])
return np.float32(cer / bs)
示例6: enhance
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def enhance(self, xs):
"""Forward only in the frontend stage.
:param ndarray xs: input acoustic feature (T, C, F)
:return: enhaned feature
:rtype: torch.Tensor
"""
if self.frontend is None:
raise RuntimeError("Frontend does't exist")
prev = self.training
self.eval()
ilens = np.fromiter((xx.shape[0] for xx in xs), dtype=np.int64)
# subsample frame
xs = [xx[:: self.subsample[0], :] for xx in xs]
xs = [to_device(self, to_torch_tensor(xx).float()) for xx in xs]
xs_pad = pad_list(xs, 0.0)
enhanced, hlensm, mask = self.frontend(xs_pad, ilens)
if prev:
self.train()
return enhanced.cpu().numpy(), mask.cpu().numpy(), ilens
示例7: encode
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def encode(self, x):
"""Encode acoustic features.
:param ndarray x: input acoustic feature (T, D)
:return: encoder outputs
:rtype: torch.Tensor
"""
self.eval()
ilens = [x.shape[0]]
# subsample frame
x = x[:: self.subsample[0], :]
p = next(self.parameters())
h = torch.as_tensor(x, device=p.device, dtype=p.dtype)
# make a utt list (1) to use the same interface for encoder
hs = h.contiguous().unsqueeze(0)
# 1. encoder
hs, _, _ = self.enc(hs, ilens)
return hs.squeeze(0)
示例8: calculate_cer
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def calculate_cer(self, seqs_hat, seqs_true):
"""Calculate sentence-level CER score.
:param list seqs_hat: prediction
:param list seqs_true: reference
:return: average sentence-level CER score
:rtype float
"""
char_eds, char_ref_lens = [], []
for i, seq_hat_text in enumerate(seqs_hat):
seq_true_text = seqs_true[i]
hyp_chars = seq_hat_text.replace(" ", "")
ref_chars = seq_true_text.replace(" ", "")
char_eds.append(editdistance.eval(hyp_chars, ref_chars))
char_ref_lens.append(len(ref_chars))
return float(sum(char_eds)) / sum(char_ref_lens)
示例9: calculate_wer
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def calculate_wer(self, seqs_hat, seqs_true):
"""Calculate sentence-level WER score.
:param list seqs_hat: prediction
:param list seqs_true: reference
:return: average sentence-level WER score
:rtype float
"""
word_eds, word_ref_lens = [], []
for i, seq_hat_text in enumerate(seqs_hat):
seq_true_text = seqs_true[i]
hyp_words = seq_hat_text.split()
ref_words = seq_true_text.split()
word_eds.append(editdistance.eval(hyp_words, ref_words))
word_ref_lens.append(len(ref_words))
return float(sum(word_eds)) / sum(word_ref_lens)
示例10: show_edit_distance
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def show_edit_distance(self, num):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
for j in range(num_proc):
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
示例11: get_frames
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def get_frames(self, lemma):
"""
Given a lemma, find the most likely frames for the lemma.
If no lemma is found or it should be a senseless node, return a single element list [lemma].
"""
if lemma in self.frequent_senseless_nodes or lemma not in self.lemma_frame_map:
return [lemma]
else:
frames = list(self.lemma_frame_map[lemma])
frames.sort(
key=lambda frame: (
editdistance.eval(re.sub(r'-\d\d$', '', frame), lemma),
-int(frame[-2:]) if re.search(r'-\d\d$', frame) else 0
),
reverse=True
)
return frames
示例12: show_edit_distance
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def show_edit_distance(self, num):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
for j in range(0, num_proc):
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
示例13: editDistance
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def editDistance(s1, s2):
# check if editdistance module loaded
if EDIT_DISTANCE_MODULE_EXISTS:
return editdistance.eval(s1, s2)
elif EDIT_DISTANCE_CTYPES_LOADED:
return ed_ctypes.edit_distance(s1, len(s1), s2, len(s2))
m=len(s1)+1
n=len(s2)+1
tbl = [([0] * n) for i in xrange(m)]
for i in xrange(m):tbl[i][0]=i
for j in xrange(n):tbl[0][j]=j
for i in xrange(1, m):
for j in xrange(1, n):
cost = 0 if s1[i-1] == s2[j-1] else 1
tbl[i][j] = min(tbl[i][j-1]+1, tbl[i-1][j]+1, tbl[i-1][j-1]+cost)
return tbl[i][j]
示例14: inference_metrics
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def inference_metrics(model, src, tgt, config):
""" decode and evaluate bleu """
inputs, preds, top_k_preds, ground_truths, auxs, raw_srcs = decode_dataset(
model, src, tgt, config, k=config['eval']['precision_recall_k'])
eval_classifier = models.TextClassifier.from_pickle(
config['eval']['classifier_path'])
metrics = get_metrics(
raw_srcs, preds, ground_truths,
top_k_preds=top_k_preds, classifier=eval_classifier)
inputs = [' '.join(seq) for seq in inputs]
preds = [' '.join(seq) for seq in preds]
ground_truths = [' '.join(seq) for seq in ground_truths]
auxs = [' '.join(seq) for seq in auxs]
return metrics, inputs, preds, ground_truths, auxs
示例15: edit_distance
# 需要导入模块: import editdistance [as 别名]
# 或者: from editdistance import eval [as 别名]
def edit_distance(x, y):
"""Levenshtein Distance
The "feature" dimension is along the columns and the "time" dimension
along the lines of arrays x and y
"""
# convert arrays to tuple, to evaluate w/ editdistance
def totuple(a):
try:
return tuple(totuple(i) for i in a)
except TypeError:
return a
if x.shape[0] > 0 and y.shape[0] > 0:
# x and y are not empty
d = editdistance.eval(totuple(x), totuple(y))
elif x.shape[0] == y.shape[0]:
# both x and y are empty
d = 0
else:
# x or y is empty
d = np.inf
return d