本文整理汇总了Python中difflib.SequenceMatcher方法的典型用法代码示例。如果您正苦于以下问题:Python difflib.SequenceMatcher方法的具体用法?Python difflib.SequenceMatcher怎么用?Python difflib.SequenceMatcher使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类difflib
的用法示例。
在下文中一共展示了difflib.SequenceMatcher方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: structural_similarity
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def structural_similarity(document_1, document_2):
"""
Computes the structural similarity between two DOM Trees
:param document_1: html string
:param document_2: html string
:return: int
"""
try:
document_1 = lxml.html.parse(StringIO(document_1))
document_2 = lxml.html.parse(StringIO(document_2))
except Exception as e:
print(e)
return 0
tags1 = get_tags(document_1)
tags2 = get_tags(document_2)
diff = difflib.SequenceMatcher()
diff.set_seq1(tags1)
diff.set_seq2(tags2)
return diff.ratio()
示例2: _sortKeywords
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def _sortKeywords(keyword, kwds):
"""Sort a list of keywords, based on the searched one."""
sm = SequenceMatcher()
sm.set_seq1(keyword.lower())
ratios = [(ratcliff(keyword, k, sm), k) for k in kwds]
checkContained = False
if len(keyword) > 4:
checkContained = True
for idx, data in enumerate(ratios):
ratio, key = data
if key.startswith(keyword):
ratios[idx] = (ratio+0.5, key)
elif checkContained and keyword in key:
ratios[idx] = (ratio+0.3, key)
ratios.sort()
ratios.reverse()
return [r[1] for r in ratios]
示例3: getAKAsInLanguage
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def getAKAsInLanguage(movie, lang, _searchedTitle=None):
"""Return a list of AKAs of a movie, in the specified language.
If _searchedTitle is given, the AKAs are sorted by their similarity
to it."""
akas = []
for language, aka in akasLanguages(movie):
if lang == language:
akas.append(aka)
if _searchedTitle:
scores = []
if isinstance(_searchedTitle, unicode):
_searchedTitle = _searchedTitle.encode('utf8')
for aka in akas:
m_aka = aka
if isinstance(m_aka):
m_aka = m_aka.encode('utf8')
scores.append(difflib.SequenceMatcher(None, m_aka.lower(),
_searchedTitle.lower()), aka)
scores.sort(reverse=True)
akas = [x[1] for x in scores]
return akas
示例4: test_hoeffding_tree_regressor_model_description
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def test_hoeffding_tree_regressor_model_description():
stream = RegressionGenerator(
n_samples=500, n_features=20, n_informative=15, random_state=1
)
learner = HoeffdingTreeRegressor(leaf_prediction='mean')
max_samples = 500
X, y = stream.next_sample(max_samples)
learner.partial_fit(X, y)
expected_description = "if Attribute 6 <= 0.1394515530995348:\n" \
" Leaf = Statistics {0: 276.0000, 1: -21537.4157, 2: 11399392.2187}\n" \
"if Attribute 6 > 0.1394515530995348:\n" \
" Leaf = Statistics {0: 224.0000, 1: 22964.8868, 2: 10433581.2534}\n"
assert SequenceMatcher(
None, expected_description, learner.get_model_description()
).ratio() > 0.9
示例5: __java_final
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def __java_final (self, home):
path = [ home ]
subdir = []
try:
for sub in os.listdir(home):
newpath = os.path.join(home, sub)
if os.path.isdir(newpath):
import difflib
m = difflib.SequenceMatcher(None, sys.platform, sub)
subdir.append((m.ratio(), sub))
except:
pass
subdir.sort()
if subdir:
path.append(os.path.join(home, subdir[-1][1]))
return ' '.join([ '-I%s'%self.pathtext(n) for n in path ])
# 取得 java配置
示例6: validate
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def validate(self, password, user=None):
if not user:
return
for attribute_name in self.user_attributes:
value = getattr(user, attribute_name, None)
if not value or not isinstance(value, str):
continue
value_parts = re.split(r'\W+', value) + [value]
for value_part in value_parts:
if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
try:
verbose_name = str(user._meta.get_field(attribute_name).verbose_name)
except FieldDoesNotExist:
verbose_name = attribute_name
raise ValidationError(
_("The password is too similar to the %(verbose_name)s."),
code='password_too_similar',
params={'verbose_name': verbose_name},
)
示例7: __show_diff
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def __show_diff(expected, actual):
seqm = difflib.SequenceMatcher(None, expected, actual)
output = [Style.RESET_ALL]
for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
if opcode == "equal":
output.append(seqm.a[a0:a1])
elif opcode == "insert":
output.append(Fore.GREEN + seqm.b[b0:b1] + Style.RESET_ALL)
elif opcode == "delete":
output.append(Fore.RED + seqm.a[a0:a1] + Style.RESET_ALL)
elif opcode == "replace":
output.append(Fore.BLUE + seqm.b[b0:b1] + Style.RESET_ALL)
else:
raise RuntimeError("unexpected opcode")
return "".join(output)
示例8: is_needle_in_hay
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def is_needle_in_hay(cls, needle, hay):
needle_length = len(needle.split())
max_sim_val = 0
for ngram in ngrams(hay.split(), needle_length + int(.2 * needle_length)):
hay_ngram = u" ".join(ngram)
similarity = SequenceMatcher(None, hay_ngram, needle).ratio()
if similarity > max_sim_val:
max_sim_val = similarity
max_sim_string = hay_ngram
return max_sim_val # how confident are we that needle was found in hay
# https://stackoverflow.com/a/31505798
# given a string paragraph, return a list of sentences
示例9: compare_truth
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def compare_truth(pred_dict, truth_dict):
ratio = 0
for k in truth_dict.keys():
ratio += SequenceMatcher(None, truth_dict[k], pred_dict[k]).ratio()
return ratio / len(truth_dict.keys())
示例10: process_tree
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def process_tree(self, tree):
gold_tree = tree.bundle.get_tree(self.gold_zone)
if tree == gold_tree:
return
pred_nodes = tree.descendants
gold_nodes = gold_tree.descendants
pred_forms = [n.form.lower() for n in pred_nodes]
gold_forms = [n.form.lower() for n in gold_nodes]
matcher = difflib.SequenceMatcher(None, pred_forms, gold_forms, autojunk=False)
aligned = []
for diff in matcher.get_opcodes():
edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
if edit == 'equal':
aligned.extend(zip(pred_nodes[pred_lo:pred_hi], gold_nodes[gold_lo:gold_hi]))
align_map = {tree: gold_tree}
for p_node, g_node in aligned:
align_map[p_node] = g_node
count = Counter()
count['pred'] = len(pred_nodes)
count['gold'] = len(gold_nodes)
count['Words'] = len(aligned)
count['pred_clas'] = len([n for n in pred_nodes if n.udeprel not in CLAS_IGNORE])
count['gold_clas'] = len([n for n in gold_nodes if n.udeprel not in CLAS_IGNORE])
count['alig_clas'] = len([n for _, n in aligned if n.udeprel not in CLAS_IGNORE])
for p_node, g_node in aligned:
for attr in ('UPOS', 'XPOS', 'Feats', 'Lemma'):
if p_node.get_attrs([attr.lower()]) == g_node.get_attrs([attr.lower()]):
count[attr] += 1
if align_map.get(p_node.parent) == g_node.parent:
count['UAS'] += 1
if p_node.udeprel == g_node.udeprel:
count['LAS'] += 1
if g_node.udeprel not in CLAS_IGNORE:
count['CLAS'] += 1
self.total_count.update(count)
if self.print_raw:
scores = [str(count[s]) for s in ('pred', 'gold', 'Words', 'LAS')]
print(' '.join(scores))
示例11: process_tree
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def process_tree(self, tree):
gold_tree = tree.bundle.get_tree(self.gold_zone)
if tree == gold_tree:
return
if not self.add:
for node in tree.descendants + gold_tree.descendants:
del node.misc['Mark']
del node.misc['ToDo']
del node.misc['Bug']
pred_nodes, gold_nodes = tree.descendants, gold_tree.descendants
# Make sure both pred and gold trees are marked, even if one has just deleted nodes.
if len(pred_nodes) != len(gold_nodes):
tree.add_comment('Mark = %s' % self.mark)
gold_tree.add_comment('Mark = %s' % self.mark)
pred_tokens = ['_'.join(n.get_attrs(self.attrs)) for n in pred_nodes]
gold_tokens = ['_'.join(n.get_attrs(self.attrs)) for n in gold_nodes]
matcher = difflib.SequenceMatcher(None, pred_tokens, gold_tokens, autojunk=False)
diffs = list(matcher.get_opcodes())
alignment = {-1: -1}
for diff in diffs:
edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
if edit in {'equal', 'replace'}:
for i in range(pred_lo, pred_hi):
alignment[i] = i - pred_lo + gold_lo
for diff in diffs:
edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
if edit == 'equal':
for p_node, g_node in zip(pred_nodes[pred_lo:pred_hi], gold_nodes[gold_lo:gold_hi]):
if alignment.get(p_node.parent.ord - 1) != g_node.parent.ord - 1:
p_node.misc['Mark'] = self.mark
g_node.misc['Mark'] = self.mark
else:
for node in pred_nodes[pred_lo:pred_hi] + gold_nodes[gold_lo:gold_hi]:
node.misc['Mark'] = self.mark
示例12: str_similar
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def str_similar(a, b):
return SequenceMatcher(None, a, b).ratio()
示例13: ratio
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def ratio(search_string: str, possible_match: str) -> float:
search_string = search_string.lower()
possible_match = possible_match.lower()
if len(search_string) >= len(possible_match):
parts = [possible_match]
else:
shorter_length = len(search_string)
num_of_parts = len(possible_match) - shorter_length
parts = [possible_match[i:i + shorter_length] for i in range(num_of_parts + 1)]
return max([SequenceMatcher(None, search_string, part).ratio() for part in parts])
示例14: changes
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def changes(self, other):
"""
Changes to get from the get_wikitext() of self to other.
List of changes that can be insertions, deletions, or replacements. Each
is a tuple containing:
(type flag, position of change, [other info need to reconstrut original])
"""
lines1 = self.get_wikitext().split("\n")
lines2 = other.get_wikitext().split("\n")
matcher = difflib.SequenceMatcher()
matcher.set_seqs(lines1, lines2)
changes = []
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
# ignore no-change blocks
pass
elif tag == 'insert':
changes.append(("I", i1, lines2[j1:j2]))
elif tag == 'delete':
changes.append(("D", i1, i2))
elif tag == 'replace':
changes.append(("R", i1, i2, lines2[j1:j2]))
else:
raise ValueError
return changes
示例15: closest_rule
# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def closest_rule(self, adapter):
def _score_rule(rule):
return sum(
[
0.98
* difflib.SequenceMatcher(
None, rule.endpoint, self.endpoint
).ratio(),
0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
0.01 * bool(rule.methods and self.method in rule.methods),
]
)
if adapter and adapter.map._rules:
return max(adapter.map._rules, key=_score_rule)