Python difflib.SequenceMatcher方法代码示例

本文整理汇总了Python中difflib.SequenceMatcher方法的典型用法代码示例。如果您正苦于以下问题：Python difflib.SequenceMatcher方法的具体用法？Python difflib.SequenceMatcher怎么用？Python difflib.SequenceMatcher使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类difflib的用法示例。

在下文中一共展示了difflib.SequenceMatcher方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: structural_similarity

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def structural_similarity(document_1, document_2):
    """
    Computes the structural similarity between two DOM Trees
    :param document_1: html string
    :param document_2: html string
    :return: int
    """
    try:
        document_1 = lxml.html.parse(StringIO(document_1))
        document_2 = lxml.html.parse(StringIO(document_2))
    except Exception as e:
        print(e)
        return 0

    tags1 = get_tags(document_1)
    tags2 = get_tags(document_2)
    diff = difflib.SequenceMatcher()
    diff.set_seq1(tags1)
    diff.set_seq2(tags2)

    return diff.ratio()

开发者ID:matiskay，项目名称:html-similarity，代码行数:23，代码来源:structural_similarity.py

示例2: _sortKeywords

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def _sortKeywords(keyword, kwds):
    """Sort a list of keywords, based on the searched one."""
    sm = SequenceMatcher()
    sm.set_seq1(keyword.lower())
    ratios = [(ratcliff(keyword, k, sm), k) for k in kwds]
    checkContained = False
    if len(keyword) > 4:
        checkContained = True
    for idx, data in enumerate(ratios):
        ratio, key = data
        if key.startswith(keyword):
            ratios[idx] = (ratio+0.5, key)
        elif checkContained and keyword in key:
            ratios[idx] = (ratio+0.3, key)
    ratios.sort()
    ratios.reverse()
    return [r[1] for r in ratios]

开发者ID:skarlekar，项目名称:faces，代码行数:19，代码来源:__init__.py

示例3: getAKAsInLanguage

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def getAKAsInLanguage(movie, lang, _searchedTitle=None):
    """Return a list of AKAs of a movie, in the specified language.
    If _searchedTitle is given, the AKAs are sorted by their similarity
    to it."""
    akas = []
    for language, aka in akasLanguages(movie):
        if lang == language:
            akas.append(aka)
    if _searchedTitle:
        scores = []
        if isinstance(_searchedTitle, unicode):
            _searchedTitle = _searchedTitle.encode('utf8')
        for aka in akas:
            m_aka = aka
            if isinstance(m_aka):
                m_aka = m_aka.encode('utf8')
            scores.append(difflib.SequenceMatcher(None, m_aka.lower(),
                            _searchedTitle.lower()), aka)
        scores.sort(reverse=True)
        akas = [x[1] for x in scores]
    return akas

开发者ID:skarlekar，项目名称:faces，代码行数:23，代码来源:helpers.py

示例4: test_hoeffding_tree_regressor_model_description

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def test_hoeffding_tree_regressor_model_description():
    stream = RegressionGenerator(
        n_samples=500, n_features=20, n_informative=15, random_state=1
    )

    learner = HoeffdingTreeRegressor(leaf_prediction='mean')

    max_samples = 500
    X, y = stream.next_sample(max_samples)
    learner.partial_fit(X, y)

    expected_description = "if Attribute 6 <= 0.1394515530995348:\n" \
                           "  Leaf = Statistics {0: 276.0000, 1: -21537.4157, 2: 11399392.2187}\n" \
                           "if Attribute 6 > 0.1394515530995348:\n" \
                           "  Leaf = Statistics {0: 224.0000, 1: 22964.8868, 2: 10433581.2534}\n"

    assert SequenceMatcher(
        None, expected_description, learner.get_model_description()
    ).ratio() > 0.9

开发者ID:scikit-multiflow，项目名称:scikit-multiflow，代码行数:21，代码来源:test_hoeffding_tree_regressor.py

示例5: __java_final

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def __java_final (self, home):
		path = [ home ]
		subdir = []
		try:
			for sub in os.listdir(home):
				newpath = os.path.join(home, sub)
				if os.path.isdir(newpath):
					import difflib
					m = difflib.SequenceMatcher(None, sys.platform, sub)
					subdir.append((m.ratio(), sub))
		except:
			pass
		subdir.sort()
		if subdir:
			path.append(os.path.join(home, subdir[-1][1]))
		return ' '.join([ '-I%s'%self.pathtext(n) for n in path ])

	# 取得 java配置

开发者ID:skywind3000，项目名称:emake，代码行数:20，代码来源:emake.py

示例6: validate

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, str):
                continue
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
                    try:
                        verbose_name = str(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

开发者ID:reBiocoder，项目名称:bioforum，代码行数:22，代码来源:password_validation.py

示例7: __show_diff

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def __show_diff(expected, actual):
    seqm = difflib.SequenceMatcher(None, expected, actual)
    output = [Style.RESET_ALL]

    for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
        if opcode == "equal":
            output.append(seqm.a[a0:a1])
        elif opcode == "insert":
            output.append(Fore.GREEN + seqm.b[b0:b1] + Style.RESET_ALL)
        elif opcode == "delete":
            output.append(Fore.RED + seqm.a[a0:a1] + Style.RESET_ALL)
        elif opcode == "replace":
            output.append(Fore.BLUE + seqm.b[b0:b1] + Style.RESET_ALL)
        else:
            raise RuntimeError("unexpected opcode")

    return "".join(output)

开发者ID:fastlane-queue，项目名称:fastlane，代码行数:19，代码来源:func.py

示例8: is_needle_in_hay

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def is_needle_in_hay(cls, needle, hay):

        needle_length = len(needle.split())
        max_sim_val = 0

        for ngram in ngrams(hay.split(), needle_length + int(.2 * needle_length)):
            hay_ngram = u" ".join(ngram)
            similarity = SequenceMatcher(None, hay_ngram, needle).ratio()
            if similarity > max_sim_val:
                max_sim_val = similarity
                max_sim_string = hay_ngram

        return max_sim_val  # how confident are we that needle was found in hay

    # https://stackoverflow.com/a/31505798
    # given a string paragraph, return a list of sentences

开发者ID:fterh，项目名称:sneakpeek，代码行数:18，代码来源:__init__.py

示例9: compare_truth

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def compare_truth(pred_dict, truth_dict):
    ratio = 0
    for k in truth_dict.keys():
        ratio += SequenceMatcher(None, truth_dict[k], pred_dict[k]).ratio()

    return ratio / len(truth_dict.keys())

开发者ID:zzzDavid，项目名称:ICDAR-2019-SROIE，代码行数:8，代码来源:my_utils.py

示例10: process_tree

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def process_tree(self, tree):
        gold_tree = tree.bundle.get_tree(self.gold_zone)
        if tree == gold_tree:
            return
        pred_nodes = tree.descendants
        gold_nodes = gold_tree.descendants
        pred_forms = [n.form.lower() for n in pred_nodes]
        gold_forms = [n.form.lower() for n in gold_nodes]
        matcher = difflib.SequenceMatcher(None, pred_forms, gold_forms, autojunk=False)
        aligned = []
        for diff in matcher.get_opcodes():
            edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
            if edit == 'equal':
                aligned.extend(zip(pred_nodes[pred_lo:pred_hi], gold_nodes[gold_lo:gold_hi]))
        align_map = {tree: gold_tree}
        for p_node, g_node in aligned:
            align_map[p_node] = g_node

        count = Counter()
        count['pred'] = len(pred_nodes)
        count['gold'] = len(gold_nodes)
        count['Words'] = len(aligned)
        count['pred_clas'] = len([n for n in pred_nodes if n.udeprel not in CLAS_IGNORE])
        count['gold_clas'] = len([n for n in gold_nodes if n.udeprel not in CLAS_IGNORE])
        count['alig_clas'] = len([n for _, n in aligned if n.udeprel not in CLAS_IGNORE])

        for p_node, g_node in aligned:
            for attr in ('UPOS', 'XPOS', 'Feats', 'Lemma'):
                if p_node.get_attrs([attr.lower()]) == g_node.get_attrs([attr.lower()]):
                    count[attr] += 1
            if align_map.get(p_node.parent) == g_node.parent:
                count['UAS'] += 1
                if p_node.udeprel == g_node.udeprel:
                    count['LAS'] += 1
                    if g_node.udeprel not in CLAS_IGNORE:
                        count['CLAS'] += 1
        self.total_count.update(count)

        if self.print_raw:
            scores = [str(count[s]) for s in ('pred', 'gold', 'Words', 'LAS')]
            print(' '.join(scores))

开发者ID:udapi，项目名称:udapi-python，代码行数:43，代码来源:conll17.py

示例11: process_tree

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def process_tree(self, tree):
        gold_tree = tree.bundle.get_tree(self.gold_zone)
        if tree == gold_tree:
            return
        if not self.add:
            for node in tree.descendants + gold_tree.descendants:
                del node.misc['Mark']
                del node.misc['ToDo']
                del node.misc['Bug']

        pred_nodes, gold_nodes = tree.descendants, gold_tree.descendants
        # Make sure both pred and gold trees are marked, even if one has just deleted nodes.
        if len(pred_nodes) != len(gold_nodes):
            tree.add_comment('Mark = %s' % self.mark)
            gold_tree.add_comment('Mark = %s' % self.mark)
        pred_tokens = ['_'.join(n.get_attrs(self.attrs)) for n in pred_nodes]
        gold_tokens = ['_'.join(n.get_attrs(self.attrs)) for n in gold_nodes]
        matcher = difflib.SequenceMatcher(None, pred_tokens, gold_tokens, autojunk=False)
        diffs = list(matcher.get_opcodes())

        alignment = {-1: -1}
        for diff in diffs:
            edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
            if edit in {'equal', 'replace'}:
                for i in range(pred_lo, pred_hi):
                    alignment[i] = i - pred_lo + gold_lo

        for diff in diffs:
            edit, pred_lo, pred_hi, gold_lo, gold_hi = diff
            if edit == 'equal':
                for p_node, g_node in zip(pred_nodes[pred_lo:pred_hi], gold_nodes[gold_lo:gold_hi]):
                    if alignment.get(p_node.parent.ord - 1) != g_node.parent.ord - 1:
                        p_node.misc['Mark'] = self.mark
                        g_node.misc['Mark'] = self.mark
            else:
                for node in pred_nodes[pred_lo:pred_hi] + gold_nodes[gold_lo:gold_hi]:
                    node.misc['Mark'] = self.mark

开发者ID:udapi，项目名称:udapi-python，代码行数:39，代码来源:markdiff.py

示例12: str_similar

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def str_similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

开发者ID:ConvLab，项目名称:ConvLab，代码行数:4，代码来源:dst_util.py

示例13: ratio

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def ratio(search_string: str, possible_match: str) -> float:
    search_string = search_string.lower()
    possible_match = possible_match.lower()
    if len(search_string) >= len(possible_match):
        parts = [possible_match]
    else:
        shorter_length = len(search_string)
        num_of_parts = len(possible_match) - shorter_length
        parts = [possible_match[i:i + shorter_length] for i in range(num_of_parts + 1)]
    return max([SequenceMatcher(None, search_string, part).ratio() for part in parts])

开发者ID:DimaKudosh，项目名称:pydfs-lineup-optimizer，代码行数:12，代码来源:utils.py

示例14: changes

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def changes(self, other):
        """
        Changes to get from the get_wikitext() of self to other.
        
        List of changes that can be insertions, deletions, or replacements. Each
        is a tuple containing:
          (type flag, position of change, [other info need to reconstrut original])
        """
        lines1 = self.get_wikitext().split("\n")
        lines2 = other.get_wikitext().split("\n")
        
        matcher = difflib.SequenceMatcher()
        matcher.set_seqs(lines1, lines2)
        
        changes = []
        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
            if tag == 'equal':
                # ignore no-change blocks
                pass
            elif tag == 'insert':
                changes.append(("I", i1, lines2[j1:j2]))
            elif tag == 'delete':
                changes.append(("D", i1, i2))
            elif tag == 'replace':
                changes.append(("R", i1, i2, lines2[j1:j2]))
            else:
                raise ValueError
        
        return changes

开发者ID:sfu-fas，项目名称:coursys，代码行数:31，代码来源:models.py

示例15: closest_rule

# 需要导入模块: import difflib [as 别名]
# 或者: from difflib import SequenceMatcher [as 别名]
def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum(
                [
                    0.98
                    * difflib.SequenceMatcher(
                        None, rule.endpoint, self.endpoint
                    ).ratio(),
                    0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                    0.01 * bool(rule.methods and self.method in rule.methods),
                ]
            )

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)

开发者ID:Frank-qlu，项目名称:recruit，代码行数:17，代码来源:routing.py

注：本文中的difflib.SequenceMatcher方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。