当前位置: 首页>>代码示例>>Python>>正文


Python regex.match方法代码示例

本文整理汇总了Python中regex.match方法的典型用法代码示例。如果您正苦于以下问题:Python regex.match方法的具体用法?Python regex.match怎么用?Python regex.match使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在regex的用法示例。


在下文中一共展示了regex.match方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: validate_left_to_right_relations

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_left_to_right_relations(id, tree):
    """
    Certain UD relations must always go left-to-right.
    Here we currently check the rule for the basic dependencies.
    The same should also be tested for the enhanced dependencies!
    """
    testlevel = 3
    testclass = 'Syntax'
    cols = tree['nodes'][id]
    if is_multiword_token(cols):
        return
    if DEPREL >= len(cols):
        return # this has been already reported in trees()
    # According to the v2 guidelines, apposition should also be left-headed, although the definition of apposition may need to be improved.
    if re.match(r"^(conj|fixed|flat|goeswith|appos)", cols[DEPREL]):
        ichild = int(cols[ID])
        iparent = int(cols[HEAD])
        if ichild < iparent:
            # We must recognize the relation type in the test id so we can manage exceptions for legacy treebanks.
            # For conj, flat, and fixed the requirement was introduced already before UD 2.2, and all treebanks in UD 2.3 passed it.
            # For appos and goeswith the requirement was introduced before UD 2.4 and legacy treebanks are allowed to fail it.
            testid = "right-to-left-%s" % lspec2ud(cols[DEPREL])
            testmessage = "Relation '%s' must go left-to-right." % cols[DEPREL]
            warn(testmessage, testclass, testlevel=testlevel, testid=testid, nodeid=id, nodelineno=tree['linenos'][id]) 
开发者ID:UniversalDependencies,项目名称:tools,代码行数:26,代码来源:validate.py

示例2: validate_sent_id

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_sent_id(comments,known_ids,lcode):
    matched=[]
    for c in comments:
        match=sentid_re.match(c)
        if match:
            matched.append(match)
        else:
            if c.startswith(u"# sent_id") or c.startswith(u"#sent_id"):
                warn(u"Spurious sent_id line: '%s' Should look like '# sent_id = xxxxxx' where xxxx is not whitespace. Forward slash reserved for special purposes." %c,u"Metadata")
    if not matched:
        warn(u"Missing the sent_id attribute.",u"Metadata")
    elif len(matched)>1:
        warn(u"Multiple sent_id attribute.",u"Metadata")
    else:
        sid=matched[0].group(1)
        if sid in known_ids:
            warn(u"Non-unique sent_id the sent_id attribute: "+sid,u"Metadata")
        if sid.count(u"/")>1 or (sid.count(u"/")==1 and lcode!=u"ud" and lcode!=u"shopen"):
            warn(u"The forward slash is reserved for special use in parallel treebanks: "+sid,u"Metadata")
        known_ids.add(sid) 
开发者ID:UniversalDependencies,项目名称:tools,代码行数:22,代码来源:validate-python2-obsolete.py

示例3: validate_left_to_right_relations

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_left_to_right_relations(cols):
    """
    Certain UD relations must always go left-to-right.
    Here we currently check the rule for the basic dependencies.
    The same should also be tested for the enhanced dependencies!
    """
    if is_multiword_token(cols):
        return
    if DEPREL >= len(cols):
        return # this has been already reported in trees()
    #if cols[DEPREL] == u"conj":
    if re.match(r"^(conj|fixed|flat)", cols[DEPREL]):
        ichild = int(cols[ID])
        iparent = int(cols[HEAD])
        if ichild < iparent:
            warn(u"Violation of guidelines: relation %s must go left-to-right" % cols[DEPREL], u"Syntax")


##### Tests applicable to the whole tree 
开发者ID:UniversalDependencies,项目名称:tools,代码行数:21,代码来源:validate-python2-obsolete.py

示例4: preprocess

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def preprocess(article):
    # Take out HTML escaping WikiExtractor didn't clean
    for k, v in article.items():
        article[k] = PARSER.unescape(v)

    # Filter some disambiguation pages not caught by the WikiExtractor
    if article['id'] in BLACKLIST:
        return None
    if '(disambiguation)' in article['title'].lower():
        return None
    if '(disambiguation page)' in article['title'].lower():
        return None

    # Take out List/Index/Outline pages (mostly links)
    if re.match(r'(List of .+)|(Index of .+)|(Outline of .+)',
                article['title']):
        return None

    # Return doc with `id` set to `title`
    return {'id': article['title'], 'text': article['text']} 
开发者ID:ailabstw,项目名称:justcopy-backend,代码行数:22,代码来源:prep_wikipedia.py

示例5: list_tables

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def list_tables(self, like=None, database=None):
        """
        List tables in the current (or indicated) database. Like the SHOW
        TABLES command.

        Parameters
        ----------
        like : string, default None
          e.g. 'foo*' to match all tables starting with 'foo'
        database : string, default None
          If not passed, uses the current/default database

        Returns
        -------
        results : list of strings
        """
        results = [t.name for t in self._catalog.listTables(dbName=database)]
        if like:
            results = [
                table_name
                for table_name in results
                if re.match(like, table_name) is not None
            ]

        return results 
开发者ID:ibis-project,项目名称:ibis,代码行数:27,代码来源:client.py

示例6: _extract_encoding

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _extract_encoding(self, source):
        if isinstance(source, bytes):
            re = self._encoding_bytes_re
            nl = b"\n"
        else:
            re = self._encoding_re
            nl = "\n"
        match = re.match(source)
        if not match:
            index = source.find(nl)
            if index != -1:
                match = re.match(source[index + 1:])
        if match:
            encoding = match.group(1)
            if isinstance(encoding, bytes):
                return encoding.decode("ascii")
            return encoding
        return "ascii" 
开发者ID:m-labs,项目名称:pythonparser,代码行数:20,代码来源:source.py

示例7: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def __init__(self, words, tags, rels):
        self.pad_index = 0
        self.unk_index = 1

        self.words = [self.PAD, self.UNK] + sorted(words)
        self.tags = [self.PAD, self.UNK] + sorted(tags)
        self.rels = sorted(rels)

        self.word_dict = {word: i for i, word in enumerate(self.words)}
        self.tag_dict = {tag: i for i, tag in enumerate(self.tags)}
        self.rel_dict = {rel: i for i, rel in enumerate(self.rels)}

        # ids of punctuation that appear in words
        self.puncts = sorted(i for word, i in self.word_dict.items()
                             if regex.match(r'\p{P}+$', word))

        self.n_words = len(self.words)
        self.n_tags = len(self.tags)
        self.n_rels = len(self.rels)
        self.n_train_words = self.n_words 
开发者ID:smilelight,项目名称:lightNLP,代码行数:22,代码来源:vocab.py

示例8: _get_epoch

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _get_epoch(_str):
    """Take incoming string, return its epoch."""
    _return = None
    if _str.startswith('A.D. '):
        _return = 'ad'
    elif _str.startswith('a. A.D. '):
        _return = None #?
    elif _str.startswith('p. A.D. '):
        _return = 'ad'
    elif regex.match(r'^[0-9]+ B\.C\. *', _str):
        _return = 'bc'
    elif regex.match(r'^a\. *[0-9]+ B\.C\. *', _str):
        _return = 'bc'
    elif regex.match(r'^p\. *[0-9]+ B\.C\. *', _str):
        _return = None  #?
    elif _str == 'Incertum' or _str == 'Varia':
        _return = _str
    return _return 
开发者ID:cltk,项目名称:cltk,代码行数:20,代码来源:parse_tlg_indices.py

示例9: _window_match

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _window_match(match, window=100):
    """Take incoming match and highlight in context.
    :rtype : str
    :param match: Regex match.
    :param window: Characters on each side of match to return.
    :type window: int
    """
    window = int(window)
    start = match.start()
    end = match.end()
    snippet_left = match.string[start - window:start]
    snippet_match = match.string[match.start():match.end()]
    snippet_right = match.string[end:end + window]

    snippet = snippet_left + '*' + snippet_match + '*' + snippet_right

    return snippet 
开发者ID:cltk,项目名称:cltk,代码行数:19,代码来源:query.py

示例10: match_regex

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def match_regex(input_str, pattern, language, context, case_insensitive=True):
    """Take input string and a regex pattern, then yield generator of matches
     in desired format.

     TODO: Rename this `match_pattern` and incorporate the keyword expansion
      code currently in search_corpus.

    :param input_str:
    :param pattern:
    :param language:
    :param context: Integer or 'sentence' 'paragraph'
    :rtype : str
    """
    if type(context) is str:
        contexts = ['sentence', 'paragraph']
        assert context in contexts or type(context) is int, 'Available contexts: {}'.format(contexts)
    else:
        context = int(context)
    for match in _regex_span(pattern, input_str, case_insensitive=case_insensitive):
        if context == 'sentence':
            yield _sentence_context(match, language)
        elif context == 'paragraph':
            yield _paragraph_context(match)
        else:
            yield _window_match(match, context) 
开发者ID:cltk,项目名称:cltk,代码行数:27,代码来源:query.py

示例11: test_partial_parse

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def test_partial_parse() -> None:
    match_a = regex.match("(?<R1>a)", "ab")
    match_b = next(regex.finditer("(?<R2>b)", "ab"))

    pp = PartialParse.from_regex_matches(
        (RegexMatch(1, match_a), RegexMatch(2, match_b))
    )

    assert len(pp.prod) == 2
    assert len(pp.rules) == 2

    assert isinstance(pp.score, float)

    def mock_rule(ts: datetime.datetime, a: Time) -> Time:
        return Time()

    pp2 = pp.apply_rule(
        datetime.datetime(day=1, month=1, year=2015), mock_rule, "mock_rule", (0, 1)
    )

    assert pp != pp2

    with pytest.raises(ValueError):
        PartialParse((), ()) 
开发者ID:comtravo,项目名称:ctparse,代码行数:26,代码来源:test_partialparse.py

示例12: _read_rule

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _read_rule(self, i, line):
        line = line.strip()
        if line:
            line = unicodedata.normalize('NFD', line)
            s = re.match(r'(?P<symbol>::\w+::)\s*=\s*(?P<value>.+)', line)
            if s:
                self.symbols[s.group('symbol')] = s.group('value')
            else:
                line = self._sub_symbols(line)
                r = re.match(r'(\S+)\s*->\s*(\S+)\s*/\s*(\S*)\s*[_]\s*(\S*)', line)
                try:
                    a, b, X, Y = r.groups()
                except AttributeError:
                    raise DatafileError('Line {}: "{}" cannot be parsed.'.format(i + 1, line))
                X, Y = X.replace('#', '^'), Y.replace('#', '$')
                a, b = a.replace('0', ''), b.replace('0', '')
                try:
                    if re.search(r'[?]P[<]sw1[>].+[?]P[<]sw2[>]', a):
                        return self._fields_to_function_metathesis(a, X, Y)
                    else:
                        return self._fields_to_function(a, b, X, Y)
                except Exception as e:
                    raise DatafileError('Line {}: "{}" cannot be compiled as regex: ̪{}'.format(i + 1, line, e)) 
开发者ID:dmort27,项目名称:epitran,代码行数:25,代码来源:rules.py

示例13: transliterate

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def transliterate(self, text, normpunc=False, ligatures=False):
        """Convert English text to IPA transcription

        Args:
            text (unicode): English text
            normpunc (bool): if True, normalize punctuation downward
            ligatures (bool): if True, use non-standard ligatures instead of
                              standard IPA
        """
        text = unicodedata.normalize('NFC', text)
        acc = []
        for chunk in self.chunk_re.findall(text):
            if self.letter_re.match(chunk):
                acc.append(self.english_g2p(chunk))
            else:
                acc.append(chunk)
        text = ''.join(acc)
        text = self.puncnorm.norm(text) if normpunc else text
        text = ligaturize(text) if (ligatures or self.ligatures) else text
        return text 
开发者ID:dmort27,项目名称:epitran,代码行数:22,代码来源:flite.py

示例14: _check_path

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _check_path(self, config_path, node):
        if regex.match(r"ns=\d*;[isgb]=.*", config_path, regex.IGNORECASE):
            return config_path
        if re.search(r"^root", config_path.lower()) is None:
            node_path = '\\\\.'.join(
                char.split(":")[1] for char in node.get_path(200000, True))
            if config_path[-3:] != '\\.':
                information_path = node_path + '\\\\.' + config_path.replace('\\', '\\\\')
            else:
                information_path = node_path + config_path.replace('\\', '\\\\')
        else:
            information_path = config_path
        result = information_path[:]
        return result 
开发者ID:thingsboard,项目名称:thingsboard-gateway,代码行数:16,代码来源:opcua_connector.py

示例15: filter_word

# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def filter_word(text):
    """Take out english stopwords, punctuation, and compound endings."""
    text = normalize(text)
    if regex.match(r'^\p{P}+$', text):
        return True
    if text.lower() in STOPWORDS:
        return True
    return False 
开发者ID:thunlp,项目名称:OpenQA,代码行数:10,代码来源:utils.py


注:本文中的regex.match方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。