本文整理汇总了Python中regex.match方法的典型用法代码示例。如果您正苦于以下问题:Python regex.match方法的具体用法?Python regex.match怎么用?Python regex.match使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类regex
的用法示例。
在下文中一共展示了regex.match方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validate_left_to_right_relations
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_left_to_right_relations(id, tree):
"""
Certain UD relations must always go left-to-right.
Here we currently check the rule for the basic dependencies.
The same should also be tested for the enhanced dependencies!
"""
testlevel = 3
testclass = 'Syntax'
cols = tree['nodes'][id]
if is_multiword_token(cols):
return
if DEPREL >= len(cols):
return # this has been already reported in trees()
# According to the v2 guidelines, apposition should also be left-headed, although the definition of apposition may need to be improved.
if re.match(r"^(conj|fixed|flat|goeswith|appos)", cols[DEPREL]):
ichild = int(cols[ID])
iparent = int(cols[HEAD])
if ichild < iparent:
# We must recognize the relation type in the test id so we can manage exceptions for legacy treebanks.
# For conj, flat, and fixed the requirement was introduced already before UD 2.2, and all treebanks in UD 2.3 passed it.
# For appos and goeswith the requirement was introduced before UD 2.4 and legacy treebanks are allowed to fail it.
testid = "right-to-left-%s" % lspec2ud(cols[DEPREL])
testmessage = "Relation '%s' must go left-to-right." % cols[DEPREL]
warn(testmessage, testclass, testlevel=testlevel, testid=testid, nodeid=id, nodelineno=tree['linenos'][id])
示例2: validate_sent_id
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_sent_id(comments,known_ids,lcode):
matched=[]
for c in comments:
match=sentid_re.match(c)
if match:
matched.append(match)
else:
if c.startswith(u"# sent_id") or c.startswith(u"#sent_id"):
warn(u"Spurious sent_id line: '%s' Should look like '# sent_id = xxxxxx' where xxxx is not whitespace. Forward slash reserved for special purposes." %c,u"Metadata")
if not matched:
warn(u"Missing the sent_id attribute.",u"Metadata")
elif len(matched)>1:
warn(u"Multiple sent_id attribute.",u"Metadata")
else:
sid=matched[0].group(1)
if sid in known_ids:
warn(u"Non-unique sent_id the sent_id attribute: "+sid,u"Metadata")
if sid.count(u"/")>1 or (sid.count(u"/")==1 and lcode!=u"ud" and lcode!=u"shopen"):
warn(u"The forward slash is reserved for special use in parallel treebanks: "+sid,u"Metadata")
known_ids.add(sid)
示例3: validate_left_to_right_relations
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def validate_left_to_right_relations(cols):
"""
Certain UD relations must always go left-to-right.
Here we currently check the rule for the basic dependencies.
The same should also be tested for the enhanced dependencies!
"""
if is_multiword_token(cols):
return
if DEPREL >= len(cols):
return # this has been already reported in trees()
#if cols[DEPREL] == u"conj":
if re.match(r"^(conj|fixed|flat)", cols[DEPREL]):
ichild = int(cols[ID])
iparent = int(cols[HEAD])
if ichild < iparent:
warn(u"Violation of guidelines: relation %s must go left-to-right" % cols[DEPREL], u"Syntax")
##### Tests applicable to the whole tree
示例4: preprocess
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def preprocess(article):
# Take out HTML escaping WikiExtractor didn't clean
for k, v in article.items():
article[k] = PARSER.unescape(v)
# Filter some disambiguation pages not caught by the WikiExtractor
if article['id'] in BLACKLIST:
return None
if '(disambiguation)' in article['title'].lower():
return None
if '(disambiguation page)' in article['title'].lower():
return None
# Take out List/Index/Outline pages (mostly links)
if re.match(r'(List of .+)|(Index of .+)|(Outline of .+)',
article['title']):
return None
# Return doc with `id` set to `title`
return {'id': article['title'], 'text': article['text']}
示例5: list_tables
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def list_tables(self, like=None, database=None):
"""
List tables in the current (or indicated) database. Like the SHOW
TABLES command.
Parameters
----------
like : string, default None
e.g. 'foo*' to match all tables starting with 'foo'
database : string, default None
If not passed, uses the current/default database
Returns
-------
results : list of strings
"""
results = [t.name for t in self._catalog.listTables(dbName=database)]
if like:
results = [
table_name
for table_name in results
if re.match(like, table_name) is not None
]
return results
示例6: _extract_encoding
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _extract_encoding(self, source):
if isinstance(source, bytes):
re = self._encoding_bytes_re
nl = b"\n"
else:
re = self._encoding_re
nl = "\n"
match = re.match(source)
if not match:
index = source.find(nl)
if index != -1:
match = re.match(source[index + 1:])
if match:
encoding = match.group(1)
if isinstance(encoding, bytes):
return encoding.decode("ascii")
return encoding
return "ascii"
示例7: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def __init__(self, words, tags, rels):
self.pad_index = 0
self.unk_index = 1
self.words = [self.PAD, self.UNK] + sorted(words)
self.tags = [self.PAD, self.UNK] + sorted(tags)
self.rels = sorted(rels)
self.word_dict = {word: i for i, word in enumerate(self.words)}
self.tag_dict = {tag: i for i, tag in enumerate(self.tags)}
self.rel_dict = {rel: i for i, rel in enumerate(self.rels)}
# ids of punctuation that appear in words
self.puncts = sorted(i for word, i in self.word_dict.items()
if regex.match(r'\p{P}+$', word))
self.n_words = len(self.words)
self.n_tags = len(self.tags)
self.n_rels = len(self.rels)
self.n_train_words = self.n_words
示例8: _get_epoch
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _get_epoch(_str):
"""Take incoming string, return its epoch."""
_return = None
if _str.startswith('A.D. '):
_return = 'ad'
elif _str.startswith('a. A.D. '):
_return = None #?
elif _str.startswith('p. A.D. '):
_return = 'ad'
elif regex.match(r'^[0-9]+ B\.C\. *', _str):
_return = 'bc'
elif regex.match(r'^a\. *[0-9]+ B\.C\. *', _str):
_return = 'bc'
elif regex.match(r'^p\. *[0-9]+ B\.C\. *', _str):
_return = None #?
elif _str == 'Incertum' or _str == 'Varia':
_return = _str
return _return
示例9: _window_match
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _window_match(match, window=100):
"""Take incoming match and highlight in context.
:rtype : str
:param match: Regex match.
:param window: Characters on each side of match to return.
:type window: int
"""
window = int(window)
start = match.start()
end = match.end()
snippet_left = match.string[start - window:start]
snippet_match = match.string[match.start():match.end()]
snippet_right = match.string[end:end + window]
snippet = snippet_left + '*' + snippet_match + '*' + snippet_right
return snippet
示例10: match_regex
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def match_regex(input_str, pattern, language, context, case_insensitive=True):
"""Take input string and a regex pattern, then yield generator of matches
in desired format.
TODO: Rename this `match_pattern` and incorporate the keyword expansion
code currently in search_corpus.
:param input_str:
:param pattern:
:param language:
:param context: Integer or 'sentence' 'paragraph'
:rtype : str
"""
if type(context) is str:
contexts = ['sentence', 'paragraph']
assert context in contexts or type(context) is int, 'Available contexts: {}'.format(contexts)
else:
context = int(context)
for match in _regex_span(pattern, input_str, case_insensitive=case_insensitive):
if context == 'sentence':
yield _sentence_context(match, language)
elif context == 'paragraph':
yield _paragraph_context(match)
else:
yield _window_match(match, context)
示例11: test_partial_parse
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def test_partial_parse() -> None:
match_a = regex.match("(?<R1>a)", "ab")
match_b = next(regex.finditer("(?<R2>b)", "ab"))
pp = PartialParse.from_regex_matches(
(RegexMatch(1, match_a), RegexMatch(2, match_b))
)
assert len(pp.prod) == 2
assert len(pp.rules) == 2
assert isinstance(pp.score, float)
def mock_rule(ts: datetime.datetime, a: Time) -> Time:
return Time()
pp2 = pp.apply_rule(
datetime.datetime(day=1, month=1, year=2015), mock_rule, "mock_rule", (0, 1)
)
assert pp != pp2
with pytest.raises(ValueError):
PartialParse((), ())
示例12: _read_rule
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _read_rule(self, i, line):
line = line.strip()
if line:
line = unicodedata.normalize('NFD', line)
s = re.match(r'(?P<symbol>::\w+::)\s*=\s*(?P<value>.+)', line)
if s:
self.symbols[s.group('symbol')] = s.group('value')
else:
line = self._sub_symbols(line)
r = re.match(r'(\S+)\s*->\s*(\S+)\s*/\s*(\S*)\s*[_]\s*(\S*)', line)
try:
a, b, X, Y = r.groups()
except AttributeError:
raise DatafileError('Line {}: "{}" cannot be parsed.'.format(i + 1, line))
X, Y = X.replace('#', '^'), Y.replace('#', '$')
a, b = a.replace('0', ''), b.replace('0', '')
try:
if re.search(r'[?]P[<]sw1[>].+[?]P[<]sw2[>]', a):
return self._fields_to_function_metathesis(a, X, Y)
else:
return self._fields_to_function(a, b, X, Y)
except Exception as e:
raise DatafileError('Line {}: "{}" cannot be compiled as regex: ̪{}'.format(i + 1, line, e))
示例13: transliterate
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def transliterate(self, text, normpunc=False, ligatures=False):
"""Convert English text to IPA transcription
Args:
text (unicode): English text
normpunc (bool): if True, normalize punctuation downward
ligatures (bool): if True, use non-standard ligatures instead of
standard IPA
"""
text = unicodedata.normalize('NFC', text)
acc = []
for chunk in self.chunk_re.findall(text):
if self.letter_re.match(chunk):
acc.append(self.english_g2p(chunk))
else:
acc.append(chunk)
text = ''.join(acc)
text = self.puncnorm.norm(text) if normpunc else text
text = ligaturize(text) if (ligatures or self.ligatures) else text
return text
示例14: _check_path
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def _check_path(self, config_path, node):
if regex.match(r"ns=\d*;[isgb]=.*", config_path, regex.IGNORECASE):
return config_path
if re.search(r"^root", config_path.lower()) is None:
node_path = '\\\\.'.join(
char.split(":")[1] for char in node.get_path(200000, True))
if config_path[-3:] != '\\.':
information_path = node_path + '\\\\.' + config_path.replace('\\', '\\\\')
else:
information_path = node_path + config_path.replace('\\', '\\\\')
else:
information_path = config_path
result = information_path[:]
return result
示例15: filter_word
# 需要导入模块: import regex [as 别名]
# 或者: from regex import match [as 别名]
def filter_word(text):
"""Take out english stopwords, punctuation, and compound endings."""
text = normalize(text)
if regex.match(r'^\p{P}+$', text):
return True
if text.lower() in STOPWORDS:
return True
return False