本文整理汇总了Python中regex.IGNORECASE属性的典型用法代码示例。如果您正苦于以下问题:Python regex.IGNORECASE属性的具体用法?Python regex.IGNORECASE怎么用?Python regex.IGNORECASE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类regex
的用法示例。
在下文中一共展示了regex.IGNORECASE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, **kwargs):
"""
Args:
annotators: None or empty set (only tokenizes).
substitutions: if true, normalizes some token types (e.g. quotes).
"""
self._regexp = regex.compile(
'(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
'(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
'(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
'(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
(self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
self.NON_WS),
flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
)
if len(kwargs.get('annotators', {})) > 0:
logger.warning('%s only tokenizes! Skipping annotators: %s' %
(type(self).__name__, kwargs.get('annotators')))
self.annotators = set()
self.substitutions = kwargs.get('substitutions', True)
示例2: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, encoder, bpe_merges, errors='replace'):
self.encoder = encoder
self.decoder = {v:k for k,v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
try:
import regex as re
self.re = re
except ImportError:
raise ImportError('Please install regex with: pip install regex')
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = self.re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
示例3: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', special_tokens=None, max_len=None):
self.max_len = max_len if max_len is not None else int(1e12)
self.encoder = json.load(open(vocab_file))
self.decoder = {v:k for k,v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
bpe_merges = [tuple(merge.split()) for merge in bpe_data]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
self.special_tokens = {}
self.special_tokens_decoder = {}
self.set_special_tokens(special_tokens)
示例4: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', special_tokens=None, max_len=None):
self.max_len = max_len if max_len is not None else int(1e12)
self.encoder = json.load(open(vocab_file))
self.decoder = {v: k for k, v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
bpe_merges = [tuple(merge.split()) for merge in bpe_data]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
self.special_tokens = {}
self.special_tokens_decoder = {}
self.set_special_tokens(special_tokens)
示例5: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', bos_token="<s>", eos_token="</s>", sep_token="</s>",
cls_token="<s>", unk_token="<unk>", pad_token='<pad>', mask_token='<mask>', **kwargs):
super(RobertaTokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token,
sep_token=sep_token, cls_token=cls_token, pad_token=pad_token,
mask_token=mask_token, **kwargs)
self.encoder = json.load(open(vocab_file, encoding="utf-8"))
self.decoder = {v: k for k, v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
bpe_merges = [tuple(merge.split()) for merge in bpe_data]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
示例6: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>",
bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs):
super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self.encoder = json.load(open(vocab_file))
self.decoder = {v:k for k,v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
bpe_merges = [tuple(merge.split()) for merge in bpe_data]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
示例7: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>",
bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs):
super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self.max_len_single_sentence = self.max_len # no default special tokens - you can update this value if you add special tokens
self.max_len_sentences_pair = self.max_len # no default special tokens - you can update this value if you add special tokens
self.encoder = json.load(open(vocab_file, encoding="utf-8"))
self.decoder = {v: k for k, v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
bpe_merges = [tuple(merge.split()) for merge in bpe_data]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
示例8: count_occurances
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def count_occurances(self, regex: str, case_sensitive: bool = False):
"""Counts occurances of the regex.
Counts the number of times the provided string occurs.
Args:
regex (str): Required. Regex string to search for
case_sensitive (bool, optional): If search should be case insensitive, by default False
Returns:
Chepy: The Chepy object.
Examples:
>>> Chepy("AABCDADJAKDJHKSDAJSDdaskjdhaskdjhasdkja").count_occurances("ja").output
2
"""
if case_sensitive:
r = re.compile(regex)
else:
r = re.compile(regex, re.IGNORECASE)
self.state = len(r.findall(self._convert_to_str()))
return self
示例9: find_replace
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def find_replace(self, pattern: str, repl: str, ignore_case=True):
"""Replace matched pattern with repln
Args:
pattern (str): Required. Pattern to search
repl (str): Required. Pattern to match
ignore_case (bool, optional): Case insensitive. Defaults to True.
Returns:
Chepy: The Chepy object.
Examples:
>>> Chepy("some some data").find_replace(r"some\s", "data").o
"datadatadata"
"""
flags = 0
if ignore_case:
flags = re.IGNORECASE
self.state = re.sub(pattern, repl, self._convert_to_str(), flags=flags)
return self
示例10: search_ctf_flags
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def search_ctf_flags(self, prefix: str, postfix: str = ".+?\{*\}"):
"""Search CTF style flags.
This by default assumes that the flag format is similar
to something like picoCTF{some_flag} as an example.
Args:
prefix (str): Prefix of the flag. Like `picoCTF`
postfix (str, optional): Regex for the remainder of the flag.
Defaults to '.+\{.+}'.
Returns:
Chepy: The Chepy object.
Examples:
>>> Chepy("tests/files/flags").read_file().search_ctf_flags("pico").get_by_index(0)
picoCTF{r3source_pag3_f1ag}
"""
self.state = re.findall(prefix + postfix, self._convert_to_str(), re.IGNORECASE)
return self
示例11: test_regex_recognizes_source
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def test_regex_recognizes_source(self):
src_list = [
('Retail Store A', 1,),
('Big Bank AB', 1,),
('Acme Capital, Inc.', 1,),
('Lowe & Swayze', 1,),
('Big Bank & Company (004578)', 1,),
('Family Name Limited (173437)', 1,),
('Financial Services & Co. (015607)', 1,),
('Food Wholsale, Inc. (056230)', 1,),
('All Eyes Communications (018951)', 1,),
('Joe Smith Archives, LLC d/b/a Foxtrot (085292)', 2,),
]
importer = self.make_importer()
importer.wrap_in_wordbreaks = True
for phrase, target_ct in src_list:
ptrns = importer.pre_process_regexp_option([phrase])
self.assertEqual(target_ct, len(ptrns), f'"{phrase}" produced {len(ptrns)} patterns, expected {target_ct}')
matches = 0
for ptrn in ptrns:
rg = re.compile(ptrn, re.IGNORECASE)
for _ in rg.finditer(phrase):
matches += 1
self.assertEqual(target_ct, matches, f'"{phrase}" gives {matches} matches, expected {target_ct}')
示例12: build_tz_offsets
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def build_tz_offsets(search_regex_parts):
def get_offset(tz_obj, regex, repl='', replw=''):
return (
tz_obj[0],
{
'regex': re.compile(re.sub(repl, replw, regex % tz_obj[0]), re.IGNORECASE),
'offset': timedelta(seconds=tz_obj[1])
}
)
for tz_info in timezone_info_list:
for regex in tz_info['regex_patterns']:
for tz_obj in tz_info['timezones']:
search_regex_parts.append(tz_obj[0])
yield get_offset(tz_obj, regex)
# alternate patterns
for replace, replacewith in tz_info.get('replace', []):
for tz_obj in tz_info['timezones']:
search_regex_parts.append(re.sub(replace, replacewith, tz_obj[0]))
yield get_offset(tz_obj, regex, repl=replace, replw=replacewith)
示例13: regex_match_score
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def regex_match_score(prediction, pattern):
"""Check if the prediction matches the given regular expression."""
try:
compiled = re.compile(
pattern,
flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
)
except BaseException:
logger.warn('Regular expression failed to compile: %s' % pattern)
return False
return compiled.match(prediction) is not None
示例14: _check_path
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def _check_path(self, config_path, node):
if regex.match(r"ns=\d*;[isgb]=.*", config_path, regex.IGNORECASE):
return config_path
if re.search(r"^root", config_path.lower()) is None:
node_path = '\\\\.'.join(
char.split(":")[1] for char in node.get_path(200000, True))
if config_path[-3:] != '\\.':
information_path = node_path + '\\\\.' + config_path.replace('\\', '\\\\')
else:
information_path = node_path + config_path.replace('\\', '\\\\')
else:
information_path = config_path
result = information_path[:]
return result
示例15: __init__
# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, encoder, bpe_merges, errors='replace'):
self.encoder = encoder
self.decoder = {v:k for k,v in self.encoder.items()}
self.errors = errors # how to handle errors in decoding
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {}
# Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")