当前位置: 首页>>代码示例>>Python>>正文


Python regex.IGNORECASE属性代码示例

本文整理汇总了Python中regex.IGNORECASE属性的典型用法代码示例。如果您正苦于以下问题:Python regex.IGNORECASE属性的具体用法?Python regex.IGNORECASE怎么用?Python regex.IGNORECASE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在regex的用法示例。


在下文中一共展示了regex.IGNORECASE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True) 
开发者ID:thunlp,项目名称:OpenQA,代码行数:25,代码来源:regexp_tokenizer.py

示例2: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, encoder, bpe_merges, errors='replace'):
        self.encoder = encoder
        self.decoder = {v:k for k,v in self.encoder.items()}
        self.errors = errors # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        try:
            import regex as re
            self.re = re
        except ImportError:
            raise ImportError('Please install regex with: pip install regex')

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = self.re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 
开发者ID:pytorch,项目名称:fairseq,代码行数:19,代码来源:gpt2_bpe_utils.py

示例3: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', special_tokens=None, max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.decoder = {v:k for k,v in self.encoder.items()}
        self.errors = errors # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")

        self.special_tokens = {}
        self.special_tokens_decoder = {}
        self.set_special_tokens(special_tokens) 
开发者ID:649453932,项目名称:Bert-Chinese-Text-Classification-Pytorch,代码行数:20,代码来源:tokenization_gpt2.py

示例4: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', special_tokens=None, max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")

        self.special_tokens = {}
        self.special_tokens_decoder = {}
        self.set_special_tokens(special_tokens) 
开发者ID:ftarlaci,项目名称:GPT2sQA,代码行数:18,代码来源:tokenization.py

示例5: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', bos_token="<s>", eos_token="</s>", sep_token="</s>",
                 cls_token="<s>", unk_token="<unk>", pad_token='<pad>', mask_token='<mask>', **kwargs):
        super(RobertaTokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token,
                                               sep_token=sep_token, cls_token=cls_token, pad_token=pad_token,
                                               mask_token=mask_token, **kwargs)

        self.encoder = json.load(open(vocab_file, encoding="utf-8"))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 
开发者ID:linhaow,项目名称:TextClassify,代码行数:20,代码来源:tokenization_roberta.py

示例6: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>",
                 bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs):
        super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)

        self.encoder = json.load(open(vocab_file))
        self.decoder = {v:k for k,v in self.encoder.items()}
        self.errors = errors # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 
开发者ID:linhaow,项目名称:TextClassify,代码行数:18,代码来源:tokenization_gpt2.py

示例7: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>",
                 bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs):
        super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
        self.max_len_single_sentence = self.max_len # no default special tokens - you can update this value if you add special tokens
        self.max_len_sentences_pair = self.max_len # no default special tokens - you can update this value if you add special tokens

        self.encoder = json.load(open(vocab_file, encoding="utf-8"))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
        bpe_data = open(merges_file, encoding='utf-8').read().split('\n')[1:-1]
        bpe_merges = [tuple(merge.split()) for merge in bpe_data]
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:20,代码来源:tokenization_gpt2.py

示例8: count_occurances

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def count_occurances(self, regex: str, case_sensitive: bool = False):
        """Counts occurances of the regex.

        Counts the number of times the provided string occurs.

        Args:
            regex (str): Required. Regex string to search for
            case_sensitive (bool, optional): If search should be case insensitive, by default False

        Returns:
            Chepy: The Chepy object.

        Examples:
            >>> Chepy("AABCDADJAKDJHKSDAJSDdaskjdhaskdjhasdkja").count_occurances("ja").output
            2
        """
        if case_sensitive:
            r = re.compile(regex)
        else:
            r = re.compile(regex, re.IGNORECASE)
        self.state = len(r.findall(self._convert_to_str()))
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:24,代码来源:utils.py

示例9: find_replace

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def find_replace(self, pattern: str, repl: str, ignore_case=True):
        """Replace matched pattern with repln
        
        Args:
            pattern (str): Required. Pattern to search
            repl (str): Required. Pattern to match
            ignore_case (bool, optional): Case insensitive. Defaults to True.
        
        Returns:
            Chepy: The Chepy object.

        Examples:
            >>> Chepy("some some data").find_replace(r"some\s", "data").o
            "datadatadata"
        """
        flags = 0
        if ignore_case:
            flags = re.IGNORECASE
        self.state = re.sub(pattern, repl, self._convert_to_str(), flags=flags)
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:22,代码来源:utils.py

示例10: search_ctf_flags

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def search_ctf_flags(self, prefix: str, postfix: str = ".+?\{*\}"):
        """Search CTF style flags. 

        This by default assumes that the flag format is similar 
        to something like picoCTF{some_flag} as an example. 
        
        Args:
            prefix (str): Prefix of the flag. Like `picoCTF`
            postfix (str, optional): Regex for the remainder of the flag. 
                Defaults to '.+\{.+}'.
        
        Returns:
            Chepy: The Chepy object. 

        Examples:
            >>> Chepy("tests/files/flags").read_file().search_ctf_flags("pico").get_by_index(0)
            picoCTF{r3source_pag3_f1ag}
        """
        self.state = re.findall(prefix + postfix, self._convert_to_str(), re.IGNORECASE)
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:22,代码来源:search.py

示例11: test_regex_recognizes_source

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def test_regex_recognizes_source(self):
        src_list = [
            ('Retail Store A', 1,),
            ('Big Bank AB', 1,),
            ('Acme Capital, Inc.', 1,),
            ('Lowe & Swayze', 1,),
            ('Big Bank & Company (004578)', 1,),
            ('Family Name Limited (173437)', 1,),
            ('Financial Services & Co. (015607)', 1,),
            ('Food Wholsale, Inc. (056230)', 1,),
            ('All Eyes Communications (018951)', 1,),
            ('Joe Smith Archives, LLC d/b/a Foxtrot (085292)', 2,),
        ]
        importer = self.make_importer()
        importer.wrap_in_wordbreaks = True

        for phrase, target_ct in src_list:
            ptrns = importer.pre_process_regexp_option([phrase])
            self.assertEqual(target_ct, len(ptrns), f'"{phrase}" produced {len(ptrns)} patterns, expected {target_ct}')
            matches = 0
            for ptrn in ptrns:
                rg = re.compile(ptrn, re.IGNORECASE)
                for _ in rg.finditer(phrase):
                    matches += 1
            self.assertEqual(target_ct, matches, f'"{phrase}" gives {matches} matches, expected {target_ct}') 
开发者ID:LexPredict,项目名称:lexpredict-contraxsuite,代码行数:27,代码来源:test_pre_process_regexp_option.py

示例12: build_tz_offsets

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def build_tz_offsets(search_regex_parts):

    def get_offset(tz_obj, regex, repl='', replw=''):
        return (
            tz_obj[0],
            {
                'regex': re.compile(re.sub(repl, replw, regex % tz_obj[0]), re.IGNORECASE),
                'offset': timedelta(seconds=tz_obj[1])
            }
        )

    for tz_info in timezone_info_list:
        for regex in tz_info['regex_patterns']:
            for tz_obj in tz_info['timezones']:
                search_regex_parts.append(tz_obj[0])
                yield get_offset(tz_obj, regex)

            # alternate patterns
            for replace, replacewith in tz_info.get('replace', []):
                for tz_obj in tz_info['timezones']:
                    search_regex_parts.append(re.sub(replace, replacewith, tz_obj[0]))
                    yield get_offset(tz_obj, regex, repl=replace, replw=replacewith) 
开发者ID:scrapinghub,项目名称:dateparser,代码行数:24,代码来源:timezone_parser.py

示例13: regex_match_score

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None 
开发者ID:HKUST-KnowComp,项目名称:MnemonicReader,代码行数:13,代码来源:utils.py

示例14: _check_path

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def _check_path(self, config_path, node):
        if regex.match(r"ns=\d*;[isgb]=.*", config_path, regex.IGNORECASE):
            return config_path
        if re.search(r"^root", config_path.lower()) is None:
            node_path = '\\\\.'.join(
                char.split(":")[1] for char in node.get_path(200000, True))
            if config_path[-3:] != '\\.':
                information_path = node_path + '\\\\.' + config_path.replace('\\', '\\\\')
            else:
                information_path = node_path + config_path.replace('\\', '\\\\')
        else:
            information_path = config_path
        result = information_path[:]
        return result 
开发者ID:thingsboard,项目名称:thingsboard-gateway,代码行数:16,代码来源:opcua_connector.py

示例15: __init__

# 需要导入模块: import regex [as 别名]
# 或者: from regex import IGNORECASE [as 别名]
def __init__(self, encoder, bpe_merges, errors='replace'):
        self.encoder = encoder
        self.decoder = {v:k for k,v in self.encoder.items()}
        self.errors = errors # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
        self.byte_decoder = {v:k for k, v in self.byte_encoder.items()}
        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
        self.cache = {}

        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") 
开发者ID:qkaren,项目名称:Counterfactual-StoryRW,代码行数:13,代码来源:processor.py


注:本文中的regex.IGNORECASE属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。