当前位置: 首页>>代码示例>>Python>>正文


Python regex.findall方法代码示例

本文整理汇总了Python中regex.findall方法的典型用法代码示例。如果您正苦于以下问题:Python regex.findall方法的具体用法?Python regex.findall怎么用?Python regex.findall使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在regex的用法示例。


在下文中一共展示了regex.findall方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _load_search_pattern

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def _load_search_pattern(self):
        self.type_mapper = {}
        py_regex_pattern = self.pattern
        while True:
            # Finding all types specified in the groks
            m = re.findall(r'%{(\w+):(\w+):(\w+)}', py_regex_pattern)
            for n in m:
                self.type_mapper[n[1]] = n[2]
            #replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type}
            # with regex and regex group name

            py_regex_pattern = re.sub(r'%{(\w+):(\w+)(?::\w+)?}',
                lambda m: "(?P<" + m.group(2) + ">" + self.predefined_patterns[m.group(1)].regex_str + ")",
                py_regex_pattern)

            #replace %{pattern_name} with regex
            py_regex_pattern = re.sub(r'%{(\w+)}',
                lambda m: "(" + self.predefined_patterns[m.group(1)].regex_str + ")",
                py_regex_pattern)

            if re.search('%{\w+(:\w+)?}', py_regex_pattern) is None:
                break

        self.regex_obj = re.compile(py_regex_pattern) 
开发者ID:garyelephant,项目名称:pygrok,代码行数:26,代码来源:pygrok.py

示例2: _test_display_output

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def _test_display_output(self, image_mode):
        """
        Test display data output with given image_mode.
        """
        with testing_utils.tempdir() as tmpdir:
            data_path = tmpdir
            os.makedirs(os.path.join(data_path, 'ImageTeacher'))

            opt = {
                'task': 'integration_tests:ImageTeacher',
                'datapath': data_path,
                'image_mode': image_mode,
                'display_verbose': True,
            }
            output = testing_utils.display_data(opt)
            train_labels = re.findall(r"\[labels\].*\n", output[0])
            valid_labels = re.findall(r"\[eval_labels\].*\n", output[1])
            test_labels = re.findall(r"\[eval_labels\].*\n", output[2])

            for i, lbls in enumerate([train_labels, valid_labels, test_labels]):
                self.assertGreater(len(lbls), 0, 'DisplayData failed')
                self.assertEqual(len(lbls), len(set(lbls)), output[i]) 
开发者ID:facebookresearch,项目名称:ParlAI,代码行数:24,代码来源:test_teachers.py

示例3: ascii_emoticons

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def ascii_emoticons(index, question, answer):
    global valid_emoticon

    valid_emoticon = False

    # Disabled
    if score_settings['ascii_emoticon_modifier_value'] is None:
        return 0

    # Split by words (tokens)
    tokens = answer.split()

    # Calculate emoticon score
    score = [1 if len(token) > 1 and len(re.findall('[^a-zA-Z0-9]', token)) / len(token) > score_settings['ascii_emoticon_non_char_to_all_chars_ratio'] else 0 for token in tokens]
    score = sum([1 if (index > 0 and score[index - 1] == 0 and value == 1) or (index == 0 and value == 1) else 0 for index, value in enumerate(score)]) * score_settings['ascii_emoticon_modifier_value']

    if score:
        valid_emoticon = True

    return score

# Check if sentence includes 'unk' token 
开发者ID:daniel-kukiela,项目名称:nmt-chatbot,代码行数:24,代码来源:scorer.py

示例4: tokenize_field

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def tokenize_field(value):

    """
    Extract normalized tokens from a field.

    Args:
        value (str): The field value.

    Returns:
        list: The cleaned tokens.
    """

    # Extract tokens.
    tokens = regex.findall('\p{L}{2,}', value.lower())

    # Remove articles.
    tokens = [t for t in tokens if t not in [
        'a', 'an', 'the', 'and',
    ]]

    return tokens 
开发者ID:davidmcclure,项目名称:open-syllabus-project,代码行数:23,代码来源:utils.py

示例5: unicode_chrs_by_lang

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def unicode_chrs_by_lang(self, lang: str):
        """Detect language specific characters
        
        Detect characters from varios Unicode code point ids. Example 
        of languages are Common, Arabic, Armenian, Bengali, Bopomofo, Braille, 
        Buhid, Canadian_Aboriginal, Cherokee, Cyrillic, Devanagari, Ethiopic, 
        Georgian, Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, 
        Hiragana, Inherited, Kannada, Katakana, Khmer, Lao, Latin, Limbu, 
        Malayalam, Mongolian, Myanmar, Ogham, Oriya, Runic, Sinhala, Syriac, 
        Tagalog, Tagbanwa, TaiLe, Tamil, Telugu, Thaana, Thai, Tibetan, Yi, 
        but other code points should work also.
        
        Args:
            lang (str): Required. A string value identifying the language. 
        
        Returns:
            Chepy: The Chepy object.
        """
        self.state = re.findall(r"\p{" + lang + "}", self._convert_to_str())
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:22,代码来源:language.py

示例6: count_occurances

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def count_occurances(self, regex: str, case_sensitive: bool = False):
        """Counts occurances of the regex.

        Counts the number of times the provided string occurs.

        Args:
            regex (str): Required. Regex string to search for
            case_sensitive (bool, optional): If search should be case insensitive, by default False

        Returns:
            Chepy: The Chepy object.

        Examples:
            >>> Chepy("AABCDADJAKDJHKSDAJSDdaskjdhaskdjhasdkja").count_occurances("ja").output
            2
        """
        if case_sensitive:
            r = re.compile(regex)
        else:
            r = re.compile(regex, re.IGNORECASE)
        self.state = len(r.findall(self._convert_to_str()))
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:24,代码来源:utils.py

示例7: extract_strings

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def extract_strings(self, length: int = 4):
        """Extract strings from state
        
        Args:
            length (int, optional): Min length of string. Defaults to 4.
        
        Returns:
            Chepy: The Chepy object. 

        Examples:
            >>> Chepy("tests/files/hello").load_file().extract_strings().o
            [
                b'__PAGEZERO',
                b'__TEXT',
                b'__text',
                b'__TEXT',
                b'__stubs',
                b'__TEXT',
                ...
            ]
        """
        pattern = b"[^\x00-\x1F\x7F-\xFF]{" + str(length).encode() + b",}"
        self.state = re.findall(pattern, self._convert_to_bytes())
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:26,代码来源:extractors.py

示例8: search_ctf_flags

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def search_ctf_flags(self, prefix: str, postfix: str = ".+?\{*\}"):
        """Search CTF style flags. 

        This by default assumes that the flag format is similar 
        to something like picoCTF{some_flag} as an example. 
        
        Args:
            prefix (str): Prefix of the flag. Like `picoCTF`
            postfix (str, optional): Regex for the remainder of the flag. 
                Defaults to '.+\{.+}'.
        
        Returns:
            Chepy: The Chepy object. 

        Examples:
            >>> Chepy("tests/files/flags").read_file().search_ctf_flags("pico").get_by_index(0)
            picoCTF{r3source_pag3_f1ag}
        """
        self.state = re.findall(prefix + postfix, self._convert_to_str(), re.IGNORECASE)
        return self 
开发者ID:securisec,项目名称:chepy,代码行数:22,代码来源:search.py

示例9: get_nested_select

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def get_nested_select(self):
        """Get tuples groups in query like ::

                Select {
                    ([Time].[Time].[Day].[2010].[Q2 2010].[May 2010].[May 19,2010],
                    [Geography].[Geography].[Continent].[Europe],
                    [Measures].[Amount]),

                    ([Time].[Time].[Day].[2010].[Q2 2010].[May 2010].[May 17,2010],
                    [Geography].[Geography].[Continent].[Europe],
                    [Measures].[Amount])
                    }

                out :
                    ['[Time].[Time].[Day].[2010].[Q2 2010].[May 2010].[May 19,2010],\
                    [Geography].[Geography].[Continent].[Europe],[Measures].[Amount]',

                    '[Time].[Time].[Day].[2010].[Q2 2010].[May 2010].[May 17,2010],\
                    [Geography].[Geography].[Continent].[Europe],[Measures].[Amount]']

        :return: All groups as list of strings.
        """
        return regex.findall(r"\(([^()]+)\)", self.mdx_query) 
开发者ID:abilian,项目名称:olapy,代码行数:25,代码来源:parse.py

示例10: compile_regex_from_str

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def compile_regex_from_str(self, pat):
        """Given a string describing features masks for a sequence of segments,
        return a compiled regex matching the corresponding strings.

        Args:
            ft_str (str): feature masks, each enclosed in square brackets, in
            which the features are delimited by any standard delimiter.

        Returns:
           Pattern: regular expression pattern equivalent to `ft_str`
        """
        s2n = {'-': -1, '0': 0, '+': 1}
        seg_res = []
        for mat in re.findall(r'\[[^]]+\]+', pat):
            ft_mask = {k: s2n[v] for (v, k) in re.findall(r'([+-])(\w+)', mat)}
            segs = self.all_segs_matching_fts(ft_mask)
            seg_res.append('({})'.format('|'.join(segs)))
        regexp = ''.join(seg_res)
        return re.compile(regexp) 
开发者ID:dmort27,项目名称:panphon,代码行数:21,代码来源:featuretable.py

示例11: tokenize

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def tokenize(self, text):
    bpe_tokens = []
    list_starts, str_starts = [], []
    basic_tokens = text if isinstance(text, list) else [text]
    for i, basic_token in enumerate(basic_tokens):
      num_subtokens = 0
      basic_token = basic_token if (i == 0 or not isinstance(text, list)) else (
          ' ' + basic_token)
      for token in re.findall(self.bpe.pat, basic_token):
        token = ''.join(self.bpe.byte_encoder[b] for b in token.encode('utf-8'))
        sub_tokens = [bpe_token for bpe_token in self.bpe.bpe(token).split(' ')]
        bpe_tokens.extend(sub_tokens)
        str_starts += [True] + [False] * (len(sub_tokens) - 1)
        num_subtokens += len(sub_tokens)
      list_starts += [True] + [False] * (num_subtokens - 1)
    word_starts = list_starts if isinstance(text, list) else str_starts
    assert len(bpe_tokens) == len(word_starts)
    return bpe_tokens, word_starts 
开发者ID:google-research,项目名称:language,代码行数:20,代码来源:tokenization.py

示例12: encode

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def encode(self, text):
        bpe_tokens = []
        bpe_token_original = []
        for token in re.findall(self.pat, text):
            # print (token)
            token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
            ### handle oov
            for bpe_token in self.bpe(token).split(' '):
                if bpe_token in self.encoder:
                    bpe_tokens.append(self.encoder[bpe_token])
                else:
                    bpe_tokens.append(self.encoder["empty"])


            # bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
            bpe_token_original.extend(bpe_token for bpe_token in self.bpe(token).split(' '))
        return bpe_tokens, bpe_token_original 
开发者ID:czyssrs,项目名称:Few-Shot-NLG,代码行数:19,代码来源:encoder.py

示例13: encode

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def encode(self, text):
        indices = []
        for token in re.findall(self.token_pattern, text):
            token = bytearray(token.encode('utf-8'))
            chars = ''.join(self.byte_encoder[code] for code in token)
            indices += [self.token_dict[token] for token in self.get_bpe(chars)]
        return indices 
开发者ID:CyberZHG,项目名称:keras-gpt-2,代码行数:9,代码来源:bpe.py

示例14: encode

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def encode(self, text):
        bpe_tokens = []
        for token in re.findall(self.pat, text):
            token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
            bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
        return bpe_tokens 
开发者ID:qkaren,项目名称:Counterfactual-StoryRW,代码行数:8,代码来源:processor.py

示例15: encode

# 需要导入模块: import regex [as 别名]
# 或者: from regex import findall [as 别名]
def encode(self, text):
        bpe_tokens = []
        for token in re.findall(self.pat, text):
            token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
            bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" "))
        return bpe_tokens 
开发者ID:openai,项目名称:lm-human-preferences,代码行数:8,代码来源:encodings.py


注:本文中的regex.findall方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。