当前位置: 首页>>代码示例>>Python>>正文


Python ahocorasick.Automaton方法代码示例

本文整理汇总了Python中ahocorasick.Automaton方法的典型用法代码示例。如果您正苦于以下问题:Python ahocorasick.Automaton方法的具体用法?Python ahocorasick.Automaton怎么用?Python ahocorasick.Automaton使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在ahocorasick的用法示例。


在下文中一共展示了ahocorasick.Automaton方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: convert

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def convert(content: str,
            realm_alert_words_automaton: Optional[ahocorasick.Automaton] = None,
            message: Optional[Message]=None,
            message_realm: Optional[Realm]=None,
            sent_by_bot: bool=False,
            translate_emoticons: bool=False,
            mention_data: Optional[MentionData]=None,
            email_gateway: bool=False,
            no_previews: bool=False) -> str:
    markdown_stats_start()
    ret = do_convert(content, realm_alert_words_automaton,
                     message, message_realm, sent_by_bot,
                     translate_emoticons, mention_data, email_gateway,
                     no_previews=no_previews)
    markdown_stats_finish()
    return ret 
开发者ID:zulip,项目名称:zulip,代码行数:18,代码来源:__init__.py

示例2: get_alert_word_automaton

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
    user_id_with_words = alert_words_in_realm(realm)
    alert_word_automaton  = ahocorasick.Automaton()
    for (user_id, alert_words) in user_id_with_words.items():
        for alert_word in alert_words:
            alert_word_lower = alert_word.lower()
            if alert_word_automaton.exists(alert_word_lower):
                (key, user_ids_for_alert_word) = alert_word_automaton.get(alert_word_lower)
                user_ids_for_alert_word.add(user_id)
            else:
                alert_word_automaton.add_word(alert_word_lower, (alert_word_lower, {user_id}))
    alert_word_automaton.make_automaton()
    # If the kind is not AHOCORASICK after calling make_automaton, it means there is no key present
    # and hence we cannot call items on the automaton yet. To avoid it we return None for such cases
    # where there is no alert-words in the realm.
    # https://pyahocorasick.readthedocs.io/en/latest/index.html?highlight=Automaton.kind#module-constants
    if alert_word_automaton.kind != ahocorasick.AHOCORASICK:
        return None
    return alert_word_automaton 
开发者ID:zulip,项目名称:zulip,代码行数:21,代码来源:alert_words.py

示例3: __init__

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def __init__(self, ignore_case=True, titles=None, extra_titles=None):
        """
        :param ignore_case if True, lower case job titles are also added
        :param titles: if given, overrides default `load_titles()` values
        :param extra_titles: if given, add to titles
        """
        titles = titles if titles else load_titles()
        logging.info('building job title searcher')
        autom = ahocorasick.Automaton()
        for title in titles:
            autom.add_word(title, title)
            if ignore_case:
                autom.add_word(title.lower(), title.lower())

        if extra_titles:
            for title in extra_titles:
                autom.add_word(title, title)
                if ignore_case:
                    autom.add_word(title.lower(), title.lower())

        autom.make_automaton()
        self.autom = autom
        logging.info('building done') 
开发者ID:fluquid,项目名称:find_job_titles,代码行数:25,代码来源:__init__.py

示例4: build_actree

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_actree(self, wordlist):
        """
        构造actree,加速过滤
        :param wordlist: 词表
        :return:
        """
        actree = ahocorasick.Automaton()
        for index, word in enumerate(wordlist):
            actree.add_word(word, (index, word))
        actree.make_automaton()
        return actree 
开发者ID:shibing624,项目名称:dialogbot,代码行数:13,代码来源:question_classifier.py

示例5: build_dic_search

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_dic_search(dic: dict):
    A = ahocorasick.Automaton()
    print("adding words to automaton")
    for k, v in tqdm.tqdm(dic.items()):
        A.add_word(k, (k,v))
    print("finalize automaton")
    A.make_automaton()
    return A 
开发者ID:fabiencro,项目名称:knmt,代码行数:10,代码来源:dictionnary_handling.py

示例6: find_match_in_list

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def find_match_in_list(line, lst):
    A = ahocorasick.Automaton()
    for w in lst:
        A.add_word(w,w)
    A.make_automaton()
    m = {}
    for end, w in A.iter(line):
        if end not in m:
             m[end] = w
             for i in range(1,len(w)):
                 if end-i in m:
                     del m[end-i]
    return m 
开发者ID:fabiencro,项目名称:knmt,代码行数:15,代码来源:dictionnary_handling.py

示例7: render_markdown

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def render_markdown(message: Message,
                    content: str,
                    realm: Optional[Realm]=None,
                    realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
                    mention_data: Optional[MentionData]=None,
                    email_gateway: bool=False) -> str:
    '''
    This is basically just a wrapper for do_render_markdown.
    '''

    if realm is None:
        realm = message.get_realm()

    sender = get_user_profile_by_id(message.sender_id)
    sent_by_bot = sender.is_bot
    translate_emoticons = sender.translate_emoticons

    rendered_content = do_render_markdown(
        message=message,
        content=content,
        realm=realm,
        realm_alert_words_automaton=realm_alert_words_automaton,
        sent_by_bot=sent_by_bot,
        translate_emoticons=translate_emoticons,
        mention_data=mention_data,
        email_gateway=email_gateway,
    )

    return rendered_content 
开发者ID:zulip,项目名称:zulip,代码行数:31,代码来源:message.py

示例8: do_render_markdown

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def do_render_markdown(message: Message,
                       content: str,
                       realm: Realm,
                       sent_by_bot: bool,
                       translate_emoticons: bool,
                       realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
                       mention_data: Optional[MentionData]=None,
                       email_gateway: bool=False) -> str:
    """Return HTML for given markdown. Markdown may add properties to the
    message object such as `mentions_user_ids`, `mentions_user_group_ids`, and
    `mentions_wildcard`.  These are only on this Django object and are not
    saved in the database.
    """

    message.mentions_wildcard = False
    message.mentions_user_ids = set()
    message.mentions_user_group_ids = set()
    message.alert_words = set()
    message.links_for_preview = set()
    message.user_ids_with_alert_words = set()

    # DO MAIN WORK HERE -- call markdown to convert
    rendered_content = markdown_convert(
        content,
        realm_alert_words_automaton=realm_alert_words_automaton,
        message=message,
        message_realm=realm,
        sent_by_bot=sent_by_bot,
        translate_emoticons=translate_emoticons,
        mention_data=mention_data,
        email_gateway=email_gateway,
    )
    return rendered_content 
开发者ID:zulip,项目名称:zulip,代码行数:35,代码来源:message.py

示例9: make_trie

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def make_trie(names):
        trie = ahocorasick.Automaton()
        for name in names:
            norm = name.replace(" ", "")
            trie.add_word(norm, (len(norm), name))
        trie.make_automaton()
        return trie 
开发者ID:paperswithcode,项目名称:axcell,代码行数:9,代码来源:context_search.py

示例10: _find_keywords_ahocorasick

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def _find_keywords_ahocorasick(self, text):
        events = []
        if self.ahocorasick_automaton == None:
            self.ahocorasick_automaton = ahocorasick.Automaton(ahocorasick.STORE_LENGTH)
            for index, entry in enumerate(self.keyword_sequence):
                self.ahocorasick_automaton.add_word(entry)
            self.ahocorasick_automaton.make_automaton()
        for end, length in self.ahocorasick_automaton.iter(text):
            events.append(
                {START: end - length + 1, END: end + 1}
            )
        return events 
开发者ID:estnltk,项目名称:estnltk,代码行数:14,代码来源:event_tagger.py

示例11: _find_events_ahocorasick

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def _find_events_ahocorasick(self, text):
        events = []
        if self.ahocorasick_automaton == None:
            self.ahocorasick_automaton = ahocorasick.Automaton()
            for entry in self.event_vocabulary:
                term = entry[TERM] if self.case_sensitive else entry[TERM].lower()
                self.ahocorasick_automaton.add_word(term, entry)
            self.ahocorasick_automaton.make_automaton()
        _text = text if self.case_sensitive else text.lower()
        for item in self.ahocorasick_automaton.iter(_text):
            events.append(item[1].copy())
            events[-1].update({START: item[0] + 1 - len(item[1][TERM]), END: item[0] + 1})
        return events 
开发者ID:estnltk,项目名称:estnltk,代码行数:15,代码来源:event_tagger.py

示例12: init_ahocorasick

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def init_ahocorasick():
    A = ahocorasick.Automaton()
    for keyword in keyword_list:
        A.add_word(keyword, keyword)
    A.make_automaton()
    return A 
开发者ID:abusix,项目名称:ahocorapy,代码行数:8,代码来源:ahocorapy_performance_test.py

示例13: init_py_aho_corasick

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def init_py_aho_corasick():
    return py_aho_corasick.Automaton(keyword_list) 
开发者ID:abusix,项目名称:ahocorapy,代码行数:4,代码来源:ahocorapy_performance_test.py

示例14: build_automaton

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_automaton(word_list):
    """
    :type word_list: str
    :param word_list: optional word list file for ignoring certain words.

    :rtype: (ahocorasick.Automaton, str)
    :returns: an automaton, and an iterated sha1 hash of the words in the word list.
    """
    # Dynamic import due to optional-dependency
    try:
        import ahocorasick
    except ImportError:  # pragma: no cover
        print('Please install the `pyahocorasick` package to use --word-list')
        raise

    # See https://pyahocorasick.readthedocs.io/en/latest/
    # for more information.
    automaton = ahocorasick.Automaton()
    word_list_hash = hashlib.sha1()

    with open(word_list) as f:
        for line in f.readlines():
            # .lower() to make everything case-insensitive
            line = line.lower().strip()
            if len(line) > 3:
                word_list_hash.update(line.encode('utf-8'))
                automaton.add_word(line, line)

    automaton.make_automaton()

    return (
        automaton,
        word_list_hash.hexdigest(),
    ) 
开发者ID:Yelp,项目名称:detect-secrets,代码行数:36,代码来源:util.py

示例15: test_analyze_standard_positives_with_automaton

# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def test_analyze_standard_positives_with_automaton(self, file_content):
        automaton = ahocorasick.Automaton()

        word = 'thisone'
        automaton.add_word(word, word)

        automaton.make_automaton()

        logic = KeywordDetector(automaton=automaton)

        f = mock_file_object(file_content)
        output = logic.analyze(f, 'mock_filename')
        # All skipped due to automaton
        assert len(output) == 0 
开发者ID:Yelp,项目名称:detect-secrets,代码行数:16,代码来源:keyword_test.py


注:本文中的ahocorasick.Automaton方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。