本文整理汇总了Python中ahocorasick.Automaton方法的典型用法代码示例。如果您正苦于以下问题:Python ahocorasick.Automaton方法的具体用法?Python ahocorasick.Automaton怎么用?Python ahocorasick.Automaton使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ahocorasick
的用法示例。
在下文中一共展示了ahocorasick.Automaton方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: convert
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def convert(content: str,
realm_alert_words_automaton: Optional[ahocorasick.Automaton] = None,
message: Optional[Message]=None,
message_realm: Optional[Realm]=None,
sent_by_bot: bool=False,
translate_emoticons: bool=False,
mention_data: Optional[MentionData]=None,
email_gateway: bool=False,
no_previews: bool=False) -> str:
markdown_stats_start()
ret = do_convert(content, realm_alert_words_automaton,
message, message_realm, sent_by_bot,
translate_emoticons, mention_data, email_gateway,
no_previews=no_previews)
markdown_stats_finish()
return ret
示例2: get_alert_word_automaton
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
user_id_with_words = alert_words_in_realm(realm)
alert_word_automaton = ahocorasick.Automaton()
for (user_id, alert_words) in user_id_with_words.items():
for alert_word in alert_words:
alert_word_lower = alert_word.lower()
if alert_word_automaton.exists(alert_word_lower):
(key, user_ids_for_alert_word) = alert_word_automaton.get(alert_word_lower)
user_ids_for_alert_word.add(user_id)
else:
alert_word_automaton.add_word(alert_word_lower, (alert_word_lower, {user_id}))
alert_word_automaton.make_automaton()
# If the kind is not AHOCORASICK after calling make_automaton, it means there is no key present
# and hence we cannot call items on the automaton yet. To avoid it we return None for such cases
# where there is no alert-words in the realm.
# https://pyahocorasick.readthedocs.io/en/latest/index.html?highlight=Automaton.kind#module-constants
if alert_word_automaton.kind != ahocorasick.AHOCORASICK:
return None
return alert_word_automaton
示例3: __init__
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def __init__(self, ignore_case=True, titles=None, extra_titles=None):
"""
:param ignore_case if True, lower case job titles are also added
:param titles: if given, overrides default `load_titles()` values
:param extra_titles: if given, add to titles
"""
titles = titles if titles else load_titles()
logging.info('building job title searcher')
autom = ahocorasick.Automaton()
for title in titles:
autom.add_word(title, title)
if ignore_case:
autom.add_word(title.lower(), title.lower())
if extra_titles:
for title in extra_titles:
autom.add_word(title, title)
if ignore_case:
autom.add_word(title.lower(), title.lower())
autom.make_automaton()
self.autom = autom
logging.info('building done')
示例4: build_actree
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_actree(self, wordlist):
"""
构造actree,加速过滤
:param wordlist: 词表
:return:
"""
actree = ahocorasick.Automaton()
for index, word in enumerate(wordlist):
actree.add_word(word, (index, word))
actree.make_automaton()
return actree
示例5: build_dic_search
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_dic_search(dic: dict):
A = ahocorasick.Automaton()
print("adding words to automaton")
for k, v in tqdm.tqdm(dic.items()):
A.add_word(k, (k,v))
print("finalize automaton")
A.make_automaton()
return A
示例6: find_match_in_list
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def find_match_in_list(line, lst):
A = ahocorasick.Automaton()
for w in lst:
A.add_word(w,w)
A.make_automaton()
m = {}
for end, w in A.iter(line):
if end not in m:
m[end] = w
for i in range(1,len(w)):
if end-i in m:
del m[end-i]
return m
示例7: render_markdown
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def render_markdown(message: Message,
content: str,
realm: Optional[Realm]=None,
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
mention_data: Optional[MentionData]=None,
email_gateway: bool=False) -> str:
'''
This is basically just a wrapper for do_render_markdown.
'''
if realm is None:
realm = message.get_realm()
sender = get_user_profile_by_id(message.sender_id)
sent_by_bot = sender.is_bot
translate_emoticons = sender.translate_emoticons
rendered_content = do_render_markdown(
message=message,
content=content,
realm=realm,
realm_alert_words_automaton=realm_alert_words_automaton,
sent_by_bot=sent_by_bot,
translate_emoticons=translate_emoticons,
mention_data=mention_data,
email_gateway=email_gateway,
)
return rendered_content
示例8: do_render_markdown
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def do_render_markdown(message: Message,
content: str,
realm: Realm,
sent_by_bot: bool,
translate_emoticons: bool,
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
mention_data: Optional[MentionData]=None,
email_gateway: bool=False) -> str:
"""Return HTML for given markdown. Markdown may add properties to the
message object such as `mentions_user_ids`, `mentions_user_group_ids`, and
`mentions_wildcard`. These are only on this Django object and are not
saved in the database.
"""
message.mentions_wildcard = False
message.mentions_user_ids = set()
message.mentions_user_group_ids = set()
message.alert_words = set()
message.links_for_preview = set()
message.user_ids_with_alert_words = set()
# DO MAIN WORK HERE -- call markdown to convert
rendered_content = markdown_convert(
content,
realm_alert_words_automaton=realm_alert_words_automaton,
message=message,
message_realm=realm,
sent_by_bot=sent_by_bot,
translate_emoticons=translate_emoticons,
mention_data=mention_data,
email_gateway=email_gateway,
)
return rendered_content
示例9: make_trie
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def make_trie(names):
trie = ahocorasick.Automaton()
for name in names:
norm = name.replace(" ", "")
trie.add_word(norm, (len(norm), name))
trie.make_automaton()
return trie
示例10: _find_keywords_ahocorasick
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def _find_keywords_ahocorasick(self, text):
events = []
if self.ahocorasick_automaton == None:
self.ahocorasick_automaton = ahocorasick.Automaton(ahocorasick.STORE_LENGTH)
for index, entry in enumerate(self.keyword_sequence):
self.ahocorasick_automaton.add_word(entry)
self.ahocorasick_automaton.make_automaton()
for end, length in self.ahocorasick_automaton.iter(text):
events.append(
{START: end - length + 1, END: end + 1}
)
return events
示例11: _find_events_ahocorasick
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def _find_events_ahocorasick(self, text):
events = []
if self.ahocorasick_automaton == None:
self.ahocorasick_automaton = ahocorasick.Automaton()
for entry in self.event_vocabulary:
term = entry[TERM] if self.case_sensitive else entry[TERM].lower()
self.ahocorasick_automaton.add_word(term, entry)
self.ahocorasick_automaton.make_automaton()
_text = text if self.case_sensitive else text.lower()
for item in self.ahocorasick_automaton.iter(_text):
events.append(item[1].copy())
events[-1].update({START: item[0] + 1 - len(item[1][TERM]), END: item[0] + 1})
return events
示例12: init_ahocorasick
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def init_ahocorasick():
A = ahocorasick.Automaton()
for keyword in keyword_list:
A.add_word(keyword, keyword)
A.make_automaton()
return A
示例13: init_py_aho_corasick
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def init_py_aho_corasick():
return py_aho_corasick.Automaton(keyword_list)
示例14: build_automaton
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def build_automaton(word_list):
"""
:type word_list: str
:param word_list: optional word list file for ignoring certain words.
:rtype: (ahocorasick.Automaton, str)
:returns: an automaton, and an iterated sha1 hash of the words in the word list.
"""
# Dynamic import due to optional-dependency
try:
import ahocorasick
except ImportError: # pragma: no cover
print('Please install the `pyahocorasick` package to use --word-list')
raise
# See https://pyahocorasick.readthedocs.io/en/latest/
# for more information.
automaton = ahocorasick.Automaton()
word_list_hash = hashlib.sha1()
with open(word_list) as f:
for line in f.readlines():
# .lower() to make everything case-insensitive
line = line.lower().strip()
if len(line) > 3:
word_list_hash.update(line.encode('utf-8'))
automaton.add_word(line, line)
automaton.make_automaton()
return (
automaton,
word_list_hash.hexdigest(),
)
示例15: test_analyze_standard_positives_with_automaton
# 需要导入模块: import ahocorasick [as 别名]
# 或者: from ahocorasick import Automaton [as 别名]
def test_analyze_standard_positives_with_automaton(self, file_content):
automaton = ahocorasick.Automaton()
word = 'thisone'
automaton.add_word(word, word)
automaton.make_automaton()
logic = KeywordDetector(automaton=automaton)
f = mock_file_object(file_content)
output = logic.analyze(f, 'mock_filename')
# All skipped due to automaton
assert len(output) == 0