本文整理汇总了Python中elasticsearch_dsl.token_filter方法的典型用法代码示例。如果您正苦于以下问题:Python elasticsearch_dsl.token_filter方法的具体用法?Python elasticsearch_dsl.token_filter怎么用?Python elasticsearch_dsl.token_filter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch_dsl
的用法示例。
在下文中一共展示了elasticsearch_dsl.token_filter方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_analyzer
# 需要导入模块: import elasticsearch_dsl [as 别名]
# 或者: from elasticsearch_dsl import token_filter [as 别名]
def add_analyzer(index: Index):
"""Agrega un nuevo analyzer al índice, disponible para ser usado
en todos sus fields. El analyzer aplica lower case + ascii fold:
quita acentos y uso de ñ, entre otros, para permitir búsqueda de
texto en español
"""
synonyms = list(Synonym.objects.values_list('terms', flat=True))
filters = ['lowercase', 'asciifolding']
if synonyms:
filters.append(token_filter(constants.SYNONYM_FILTER,
type='synonym',
synonyms=synonyms))
index.analyzer(
analyzer(constants.ANALYZER,
tokenizer='standard',
filter=filters)
)
示例2: test_simulate_complex
# 需要导入模块: import elasticsearch_dsl [as 别名]
# 或者: from elasticsearch_dsl import token_filter [as 别名]
def test_simulate_complex(client):
a = analyzer('my-analyzer',
tokenizer=tokenizer('split_words', 'simple_pattern_split', pattern=':'),
filter=['lowercase', token_filter('no-ifs', 'stop', stopwords=['if'])])
tokens = a.simulate('if:this:works', using=client).tokens
assert len(tokens) == 2
assert ['this', 'works'] == [t.token for t in tokens]
示例3: gen_name_analyzer_synonyms
# 需要导入模块: import elasticsearch_dsl [as 别名]
# 或者: from elasticsearch_dsl import token_filter [as 别名]
def gen_name_analyzer_synonyms(synonyms):
"""Crea un analizador para nombres con sinónimos.
Args:
synonyms (list): Lista de sinónimos a utilizar, en formato Solr.
Returns:
elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
'name_analyzer_synonyms'.
"""
name_synonyms_filter = token_filter(
'name_synonyms_filter',
type='synonym',
synonyms=synonyms
)
return analyzer(
name_analyzer_synonyms,
tokenizer='standard',
filter=[
'lowercase',
'asciifolding',
name_synonyms_filter,
spanish_stopwords_filter
]
)
示例4: gen_name_analyzer_excluding_terms
# 需要导入模块: import elasticsearch_dsl [as 别名]
# 或者: from elasticsearch_dsl import token_filter [as 别名]
def gen_name_analyzer_excluding_terms(excluding_terms):
"""Crea un analizador para nombres que sólo retorna TE (términos
excluyentes).
Por ejemplo, si el archivo de configuración de TE contiene las siguientes
reglas:
santa, salta, santo
caba, cba
Entonces, aplicar el analizador a la búsqueda 'salta' debería retornar
'santa' y 'santo', mientras que buscar 'caba' debería retornar 'cba'.
El analizador se utiliza para excluir resultados de búsquedas específicas.
Args:
excluding_terms (list): Lista de TE a utilizar especificados como
sinónimos Solr.
Returns:
elasticsearch_dsl.analysis.Analyzer: analizador de texto con nombre
'name_analyzer_excluding_terms'.
"""
name_excluding_terms_filter = token_filter(
'name_excluding_terms_filter',
type='synonym',
synonyms=excluding_terms
)
return analyzer(
name_analyzer_excluding_terms,
tokenizer='standard',
filter=[
'lowercase',
'asciifolding',
name_excluding_terms_filter,
synonyms_only_filter,
spanish_stopwords_filter
]
)
示例5: configure_index
# 需要导入模块: import elasticsearch_dsl [as 别名]
# 或者: from elasticsearch_dsl import token_filter [as 别名]
def configure_index(idx):
"""Configure ES index settings.
NOTE: This is unused at the moment. Current issues:
1. The index needs to be created (index.create() or search_index --create)
setting update_all_types=True because of the attribute name being the same
in Person and Company.
https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create
name = fields.TextField(attr="fullname", analyzer=lb_analyzer)
2. How to specifiy token filter for an attribute?
Therefore the index needs to be configured outside Django.
"""
idx.settings(number_of_shards=1, number_of_replicas=0)
lb_filter = token_filter(
"lb_filter",
"stop",
stopwords=["i"]
)
lb_analyzer = analyzer(
"lb_analyzer",
tokenizer="standard",
filter=["standard", "lb_filter", "asciifolding", "lowercase"]
)
return lb_analyzer, lb_filter