本文整理汇总了Python中jieba.analyse.ChineseAnalyzer方法的典型用法代码示例。如果您正苦于以下问题:Python analyse.ChineseAnalyzer方法的具体用法?Python analyse.ChineseAnalyzer怎么用?Python analyse.ChineseAnalyzer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jieba.analyse
的用法示例。
在下文中一共展示了analyse.ChineseAnalyzer方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def __init__(self, app=None, db=None, analyzer=None):
"""
You can custom analyzer by::
from jieba.analyse import ChineseAnalyzer
search = Search(analyzer = ChineseAnalyzer)
"""
self._signal = None
self._indexs = dict()
self.db = db
self.analyzer = analyzer
if app is not None:
self.init_app(app)
示例2: __init__
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def __init__(self, app=None, db=None, analyzer=None):
"""
You can custom analyzer by::
from jieba.analyse import ChineseAnalyzer
search = Search(analyzer = ChineseAnalyzer)
"""
self.db = db
self.analyzer = analyzer
if app is not None:
self.init_app(app)
示例3: setUp
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def setUp(self):
class TestConfig(object):
SQLALCHEMY_TRACK_MODIFICATIONS = True
SQLALCHEMY_DATABASE_URI = 'sqlite://'
DEBUG = True
TESTING = True
MSEARCH_INDEX_NAME = mkdtemp()
MSEARCH_BACKEND = 'whoosh'
self.app = Flask(__name__)
self.app.config.from_object(TestConfig())
self.db = SQLAlchemy(self.app)
self.search = Search(self.app, db=self.db, analyzer=ChineseAnalyzer())
db = self.db
class Post(db.Model, ModelSaveMixin):
__tablename__ = 'basic_posts'
__searchable__ = ['title', 'content']
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(49))
content = db.Column(db.Text)
def __repr__(self):
return '<Post:{}>'.format(self.title)
self.Post = Post
with self.app.test_request_context():
self.db.create_all()
for (i, title) in enumerate(titles, 1):
post = self.Post(title=title, content='content%d' % i)
post.save(self.db)
示例4: __init__
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def __init__(self, *args, **kwargs):
# 为中文分词,修改默认的分词器为结巴分词器
kwargs['analyzer'] = ChineseAnalyzer()
super().__init__(*args, **kwargs)
示例5: build_schema
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
schema_fields[field_class.index_fieldname] = TEXT(stored=True,
analyzer=ChineseAnalyzer(),
field_boost=field_class.boost,
sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例6: build_schema
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True,
field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int,
field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float,
field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored,
field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
stored=field_class.stored,
field_boost=field_class.boost)
else: # StemmingAnalyzer->ChineseAnalyzer
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),
field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError(
"No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例7: build_schema
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
# 调用结巴分词
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例8: build_schema
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import ChineseAnalyzer [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))