本文整理汇总了Python中whoosh.fields.Schema方法的典型用法代码示例。如果您正苦于以下问题:Python fields.Schema方法的具体用法?Python fields.Schema怎么用?Python fields.Schema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.fields
的用法示例。
在下文中一共展示了fields.Schema方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def __init__(self, config, api):
self.config = config
self.api = api
self.cache_dir = user_cache_dir('fac', appauthor=False)
self.storage = FileStorage(os.path.join(self.cache_dir, 'index'))
self.schema = Schema(
name=TEXT(sortable=True, phrase=True, field_boost=3,
analyzer=intraword),
owner=TEXT(sortable=True, field_boost=2.5,
analyzer=intraword),
title=TEXT(field_boost=2.0, phrase=False),
summary=TEXT(phrase=True),
downloads=NUMERIC(sortable=True),
sort_name=SortColumn(),
name_id=ID(stored=True),
)
try:
self.index = self.storage.open_index()
except EmptyIndexError:
self.index = None
self.db = JSONFile(os.path.join(self.cache_dir, 'mods.json'))
示例2: _init_index
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def _init_index(self):
if not os.path.exists(self.corpus.path):
os.mkdir(self.corpus.path)
analyzer = self.corpus.analyzer
self.analyzer = self.corpus.analyzer
if exists_in(self.corpus.path):
ix = open_dir(self.corpus.path)
else:
# may need to remove this? how can we have a schema if we don't know the...uh...schema?
schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer),
path=ID(stored=True))
ix = create_in(self.corpus.path,schema)
writer = ix.writer()
writer.commit()
self.index = ix
self.searcher = ix.searcher();
#self.reader = IndexReader.open(self.lucene_index, True)
self.reader = ix.reader();
#self.analyzer = self.corpus.analyzer
示例3: __init__
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def __init__(self) -> None:
self.schema = Schema(id=NUMERIC(unique=True, stored=True), canonical_name=STORED(), name=STORED(), name_tokenized=TEXT(stored=False, analyzer=WhooshConstants.tokenized_analyzer), name_stemmed=TEXT(stored=False, analyzer=WhooshConstants.stem_analyzer), name_normalized=TEXT(stored=False, analyzer=WhooshConstants.normalized_analyzer, field_boost=100.0))
示例4: __init__
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def __init__(self, index):
self.index = index
self.pk = index.pk
self.analyzer = index.analyzer
self.schema = _Schema(**self.fields)
示例5: __init__
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def __init__(self, index_path, language):
from whoosh import index as whoosh_index
from whoosh.fields import Schema, TEXT, ID
from whoosh import qparser
from whoosh.highlight import UppercaseFormatter
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
from whoosh.lang import has_stemmer, has_stopwords
import os
if not has_stemmer(language) or not has_stopwords(language):
# TODO Display a warning?
analyzer = SimpleAnalyzer()
else:
analyzer = LanguageAnalyzer(language)
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
self.formatter = UppercaseFormatter()
self.index_path = index_path
if not os.path.exists(index_path):
try:
os.mkdir(index_path)
except OSError as e:
sys.exit("Error creating Whoosh index: %s" % e)
if whoosh_index.exists_in(index_path):
try:
self.search_index = whoosh_index.open_dir(index_path)
except whoosh_index.IndexError as e:
sys.exit("Error opening whoosh index: {0}".format(e))
else:
self.search_index = whoosh_index.create_in(index_path, self.schema)
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
示例6: _mail_schema
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def _mail_schema(self):
return Schema(
ident=ID(stored=True, unique=True),
sender=ID(stored=False),
to=KEYWORD(stored=False, commas=True),
cc=KEYWORD(stored=False, commas=True),
bcc=KEYWORD(stored=False, commas=True),
subject=NGRAMWORDS(stored=False),
date=NUMERIC(stored=False, sortable=True, bits=64, signed=False),
body=NGRAMWORDS(stored=False),
tag=KEYWORD(stored=True, commas=True),
flags=KEYWORD(stored=True, commas=True),
raw=TEXT(stored=False))
示例7: create_index
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def create_index():
regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U)
tokenizer = RegexTokenizer(regex_tokenize)
schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer))
if not os.path.exists("index_full"):
os.mkdir("index_full")
idx = create_in("index_full", schema)
else:
idx = open_dir("index_full")
return idx
示例8: build_schema
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
schema_fields[field_class.index_fieldname] = TEXT(stored=True,
analyzer=ChineseAnalyzer(),
field_boost=field_class.boost,
sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例9: build_schema
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True,
field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int,
field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float,
field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored,
field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
stored=field_class.stored,
field_boost=field_class.boost)
else: # StemmingAnalyzer->ChineseAnalyzer
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),
field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError(
"No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例10: build_schema
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
# 调用结巴分词
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))
示例11: build_schema
# 需要导入模块: from whoosh import fields [as 别名]
# 或者: from whoosh.fields import Schema [as 别名]
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields
# are found.
if len(schema_fields) <= initial_key_count:
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
return (content_field_name, Schema(**schema_fields))