本文整理汇总了Python中whoosh.compat.dumps函数的典型用法代码示例。如果您正苦于以下问题:Python dumps函数的具体用法?Python dumps怎么用?Python dumps使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dumps函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pickle_schema
def test_pickle_schema():
from whoosh import analysis
from whoosh.support.charset import accent_map
from whoosh.compat import dumps
freetext_analyzer = (
analysis.StemmingAnalyzer() |
analysis.CharsetFilter(accent_map)
)
schema = fields.Schema(
path=fields.ID(stored=True, unique=True),
file_mtime=fields.DATETIME(stored=True),
name=fields.TEXT(stored=False, field_boost=2.0),
description=fields.TEXT(stored=False, field_boost=1.5,
analyzer=freetext_analyzer),
content=fields.TEXT(analyzer=freetext_analyzer)
)
# Try to make some sentences that will require stemming
docs = [
u"The rain in spain falls mainly in the plain",
u"Plainly sitting on the plain",
u"Imagine a greatly improved sentence here"
]
with TempIndex(schema) as ix:
with ix.writer() as w:
for doc in docs:
w.add_document(description=doc, content=doc)
assert dumps(schema, 2)
with ix.reader() as r:
assert dumps(r.schema, 2)
示例2: test_charset_pickeability
def test_charset_pickeability():
from whoosh.support import charset
charmap = charset.charset_table_to_dict(charset.default_charset)
ana = analysis.StandardAnalyzer() | analysis.CharsetFilter(charmap)
_ = dumps(ana, -1)
ana = analysis.CharsetTokenizer(charmap)
_ = dumps(ana, -1)
示例3: _write_block
def _write_block(self, last=False):
# Write the buffered block to the postings file
# If this is the first block, write a small header first
if not self._blockcount:
self._postfile.write(WHOOSH3_HEADER_MAGIC)
# Add this block's statistics to the terminfo object, which tracks the
# overall statistics for all term postings
self._terminfo.add_block(self)
# Minify the IDs, weights, and values, and put them in a tuple
data = (self._mini_ids(), self._mini_weights(), self._mini_values())
# Pickle the tuple
databytes = dumps(data)
# If the pickle is less than 20 bytes, don't bother compressing
if len(databytes) < 20:
comp = 0
# Compress the pickle (if self._compression > 0)
comp = self._compression
if comp:
databytes = zlib.compress(databytes, comp)
# Make a tuple of block info. The posting reader can check this info
# and decide whether to skip the block without having to decompress the
# full block data
#
# - Number of postings in block
# - Last ID in block
# - Maximum weight in block
# - Compression level
# - Minimum length byte
# - Maximum length byte
ids = self._ids
infobytes = dumps((len(ids), ids[-1], self._maxweight, comp,
length_to_byte(self._minlength),
length_to_byte(self._maxlength),
))
# Write block length
postfile = self._postfile
blocklength = len(infobytes) + len(databytes)
if last:
# If this is the last block, use a negative number
blocklength *= -1
postfile.write_int(blocklength)
# Write block info
postfile.write(infobytes)
# Write block data
postfile.write(databytes)
self._blockcount += 1
# Reset block buffer
self._new_block()
示例4: to_string
def to_string(self):
# Encode the lengths as 0-255 values
ml = 0 if self._minlength is None else length_to_byte(self._minlength)
xl = length_to_byte(self._maxlength)
# Convert None values to the out-of-band NO_ID constant so they can be
# stored as unsigned ints
mid = NO_ID if self._minid is None else self._minid
xid = NO_ID if self._maxid is None else self._maxid
# Pack the term info into bytes
st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight,
0, mid, xid)
if isinstance(self.postings, tuple):
# Postings are inlined - dump them using the pickle protocol
isinlined = 1
st += dumps(self.postings, -1)[2:-1]
else:
# Append postings pointer as long to end of term info bytes
isinlined = 0
# It's possible for a term info to not have a pointer to postings
# on disk, in which case postings will be None. Convert a None
# value to -1 so it can be stored as a long.
p = -1 if self.postings is None else self.postings
st += pack_long(p)
# Prepend byte indicating whether the postings are inlined to the term
# info bytes
return pack("B", isinlined) + st
示例5: encode
def encode(self, positions):
codes = []
base = 0
for pos in positions:
codes.append(pos - base)
base = pos
return pack_uint(len(codes)) + dumps(codes, -1)[2:-1]
示例6: word_values
def word_values(self, value, analyzer, **kwargs):
fb = self.field_boost
seen = defaultdict(list)
kwargs["positions"] = True
kwargs["chars"] = True
kwargs["boosts"] = True
for t in tokens(value, analyzer, kwargs):
seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost))
for w, poses in iteritems(seen):
# posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
codes = []
posbase = 0
charbase = 0
summedboost = 0
for pos, startchar, endchar, boost in poses:
codes.append((pos - posbase, startchar - charbase,
endchar - startchar, boost))
posbase = pos
charbase = endchar
summedboost += boost
value = (pack_uint(len(poses)) + pack_float(summedboost * fb)
+ dumps(codes, -1)[2:-1])
yield (w, len(poses), summedboost * fb, value)
示例7: encode
def encode(self, poslist):
deltas = []
base = 0
for pos in poslist:
deltas.append(pos - base)
base = pos
return pack_uint(len(deltas)) + dumps(deltas, -1)[2:-1]
示例8: minimize_values
def minimize_values(postingsize, values, compression=0):
if postingsize < 0:
string = dumps(values, -1)[2:]
elif postingsize == 0:
string = b('')
else:
string = b('').join(values)
if string and compression:
string = compress(string, compression)
return string
示例9: encode
def encode(self, poses):
fb = self.field_boost
# posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
codes = []
posbase = 0
charbase = 0
summedboost = 0
for pos, startchar, endchar, boost in poses:
codes.append((pos - posbase, startchar - charbase,
endchar - startchar, boost))
posbase = pos
charbase = endchar
summedboost += boost
return ((pack_uint(len(poses)) + pack_float(summedboost * fb)
+ dumps(codes, 2)), summedboost)
示例10: append
def append(self, values):
f = self.dbfile
name_map = self.name_map
vlist = [None] * len(name_map)
for k, v in iteritems(values):
if k in name_map:
vlist[name_map[k]] = v
else:
# For dynamic stored fields, put them at the end of the list
# as a tuple of (fieldname, value)
vlist.append((k, v))
v = dumps(vlist, -1)[2:-1]
self.length += 1
self.directory.append(pack_stored_pointer(f.tell(), len(v)))
f.write(v)
示例11: add
def add(self, vdict):
f = self.dbfile
names = self.names
name_map = self.name_map
vlist = [None] * len(names)
for k, v in iteritems(vdict):
if k in name_map:
vlist[name_map[k]] = v
else:
name_map[k] = len(names)
names.append(k)
vlist.append(v)
vstring = dumps(tuple(vlist), -1)[2:-1]
self.length += 1
self.directory.append(pack_stored_pointer(f.tell(), len(vstring)))
f.write(vstring)
示例12: to_file
def to_file(self, postfile, compression=3):
ids = self.ids
idcode, idstring = minimize_ids(ids, self.stringids, compression)
wtstring = minimize_weights(self.weights, compression)
vstring = minimize_values(self.postingsize, self.values, compression)
info = (len(ids), ids[-1], self.maxweight,
length_to_byte(self.minlength), length_to_byte(self.maxlength),
idcode, compression, len(idstring), len(wtstring))
infostring = dumps(info, -1)
# Offset to next block
postfile.write_uint(len(infostring) + len(idstring) + len(wtstring)
+ len(vstring))
# Block contents
postfile.write(infostring)
postfile.write(idstring)
postfile.write(wtstring)
postfile.write(vstring)
示例13: minimize_ids
def minimize_ids(arry, stringids, compression=0):
amax = arry[-1]
if stringids:
typecode = ''
string = dumps(arry)
else:
code = arry.typecode
if amax <= 255:
typecode = "B"
elif amax <= 65535:
typecode = "H"
if typecode != code:
arry = array(typecode, iter(arry))
if not IS_LITTLE:
arry.byteswap()
string = arry.tostring()
if compression:
string = compress(string, compression)
return (typecode, string)
示例14: test_pickleability
def test_pickleability():
# Ignore base classes
ignore = (columns.Column, columns.WrappedColumn, columns.ListColumn)
# Required arguments
init_args = {"ClampedNumericColumn": (columns.NumericColumn("B"),),
"FixedBytesColumn": (5,),
"FixedBytesListColumn": (5,),
"NumericColumn": ("i",),
"PickleColumn": (columns.VarBytesColumn(),),
"StructColumn": ("=if", (0, 0.0)),
}
coltypes = [c for _, c in inspect.getmembers(columns, inspect.isclass)
if issubclass(c, columns.Column) and not c in ignore]
for coltype in coltypes:
args = init_args.get(coltype.__name__, ())
try:
inst = coltype(*args)
except TypeError:
e = sys.exc_info()[1]
raise TypeError("Error instantiating %r: %s" % (coltype, e))
_ = loads(dumps(inst, -1))
示例15: to_bytes
def to_bytes(self):
isinlined = self.is_inlined()
# Encode the lengths as 0-255 values
minlength = (0 if self._minlength is None
else length_to_byte(self._minlength))
maxlength = length_to_byte(self._maxlength)
# Convert None values to the out-of-band NO_ID constant so they can be
# stored as unsigned ints
minid = 0xffffffff if self._minid is None else self._minid
maxid = 0xffffffff if self._maxid is None else self._maxid
# Pack the term info into bytes
st = self._struct.pack(isinlined, self._weight, self._df,
minlength, maxlength, self._maxweight,
minid, maxid)
if isinlined:
# Postings are inlined - dump them using the pickle protocol
postbytes = dumps(self._inlined, -1)
else:
postbytes = pack_long(self._offset) + pack_int(self._length)
st += postbytes
return st