本文整理汇总了Python中whoosh.compat.xrange函数的典型用法代码示例。如果您正苦于以下问题:Python xrange函数的具体用法?Python xrange怎么用?Python xrange使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了xrange函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: damerau_levenshtein
def damerau_levenshtein(seq1, seq2, limit=None):
"""Returns the Damerau-Levenshtein edit distance between two strings.
"""
oneago = None
thisrow = list(range(1, len(seq2) + 1)) + [0]
for x in xrange(len(seq1)):
# Python lists wrap around for negative indices, so put the
# leftmost column at the *end* of the list. This matches with
# the zero-indexed strings and saves extra calculation.
twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
for y in xrange(len(seq2)):
delcost = oneago[y] + 1
addcost = thisrow[y - 1] + 1
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
thisrow[y] = min(delcost, addcost, subcost)
# This block deals with transpositions
if (x > 0 and y > 0 and seq1[x] == seq2[y - 1]
and seq1[x - 1] == seq2[y] and seq1[x] != seq2[y]):
thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
if limit and x > limit and min(thisrow) > limit:
return limit + 1
return thisrow[len(seq2) - 1]
示例2: test_roundtrip
def test_roundtrip():
_rt(columns.VarBytesColumn(),
[b("a"), b("ccc"), b("bbb"), b("e"), b("dd")], b(""))
_rt(columns.FixedBytesColumn(5),
[b("aaaaa"), b("eeeee"), b("ccccc"), b("bbbbb"), b("eeeee")],
b("\x00") * 5)
_rt(columns.RefBytesColumn(),
[b("a"), b("ccc"), b("bb"), b("ccc"), b("a"), b("bb")], b(""))
_rt(columns.RefBytesColumn(3),
[b("aaa"), b("bbb"), b("ccc"), b("aaa"), b("bbb"), b("ccc")],
b("\x00") * 3)
_rt(columns.StructColumn("ifH", (0, 0.0, 0)),
[(100, 1.5, 15000), (-100, -5.0, 0), (5820, 6.5, 462),
(-57829, -1.5, 6), (0, 0, 0)],
(0, 0.0, 0))
numcol = columns.NumericColumn
_rt(numcol("b"), [10, -20, 30, -25, 15], 0)
_rt(numcol("B"), [10, 20, 30, 25, 15], 0)
_rt(numcol("h"), [1000, -2000, 3000, -15000, 32000], 0)
_rt(numcol("H"), [1000, 2000, 3000, 15000, 50000], 0)
_rt(numcol("i"), [2 ** 16, -(2 ** 20), 2 ** 24, -(2 ** 28), 2 ** 30], 0)
_rt(numcol("I"), [2 ** 16, 2 ** 20, 2 ** 24, 2 ** 28, 2 ** 31 & 0xFFFFFFFF], 0)
_rt(numcol("q"), [10, -20, 30, -25, 15], 0)
_rt(numcol("Q"), [2 ** 35, 2 ** 40, 2 ** 48, 2 ** 52, 2 ** 63], 0)
_rt(numcol("f"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)
_rt(numcol("d"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)
c = columns.BitColumn(compress_at=10)
_rt(c, [bool(random.randint(0, 1)) for _ in xrange(70)], False)
_rt(c, [bool(random.randint(0, 1)) for _ in xrange(90)], False)
c = columns.PickleColumn(columns.VarBytesColumn())
_rt(c, [None, True, False, 100, -7, "hello"], None)
示例3: test_charboost_postings
def test_charboost_postings():
postings = []
docnum = 0
for _ in xrange(0, 20):
docnum += randint(1, 10)
posns = []
pos = 0
endchar = 0
for __ in xrange(0, randint(1, 10)):
pos += randint(1, 10)
startchar = endchar + randint(3, 10)
endchar = startchar + randint(3, 10)
boost = byte_to_float(float_to_byte(random() * 2))
posns.append((pos, startchar, endchar, boost))
postings.append((docnum, posns))
assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts"))
as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters"))
as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts"))
as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings]
assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))
as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
示例4: suggestions_and_scores
def suggestions_and_scores(self, text, weighting=None):
if weighting is None:
weighting = scoring.TF_IDF()
grams = defaultdict(list)
for size in xrange(self.mingram, self.maxgram + 1):
key = "gram%s" % size
nga = analysis.NgramAnalyzer(size)
for t in nga(text):
grams[key].append(t.text)
queries = []
for size in xrange(self.mingram, min(self.maxgram + 1, len(text))):
key = "gram%s" % size
gramlist = grams[key]
queries.append(query.Term("start%s" % size, gramlist[0],
boost=self.booststart))
queries.append(query.Term("end%s" % size, gramlist[-1],
boost=self.boostend))
for gram in gramlist:
queries.append(query.Term(key, gram))
q = query.Or(queries)
ix = self.index()
s = ix.searcher(weighting=weighting)
try:
result = s.search(q, limit=None)
return [(fs["word"], fs["score"], result.score(i))
for i, fs in enumerate(result)
if fs["word"] != text]
finally:
s.close()
示例5: test_datetime
def test_datetime():
dtf = fields.DATETIME(stored=True)
schema = fields.Schema(id=fields.ID(stored=True), date=dtf)
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
for month in xrange(1, 12):
for day in xrange(1, 28):
w.add_document(id=u("%s-%s") % (month, day),
date=datetime(2010, month, day, 14, 0, 0))
w.commit()
with ix.searcher() as s:
qp = qparser.QueryParser("id", schema)
r = s.search(qp.parse("date:20100523"))
assert len(r) == 1
assert r[0]["id"] == "5-23"
assert r[0]["date"].__class__ is datetime
assert r[0]["date"].month == 5
assert r[0]["date"].day == 23
r = s.search(qp.parse("date:'2010 02'"))
assert len(r) == 27
q = qp.parse(u("date:[2010-05 to 2010-08]"))
startdt = datetime(2010, 5, 1, 0, 0, 0, 0)
enddt = datetime(2010, 8, 31, 23, 59, 59, 999999)
assert q.__class__ is query.NumericRange
assert q.start == times.datetime_to_long(startdt)
assert q.end == times.datetime_to_long(enddt)
示例6: test_merge_random
def test_merge_random():
items1 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
items2 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
x1 = sorted(dict(items1 + items2).items())
x2 = list(kv.merge_items(items1, items2))
assert x1 == x2
示例7: fill
def fill(self, docnum):
if docnum > self._count:
if self._refs is not None:
self._refs.extend(0 for _ in xrange(docnum - self._count))
else:
dbfile = self._dbfile
for _ in xrange(docnum - self._count):
dbfile.write_ushort(0)
示例8: read_qsafe_array
def read_qsafe_array(typecode, size, dbfile):
if typecode == "q":
arry = [dbfile.read_long() for _ in xrange(size)]
elif typecode == "Q":
arry = [dbfile.read_ulong() for _ in xrange(size)]
else:
arry = dbfile.read_array(typecode, size)
return arry
示例9: __iter__
def __iter__(self):
i = 0
for num in self._bitset:
if num > i:
for _ in xrange(num - i):
yield False
yield True
i = num + 1
if self._doccount > i:
for _ in xrange(self._doccount - i):
yield False
示例10: __call__
def __call__(
self,
value,
positions=False,
chars=False,
keeporiginal=False,
removestops=True,
start_pos=0,
start_char=0,
mode="",
**kwargs
):
assert isinstance(value, text_type), "%r is not unicode" % value
inlen = len(value)
t = Token(positions, chars, removestops=removestops, mode=mode)
pos = start_pos
if mode == "query":
size = min(self.max, inlen)
for start in xrange(0, inlen - size + 1):
end = start + size
if end > inlen:
continue
t.text = value[start:end]
if keeporiginal:
t.original = t.text
t.stopped = False
if positions:
t.pos = pos
if chars:
t.startchar = start_char + start
t.endchar = start_char + end
yield t
pos += 1
else:
for start in xrange(0, inlen - self.min + 1):
for size in xrange(self.min, self.max + 1):
end = start + size
if end > inlen:
continue
t.text = value[start:end]
if keeporiginal:
t.original = t.text
t.stopped = False
if positions:
t.pos = pos
if chars:
t.startchar = start_char + start
t.endchar = start_char + end
yield t
pos += 1
示例11: run
def run(self):
ix = st.create_index(dir, schema)
num = 0
for i in xrange(50):
print(i)
w = ix.writer()
for _ in xrange(random.randint(1, 100)):
content = u(" ").join(random.sample(domain, random.randint(5, 20)))
w.add_document(id=text_type(num), content=content)
num += 1
w.commit()
time.sleep(0.1)
示例12: _read_weights
def _read_weights(self):
# If we haven't loaded the data from disk yet, load it now
if self._data is None:
self._read_data()
weights = self._data[1]
# De-minify the weights
postcount = self._blocklength
if weights is None:
self._weights = array("f", (1.0 for _ in xrange(postcount)))
elif isinstance(weights, float):
self._weights = array("f", (weights for _ in xrange(postcount)))
else:
self._weights = weights
示例13: test_boolean_find_deleted
def test_boolean_find_deleted():
# "Random" string of ones and zeros representing deleted and undeleted
domain = "1110001010001110010101000101001011101010001011111101000101010101"
schema = fields.Schema(i=fields.STORED, b=fields.BOOLEAN(stored=True))
ix = RamStorage().create_index(schema)
count = 0
# Create multiple segments just in case
for _ in xrange(5):
w = ix.writer()
for c in domain:
w.add_document(i=count, b=(c == "1"))
w.commit(merge=False)
# Delete documents where "b" is True
with ix.writer() as w:
w.delete_by_term("b", "t")
with ix.searcher() as s:
# Double check that documents with b=True are all deleted
reader = s.reader()
for docnum in xrange(s.doc_count_all()):
b = s.stored_fields(docnum)["b"]
assert b == reader.is_deleted(docnum)
# Try doing a search for documents where b=True
qp = qparser.QueryParser("b", ix.schema)
q = qp.parse("b:t")
r = s.search(q, limit=None)
assert len(r) == 0
# Make sure Every query doesn't match deleted docs
r = s.search(qp.parse("*"), limit=None)
assert not any(hit["b"] for hit in r)
assert not any(reader.is_deleted(hit.docnum) for hit in r)
r = s.search(qp.parse("*:*"), limit=None)
assert not any(hit["b"] for hit in r)
assert not any(reader.is_deleted(hit.docnum) for hit in r)
# Make sure Not query doesn't match deleted docs
q = qp.parse("NOT b:t")
r = s.search(q, limit=None)
assert not any(hit["b"] for hit in r)
assert not any(reader.is_deleted(hit.docnum) for hit in r)
r = s.search(q, limit=5)
assert not any(hit["b"] for hit in r)
assert not any(reader.is_deleted(hit.docnum) for hit in r)
示例14: __init__
def __init__(self, dbfile, magic=b("HSH3"), hashtype=0):
"""
:param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object
to write to.
:param magic: the format tag bytes to write at the start of the file.
:param hashtype: an integer indicating which hashing algorithm to use.
Possible values are 0 (MD5), 1 (CRC32), or 2 (CDB hash).
"""
self.dbfile = dbfile
self.hashtype = hashtype
self.hashfn = _hash_functions[self.hashtype]
# A place for subclasses to put extra metadata
self.extras = {}
self.startoffset = dbfile.tell()
# Write format tag
dbfile.write(magic)
# Write hash type
dbfile.write_byte(self.hashtype)
# Unused future expansion bits
dbfile.write_int(0)
dbfile.write_int(0)
# 256 lists of hashed keys and positions
self.buckets = [[] for _ in xrange(256)]
# List to remember the positions of the hash tables
self.directory = []
示例15: all_doc_ids
def all_doc_ids(self):
"""Returns an iterator of all (undeleted) document IDs in the reader.
"""
is_deleted = self.is_deleted
return (docnum for docnum in xrange(self.doc_count_all())
if not is_deleted(docnum))