本文整理汇总了Python中xapian.sortable_serialise函数的典型用法代码示例。如果您正苦于以下问题:Python sortable_serialise函数的具体用法?Python sortable_serialise怎么用?Python sortable_serialise使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sortable_serialise函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index_document
def index_document(self, document, properties):
document.add_value(_VALUE_TIMESTAMP,
xapian.sortable_serialise(float(properties['timestamp'])))
document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
if 'filesize' in properties:
try:
document.add_value(_VALUE_FILESIZE,
xapian.sortable_serialise(int(properties['filesize'])))
except (ValueError, TypeError):
logging.debug('Invalid value for filesize property: %s',
properties['filesize'])
if 'creation_time' in properties:
try:
document.add_value(
_VALUE_CREATION_TIME, xapian.sortable_serialise(
float(properties['creation_time'])))
except (ValueError, TypeError):
logging.debug('Invalid value for creation_time property: %s',
properties['creation_time'])
self.set_document(document)
properties = dict(properties)
self._index_known(document, properties)
self._index_unknown(document, properties)
示例2: test_value_stats
def test_value_stats():
"""Simple test of being able to get value statistics.
"""
dbpath = "db_test_value_stats"
db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
for id in range(10):
doc = xapian.Document()
doc.add_value(1, xapian.sortable_serialise(vals[id]))
db.add_document(doc)
expect(db.get_value_freq(0), 0)
expect(db.get_value_lower_bound(0), "")
expect(db.get_value_upper_bound(0), "")
expect(db.get_value_freq(1), 10)
expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
expect(db.get_value_freq(2), 0)
expect(db.get_value_lower_bound(2), "")
expect(db.get_value_upper_bound(2), "")
db.close()
shutil.rmtree(dbpath)
示例3: __call__
def __call__(self, begin, end):
"""
Construct a tuple for value range processing.
`begin` -- a string in the format '<field_name>:[low_range]'
If 'low_range' is omitted, assume the smallest possible value.
`end` -- a string in the the format '[high_range|*]'. If '*', assume
the highest possible value.
Return a tuple of three strings: (column, low, high)
"""
colon = begin.find(':')
field_name = begin[:colon]
begin = begin[colon + 1:len(begin)]
for field_dict in self.sb.schema:
if field_dict['field_name'] == field_name:
if not begin:
if field_dict['type'] == 'text':
begin = u'a' # TODO: A better way of getting a min text value?
elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
begin = float('-inf')
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
begin = u'00010101000000'
elif end == '*':
if field_dict['type'] == 'text':
end = u'z' * 100 # TODO: A better way of getting a max text value?
elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
end = float('inf')
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
end = u'99990101000000'
if field_dict['type'] == 'long' or field_dict['type'] == 'float':
begin = xapian.sortable_serialise(float(begin))
end = xapian.sortable_serialise(float(end))
return field_dict['column'], str(begin), str(end)
示例4: __call__
def __call__(self, doc):
app = Application(self.db.get_appname(doc),
self.db.get_pkgname(doc))
stats = self.review_loader.get_review_stats(app)
import xapian
if stats:
return xapian.sortable_serialise(stats.dampened_rating)
return xapian.sortable_serialise(0)
示例5: get_msg_terms
def get_msg_terms(db=None,msg=None):
# This is pretty important: what data to be shown from the thing?
# Maybe should be parsed into json already? Ot serialise a hash somehow?
doc_data = msg.content
doc_values = []
doc_terms = []
stemmer = xapian.Stem("finnish")
for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(msg.content)):
word = match.group(0)
if is_stopword(word):
continue
term = stemmer(word)
doc_terms.append(term)
for term in ["_commented-by_"+msg.author]:
doc_terms.append(term)
if msg.date:
doc_terms.append("_c_"+str(msg.date)[:7])
official_terms = ["_o_"+msg.id]
if msg.places:
place = db.place.getnode(msg.places[0])
for term in get_place_terms(place = place):
doc_terms.append (term)
for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(place.address)):
word = match.group(0)
if is_stopword(word):
continue
term = stemmer(word)
#print "adding term "+term
doc_terms.append(term)
doc_data += " " + place.address
for term in get_latlng_range(place.lat):
doc_terms.append("_glatrange_"+term)
for term in get_latlng_range(place.lng):
doc_terms.append("_glngrange_"+term)
doc_values.append({"field": XAPIAN_X_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lat))})
doc_values.append({"field": XAPIAN_Y_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lng))})
if msg.date:
doc_values.append({"field": XAPIAN_CREATED_FIELD, "value": xapian.sortable_serialise( float( msg.date.serialise() ) ) })
return {"doc_data":doc_data,
"doc_terms":doc_terms,
"doc_values":doc_values }
示例6: index
def index(datapath, dbpath):
# Create or open the database we're going to be writing to.
db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)
# Set up a TermGenerator that we'll use in indexing.
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(xapian.Stem("en"))
for fields in parse_states(datapath):
# 'fields' is a dictionary mapping from field name to value.
# Pick out the fields we're going to index.
name = fields.get('name', u'')
description = fields.get('description', u'')
motto = fields.get('motto', u'')
admitted = fields.get('admitted', None)
population = fields.get('population', None)
order = fields.get('order', u'')
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
# index each field with a suitable prefix
termgenerator.index_text(name, 1, 'S')
termgenerator.index_text(description, 1, 'XD')
termgenerator.index_text(motto, 1, 'XM')
# Index fields without prefixes for general search.
termgenerator.index_text(name)
termgenerator.increase_termpos()
termgenerator.index_text(description)
termgenerator.increase_termpos()
termgenerator.index_text(motto)
# Add document values.
if admitted is not None:
doc.add_value(1, xapian.sortable_serialise(int(admitted[:4])))
doc.add_value(2, admitted) # YYYYMMDD
if population is not None:
doc.add_value(3, xapian.sortable_serialise(population))
### Start of example code.
midlat = fields['midlat']
midlon = fields['midlon']
if midlat and midlon:
doc.add_value(4, "%f,%f" % (midlat, midlon))
### End of example code.
# Store all the fields for display purposes.
doc.set_data(json.dumps(fields))
# We use the identifier to ensure each object ends up in the
# database only once no matter how many times we run the
# indexer.
idterm = u"Q" + order
doc.add_boolean_term(idterm)
db.replace_document(idterm, doc)
示例7: make_value
def make_value(s, term):
"""Parse various string values and return suitable numeric
representations."""
if term == 'year':
# This is in a date string format due to serialization.
return xapian.sortable_serialise(int(s))
if term == 'mtime':
return xapian.sortable_serialise(time.mktime(time.strptime(s)))
if term == 'rating':
return xapian.sortable_serialise(float(s))
else:
return xapian.sortable_serialise(int(s))
示例8: create_index
def create_index(filename,databasePath):
print "begin read",filename
if not os.path.exists(databasePath):
os.makedirs(databasePath)
database = xapian.WritableDatabase(databasePath, xapian.DB_CREATE_OR_OPEN)
stemmer=xapian.Stem('english')
rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}')
lines=open(filename).readlines()
processed=0
len_file=len(lines)
print filename,"read end"
time_begin=time.time()
for line in lines:
try:
line=line.encode('utf-8')
except:
continue
line_items=line.split('\t')
document = xapian.Document()
freq_sortable=xapian.sortable_serialise(float(line_items[3]))
click_sortable=xapian.sortable_serialise(float(line_items[4]))
document.add_value(FREQ,freq_sortable)
document.add_value(CLICK,click_sortable)
document.add_value(DATE,line_items[1])
document.set_data(line_items[0])
terms=rex.findall(line_items[0])
for term in terms:
if len(term) > MAX_TERM_LENGTH:
document.add_term(stemmer(term[:MAX_TERM_LENGTH]))
else:
document.add_term(stemmer(term))
database.add_document(document)
processed+=1
del line
del line_items
del document
del freq_sortable
del click_sortable
del terms
if processed%100000==0:
end=time.time()
speed=100000/float(end-time_begin)
print "="*40
print filename
print "speed:\t",speed
print "percent:\t%s %%" %(100.0*(processed/float(len_file)))
print "time remain:\t %s hours" %( (len_file-processed)/(speed*3600))
time_begin=time.time()
gc.collect()
os.system("rm -rf %s" % filename)
print filename,"end"
示例9: index
def index(datapath, dbpath):
# Create or open the database we're going to be writing to.
db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)
# Set up a TermGenerator that we'll use in indexing.
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(xapian.Stem("en"))
for fields in parse_csv_file(datapath):
# 'fields' is a dictionary mapping from field name to value.
# Pick out the fields we're going to index.
description = fields.get('DESCRIPTION', u'')
title = fields.get('TITLE', u'')
identifier = fields.get('id_NUMBER', u'')
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
# Index each field with a suitable prefix.
termgenerator.index_text(title, 1, 'S')
termgenerator.index_text(description, 1, 'XD')
# Index fields without prefixes for general search.
termgenerator.index_text(title)
termgenerator.increase_termpos()
termgenerator.index_text(description)
# Store all the fields for display purposes.
doc.set_data(json.dumps(fields, encoding='utf8'))
### Start of example code.
# parse the two values we need
measurements = fields.get('MEASUREMENTS', u'')
if measurements != u'':
numbers = numbers_from_string(measurements)
if len(numbers) > 0:
doc.add_value(0, xapian.sortable_serialise(max(numbers)))
date_made = fields.get('DATE_MADE', u'')
years = numbers_from_string(date_made)
if len(years) > 0:
doc.add_value(1, xapian.sortable_serialise(years[0]))
### End of example code.
# We use the identifier to ensure each object ends up in the
# database only once no matter how many times we run the
# indexer.
idterm = u"Q" + identifier
doc.add_boolean_term(idterm)
db.replace_document(idterm, doc)
示例10: _encode_simple_value
def _encode_simple_value(field_cls, value):
# Integers (FIXME this doesn't work with the big integers)
if issubclass(field_cls, Integer):
return sortable_serialise(value)
elif issubclass(field_cls, Decimal):
# FIXME: We convert decimal->float so we lost precision
return sortable_serialise(float(value))
# Datetimes: normalize to UTC, so searching works
if type(value) is datetime:
value = value.astimezone(fixed_offset(0))
# A common field or a new field
return field_cls.encode(value)
示例11: dict2doc
def dict2doc(y):
doc = xapian.Document()
indexer.set_document(doc)
url = y['url']
uid = urlid(url)
sid = uid[:8]
doc.add_boolean_term(P['id'] + uid)
# add the id and short id as unprefixed/stemmed terms to
# make it easier to select bookmarks from search results
for idterm in [uid, sid, 'Z' + uid, 'Z' + sid]:
doc.add_boolean_term(idterm)
doc.add_value(VALUE_URL, url)
# add hostname parts as site terms
hostname = urlparse(url).hostname
if hostname:
hs = hostname.split('.')
for i in xrange(len(hs)):
doc.add_boolean_term(P['site'] + '.'.join(hs[i:]))
archive_path = get_archive_path(uid)
if archive_path:
y['tags'].append('archived')
# remove duplicate tags, preserving order
y['tags'] = list(OrderedDict.fromkeys(y['tags']))
alltags = u'\x1f'.join(y['tags'])
doc.add_value(VALUE_TAGS, alltags)
for tag in y['tags']:
doc.add_boolean_term(P['tag'] + tag)
if 'title' in y:
doc.add_value(VALUE_TITLE, y['title'])
index_text(y['title'], 'title')
if 'notes' in y:
doc.set_data(y['notes'])
index_text(y['notes'], 'notes')
created = y.get('created', arrow.utcnow()).timestamp
doc.add_value(VALUE_CREATED, xapian.sortable_serialise(created))
if archive_path:
archived = y.get('archived', arrow.utcnow()).timestamp
doc.add_value(VALUE_ARCHIVED, xapian.sortable_serialise(archived))
index_archive(doc, archive_path)
return doc
示例12: normalize_range
def normalize_range(begin, end):
""" 查询时,转换range 参数,主要是把 float/int 转换为 str 格式 """
if begin is not None:
if isinstance(begin, float):
begin = xapian.sortable_serialise(float(begin))
else:
begin = str(begin)
if end is not None:
if isinstance(end, float):
end = xapian.sortable_serialise(float(end))
else:
end = str(end)
return begin, end
示例13: add_product
def add_product(self, product, database_path=None):
"""Adds product to repository.
product - Product to be added to database
database_path - Path of the database where product is added. Default: None
When repository has been created with many database paths then database_path must
be defined."""
# Set up a TermGenerator that we'll use in indexing.
if len(self._databases) > 1:
assert database_path != None, \
"With many databases you must identify the database where product is added"
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(self._create_stem())
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
termgenerator.index_text(unicode(product.title))
termgenerator.index_text(unicode(product.description))
doc.set_data(unicode(json.dumps(product.__dict__)))
doc.add_value(0, xapian.sortable_serialise(float(product.price)))
idterm = "Q" + product.url
doc.add_boolean_term(idterm)
db = self._db
if database_path:
db = self._databases[database_path]
db.replace_document(idterm, doc)
示例14: convert
def convert(self, field_value):
"""
Generates index values (for sorting) for given field value and its content type
"""
if field_value is None:
return None
content_type = self._get_content_type(field_value)
value = field_value
if self._is_float_or_interger(content_type):
value = xapian.sortable_serialise(field_value)
elif isinstance(content_type, (models.BooleanField, bool)):
# Boolean fields are stored as 't' or 'f'
value = field_value and 't' or 'f'
elif isinstance(content_type, (models.DateTimeField, datetime.datetime)):
# DateTime fields are stored as %Y%m%d%H%M%S (better sorting)
# value = field_value.strftime('%Y%m%d%H%M%S')
value = '%d%02d%02d%02d%02d%02d' % ( field_value.year,
field_value.month,
field_value.day,
field_value.hour,
field_value.minute,
field_value.second )
return smart_str(value)
示例15: index
def index(contacts, database, prefixes):
c = config(contacts)
db = xapian.WritableDatabase(database, xapian.DB_CREATE_OR_OPEN)
p = set()
for person, data in c:
doc = xapian.Document()
termgenerator.set_document(doc)
termgenerator.index_text(person, 1, u'id')
for prefix, content in data:
if prefix[0] in digits[:5]:
doc.add_value(int(prefix[0]), xapian.sortable_serialise(int(content)))
elif prefix[0] in digits[5:]:
doc.add_value(int(prefix[0]), content)
else:
termgenerator.index_text(content, 1, u'X' + prefix)
termgenerator.index_text(content)
termgenerator.increase_termpos()
p.add(prefix)
doc.add_boolean_term(u'Q' + person)
doc.set_data(person)
db.replace_document(u'Q' + person, doc)
with open(prefixes, 'wb') as fp:
json.dump(list(p), fp)