本文整理汇总了Python中BTrees.OIBTree.OIBTree.get方法的典型用法代码示例。如果您正苦于以下问题:Python OIBTree.get方法的具体用法?Python OIBTree.get怎么用?Python OIBTree.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BTrees.OIBTree.OIBTree
的用法示例。
在下文中一共展示了OIBTree.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DateIndex
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DateIndex(UnIndex):
""" Index for Dates """
__implements__ = (PluggableIndex.PluggableIndexInterface,)
meta_type = 'DateIndex'
query_options = ['query', 'range']
manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
manage_main._setName( 'manage_main' )
manage_options = ( { 'label' : 'Settings'
, 'action' : 'manage_main'
},
)
def clear( self ):
""" Complete reset """
self._index = IOBTree()
self._unindex = OIBTree()
def index_object( self, documentId, obj, threshold=None ):
"""index an object, normalizing the indexed value to an integer
o Normalized value has granularity of one minute.
o Objects which have 'None' as indexed value are *omitted*,
by design.
"""
returnStatus = 0
try:
date_attr = getattr( obj, self.id )
if callable( date_attr ):
date_attr = date_attr()
ConvertedDate = self._convert( value=date_attr, default=_marker )
except AttributeError:
ConvertedDate = _marker
oldConvertedDate = self._unindex.get( documentId, _marker )
if ConvertedDate != oldConvertedDate:
if oldConvertedDate is not _marker:
self.removeForwardIndexEntry(oldConvertedDate, documentId)
if ConvertedDate is not _marker:
self.insertForwardIndexEntry( ConvertedDate, documentId )
self._unindex[documentId] = ConvertedDate
returnStatus = 1
return returnStatus
def _apply_index( self, request, cid='', type=type, None=None ):
示例2: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):
"""
Implementation of :class:`zope.index.text.interfaces.ILexicon`.
"""
def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
self.wordCount = Length()
self._pipeline = pipeline
def wordCount(self):
"""Return the number of unique terms in the lexicon."""
# overridden per instance
return len(self._wids)
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
if text is None:
text = ''
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
if not isinstance(self.wordCount, Length):
# Make sure wordCount is overridden with a BTrees.Length.Length
self.wordCount = Length(self.wordCount())
# Strategically unload the length value so that we get the most
# recent value written to the database to minimize conflicting wids
# Because length is independent, this will load the most
# recent value stored, regardless of whether MVCC is enabled
self.wordCount._p_deactivate()
return list(map(self._getWordIdCreate, last))
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
#.........这里部分代码省略.........
示例3: UUIDIndex
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class UUIDIndex(UnIndex):
"""Index for uuid fields with an unique value per key.
The internal structure is:
self._index = {datum:documentId]}
self._unindex = {documentId:datum}
For each datum only one documentId can exist.
"""
meta_type = "UUIDIndex"
manage_options = (
{'label': 'Settings', 'action': 'manage_main'},
{'label': 'Browse', 'action': 'manage_browse'},
)
query_options = ["query", "range"]
manage = manage_main = DTMLFile('dtml/manageUUIDIndex', globals())
manage_main._setName('manage_main')
manage_browse = DTMLFile('../dtml/browseIndex', globals())
def clear(self):
self._length = Length()
self._index = OIBTree()
self._unindex = IOBTree()
self._counter = Length()
def numObjects(self):
"""Return the number of indexed objects. Since we have a 1:1 mapping
from documents to values, we can reuse the stored length.
"""
return self.indexSize()
def uniqueValues(self, name=None, withLengths=0):
"""returns the unique values for name
if withLengths is true, returns a sequence of
tuples of (value, length)
"""
if name is None:
name = self.id
elif name != self.id:
raise StopIteration
if not withLengths:
for key in self._index.keys():
yield key
else:
# We know the length for each value is one
for key in self._index.keys():
yield (key, 1)
def insertForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and put it in the correct place
in the forward index.
"""
if entry is None:
return
old_docid = self._index.get(entry, _marker)
if old_docid is _marker:
self._index[entry] = documentId
self._length.change(1)
elif old_docid != documentId:
logger.error("A different document with value '%s' already "
"exists in the index.'" % entry)
def removeForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and remove any reference to documentId
in its entry in the index.
"""
old_docid = self._index.get(entry, _marker)
if old_docid is not _marker:
del self._index[entry]
self._length.change(-1)
def _get_object_datum(self, obj, attr):
# for a uuid it never makes sense to acquire a parent value via
# Acquisition
has_attr = getattr(aq_base(obj), attr, _marker)
if has_attr is _marker:
return _marker
return super(UUIDIndex, self)._get_object_datum(obj, attr)
示例4: Path
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Path(String):
root = None # root as passed to Catalog()
path2rid = None # OIBTree mapping path to rid (one:one)
rid2path = None # IOBTree mapping rid to path (one:one)
parts = None # OOBTree mapping (level, part) to rids (one:many)
levels = None # IOBTree mapping level to a list of rids (one:many)
case_sensitive = None
sorted = None # OOBTree for sorting; inherited from Path
def __init__(self, root, case_sensitive=None):
# Root
# ====
if not isinstance(root, basestring):
raise TypeError("root is not a string: '%s'" % root)
elif not isdir(root):
raise ValueError("root doesn't point to a directory: '%s'" % root)
self.root = root.rstrip(os.sep)
# Case Sensitivity
# ================
if case_sensitive is None:
if 'win' in sys.platform:
case_sensitive = False
else:
case_sensitive = True
if case_sensitive not in (False, True, 0, 1):
raise TypeError( "case_sensitive isn't a boolean: "
+ "'%s'" % case_sensitive
)
self.case_sensitive = bool(case_sensitive)
self.reset()
# Index contract
# ==============
__name__ = 'Path' # used in command-line interface
def reset(self):
"""Forget everything; usually called from __init__.
"""
String.reset(self)
self.path2rid = OIBTree() # {path:rid}
self.rid2path = IOBTree() # {rid:path}
self.parts = OOBTree() # {(level,part):rids}
self.rids = IOBTree() # {rid:(level,part)s}
self.levels = IOBTree() # {level:rids}
def learn(self, rid, value):
"""Given an rid and a value, associate them.
"""
String.learn(self, rid, value)
# Parse and validate.
# ===================
# Value is an absolute path, rooted in self.root.
if not isinstance(value, basestring):
raise TypeError("string expected")
elif value and not value.startswith(os.sep):
raise ValueError("path not specified absolutely: '%s'" % value)
if self.case_sensitive:
path = value
else:
path = value.lower()
path = path.rstrip(os.sep) # safety net; should never need this
parts = value.split(os.sep)
#parts = value.split(os.sep)[1:]
# Add to simple identity indices.
# ===============================
self.path2rid[path] = rid
self.rid2path[rid] = path
# Add to complex level/part indices.
# ==================================
for level in range(len(parts)):
token_ = (level, parts[level])
# Add to (one:many) mapping of (level,part) to [rids].
# ====================================================
if token_ not in self.parts:
#.........这里部分代码省略.........
示例5: Indexer
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Indexer(object):
filestorage = database = connection = root = None
def __init__(self, datafs, writable=0, trans=0, pack=0):
self.trans_limit = trans
self.pack_limit = pack
self.trans_count = 0
self.pack_count = 0
self.stopdict = get_stopdict()
self.mh = mhlib.MH()
self.filestorage = FileStorage(datafs, read_only=(not writable))
self.database = DB(self.filestorage)
self.connection = self.database.open()
self.root = self.connection.root()
try:
self.index = self.root["index"]
except KeyError:
self.index = self.root["index"] = TextIndexWrapper()
try:
self.docpaths = self.root["docpaths"]
except KeyError:
self.docpaths = self.root["docpaths"] = IOBTree()
try:
self.doctimes = self.root["doctimes"]
except KeyError:
self.doctimes = self.root["doctimes"] = IIBTree()
try:
self.watchfolders = self.root["watchfolders"]
except KeyError:
self.watchfolders = self.root["watchfolders"] = {}
self.path2docid = OIBTree()
for docid in self.docpaths.keys():
path = self.docpaths[docid]
self.path2docid[path] = docid
try:
self.maxdocid = max(self.docpaths.keys())
except ValueError:
self.maxdocid = 0
print(len(self.docpaths), "Document ids")
print(len(self.path2docid), "Pathnames")
print(self.index.lexicon.length(), "Words")
def dumpfreqs(self):
lexicon = self.index.lexicon
index = self.index.index
assert isinstance(index, OkapiIndex)
L = []
for wid in lexicon.wids():
freq = 0
for f in index._wordinfo.get(wid, {}).values():
freq += f
L.append((freq, wid, lexicon.get_word(wid)))
L.sort()
L.reverse()
for freq, wid, word in L:
print("%10d %10d %s" % (wid, freq, word))
def dumpwids(self):
lexicon = self.index.lexicon
index = self.index.index
assert isinstance(index, OkapiIndex)
for wid in lexicon.wids():
freq = 0
for f in index._wordinfo.get(wid, {}).values():
freq += f
print("%10d %10d %s" % (wid, freq, lexicon.get_word(wid)))
def dumpwords(self):
lexicon = self.index.lexicon
index = self.index.index
assert isinstance(index, OkapiIndex)
for word in lexicon.words():
wid = lexicon.get_wid(word)
freq = 0
for f in index._wordinfo.get(wid, {}).values():
freq += f
print("%10d %10d %s" % (wid, freq, word))
def close(self):
self.root = None
if self.connection is not None:
self.connection.close()
self.connection = None
if self.database is not None:
self.database.close()
self.database = None
if self.filestorage is not None:
self.filestorage.close()
self.filestorage = None
def interact(self, nbest=NBEST, maxlines=MAXLINES):
try:
import readline
except ImportError:
pass
text = ""
top = 0
results = []
while 1:
#.........这里部分代码省略.........
示例6: DateIndex
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DateIndex(UnIndex, PropertyManager):
"""Index for dates.
"""
__implements__ = UnIndex.__implements__
implements(IDateIndex)
meta_type = 'DateIndex'
query_options = ['query', 'range']
index_naive_time_as_local = True # False means index as UTC
_properties=({'id':'index_naive_time_as_local',
'type':'boolean',
'mode':'w'},)
manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
manage_browse = DTMLFile('../dtml/browseIndex', globals())
manage_main._setName( 'manage_main' )
manage_options = ( { 'label' : 'Settings'
, 'action' : 'manage_main'
},
{'label': 'Browse',
'action': 'manage_browse',
},
) + PropertyManager.manage_options
def clear( self ):
""" Complete reset """
self._index = IOBTree()
self._unindex = OIBTree()
self._length = BTrees.Length.Length()
def index_object( self, documentId, obj, threshold=None ):
"""index an object, normalizing the indexed value to an integer
o Normalized value has granularity of one minute.
o Objects which have 'None' as indexed value are *omitted*,
by design.
"""
returnStatus = 0
try:
date_attr = getattr( obj, self.id )
if safe_callable( date_attr ):
date_attr = date_attr()
ConvertedDate = self._convert( value=date_attr, default=_marker )
except AttributeError:
ConvertedDate = _marker
oldConvertedDate = self._unindex.get( documentId, _marker )
if ConvertedDate != oldConvertedDate:
if oldConvertedDate is not _marker:
self.removeForwardIndexEntry(oldConvertedDate, documentId)
if ConvertedDate is _marker:
try:
del self._unindex[documentId]
except ConflictError:
raise
except:
logger.error(
("Should not happen: ConvertedDate was there,"
" now it's not, for document with id %s" %
documentId))
if ConvertedDate is not _marker:
self.insertForwardIndexEntry( ConvertedDate, documentId )
self._unindex[documentId] = ConvertedDate
returnStatus = 1
return returnStatus
def _apply_index( self, request, cid='', type=type ):
"""Apply the index to query parameters given in the argument
Normalize the 'query' arguments into integer values at minute
precision before querying.
"""
record = parseIndexRequest( request, self.id, self.query_options )
if record.keys == None:
return None
keys = map( self._convert, record.keys )
index = self._index
r = None
opr = None
#experimental code for specifing the operator
operator = record.get( 'operator', self.useOperator )
if not operator in self.operators :
raise RuntimeError, "operator not valid: %s" % operator
# depending on the operator we use intersection or union
if operator=="or":
#.........这里部分代码省略.........
示例7: IntegerRangesIndex
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class IntegerRangesIndex(SimpleItem):
""" Index a set of integer ranges:
[(1,2), (12,23), (12, 22)]
"""
implements(IPluggableIndex)
meta_type = 'IntegerRangesIndex'
def __init__(self, id, caller=None, extra=None):
self.id = id
self.caller = caller
self.clear()
self.__genid = 0
def __len__(self):
return self._length()
def getId(self):
"""Return Id of index."""
return self.id
def clear(self):
"""Empty the index"""
IOBTree = BTrees.family64.IO.BTree
self._index = IOBTree() # {rangeid: [document_id, ...]}
self._unindex = IOBTree() # {document_id: [rangeid, ...]}
self._range_mapping = IOBTree() # {rangeid: range}
self._reverse_range_mapping = OIBTree() # {range: rangeid}
self._since_index = IOBTree() # {since: [rangeid,...]}
self._until_index = IOBTree() # {until: [rangeid,...]}
self._length = BTrees.Length.Length()
self._unique_values_length = BTrees.Length.Length()
def __get_range_id(self, range_):
return self._reverse_range_mapping.get(range_, None)
def __get_range(self, range_id):
return self._range_mapping.get(range_id, None)
def __index_range(self, range_):
""" index range if needed and return the rangeid
"""
range_id = self.__get_range_id(range_)
if range_id is None:
range_id = self.genid()
# index range
self._unique_values_length.change(1)
self._range_mapping[range_id] = range_
self._reverse_range_mapping[range_] = range_id
# index range boundaries
since, until = range_
self.__insert_in_index_set(self._since_index, since, range_id)
self.__insert_in_index_set(self._until_index, until, range_id)
return range_id
def __unindex_range(self, range_id):
range_ = self.__get_range(range_id)
if range_ is None:
return None
since, until = range_
self.__remove_in_index_set(self._since_index, since, range_id)
self.__remove_in_index_set(self._until_index, until, range_id)
self._unique_values_length.change(-1)
del self._range_mapping[range_id]
del self._reverse_range_mapping[range_]
return range_
def genid(self):
self.__genid += 1
return self.__genid
def getEntryForObject(self, document_id, default=_marker):
"""Get all information contained for 'document_id'."""
if default is _marker:
return self._unindex.get(document_id)
else:
return self._index.get(document_id, default)
def getIndexSourceNames(self):
"""Get a sequence of attribute names that are indexed by the index.
"""
return [self.id]
def index_object(self, document_id, obj, threshold=None):
"""Index an object.
'document_id' is the integer ID of the document.
'obj' is the object to be indexed.
'threshold' is the number of words to process between committing
subtransactions. If None, subtransactions are disabled.
"""
new_ranges = self._get_object_data(obj, self.id)
if new_ranges:
new_set = IISet(map(self.__index_range, new_ranges))
else:
new_set = IISet()
old_set = self._unindex.get(document_id, IISet())
#.........这里部分代码省略.........
示例8: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):
implements(ILexicon)
def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
self._nextwid = 1
self._pipeline = pipeline
# Keep some statistics about indexing
self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
self._nwords = 0 # Number of words indexed (after pipeline)
def wordCount(self):
"""Return the number of unique terms in the lexicon."""
return self._nextwid - 1
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
last = _text2list(text)
for t in last:
self._nbytes += len(t)
for element in self._pipeline:
last = element.process(last)
self._nwords += len(last)
return map(self._getWordIdCreate, last)
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix
wids = []
for key in keys:
if not key.startswith(prefix):
break
#.........这里部分代码省略.........
示例9: LinkCheckTool
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class LinkCheckTool(SimpleItem):
security = ClassSecurityInfo()
def __init__(self, id=None):
super(LinkCheckTool, self).__init__(id)
# This is the work queue; items in this queue are scheduled
# for link validity check.
self.queue = CompositeQueue()
# Additional queue for internal crawler to revalidate the site
self.crawl_queue = CompositeQueue()
# This is the link database. It maps a hyperlink index to a
# tuple (timestamp, status, referers).
self.checked = IOBTree()
# Indexes
self.index = OIBTree()
self.links = IOBTree()
# This is a counter that allows us to add new hyperlinks and
# provide an indexc quickly.
self.counter = 0
security.declarePrivate("is_available")
def is_available(self):
return hasattr(self, 'index') and \
hasattr(self, 'checked') and \
hasattr(self, 'queue') and \
hasattr(self, 'counter')
security.declarePrivate("clear")
def clear(self):
while True:
try:
self.queue.pull()
except IndexError:
break
while True:
try:
self.crawl_queue.pull()
except IndexError:
break
self.checked.clear()
self.index.clear()
self.links.clear()
self.counter = 0
security.declarePrivate("crawl")
def crawl(self):
self.clear()
query = {}
registry = getUtility(IRegistry)
settings = registry.forInterface(ISettings)
if settings.content_types:
query['portal_type'] = settings.content_types
if settings.workflow_states:
query['review_state'] = settings.workflow_states
catalog = api.portal.get_tool('portal_catalog')
brains = catalog(query)
for brain in brains:
# asyncronous crawling not working yet
# self.crawl_enqueue(brain.UID)
obj = brain.getObject()
obj.restrictedTraverse('@@linkcheck')()
logger.info('Crawling: checked {0}'.format(brain.getURL()))
security.declarePrivate("enqueue")
def enqueue(self, url):
index = self.index.get(url)
if index is None:
# a really new url
index = self.store(url)
else:
entry = self.checked.get(index)
if entry is not None and entry:
entry = None, entry[1], entry[2]
self.checked[index] = entry
else:
# reset empty entry
self.remove(url)
index = self.store(url)
self.queue.put(index)
return index
security.declarePrivate("register")
def register(self, hrefs, referer, timestamp):
"""Add or update link presence information.
If a link has not been checked since the provided timestamp,
it will be added to the queue (or if it is not in the
database).
"""
referer = self.index.get(referer) or self.store(referer)
#.........这里部分代码省略.........
示例10: ExtendedPathIndex
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class ExtendedPathIndex(PathIndex):
"""A path index stores all path components of the physical path of an
object.
Internal datastructure (regular pathindex):
- a physical path of an object is split into its components
- every component is kept as a key of a OOBTree in self._indexes
- the value is a mapping 'level of the path component' to
'all docids with this path component on this level'
In addition
- there is a terminator (None) signifying the last component in the path
- 2 additional indexes map absolute path to either the doc id or doc ids of
contained objects. This allows for rapid answering of common queries.
"""
meta_type = "ExtendedPathIndex"
manage_options = (
{'label': 'Settings', 'action': 'manage_main'},
)
indexed_attrs = None
multi_valued = False
query_options = ("query", "level", "operator",
"depth", "navtree", "navtree_start")
def __init__(self, id, extra=None, caller=None):
""" ExtendedPathIndex supports indexed_attrs """
PathIndex.__init__(self, id, caller)
if isinstance(extra, dict):
attrs = extra.get('indexed_attrs', None)
self.multi_valued = extra.get('multi_valued', False)
else:
attrs = getattr(extra, 'indexed_attrs', None)
self.multi_valued = getattr(extra, 'multi_valued', False)
if attrs is None:
return
if isinstance(attrs, str):
attrs = attrs.split(',')
attrs = [a.strip() for a in attrs]
attrs = [a for a in attrs if a]
if attrs:
# We only index the first attribute so snip off the rest
self.indexed_attrs = tuple(attrs[:1])
def clear(self):
PathIndex.clear(self)
self._index_parents = OOBTree()
self._index_items = OIBTree()
def index_object(self, docid, obj, threshold=100):
""" hook for (Z)Catalog """
# PathIndex first checks for an attribute matching its id and
# falls back to getPhysicalPath only when failing to get one.
# If self.indexed_attrs is not None, it's value overrides this behavior
attrs = self.indexed_attrs
index = attrs is None and self.id or attrs[0]
path = getattr(obj, index, None)
if path is not None:
if safe_callable(path):
path = path()
if not isinstance(path, (str, tuple)):
raise TypeError('path value must be string or tuple '
'of strings: (%r, %s)' % (index, repr(path)))
else:
try:
path = obj.getPhysicalPath()
except AttributeError:
return 0
if isinstance(path, (list, tuple)):
path = '/' + '/'.join(path[1:])
comps = [p for p in path.split('/') if p]
# Make sure we reindex properly when path change
old_path = self._unindex.get(docid, _marker)
if old_path is not _marker:
if old_path != path:
self.unindex_object(docid, _old=old_path)
# unindex reduces length, we need to counter that
self._length.change(1)
else:
# We only get a new entry if the value wasn't there before.
# If it already existed the length is unchanged
self._length.change(1)
#.........这里部分代码省略.........
示例11: DocumentMap
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DocumentMap(Persistent):
""" A two-way map between addresses (e.g. location paths) and document ids.
The map is a persistent object meant to live in a ZODB storage.
Additionally, the map is capable of mapping 'metadata' to docids.
"""
_v_nextid = None
family = BTrees.family32
_randrange = random.randrange
docid_to_metadata = None # latch for b/c
def __init__(self):
self.docid_to_address = IOBTree()
self.address_to_docid = OIBTree()
self.docid_to_metadata = IOBTree()
def docid_for_address(self, address):
""" Retrieve a document id for a given address.
``address`` is a string or other hashable object which represents
a token known by the application.
Return the integer document id corresponding to ``address``.
If ``address`` doesn't exist in the document map, return None.
"""
return self.address_to_docid.get(address)
def address_for_docid(self, docid):
""" Retrieve an address for a given document id.
``docid`` is an integer document id.
Return the address corresponding to ``docid``.
If ``docid`` doesn't exist in the document map, return None.
"""
return self.docid_to_address.get(docid)
def add(self, address, docid=_marker):
""" Add a new document to the document map.
``address`` is a string or other hashable object which represents
a token known by the application.
``docid``, if passed, must be an int. In this case, remove
any previous address stored for it before mapping it to the
new address. Passing an explicit ``docid`` also removes any
metadata associated with that docid.
If ``docid`` is not passed, generate a new docid.
Return the integer document id mapped to ``address``.
"""
if docid is _marker:
docid = self.new_docid()
self.remove_docid(docid)
self.remove_address(address)
self.docid_to_address[docid] = address
self.address_to_docid[address] = docid
return docid
def remove_docid(self, docid):
""" Remove a document from the document map for the given document ID.
``docid`` is an integer document id.
Remove any corresponding metadata for ``docid`` as well.
Return a True if ``docid`` existed in the map, else return False.
"""
# It should be an invariant that if one entry exists in
# docid_to_address for a docid/address pair, exactly one
# corresponding entry exists in address_to_docid for the same
# docid/address pair. However, versions of this code before
# r.catalog 0.7.3 had a bug which, if this method was called
# multiple times, each time with the same address but a
# different docid, the ``docid_to_address`` mapping could
# contain multiple entries for the same address each with a
# different docid, causing this invariant to be violated. The
# symptom: in systems that used r.catalog 0.7.2 and lower,
# there might be more entries in docid_to_address than there
# are in address_to_docid. The conditional fuzziness in the
# code directly below is a runtime kindness to systems in that
# state. Technically, the administrator of a system in such a
# state should normalize the two data structures by running a
# script after upgrading to 0.7.3. If we made the admin do
# this, some of the code fuzziness below could go away,
# replaced with something simpler. But there's no sense in
# breaking systems at runtime through being a hardass about
# consistency if an unsuspecting upgrader has not yet run the
# data fixer script. The "fix the data" mantra rings a
# little hollow when you weren't the one who broke the data in
# the first place ;-)
self._check_metadata()
#.........这里部分代码省略.........
示例12: index_object
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
def index_object(self, documentId, obj, threshold=None):
""" Index an object:
'documentId' is the integer id of the document
'obj' is the object to be indexed
'threshold' is the number of words to process between
commiting subtransactions. If 'None' subtransactions are
disabled. """
# sniff the object for our 'id', the 'document source' of the
# index is this attribute. If it smells callable, call it.
try:
source = getattr(obj, self.id)
if safe_callable(source):
source = source()
if not isinstance(source, UnicodeType):
source = str(source)
except (AttributeError, TypeError):
return 0
# sniff the object for 'id'+'_encoding'
try:
encoding = getattr(obj, self.id+'_encoding')
if safe_callable(encoding ):
encoding = str(encoding())
else:
encoding = str(encoding)
except (AttributeError, TypeError):
encoding = 'latin1'
lexicon = self.getLexicon()
splitter = lexicon.Splitter
wordScores = OIBTree()
last = None
# Run through the words and score them
for word in list(splitter(source,encoding=encoding)):
if word[0] == '\"':
last = self._subindex(word[1:-1], wordScores, last, splitter)
else:
if word==last: continue
last=word
wordScores[word]=wordScores.get(word,0)+1
# Convert scores to use wids:
widScores=IIBucket()
getWid=lexicon.getWordId
for word, score in wordScores.items():
widScores[getWid(word)]=score
del wordScores
currentWids=IISet(self._unindex.get(documentId, []))
# Get rid of document words that are no longer indexed
self.unindex_objectWids(documentId, difference(currentWids, widScores))
# Now index the words. Note that the new xIBTrees are clever
# enough to do nothing when there isn't a change. Woo hoo.
insert=self.insertForwardIndexEntry
for wid, score in widScores.items():
insert(wid, documentId, score)
# Save the unindexing info if it's changed:
wids=widScores.keys()
if wids != currentWids.keys():
self._unindex[documentId]=wids
return len(wids)
示例13: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent, Implicit):
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
stop_syn={}
def __init__(self, stop_syn=None,useSplitter=None,extra=None):
self.clear()
if stop_syn is None:
self.stop_syn = {}
else:
self.stop_syn = stop_syn
self.useSplitter = Splitter.splitterNames[0]
if useSplitter: self.useSplitter=useSplitter
self.splitterParams = extra
self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
return
from BTrees.convert import convert
lexicon=self._lexicon
self._lexicon=OIBTree()
self._lexicon._p_jar=self._p_jar
convert(lexicon, self._lexicon, threshold)
try:
inverseLex=self._inverseLex
self._inverseLex=IOBTree()
except AttributeError:
# older lexicons didn't have an inverse lexicon
self._inverseLex=IOBTree()
inverseLex=self._inverseLex
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self.stop_syn = stop_syn
def getWordId(self, word):
""" return the word id of 'word' """
wid=self._lexicon.get(word, None)
if wid is None:
wid=self.assignWordId(word)
return wid
set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if self._lexicon.has_key(word):
return self._lexicon[word]
try: inverse=self._inverseLex
except AttributeError:
# woops, old lexicom wo wids
inverse=self._inverseLex=IOBTree()
for word, wid in self._lexicon.items():
inverse[wid]=word
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
if isinstance(word,StringType):
self._lexicon[intern(word)] = wid
#.........这里部分代码省略.........
示例14: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):
_v_nextid = None
_wid_length_based = True # Flag to distinguish new and old lexica
def __init__(self, *pipeline):
self.clear()
self._pipeline = pipeline
def clear(self):
"""Empty the lexicon.
"""
self.length = Length()
self._wid_length_based = False
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
def length(self):
"""Return the number of unique terms in the lexicon.
"""
# Overridden in instances with a BTrees.Length.Length
raise NotImplementedError
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
return list(map(self._getWordIdCreate, last))
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "process_post_glob", element.process)
last = process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix
wids = []
#.........这里部分代码省略.........
示例15: GlobbingLexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class GlobbingLexicon(Lexicon):
"""Lexicon which supports basic globbing function ('*' and '?').
This lexicon keeps several data structures around that are useful
for searching. They are:
'_lexicon' -- Contains the mapping from word => word_id
'_inverseLex' -- Contains the mapping from word_id => word
'_digrams' -- Contains a mapping from digram => word_id
Before going further, it is necessary to understand what a digram is,
as it is a core component of the structure of this lexicon. A digram
is a two-letter sequence in a word. For example, the word 'zope'
would be converted into the digrams::
['$z', 'zo', 'op', 'pe', 'e$']
where the '$' is a word marker. It is used at the beginning and end
of the words. Those digrams are significant.
"""
multi_wc = '*'
single_wc = '?'
eow = '$'
def __init__(self,useSplitter=None,extra=None):
self.clear()
self.useSplitter = useSplitter
self.splitterParams = extra
self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
self._digrams = OOBTree()
def _convertBTrees(self, threshold=200):
Lexicon._convertBTrees(self, threshold)
if type(self._digrams) is OOBTree: return
from BTrees.convert import convert
_digrams=self._digrams
self._digrams=OOBTree()
self._digrams._p_jar=self._p_jar
convert(_digrams, self._digrams, threshold, IITreeSet)
def createDigrams(self, word):
"""Returns a list with the set of digrams in the word."""
word = '$'+word+'$'
return [ word[i:i+2] for i in range(len(word)-1)]
def getWordId(self, word):
"""Provided 'word', return the matching integer word id."""
if self._lexicon.has_key(word):
return self._lexicon[word]
else:
return self.assignWordId(word)
set = getWordId # Kludge for old code
def getWord(self, wid):
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word, and return it."""
# Double check it's not in the lexicon already, and if it is, just
# return it.
if self._lexicon.has_key(word):
return self._lexicon[word]
# Get word id. BBB Backward compat pain.
inverse=self._inverseLex
try: insert=inverse.insert
except AttributeError:
# we have an "old" BTree object
if inverse:
wid=inverse.keys()[-1]+1
else:
self._inverseLex=IOBTree()
wid=1
inverse[wid] = word
else:
# we have a "new" IOBTree object
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[word] = wid
# Now take all the digrams and insert them into the digram map.
#.........这里部分代码省略.........