本文整理汇总了Python中BTrees.OIBTree.OIBTree.items方法的典型用法代码示例。如果您正苦于以下问题:Python OIBTree.items方法的具体用法?Python OIBTree.items怎么用?Python OIBTree.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BTrees.OIBTree.OIBTree
的用法示例。
在下文中一共展示了OIBTree.items方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import items [as 别名]
class Lexicon(Persistent):
"""
Implementation of :class:`zope.index.text.interfaces.ILexicon`.
"""
def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
self.wordCount = Length()
self._pipeline = pipeline
def wordCount(self):
"""Return the number of unique terms in the lexicon."""
# overridden per instance
return len(self._wids)
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
if text is None:
text = ''
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
if not isinstance(self.wordCount, Length):
# Make sure wordCount is overridden with a BTrees.Length.Length
self.wordCount = Length(self.wordCount())
# Strategically unload the length value so that we get the most
# recent value written to the database to minimize conflicting wids
# Because length is independent, this will load the most
# recent value stored, regardless of whether MVCC is enabled
self.wordCount._p_deactivate()
return list(map(self._getWordIdCreate, last))
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
#.........这里部分代码省略.........
示例2: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import items [as 别名]
class Lexicon(Persistent):
implements(ILexicon)
def __init__(self, *pipeline):
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
self._nextwid = 1
self._pipeline = pipeline
# Keep some statistics about indexing
self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
self._nwords = 0 # Number of words indexed (after pipeline)
def wordCount(self):
"""Return the number of unique terms in the lexicon."""
return self._nextwid - 1
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
last = _text2list(text)
for t in last:
self._nbytes += len(t)
for element in self._pipeline:
last = element.process(last)
self._nwords += len(last)
return map(self._getWordIdCreate, last)
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix
wids = []
for key in keys:
if not key.startswith(prefix):
break
#.........这里部分代码省略.........
示例3: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import items [as 别名]
class Lexicon(Persistent, Implicit):
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
stop_syn={}
def __init__(self, stop_syn=None,useSplitter=None,extra=None):
self.clear()
if stop_syn is None:
self.stop_syn = {}
else:
self.stop_syn = stop_syn
self.useSplitter = Splitter.splitterNames[0]
if useSplitter: self.useSplitter=useSplitter
self.splitterParams = extra
self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
return
from BTrees.convert import convert
lexicon=self._lexicon
self._lexicon=OIBTree()
self._lexicon._p_jar=self._p_jar
convert(lexicon, self._lexicon, threshold)
try:
inverseLex=self._inverseLex
self._inverseLex=IOBTree()
except AttributeError:
# older lexicons didn't have an inverse lexicon
self._inverseLex=IOBTree()
inverseLex=self._inverseLex
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self.stop_syn = stop_syn
def getWordId(self, word):
""" return the word id of 'word' """
wid=self._lexicon.get(word, None)
if wid is None:
wid=self.assignWordId(word)
return wid
set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if self._lexicon.has_key(word):
return self._lexicon[word]
try: inverse=self._inverseLex
except AttributeError:
# woops, old lexicom wo wids
inverse=self._inverseLex=IOBTree()
for word, wid in self._lexicon.items():
inverse[wid]=word
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
if isinstance(word,StringType):
self._lexicon[intern(word)] = wid
#.........这里部分代码省略.........
示例4: index_object
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import items [as 别名]
def index_object(self, documentId, obj, threshold=None):
""" Index an object:
'documentId' is the integer id of the document
'obj' is the object to be indexed
'threshold' is the number of words to process between
commiting subtransactions. If 'None' subtransactions are
disabled. """
# sniff the object for our 'id', the 'document source' of the
# index is this attribute. If it smells callable, call it.
try:
source = getattr(obj, self.id)
if safe_callable(source):
source = source()
if not isinstance(source, UnicodeType):
source = str(source)
except (AttributeError, TypeError):
return 0
# sniff the object for 'id'+'_encoding'
try:
encoding = getattr(obj, self.id+'_encoding')
if safe_callable(encoding ):
encoding = str(encoding())
else:
encoding = str(encoding)
except (AttributeError, TypeError):
encoding = 'latin1'
lexicon = self.getLexicon()
splitter = lexicon.Splitter
wordScores = OIBTree()
last = None
# Run through the words and score them
for word in list(splitter(source,encoding=encoding)):
if word[0] == '\"':
last = self._subindex(word[1:-1], wordScores, last, splitter)
else:
if word==last: continue
last=word
wordScores[word]=wordScores.get(word,0)+1
# Convert scores to use wids:
widScores=IIBucket()
getWid=lexicon.getWordId
for word, score in wordScores.items():
widScores[getWid(word)]=score
del wordScores
currentWids=IISet(self._unindex.get(documentId, []))
# Get rid of document words that are no longer indexed
self.unindex_objectWids(documentId, difference(currentWids, widScores))
# Now index the words. Note that the new xIBTrees are clever
# enough to do nothing when there isn't a change. Woo hoo.
insert=self.insertForwardIndexEntry
for wid, score in widScores.items():
insert(wid, documentId, score)
# Save the unindexing info if it's changed:
wids=widScores.keys()
if wids != currentWids.keys():
self._unindex[documentId]=wids
return len(wids)
示例5: Lexicon
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import items [as 别名]
class Lexicon(Persistent):
_v_nextid = None
_wid_length_based = True # Flag to distinguish new and old lexica
def __init__(self, *pipeline):
self.clear()
self._pipeline = pipeline
def clear(self):
"""Empty the lexicon.
"""
self.length = Length()
self._wid_length_based = False
self._wids = OIBTree() # word -> wid
self._words = IOBTree() # wid -> word
# wid 0 is reserved for words that aren't in the lexicon (OOV -- out
# of vocabulary). This can happen, e.g., if a query contains a word
# we never saw before, and that isn't a known stopword (or otherwise
# filtered out). Returning a special wid value for OOV words is a
# way to let clients know when an OOV word appears.
def length(self):
"""Return the number of unique terms in the lexicon.
"""
# Overridden in instances with a BTrees.Length.Length
raise NotImplementedError
def words(self):
return self._wids.keys()
def wids(self):
return self._words.keys()
def items(self):
return self._wids.items()
def sourceToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
return list(map(self._getWordIdCreate, last))
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "process_post_glob", element.process)
last = process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
return wids
def parseTerms(self, text):
last = _text2list(text)
for element in self._pipeline:
process = getattr(element, "processGlob", element.process)
last = process(last)
return last
def isGlob(self, word):
return "*" in word or "?" in word
def get_word(self, wid):
return self._words[wid]
def get_wid(self, word):
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
# Implement * and ? just as in the shell, except the pattern
# must not start with either of these
prefix = ""
while pattern and pattern[0] not in "*?":
prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix
wids = []
#.........这里部分代码省略.........