当前位置: 首页>>代码示例>>Python>>正文


Python OIBTree.get方法代码示例

本文整理汇总了Python中BTrees.OIBTree.OIBTree.get方法的典型用法代码示例。如果您正苦于以下问题:Python OIBTree.get方法的具体用法?Python OIBTree.get怎么用?Python OIBTree.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在BTrees.OIBTree.OIBTree的用法示例。


在下文中一共展示了OIBTree.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DateIndex

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DateIndex(UnIndex):
    """ Index for Dates """

    __implements__ = (PluggableIndex.PluggableIndexInterface,)

    meta_type = 'DateIndex'
    query_options = ['query', 'range']

    manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
    manage_main._setName( 'manage_main' )
    manage_options = ( { 'label' : 'Settings'
                       , 'action' : 'manage_main'
                       },
                     )

    def clear( self ):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = OIBTree()


    def index_object( self, documentId, obj, threshold=None ):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr( obj, self.id )
            if callable( date_attr ):
                date_attr = date_attr()

            ConvertedDate = self._convert( value=date_attr, default=_marker )
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get( documentId, _marker )

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry( ConvertedDate, documentId )
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        return returnStatus


    def _apply_index( self, request, cid='', type=type, None=None ):
开发者ID:OS2World,项目名称:APP-SERVER-Zope,代码行数:58,代码来源:DateIndex.py

示例2: Lexicon

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):
    """
    Implementation of :class:`zope.index.text.interfaces.ILexicon`.
    """

    def __init__(self, *pipeline):
        self._wids = OIBTree()  # word -> wid
        self._words = IOBTree() # wid -> word
        # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
        # of vocabulary).  This can happen, e.g., if a query contains a word
        # we never saw before, and that isn't a known stopword (or otherwise
        # filtered out).  Returning a special wid value for OOV words is a
        # way to let clients know when an OOV word appears.
        self.wordCount = Length()
        self._pipeline = pipeline

    def wordCount(self):
        """Return the number of unique terms in the lexicon."""
        # overridden per instance
        return len(self._wids)

    def words(self):
        return self._wids.keys()

    def wids(self):
        return self._words.keys()

    def items(self):
        return self._wids.items()

    def sourceToWordIds(self, text):
        if text is None:
            text = ''
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        if not isinstance(self.wordCount, Length):
            # Make sure wordCount is overridden with a BTrees.Length.Length
            self.wordCount = Length(self.wordCount())
        # Strategically unload the length value so that we get the most
        # recent value written to the database to minimize conflicting wids
        # Because length is independent, this will load the most
        # recent value stored, regardless of whether MVCC is enabled
        self.wordCount._p_deactivate()
        return list(map(self._getWordIdCreate, last))

    def termToWordIds(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        wids = []
        for word in last:
            wids.append(self._wids.get(word, 0))
        return wids

    def parseTerms(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            process = getattr(element, "processGlob", element.process)
            last = process(last)
        return last

    def isGlob(self, word):
        return "*" in word or "?" in word

    def get_word(self, wid):
        return self._words[wid]

    def get_wid(self, word):
        return self._wids.get(word, 0)

    def globToWordIds(self, pattern):
        # Implement * and ? just as in the shell, except the pattern
        # must not start with either of these
        prefix = ""
        while pattern and pattern[0] not in "*?":
            prefix += pattern[0]
            pattern = pattern[1:]
        if not pattern:
            # There were no globbing characters in the pattern
            wid = self._wids.get(prefix, 0)
            if wid:
                return [wid]
            else:
                return []
        if not prefix:
            # The pattern starts with a globbing character.
            # This is too efficient, so we raise an exception.
            raise QueryError(
                "pattern %r shouldn't start with glob character" % pattern)
        pat = prefix
        for c in pattern:
            if c == "*":
                pat += ".*"
            elif c == "?":
                pat += "."
            else:
                pat += re.escape(c)
        pat += "$"
        prog = re.compile(pat)
#.........这里部分代码省略.........
开发者ID:zopefoundation,项目名称:zope.index,代码行数:103,代码来源:lexicon.py

示例3: UUIDIndex

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class UUIDIndex(UnIndex):
    """Index for uuid fields with an unique value per key.

    The internal structure is:

    self._index = {datum:documentId]}
    self._unindex = {documentId:datum}

    For each datum only one documentId can exist.
    """

    meta_type = "UUIDIndex"

    manage_options = (
        {'label': 'Settings', 'action': 'manage_main'},
        {'label': 'Browse', 'action': 'manage_browse'},
    )

    query_options = ["query", "range"]

    manage = manage_main = DTMLFile('dtml/manageUUIDIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    def clear(self):
        self._length = Length()
        self._index = OIBTree()
        self._unindex = IOBTree()
        self._counter = Length()

    def numObjects(self):
        """Return the number of indexed objects. Since we have a 1:1 mapping
        from documents to values, we can reuse the stored length.
        """
        return self.indexSize()

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            raise StopIteration

        if not withLengths:
            for key in self._index.keys():
                yield key
        else:
            # We know the length for each value is one
            for key in self._index.keys():
                yield (key, 1)

    def insertForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and put it in the correct place
        in the forward index.
        """
        if entry is None:
            return

        old_docid = self._index.get(entry, _marker)
        if old_docid is _marker:
            self._index[entry] = documentId
            self._length.change(1)
        elif old_docid != documentId:
            logger.error("A different document with value '%s' already "
                "exists in the index.'" % entry)

    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        old_docid = self._index.get(entry, _marker)
        if old_docid is not _marker:
            del self._index[entry]
            self._length.change(-1)

    def _get_object_datum(self, obj, attr):
        # for a uuid it never makes sense to acquire a parent value via
        # Acquisition
        has_attr = getattr(aq_base(obj), attr, _marker)
        if has_attr is _marker:
            return _marker
        return super(UUIDIndex, self)._get_object_datum(obj, attr)
开发者ID:wildcardcorp,项目名称:Products.ZCatalog,代码行数:88,代码来源:UUIDIndex.py

示例4: Path

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Path(String):

    root = None     # root as passed to Catalog()
    path2rid = None # OIBTree mapping path to rid (one:one)
    rid2path = None # IOBTree mapping rid to path (one:one)
    parts = None    # OOBTree mapping (level, part) to rids (one:many)
    levels = None   # IOBTree mapping level to a list of rids (one:many)
    case_sensitive = None

    sorted = None   # OOBTree for sorting; inherited from Path


    def __init__(self, root, case_sensitive=None):

        # Root
        # ====

        if not isinstance(root, basestring):
            raise TypeError("root is not a string: '%s'" % root)
        elif not isdir(root):
            raise ValueError("root doesn't point to a directory: '%s'" % root)
        self.root = root.rstrip(os.sep)


        # Case Sensitivity
        # ================

        if case_sensitive is None:
            if 'win' in sys.platform:
                case_sensitive = False
            else:
                case_sensitive = True
        if case_sensitive not in (False, True, 0, 1):
            raise TypeError( "case_sensitive isn't a boolean: "
                           + "'%s'" % case_sensitive
                            )
        self.case_sensitive = bool(case_sensitive)

        self.reset()


    # Index contract
    # ==============

    __name__ = 'Path' # used in command-line interface


    def reset(self):
        """Forget everything; usually called from __init__.
        """
        String.reset(self)

        self.path2rid = OIBTree()   # {path:rid}
        self.rid2path = IOBTree()   # {rid:path}
        self.parts = OOBTree()      # {(level,part):rids}
        self.rids = IOBTree()       # {rid:(level,part)s}
        self.levels = IOBTree()     # {level:rids}


    def learn(self, rid, value):
        """Given an rid and a value, associate them.
        """
        String.learn(self, rid, value)


        # Parse and validate.
        # ===================
        # Value is an absolute path, rooted in self.root.

        if not isinstance(value, basestring):
            raise TypeError("string expected")
        elif value and not value.startswith(os.sep):
            raise ValueError("path not specified absolutely: '%s'" % value)
        if self.case_sensitive:
            path = value
        else:
            path = value.lower()
        path = path.rstrip(os.sep) # safety net; should never need this
        parts = value.split(os.sep)
        #parts = value.split(os.sep)[1:]


        # Add to simple identity indices.
        # ===============================

        self.path2rid[path] = rid
        self.rid2path[rid] = path


        # Add to complex level/part indices.
        # ==================================

        for level in range(len(parts)):
            token_ = (level, parts[level])


            # Add to (one:many) mapping of (level,part) to [rids].
            # ====================================================

            if token_ not in self.parts:
#.........这里部分代码省略.........
开发者ID:Pythoning,项目名称:dewey,代码行数:103,代码来源:indices.py

示例5: Indexer

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Indexer(object):

    filestorage = database = connection = root = None

    def __init__(self, datafs, writable=0, trans=0, pack=0):
        self.trans_limit = trans
        self.pack_limit = pack
        self.trans_count = 0
        self.pack_count = 0
        self.stopdict = get_stopdict()
        self.mh = mhlib.MH()
        self.filestorage = FileStorage(datafs, read_only=(not writable))
        self.database = DB(self.filestorage)
        self.connection = self.database.open()
        self.root = self.connection.root()
        try:
            self.index = self.root["index"]
        except KeyError:
            self.index = self.root["index"] = TextIndexWrapper()
        try:
            self.docpaths = self.root["docpaths"]
        except KeyError:
            self.docpaths = self.root["docpaths"] = IOBTree()
        try:
            self.doctimes = self.root["doctimes"]
        except KeyError:
            self.doctimes = self.root["doctimes"] = IIBTree()
        try:
            self.watchfolders = self.root["watchfolders"]
        except KeyError:
            self.watchfolders = self.root["watchfolders"] = {}
        self.path2docid = OIBTree()
        for docid in self.docpaths.keys():
            path = self.docpaths[docid]
            self.path2docid[path] = docid
        try:
            self.maxdocid = max(self.docpaths.keys())
        except ValueError:
            self.maxdocid = 0
        print(len(self.docpaths), "Document ids")
        print(len(self.path2docid), "Pathnames")
        print(self.index.lexicon.length(), "Words")

    def dumpfreqs(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        L = []
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            L.append((freq, wid, lexicon.get_word(wid)))
        L.sort()
        L.reverse()
        for freq, wid, word in L:
            print("%10d %10d %s" % (wid, freq, word))

    def dumpwids(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, lexicon.get_word(wid)))

    def dumpwords(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for word in lexicon.words():
            wid = lexicon.get_wid(word)
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, word))

    def close(self):
        self.root = None
        if self.connection is not None:
            self.connection.close()
            self.connection = None
        if self.database is not None:
            self.database.close()
            self.database = None
        if self.filestorage is not None:
            self.filestorage.close()
            self.filestorage = None

    def interact(self, nbest=NBEST, maxlines=MAXLINES):
        try:
            import readline
        except ImportError:
            pass
        text = ""
        top = 0
        results = []
        while 1:
#.........这里部分代码省略.........
开发者ID:Pylons,项目名称:hypatia,代码行数:103,代码来源:mhindex.py

示例6: DateIndex

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DateIndex(UnIndex, PropertyManager):

    """Index for dates.
    """

    __implements__ = UnIndex.__implements__
    implements(IDateIndex)

    meta_type = 'DateIndex'
    query_options = ['query', 'range']

    index_naive_time_as_local = True # False means index as UTC
    _properties=({'id':'index_naive_time_as_local',
                  'type':'boolean',
                  'mode':'w'},)

    manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName( 'manage_main' )
    manage_options = ( { 'label' : 'Settings'
                       , 'action' : 'manage_main'
                       },
                       {'label': 'Browse',
                        'action': 'manage_browse',
                       },
                     ) + PropertyManager.manage_options

    def clear( self ):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = OIBTree()
        self._length = BTrees.Length.Length()

    def index_object( self, documentId, obj, threshold=None ):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr( obj, self.id )
            if safe_callable( date_attr ):
                date_attr = date_attr()

            ConvertedDate = self._convert( value=date_attr, default=_marker )
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get( documentId, _marker )

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except:
                        logger.error(
                            ("Should not happen: ConvertedDate was there,"
                             " now it's not, for document with id %s" %
                             documentId))

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry( ConvertedDate, documentId )
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        return returnStatus

    def _apply_index( self, request, cid='', type=type ):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest( request, self.id, self.query_options )
        if record.keys == None:
            return None

        keys = map( self._convert, record.keys )

        index = self._index
        r = None
        opr = None

        #experimental code for specifing the operator
        operator = record.get( 'operator', self.useOperator )
        if not operator in self.operators :
            raise RuntimeError, "operator not valid: %s" % operator

        # depending on the operator we use intersection or union
        if operator=="or":
#.........这里部分代码省略.........
开发者ID:wpjunior,项目名称:proled,代码行数:103,代码来源:DateIndex.py

示例7: IntegerRangesIndex

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class IntegerRangesIndex(SimpleItem):
    """ Index a set of integer ranges:
        [(1,2), (12,23), (12, 22)]
    """

    implements(IPluggableIndex)
    meta_type = 'IntegerRangesIndex'

    def __init__(self, id, caller=None, extra=None):
        self.id = id
        self.caller = caller
        self.clear()
        self.__genid = 0

    def __len__(self):
        return self._length()

    def getId(self):
        """Return Id of index."""
        return self.id

    def clear(self):
        """Empty the index"""
        
        IOBTree = BTrees.family64.IO.BTree

        self._index = IOBTree() # {rangeid: [document_id, ...]}
        self._unindex = IOBTree() # {document_id: [rangeid, ...]}
        self._range_mapping = IOBTree() # {rangeid: range}
        self._reverse_range_mapping = OIBTree() # {range: rangeid}
        self._since_index = IOBTree() # {since: [rangeid,...]}
        self._until_index = IOBTree() # {until: [rangeid,...]}
        self._length = BTrees.Length.Length()
        self._unique_values_length = BTrees.Length.Length()

    def __get_range_id(self, range_):
        return self._reverse_range_mapping.get(range_, None)

    def __get_range(self, range_id):
        return self._range_mapping.get(range_id, None)

    def __index_range(self, range_):
        """ index range if needed and return the rangeid
        """
        range_id = self.__get_range_id(range_)
        if range_id is None:
            range_id = self.genid()
            # index range
            self._unique_values_length.change(1)
            self._range_mapping[range_id] = range_
            self._reverse_range_mapping[range_] = range_id
            # index range boundaries
            since, until = range_
            self.__insert_in_index_set(self._since_index, since, range_id)
            self.__insert_in_index_set(self._until_index, until, range_id)
        return range_id

    def __unindex_range(self, range_id):
        range_ = self.__get_range(range_id)
        if range_ is None:
            return None
        since, until = range_
        self.__remove_in_index_set(self._since_index, since, range_id)
        self.__remove_in_index_set(self._until_index, until, range_id)
        self._unique_values_length.change(-1)
        del self._range_mapping[range_id]
        del self._reverse_range_mapping[range_]
        return range_

    def genid(self):
        self.__genid += 1
        return self.__genid

    def getEntryForObject(self, document_id, default=_marker):
        """Get all information contained for 'document_id'."""
        if default is _marker:
            return self._unindex.get(document_id)
        else:
            return self._index.get(document_id, default)

    def getIndexSourceNames(self):
        """Get a sequence of attribute names that are indexed by the index.
        """
        return [self.id]

    def index_object(self, document_id, obj, threshold=None):
        """Index an object.

        'document_id' is the integer ID of the document.
        'obj' is the object to be indexed.
        'threshold' is the number of words to process between committing
        subtransactions.  If None, subtransactions are disabled.
        """
        new_ranges = self._get_object_data(obj, self.id)
        if new_ranges:
            new_set = IISet(map(self.__index_range, new_ranges))
        else:
            new_set = IISet()

        old_set = self._unindex.get(document_id, IISet())
#.........这里部分代码省略.........
开发者ID:davidgillies,项目名称:silva.app.news,代码行数:103,代码来源:indexing.py

示例8: Lexicon

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):

    implements(ILexicon)

    def __init__(self, *pipeline):
        self._wids = OIBTree()  # word -> wid
        self._words = IOBTree() # wid -> word
        # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
        # of vocabulary).  This can happen, e.g., if a query contains a word
        # we never saw before, and that isn't a known stopword (or otherwise
        # filtered out).  Returning a special wid value for OOV words is a
        # way to let clients know when an OOV word appears.
        self._nextwid = 1
        self._pipeline = pipeline

        # Keep some statistics about indexing
        self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
        self._nwords = 0 # Number of words indexed (after pipeline)

    def wordCount(self):
        """Return the number of unique terms in the lexicon."""
        return self._nextwid - 1

    def words(self):
        return self._wids.keys()

    def wids(self):
        return self._words.keys()

    def items(self):
        return self._wids.items()

    def sourceToWordIds(self, text):
        last = _text2list(text)
        for t in last:
            self._nbytes += len(t)
        for element in self._pipeline:
            last = element.process(last)
        self._nwords += len(last)
        return map(self._getWordIdCreate, last)

    def termToWordIds(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        wids = []
        for word in last:
            wids.append(self._wids.get(word, 0))
        return wids

    def parseTerms(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            process = getattr(element, "processGlob", element.process)
            last = process(last)
        return last

    def isGlob(self, word):
        return "*" in word or "?" in word

    def get_word(self, wid):
        return self._words[wid]

    def get_wid(self, word):
        return self._wids.get(word, 0)

    def globToWordIds(self, pattern):
        # Implement * and ? just as in the shell, except the pattern
        # must not start with either of these
        prefix = ""
        while pattern and pattern[0] not in "*?":
            prefix += pattern[0]
            pattern = pattern[1:]
        if not pattern:
            # There were no globbing characters in the pattern
            wid = self._wids.get(prefix, 0)
            if wid:
                return [wid]
            else:
                return []
        if not prefix:
            # The pattern starts with a globbing character.
            # This is too efficient, so we raise an exception.
            raise QueryError(
                "pattern %r shouldn't start with glob character" % pattern)
        pat = prefix
        for c in pattern:
            if c == "*":
                pat += ".*"
            elif c == "?":
                pat += "."
            else:
                pat += re.escape(c)
        pat += "$"
        prog = re.compile(pat)
        keys = self._wids.keys(prefix) # Keys starting at prefix
        wids = []
        for key in keys:
            if not key.startswith(prefix):
                break
#.........这里部分代码省略.........
开发者ID:,项目名称:,代码行数:103,代码来源:

示例9: LinkCheckTool

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class LinkCheckTool(SimpleItem):
    security = ClassSecurityInfo()

    def __init__(self, id=None):
        super(LinkCheckTool, self).__init__(id)

        # This is the work queue; items in this queue are scheduled
        # for link validity check.
        self.queue = CompositeQueue()

        # Additional queue for internal crawler to revalidate the site
        self.crawl_queue = CompositeQueue()

        # This is the link database. It maps a hyperlink index to a
        # tuple (timestamp, status, referers).
        self.checked = IOBTree()

        # Indexes
        self.index = OIBTree()
        self.links = IOBTree()

        # This is a counter that allows us to add new hyperlinks and
        # provide an indexc quickly.
        self.counter = 0

    security.declarePrivate("is_available")
    def is_available(self):
        return hasattr(self, 'index') and \
               hasattr(self, 'checked') and \
               hasattr(self, 'queue') and \
               hasattr(self, 'counter')

    security.declarePrivate("clear")
    def clear(self):
        while True:
            try:
                self.queue.pull()
            except IndexError:
                break
        while True:
            try:
                self.crawl_queue.pull()
            except IndexError:
                break

        self.checked.clear()
        self.index.clear()
        self.links.clear()
        self.counter = 0

    security.declarePrivate("crawl")
    def crawl(self):
        self.clear()
        query = {}
        registry = getUtility(IRegistry)
        settings = registry.forInterface(ISettings)
        if settings.content_types:
            query['portal_type'] = settings.content_types

        if settings.workflow_states:
            query['review_state'] = settings.workflow_states

        catalog = api.portal.get_tool('portal_catalog')
        brains = catalog(query)
        for brain in brains:
            # asyncronous crawling not working yet
            # self.crawl_enqueue(brain.UID)

            obj = brain.getObject()
            obj.restrictedTraverse('@@linkcheck')()
            logger.info('Crawling: checked {0}'.format(brain.getURL()))

    security.declarePrivate("enqueue")
    def enqueue(self, url):
        index = self.index.get(url)
        if index is None:
            # a really new url
            index = self.store(url)
        else:
            entry = self.checked.get(index)
            if entry is not None and entry:
                entry = None, entry[1], entry[2]
                self.checked[index] = entry
            else:
                # reset empty entry
                self.remove(url)
                index = self.store(url)
        self.queue.put(index)
        return index

    security.declarePrivate("register")
    def register(self, hrefs, referer, timestamp):
        """Add or update link presence information.

        If a link has not been checked since the provided timestamp,
        it will be added to the queue (or if it is not in the
        database).
        """

        referer = self.index.get(referer) or self.store(referer)
#.........这里部分代码省略.........
开发者ID:collective,项目名称:collective.linkcheck,代码行数:103,代码来源:tool.py

示例10: ExtendedPathIndex

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class ExtendedPathIndex(PathIndex):
    """A path index stores all path components of the physical path of an
    object.

    Internal datastructure (regular pathindex):

    - a physical path of an object is split into its components

    - every component is kept as a key of a OOBTree in self._indexes

    - the value is a mapping 'level of the path component' to
      'all docids with this path component on this level'

    In addition

    - there is a terminator (None) signifying the last component in the path

    - 2 additional indexes map absolute path to either the doc id or doc ids of
      contained objects. This allows for rapid answering of common queries.
    """

    meta_type = "ExtendedPathIndex"

    manage_options = (
        {'label': 'Settings', 'action': 'manage_main'},
    )

    indexed_attrs = None
    multi_valued = False
    query_options = ("query", "level", "operator",
                     "depth", "navtree", "navtree_start")

    def __init__(self, id, extra=None, caller=None):
        """ ExtendedPathIndex supports indexed_attrs """
        PathIndex.__init__(self, id, caller)

        if isinstance(extra, dict):
            attrs = extra.get('indexed_attrs', None)
            self.multi_valued = extra.get('multi_valued', False)
        else:
            attrs = getattr(extra, 'indexed_attrs', None)
            self.multi_valued = getattr(extra, 'multi_valued', False)

        if attrs is None:
            return

        if isinstance(attrs, str):
            attrs = attrs.split(',')
        attrs = [a.strip() for a in attrs]
        attrs = [a for a in attrs if a]

        if attrs:
            # We only index the first attribute so snip off the rest
            self.indexed_attrs = tuple(attrs[:1])

    def clear(self):
        PathIndex.clear(self)
        self._index_parents = OOBTree()
        self._index_items = OIBTree()

    def index_object(self, docid, obj, threshold=100):
        """ hook for (Z)Catalog """

        # PathIndex first checks for an attribute matching its id and
        # falls back to getPhysicalPath only when failing to get one.
        # If self.indexed_attrs is not None, it's value overrides this behavior

        attrs = self.indexed_attrs
        index = attrs is None and self.id or attrs[0]

        path = getattr(obj, index, None)
        if path is not None:
            if safe_callable(path):
                path = path()

            if not isinstance(path, (str, tuple)):
                raise TypeError('path value must be string or tuple '
                                'of strings: (%r, %s)' % (index, repr(path)))
        else:
            try:
                path = obj.getPhysicalPath()
            except AttributeError:
                return 0

        if isinstance(path, (list, tuple)):
            path = '/' + '/'.join(path[1:])
        comps = [p for p in path.split('/') if p]

        # Make sure we reindex properly when path change
        old_path = self._unindex.get(docid, _marker)
        if old_path is not _marker:
            if old_path != path:
                self.unindex_object(docid, _old=old_path)
                # unindex reduces length, we need to counter that
                self._length.change(1)
        else:
            # We only get a new entry if the value wasn't there before.
            # If it already existed the length is unchanged
            self._length.change(1)

#.........这里部分代码省略.........
开发者ID:plone,项目名称:Products.ExtendedPathIndex,代码行数:103,代码来源:ExtendedPathIndex.py

示例11: DocumentMap

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class DocumentMap(Persistent):
    """ A two-way map between addresses (e.g. location paths) and document ids.

    The map is a persistent object meant to live in a ZODB storage.

    Additionally, the map is capable of mapping 'metadata' to docids.
    """
    _v_nextid = None
    family = BTrees.family32
    _randrange = random.randrange
    docid_to_metadata = None # latch for b/c

    def __init__(self):
        self.docid_to_address = IOBTree()
        self.address_to_docid = OIBTree()
        self.docid_to_metadata = IOBTree()

    def docid_for_address(self, address):
        """ Retrieve a document id for a given address.

        ``address`` is a string or other hashable object which represents
        a token known by the application.

        Return the integer document id corresponding to ``address``.

        If ``address`` doesn't exist in the document map, return None.
        """
        return self.address_to_docid.get(address)

    def address_for_docid(self, docid):
        """ Retrieve an address for a given document id.

        ``docid`` is an integer document id.

        Return the address corresponding to ``docid``.

        If ``docid`` doesn't exist in the document map, return None.
        """
        return self.docid_to_address.get(docid)

    def add(self, address, docid=_marker):
        """ Add a new document to the document map.

        ``address`` is a string or other hashable object which represents
        a token known by the application.

        ``docid``, if passed, must be an int.  In this case, remove
        any previous address stored for it before mapping it to the
        new address.  Passing an explicit ``docid`` also removes any
        metadata associated with that docid.
        
        If ``docid`` is not passed, generate a new docid.

        Return the integer document id mapped to ``address``.
        """
        if docid is _marker:
            docid = self.new_docid()

        self.remove_docid(docid)
        self.remove_address(address)

        self.docid_to_address[docid] = address
        self.address_to_docid[address] = docid
        return docid

    def remove_docid(self, docid):
        """ Remove a document from the document map for the given document ID.

        ``docid`` is an integer document id.

        Remove any corresponding metadata for ``docid`` as well.

        Return a True if ``docid`` existed in the map, else return False.
        """
        # It should be an invariant that if one entry exists in
        # docid_to_address for a docid/address pair, exactly one
        # corresponding entry exists in address_to_docid for the same
        # docid/address pair.  However, versions of this code before
        # r.catalog 0.7.3 had a bug which, if this method was called
        # multiple times, each time with the same address but a
        # different docid, the ``docid_to_address`` mapping could
        # contain multiple entries for the same address each with a
        # different docid, causing this invariant to be violated.  The
        # symptom: in systems that used r.catalog 0.7.2 and lower,
        # there might be more entries in docid_to_address than there
        # are in address_to_docid.  The conditional fuzziness in the
        # code directly below is a runtime kindness to systems in that
        # state.  Technically, the administrator of a system in such a
        # state should normalize the two data structures by running a
        # script after upgrading to 0.7.3.  If we made the admin do
        # this, some of the code fuzziness below could go away,
        # replaced with something simpler.  But there's no sense in
        # breaking systems at runtime through being a hardass about
        # consistency if an unsuspecting upgrader has not yet run the
        # data fixer script. The "fix the data" mantra rings a
        # little hollow when you weren't the one who broke the data in
        # the first place ;-)

        self._check_metadata()

#.........这里部分代码省略.........
开发者ID:claytron,项目名称:repoze.catalog,代码行数:103,代码来源:document.py

示例12: index_object

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
    def index_object(self, documentId, obj, threshold=None):
        """ Index an object:
        'documentId' is the integer id of the document

        'obj' is the object to be indexed

        'threshold' is the number of words to process between
        commiting subtransactions.  If 'None' subtransactions are
        disabled. """

        # sniff the object for our 'id', the 'document source' of the
        # index is this attribute.  If it smells callable, call it.
        try:
            source = getattr(obj, self.id)
            if safe_callable(source):
                source = source()

            if not isinstance(source, UnicodeType):
                source = str(source)

        except (AttributeError, TypeError):
            return 0

        # sniff the object for 'id'+'_encoding'

        try:
            encoding = getattr(obj, self.id+'_encoding')
            if safe_callable(encoding ):
                encoding = str(encoding())
            else:
                encoding = str(encoding)
        except (AttributeError, TypeError):
            encoding = 'latin1'

        lexicon = self.getLexicon()

        splitter = lexicon.Splitter

        wordScores = OIBTree()
        last = None

        # Run through the words and score them

        for word in list(splitter(source,encoding=encoding)):
            if word[0] == '\"':
                last = self._subindex(word[1:-1], wordScores, last, splitter)
            else:
                if word==last: continue
                last=word
                wordScores[word]=wordScores.get(word,0)+1

        # Convert scores to use wids:
        widScores=IIBucket()
        getWid=lexicon.getWordId
        for word, score in wordScores.items():
            widScores[getWid(word)]=score

        del wordScores

        currentWids=IISet(self._unindex.get(documentId, []))

        # Get rid of document words that are no longer indexed
        self.unindex_objectWids(documentId, difference(currentWids, widScores))

        # Now index the words. Note that the new xIBTrees are clever
        # enough to do nothing when there isn't a change. Woo hoo.
        insert=self.insertForwardIndexEntry
        for wid, score in widScores.items():
            insert(wid, documentId, score)

        # Save the unindexing info if it's changed:
        wids=widScores.keys()
        if wids != currentWids.keys():
            self._unindex[documentId]=wids

        return len(wids)
开发者ID:hoka,项目名称:backport-zcatalog-plone4.x-to-plone3.x,代码行数:78,代码来源:TextIndex.py

示例13: Lexicon

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent, Implicit):
    """Maps words to word ids and then some

    The Lexicon object is an attempt to abstract vocabularies out of
    Text indexes.  This abstraction is not totally cooked yet, this
    module still includes the parser for the 'Text Index Query
    Language' and a few other hacks.

    """

    # default for older objects
    stop_syn={}

    def __init__(self, stop_syn=None,useSplitter=None,extra=None):

        self.clear()
        if stop_syn is None:
            self.stop_syn = {}
        else:
            self.stop_syn = stop_syn

        self.useSplitter = Splitter.splitterNames[0]
        if useSplitter: self.useSplitter=useSplitter
        self.splitterParams = extra
        self.SplitterFunc = Splitter.getSplitter(self.useSplitter)


    def clear(self):
        self._lexicon = OIBTree()
        self._inverseLex = IOBTree()

    def _convertBTrees(self, threshold=200):
        if (type(self._lexicon) is OIBTree and
            type(getattr(self, '_inverseLex', None)) is IOBTree):
            return

        from BTrees.convert import convert

        lexicon=self._lexicon
        self._lexicon=OIBTree()
        self._lexicon._p_jar=self._p_jar
        convert(lexicon, self._lexicon, threshold)

        try:
            inverseLex=self._inverseLex
            self._inverseLex=IOBTree()
        except AttributeError:
            # older lexicons didn't have an inverse lexicon
            self._inverseLex=IOBTree()
            inverseLex=self._inverseLex

        self._inverseLex._p_jar=self._p_jar
        convert(inverseLex, self._inverseLex, threshold)

    def set_stop_syn(self, stop_syn):
        """ pass in a mapping of stopwords and synonyms.  Format is:

        {'word' : [syn1, syn2, ..., synx]}

        Vocabularies do not necesarily need to implement this if their
        splitters do not support stemming or stoping.

        """
        self.stop_syn = stop_syn


    def getWordId(self, word):
        """ return the word id of 'word' """

        wid=self._lexicon.get(word, None)
        if wid is None:
            wid=self.assignWordId(word)
        return wid

    set = getWordId

    def getWord(self, wid):
        """ post-2.3.1b2 method, will not work with unconverted lexicons """
        return self._inverseLex.get(wid, None)

    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""
        # First make sure it's not already in there
        if self._lexicon.has_key(word):
            return self._lexicon[word]


        try: inverse=self._inverseLex
        except AttributeError:
            # woops, old lexicom wo wids
            inverse=self._inverseLex=IOBTree()
            for word, wid in self._lexicon.items():
                inverse[wid]=word

        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()

        if isinstance(word,StringType):
            self._lexicon[intern(word)] = wid
#.........这里部分代码省略.........
开发者ID:OS2World,项目名称:APP-SERVER-Zope,代码行数:103,代码来源:Lexicon.py

示例14: Lexicon

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class Lexicon(Persistent):

    _v_nextid = None
    _wid_length_based = True  # Flag to distinguish new and old lexica

    def __init__(self, *pipeline):
        self.clear()
        self._pipeline = pipeline

    def clear(self):
        """Empty the lexicon.
        """
        self.length = Length()
        self._wid_length_based = False
        self._wids = OIBTree()  # word -> wid
        self._words = IOBTree()  # wid -> word
        # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
        # of vocabulary).  This can happen, e.g., if a query contains a word
        # we never saw before, and that isn't a known stopword (or otherwise
        # filtered out).  Returning a special wid value for OOV words is a
        # way to let clients know when an OOV word appears.

    def length(self):
        """Return the number of unique terms in the lexicon.
        """
        # Overridden in instances with a BTrees.Length.Length
        raise NotImplementedError

    def words(self):
        return self._wids.keys()

    def wids(self):
        return self._words.keys()

    def items(self):
        return self._wids.items()

    def sourceToWordIds(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        return list(map(self._getWordIdCreate, last))

    def termToWordIds(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            process = getattr(element, "process_post_glob", element.process)
            last = process(last)
        wids = []
        for word in last:
            wids.append(self._wids.get(word, 0))
        return wids

    def parseTerms(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            process = getattr(element, "processGlob", element.process)
            last = process(last)
        return last

    def isGlob(self, word):
        return "*" in word or "?" in word

    def get_word(self, wid):
        return self._words[wid]

    def get_wid(self, word):
        return self._wids.get(word, 0)

    def globToWordIds(self, pattern):
        # Implement * and ? just as in the shell, except the pattern
        # must not start with either of these
        prefix = ""
        while pattern and pattern[0] not in "*?":
            prefix += pattern[0]
            pattern = pattern[1:]
        if not pattern:
            # There were no globbing characters in the pattern
            wid = self._wids.get(prefix, 0)
            if wid:
                return [wid]
            else:
                return []
        if not prefix:
            # The pattern starts with a globbing character.
            # This is too efficient, so we raise an exception.
            raise QueryError(
                "pattern %r shouldn't start with glob character" % pattern)
        pat = prefix
        for c in pattern:
            if c == "*":
                pat += ".*"
            elif c == "?":
                pat += "."
            else:
                pat += re.escape(c)
        pat += "$"
        prog = re.compile(pat)
        keys = self._wids.keys(prefix)  # Keys starting at prefix
        wids = []
#.........这里部分代码省略.........
开发者ID:zopefoundation,项目名称:Products.ZCatalog,代码行数:103,代码来源:Lexicon.py

示例15: GlobbingLexicon

# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import get [as 别名]
class GlobbingLexicon(Lexicon):
    """Lexicon which supports basic globbing function ('*' and '?').

    This lexicon keeps several data structures around that are useful
    for searching. They are:

      '_lexicon' -- Contains the mapping from word => word_id

      '_inverseLex' -- Contains the mapping from word_id => word

      '_digrams' -- Contains a mapping from digram => word_id

    Before going further, it is necessary to understand what a digram is,
    as it is a core component of the structure of this lexicon.  A digram
    is a two-letter sequence in a word.  For example, the word 'zope'
    would be converted into the digrams::

      ['$z', 'zo', 'op', 'pe', 'e$']

    where the '$' is a word marker.  It is used at the beginning and end
    of the words.  Those digrams are significant.
    """

    multi_wc = '*'
    single_wc = '?'
    eow = '$'


    def __init__(self,useSplitter=None,extra=None):
        self.clear()
        self.useSplitter = useSplitter
        self.splitterParams = extra
        self.SplitterFunc = Splitter.getSplitter(self.useSplitter)

    def clear(self):
        self._lexicon = OIBTree()
        self._inverseLex = IOBTree()
        self._digrams = OOBTree()

    def _convertBTrees(self, threshold=200):
        Lexicon._convertBTrees(self, threshold)
        if type(self._digrams) is OOBTree: return

        from BTrees.convert import convert

        _digrams=self._digrams
        self._digrams=OOBTree()
        self._digrams._p_jar=self._p_jar
        convert(_digrams, self._digrams, threshold, IITreeSet)


    def createDigrams(self, word):
        """Returns a list with the set of digrams in the word."""

        word = '$'+word+'$'
        return [ word[i:i+2] for i in range(len(word)-1)]


    def getWordId(self, word):
        """Provided 'word', return the matching integer word id."""

        if self._lexicon.has_key(word):
            return self._lexicon[word]
        else:
            return self.assignWordId(word)

    set = getWordId                     # Kludge for old code

    def getWord(self, wid):
        return self._inverseLex.get(wid, None)

    def assignWordId(self, word):
        """Assigns a new word id to the provided word, and return it."""

        # Double check it's not in the lexicon already, and if it is, just
        # return it.
        if self._lexicon.has_key(word):
            return self._lexicon[word]


        # Get word id. BBB Backward compat pain.
        inverse=self._inverseLex
        try: insert=inverse.insert
        except AttributeError:
            # we have an "old" BTree object
            if inverse:
                wid=inverse.keys()[-1]+1
            else:
                self._inverseLex=IOBTree()
                wid=1
            inverse[wid] = word
        else:
            # we have a "new" IOBTree object
            wid=randid()
            while not inverse.insert(wid, word):
                wid=randid()

        self._lexicon[word] = wid

        # Now take all the digrams and insert them into the digram map.
#.........这里部分代码省略.........
开发者ID:OS2World,项目名称:APP-SERVER-Zope,代码行数:103,代码来源:GlobbingLexicon.py


注:本文中的BTrees.OIBTree.OIBTree.get方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。