当前位置: 首页>>代码示例>>Python>>正文


Python AsyncWriter.update_document方法代码示例

本文整理汇总了Python中whoosh.writing.AsyncWriter.update_document方法的典型用法代码示例。如果您正苦于以下问题:Python AsyncWriter.update_document方法的具体用法?Python AsyncWriter.update_document怎么用?Python AsyncWriter.update_document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在whoosh.writing.AsyncWriter的用法示例。


在下文中一共展示了AsyncWriter.update_document方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def add():
    d = request.get_json(force=True)
    url = d.get("url")
    content = d.get("content")
    if not url or not content: return jsonify({"status": "missing parameters"})
    if urlparse.urlparse(url).netloc.startswith("localhost"): return  jsonify({"status": "ignored"})
    ix = get_index()
    writer = AsyncWriter(ix)
    soup = BeautifulSoup(content)
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out
    # get text
    text = soup.get_text()
    # break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    # break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    # drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)

    writer.update_document(title=d.get("title", "Untitled"),
        url=url,
        content=text,
        modified=datetime.datetime.now())
    writer.commit()
    return jsonify({"status": "ok"})
开发者ID:stuartlangridge,项目名称:WebHistoryIndex,代码行数:29,代码来源:server.py

示例2: update

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def update(self, index, iterable, commit=True):
     if not self.setup_complete:
         self.setup()
     
     self.index = self.index.refresh()
     writer = AsyncWriter(self.index)
     
     for obj in iterable:
         doc = index.full_prepare(obj)
         
         # Really make sure it's unicode, because Whoosh won't have it any
         # other way.
         for key in doc:
             doc[key] = self._from_python(doc[key])
         
         writer.update_document(**doc)
     
     if len(iterable) > 0:
         # For now, commit no matter what, as we run into locking issues otherwise.
         writer.commit()
         
         # If spelling support is desired, add to the dictionary.
         if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
             sp = SpellChecker(self.storage)
             sp.add_field(self.index, self.content_field_name)
开发者ID:concentricsky,项目名称:django-haystack,代码行数:27,代码来源:whoosh_backend.py

示例3: update

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
    def update(self, index, iterable, commit=True):
        if not self.setup_complete:
            self.setup()

        self.index = self.index.refresh()
        writer = AsyncWriter(self.index)

        for obj in iterable:
            doc = index.full_prepare(obj)

            # Really make sure it's unicode, because Whoosh won't have it any
            # other way.
            for key in doc:
                doc[key] = self._from_python(doc[key])

            try:
                writer.update_document(**doc)
            except Exception, e:
                if not self.silently_fail:
                    raise

                # We'll log the object identifier but won't include the actual object
                # to avoid the possibility of that generating encoding errors while
                # processing the log message:
                self.log.error(u"%s while preparing object for update" % e, exc_info=True, extra={
                    "data": {
                        "index": index,
                        "object": get_identifier(obj)
                    }
                })
开发者ID:stden,项目名称:bonushouse,代码行数:32,代码来源:whoosh_backend.py

示例4: whoosh_index

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def whoosh_index(self):
     it = QTreeWidgetItemIterator(
         self.notesTree, QTreeWidgetItemIterator.All)
     print("Starting complete indexing.")
     #writer = self.ix.writer()
     writer = AsyncWriter(self.ix)
     while it.value():
         treeItem = it.value()
         name = self.notesTree.itemToPage(treeItem)
         path = os.path.join(self.notesTree.pageToFile(name)).replace(os.sep, '/')
         print(path)
         fileobj = open(path, 'r', encoding='utf-8')
         content = fileobj.read()
         fileobj.close()
         if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
             no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
             self.settings.md.reset().convert(content)
             writer.update_document(
                 path=name, title=parseTitle(content, name), content=no_metadata_content,
                 tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
         else:
             writer.add_document(path=name, title=parseTitle(content, name), content=content, tags='')
        
         it += 1
     writer.commit()
     print("Finished completely reindexing.")
开发者ID:OSUser,项目名称:mikidown,代码行数:28,代码来源:mikiwindow.py

示例5: update

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
    def update(self, index, document, **options):
        index = base._resolve_index(index)

        ix = self._storage.open_index(indexname=index.get_name())
        writer = AsyncWriter(ix)

        adapted_document = index.adapt_document(document)
        writer.update_document(**adapted_document)
        writer.commit()
开发者ID:niwinz,项目名称:needlestack,代码行数:11,代码来源:base.py

示例6: update_bulk

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
    def update_bulk(self, index, documents):
        index = base._resolve_index(index)

        ix = self._storage.open_index(indexname=index.get_name())
        writer = AsyncWriter(ix)

        adapted_documents = (index.adapt_document(doc)
                                for doc in documents)
        for doc in adapted_documents:
            writer.update_document(**doc)

        writer.commit()
开发者ID:niwinz,项目名称:needlestack,代码行数:14,代码来源:base.py

示例7: index_documents

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def index_documents(self, documents):
     """Add or update documents in the index."""
     index = open_dir(self.index_path)
     writer = AsyncWriter(index)
     needs_commit = False
     for document in documents:
         needs_commit = True
         writer.update_document(
             uid=':'.join((document['set'], document['path'])),
             path=document['path'],
             set=document['set'],
             hash=document['hash'],
             title=document['title'],
             content=document['content'],
             kind=document['kind'],
         )
     if needs_commit:
         writer.commit()
开发者ID:pombredanne,项目名称:dokang,代码行数:20,代码来源:whoosh.py

示例8: update

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
    def update(self, index, iterable, commit=True):
        if not self.setup_complete:
            self.setup()

        self.index = self.index.refresh()
        writer = AsyncWriter(self.index)

        for obj in iterable:
            doc = index.full_prepare(obj)

            # Really make sure it's unicode, because Whoosh won't have it any
            # other way.
            for key in doc:
                doc[key] = self._from_python(doc[key])

            # Document boosts aren't supported in Whoosh 2.5.0+.
            if 'boost' in doc:
                del doc['boost']

            try:
                writer.update_document(**doc)
            except Exception as e:
                if not self.silently_fail:
                    raise

                # We'll log the object identifier but won't include the actual object
                # to avoid the possibility of that generating encoding errors while
                # processing the log message:
                self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
                    "data": {
                        "index": index,
                        "object": get_identifier(obj)
                    }
                })
                
                # reset the writer so there is no 'start_doc' error from the
                # previous failed update attempt
                writer = AsyncWriter(self.index)

        if len(iterable) > 0:
            # For now, commit no matter what, as we run into locking issues otherwise.
            writer.commit()
开发者ID:kamni,项目名称:django-haystack,代码行数:44,代码来源:whoosh_backend.py

示例9: updateIndex

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def updateIndex(self):
     ''' Update whoosh index, which cost much computing resource '''
     page = self.parent.notesTree.currentPage()
     content = self.toPlainText()        
     try:
         #writer = self.ix.writer()
         writer = AsyncWriter(self.ix)
         if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
             no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
             self.settings.md.reset().convert(content)
             writer.update_document(
                 path=page, title=parseTitle(content, page), content=no_metadata_content,
                 tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
             writer.commit()
         else:
             writer.update_document(
                 path=page, title=parseTitle(content, page), content=content, tags='')
             writer.commit()
     except:
         print("Whoosh commit failed.")
开发者ID:albfan,项目名称:mikidown,代码行数:22,代码来源:mikiedit.py

示例10: update

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def update(self, index, iterable, commit=True):
     if not self.setup_complete:
         self.setup()
     
     self.index = self.index.refresh()
     writer = AsyncWriter(self.index)
     
     for obj in iterable:
         doc = index.full_prepare(obj)
         
         # Really make sure it's unicode, because Whoosh won't have it any
         # other way.
         for key in doc:
             doc[key] = self._from_python(doc[key])
         
         try:
             writer.update_document(**doc)
         except Exception, e:
             if not self.silently_fail:
                 raise
             
             self.log.error("Failed to add documents to Whoosh: %s", e)
开发者ID:albanm,项目名称:django-haystack,代码行数:24,代码来源:whoosh_backend.py

示例11: Term

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
                latest_backends_revids = self._find_latest_backends_revids(self.ix[ALL_REVS], Term(ITEMID, itemid))
                if latest_backends_revids:
                    # we have a latest revision, just update the document in the index:
                    assert len(latest_backends_revids) == 1  # this item must have only one latest revision
                    latest_backend_revid = latest_backends_revids[0]
                    # we must fetch from backend because schema for LATEST_REVS is different than for ALL_REVS
                    # (and we can't be sure we have all fields stored, too)
                    meta, _ = self.backend.retrieve(*latest_backend_revid)
                    # we only use meta (not data), because we do not want to transform data->content again (this
                    # is potentially expensive) as we already have the transformed content stored in ALL_REVS index:
                    with self.ix[ALL_REVS].searcher() as searcher:
                        doc = searcher.document(revid=latest_backend_revid[1])
                        content = doc[CONTENT]
                    doc = backend_to_index(meta, content, self.schemas[LATEST_REVS], self.wikiname,
                                           backend_name=latest_backend_revid[0])
                    writer.update_document(**doc)
                else:
                    # this is no revision left in this item that could be the new "latest rev", just kill the rev
                    writer.delete_document(docnum_remove)

    def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256):
        """
        modify index contents - add, update, delete the indexed documents for all given revids

        Note: mode == 'add' is faster but you need to make sure to not create duplicate
              documents in the index.
        """
        with index.writer(procs=procs, limitmb=limitmb) as writer:
            for backend_name, revid in revids:
                if mode in ['add', 'update', ]:
                    meta, data = self.backend.retrieve(backend_name, revid)
开发者ID:denedios,项目名称:moin-2.0,代码行数:33,代码来源:indexing.py

示例12: add_to_index

# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
 def add_to_index(self, item_id, text):
     from whoosh.writing import AsyncWriter
     writer = AsyncWriter(self.ix)
     writer.update_document(id=item_id, text=text.lower())
     writer.commit()
开发者ID:ybenitezf,项目名称:nstock,代码行数:7,代码来源:z_whoosh.py


注:本文中的whoosh.writing.AsyncWriter.update_document方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。