本文整理汇总了Python中whoosh.writing.AsyncWriter.update_document方法的典型用法代码示例。如果您正苦于以下问题:Python AsyncWriter.update_document方法的具体用法?Python AsyncWriter.update_document怎么用?Python AsyncWriter.update_document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.writing.AsyncWriter
的用法示例。
在下文中一共展示了AsyncWriter.update_document方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def add():
d = request.get_json(force=True)
url = d.get("url")
content = d.get("content")
if not url or not content: return jsonify({"status": "missing parameters"})
if urlparse.urlparse(url).netloc.startswith("localhost"): return jsonify({"status": "ignored"})
ix = get_index()
writer = AsyncWriter(ix)
soup = BeautifulSoup(content)
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
writer.update_document(title=d.get("title", "Untitled"),
url=url,
content=text,
modified=datetime.datetime.now())
writer.commit()
return jsonify({"status": "ok"})
示例2: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
writer.update_document(**doc)
if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues otherwise.
writer.commit()
# If spelling support is desired, add to the dictionary.
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
sp = SpellChecker(self.storage)
sp.add_field(self.index, self.content_field_name)
示例3: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
try:
writer.update_document(**doc)
except Exception, e:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e, exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
示例4: whoosh_index
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def whoosh_index(self):
it = QTreeWidgetItemIterator(
self.notesTree, QTreeWidgetItemIterator.All)
print("Starting complete indexing.")
#writer = self.ix.writer()
writer = AsyncWriter(self.ix)
while it.value():
treeItem = it.value()
name = self.notesTree.itemToPage(treeItem)
path = os.path.join(self.notesTree.pageToFile(name)).replace(os.sep, '/')
print(path)
fileobj = open(path, 'r', encoding='utf-8')
content = fileobj.read()
fileobj.close()
if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
self.settings.md.reset().convert(content)
writer.update_document(
path=name, title=parseTitle(content, name), content=no_metadata_content,
tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
else:
writer.add_document(path=name, title=parseTitle(content, name), content=content, tags='')
it += 1
writer.commit()
print("Finished completely reindexing.")
示例5: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update(self, index, document, **options):
index = base._resolve_index(index)
ix = self._storage.open_index(indexname=index.get_name())
writer = AsyncWriter(ix)
adapted_document = index.adapt_document(document)
writer.update_document(**adapted_document)
writer.commit()
示例6: update_bulk
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update_bulk(self, index, documents):
index = base._resolve_index(index)
ix = self._storage.open_index(indexname=index.get_name())
writer = AsyncWriter(ix)
adapted_documents = (index.adapt_document(doc)
for doc in documents)
for doc in adapted_documents:
writer.update_document(**doc)
writer.commit()
示例7: index_documents
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def index_documents(self, documents):
"""Add or update documents in the index."""
index = open_dir(self.index_path)
writer = AsyncWriter(index)
needs_commit = False
for document in documents:
needs_commit = True
writer.update_document(
uid=':'.join((document['set'], document['path'])),
path=document['path'],
set=document['set'],
hash=document['hash'],
title=document['title'],
content=document['content'],
kind=document['kind'],
)
if needs_commit:
writer.commit()
示例8: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
# Document boosts aren't supported in Whoosh 2.5.0+.
if 'boost' in doc:
del doc['boost']
try:
writer.update_document(**doc)
except Exception as e:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
# reset the writer so there is no 'start_doc' error from the
# previous failed update attempt
writer = AsyncWriter(self.index)
if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues otherwise.
writer.commit()
示例9: updateIndex
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def updateIndex(self):
''' Update whoosh index, which cost much computing resource '''
page = self.parent.notesTree.currentPage()
content = self.toPlainText()
try:
#writer = self.ix.writer()
writer = AsyncWriter(self.ix)
if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
self.settings.md.reset().convert(content)
writer.update_document(
path=page, title=parseTitle(content, page), content=no_metadata_content,
tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
writer.commit()
else:
writer.update_document(
path=page, title=parseTitle(content, page), content=content, tags='')
writer.commit()
except:
print("Whoosh commit failed.")
示例10: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
try:
writer.update_document(**doc)
except Exception, e:
if not self.silently_fail:
raise
self.log.error("Failed to add documents to Whoosh: %s", e)
示例11: Term
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
latest_backends_revids = self._find_latest_backends_revids(self.ix[ALL_REVS], Term(ITEMID, itemid))
if latest_backends_revids:
# we have a latest revision, just update the document in the index:
assert len(latest_backends_revids) == 1 # this item must have only one latest revision
latest_backend_revid = latest_backends_revids[0]
# we must fetch from backend because schema for LATEST_REVS is different than for ALL_REVS
# (and we can't be sure we have all fields stored, too)
meta, _ = self.backend.retrieve(*latest_backend_revid)
# we only use meta (not data), because we do not want to transform data->content again (this
# is potentially expensive) as we already have the transformed content stored in ALL_REVS index:
with self.ix[ALL_REVS].searcher() as searcher:
doc = searcher.document(revid=latest_backend_revid[1])
content = doc[CONTENT]
doc = backend_to_index(meta, content, self.schemas[LATEST_REVS], self.wikiname,
backend_name=latest_backend_revid[0])
writer.update_document(**doc)
else:
# this is no revision left in this item that could be the new "latest rev", just kill the rev
writer.delete_document(docnum_remove)
def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256):
"""
modify index contents - add, update, delete the indexed documents for all given revids
Note: mode == 'add' is faster but you need to make sure to not create duplicate
documents in the index.
"""
with index.writer(procs=procs, limitmb=limitmb) as writer:
for backend_name, revid in revids:
if mode in ['add', 'update', ]:
meta, data = self.backend.retrieve(backend_name, revid)
示例12: add_to_index
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import update_document [as 别名]
def add_to_index(self, item_id, text):
from whoosh.writing import AsyncWriter
writer = AsyncWriter(self.ix)
writer.update_document(id=item_id, text=text.lower())
writer.commit()