本文整理汇总了Python中whoosh.writing.AsyncWriter.commit方法的典型用法代码示例。如果您正苦于以下问题:Python AsyncWriter.commit方法的具体用法?Python AsyncWriter.commit怎么用?Python AsyncWriter.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.writing.AsyncWriter
的用法示例。
在下文中一共展示了AsyncWriter.commit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: whoosh_index
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def whoosh_index(self):
it = QTreeWidgetItemIterator(
self.notesTree, QTreeWidgetItemIterator.All)
print("Starting complete indexing.")
#writer = self.ix.writer()
writer = AsyncWriter(self.ix)
while it.value():
treeItem = it.value()
name = self.notesTree.itemToPage(treeItem)
path = os.path.join(self.notesTree.pageToFile(name)).replace(os.sep, '/')
print(path)
fileobj = open(path, 'r', encoding='utf-8')
content = fileobj.read()
fileobj.close()
if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
self.settings.md.reset().convert(content)
writer.update_document(
path=name, title=parseTitle(content, name), content=no_metadata_content,
tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
else:
writer.add_document(path=name, title=parseTitle(content, name), content=content, tags='')
it += 1
writer.commit()
print("Finished completely reindexing.")
示例2: store_page
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def store_page(user, url):
writer = AsyncWriter(idx)
resp = requests.get(url)
content = parse(resp.content)
now = datetime.now()
writer.add_document(ts=now, user=unicode(user), url=unicode(url), content=content)
writer.commit()
示例3: incremental_index
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def incremental_index(t, l, c, dirname):
id = (Searcher().getcount() + 1)
ix = index.open_dir(dirname)
# The set of all paths in the index
#with ix.searcher() as searcher:
indexed_feeds = set()
with ix.searcher() as searcher:
writer = AsyncWriter(ix)
# Loop over the stored fields in the index
for fields in searcher.all_stored_fields():
indexed_feed = fields['title']
indexed_feeds.add(indexed_feed)
# Loop over the files in the filesystem
# Assume we have a function that gathers the filenames of the
# documents to be indexed
if t not in indexed_feeds:
# This is either a file that's changed, or a new file
# that wasn't indexed before. So index it!
wooshDocuments(id, writer, t, l, c)
writer.commit()
return id
示例4: add
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def add():
d = request.get_json(force=True)
url = d.get("url")
content = d.get("content")
if not url or not content: return jsonify({"status": "missing parameters"})
if urlparse.urlparse(url).netloc.startswith("localhost"): return jsonify({"status": "ignored"})
ix = get_index()
writer = AsyncWriter(ix)
soup = BeautifulSoup(content)
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
writer.update_document(title=d.get("title", "Untitled"),
url=url,
content=text,
modified=datetime.datetime.now())
writer.commit()
return jsonify({"status": "ok"})
示例5: delPage
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def delPage(self, item):
index = item.childCount()
while index > 0:
index = index - 1
self.dirname = item.child(index).text(0)
self.delPage(item.child(index))
# remove attachment folder
attDir = self.itemToAttachmentDir(item)
for info in QtCore.QDir(attDir).entryInfoList():
QtCore.QDir().remove(info.absoluteFilePath())
QtCore.QDir().rmdir(attDir)
pagePath = self.itemToPage(item)
self.ix = open_dir(self.settings.indexdir)
query = QueryParser("path", self.ix.schema).parse(pagePath)
# writer = self.ix.writer()
writer = AsyncWriter(self.ix)
n = writer.delete_by_query(query)
# n = writer.delete_by_term('path', pagePath)
writer.commit()
# self.ix.close()
b = QtCore.QDir(self.notePath).remove(self.pageToFile(pagePath))
parent = item.parent()
parentPage = self.itemToPage(parent)
if parent is not None:
index = parent.indexOfChild(item)
parent.takeChild(index)
if parent.childCount() == 0: # if no child, dir not needed
QtCore.QDir(self.notePath).rmdir(parentPage)
else:
index = self.indexOfTopLevelItem(item)
self.takeTopLevelItem(index)
QtCore.QDir(self.notePath).rmdir(pagePath)
示例6: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
writer.update_document(**doc)
if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues otherwise.
writer.commit()
# If spelling support is desired, add to the dictionary.
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
sp = SpellChecker(self.storage)
sp.add_field(self.index, self.content_field_name)
示例7: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def update(self, index, document, **options):
index = base._resolve_index(index)
ix = self._storage.open_index(indexname=index.get_name())
writer = AsyncWriter(ix)
adapted_document = index.adapt_document(document)
writer.update_document(**adapted_document)
writer.commit()
示例8: delete_documents
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def delete_documents(self, doc_set, paths):
"""Delete documents from the index."""
index = open_dir(self.index_path)
writer = AsyncWriter(index)
query = And([
Term('set', doc_set),
Or([Term('path', path) for path in paths])
])
writer.delete_by_query(query)
writer.commit()
示例9: update_bulk
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def update_bulk(self, index, documents):
index = base._resolve_index(index)
ix = self._storage.open_index(indexname=index.get_name())
writer = AsyncWriter(ix)
adapted_documents = (index.adapt_document(doc)
for doc in documents)
for doc in adapted_documents:
writer.update_document(**doc)
writer.commit()
示例10: addLink
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def addLink(self, url, title, summary, txt):
titleb = title + " "
title10 = titleb + titleb + titleb + titleb + titleb + titleb + titleb + titleb + titleb + titleb
sumario = summary + " "
sumario2 = sumario + sumario
text = title10 + sumario2 + " " + txt
ix = open_dir(self.indexDir, indexname='MAIN', readonly=False)
writer = AsyncWriter(ix)
writer.add_document(id=url, content=unicode(text))
writer.commit()
ix.close()
示例11: whoosh_task
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def whoosh_task(ids, pool_number, ix, model_class):
session = sqla['session']
writer = AsyncWriter(ix)
for id_ in ids:
obj = session.query(model_class).filter_by(id=id_).one()
if obj.title is None or obj.summary is None:
continue
writer.add_document(
title=obj.title,
summary=obj.summary
)
writer.commit()
示例12: index_documents
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def index_documents(self, documents):
"""Add or update documents in the index."""
index = open_dir(self.index_path)
writer = AsyncWriter(index)
needs_commit = False
for document in documents:
needs_commit = True
writer.update_document(
uid=':'.join((document['set'], document['path'])),
path=document['path'],
set=document['set'],
hash=document['hash'],
title=document['title'],
content=document['content'],
kind=document['kind'],
)
if needs_commit:
writer.commit()
示例13: update
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def update(self, index, iterable, commit=True):
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
writer = AsyncWriter(self.index)
for obj in iterable:
doc = index.full_prepare(obj)
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])
# Document boosts aren't supported in Whoosh 2.5.0+.
if 'boost' in doc:
del doc['boost']
try:
writer.update_document(**doc)
except Exception as e:
if not self.silently_fail:
raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
# reset the writer so there is no 'start_doc' error from the
# previous failed update attempt
writer = AsyncWriter(self.index)
if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues otherwise.
writer.commit()
示例14: clear
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def clear(self):
"""Remove all content from indexes, and unregister all classes.
After clear() the service is stopped. It must be started again
to create new indexes and register classes.
"""
logger.info("Resetting indexes")
state = self.app_state
for _name, idx in state.indexes.items():
writer = AsyncWriter(idx)
writer.commit(merge=True, optimize=True, mergetype=CLEAR)
state.indexes.clear()
state.indexed_classes.clear()
state.indexed_fqcn.clear()
self.clear_update_queue()
if self.running:
self.stop()
示例15: updateIndex
# 需要导入模块: from whoosh.writing import AsyncWriter [as 别名]
# 或者: from whoosh.writing.AsyncWriter import commit [as 别名]
def updateIndex(self):
''' Update whoosh index, which cost much computing resource '''
page = self.parent.notesTree.currentPage()
content = self.toPlainText()
try:
#writer = self.ix.writer()
writer = AsyncWriter(self.ix)
if METADATA_CHECKER.match(content) and 'meta' in self.settings.extensions:
no_metadata_content = METADATA_CHECKER.sub("", content, count=1).lstrip()
self.settings.md.reset().convert(content)
writer.update_document(
path=page, title=parseTitle(content, page), content=no_metadata_content,
tags=','.join(self.settings.md.Meta.get('tags', [])).strip())
writer.commit()
else:
writer.update_document(
path=page, title=parseTitle(content, page), content=content, tags='')
writer.commit()
except:
print("Whoosh commit failed.")