本文整理汇总了Python中org.apache.lucene.index.IndexWriter.updateDocument方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.updateDocument方法的具体用法?Python IndexWriter.updateDocument怎么用?Python IndexWriter.updateDocument使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.IndexWriter
的用法示例。
在下文中一共展示了IndexWriter.updateDocument方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: updateindex
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
def updateindex(self, data):
writer = IndexWriter(
self.d, self.conf)
doc = self.buildDocument(data['fields'], data['record'])
writer.updateDocument(lucene.Term("_id", data['record']['_id']), doc)
writer.optimize()
writer.close()
示例2: survey
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
def survey(request):
ipAddr = get_client_ip(request)
instances = (Classes.objects.values_list('image_class_desc'))
instances = [i[0] for i in instances]
#cnt = len(instances)
#lets get out choice
location = web.__path__[0] + "/static/web/files/index/index.figures"
#lucene.initVM()
vm_env = lucene.getVMEnv()
vm_env.attachCurrentThread()
analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
reader = IndexReader.open(SimpleFSDirectory(File(location)))
searcher = IndexSearcher(reader)
try:
#image_class = image.objects.get(pk=request.POST['survey'])
s = request.POST['survey']#get from post
except (KeyError, Classes.DoesNotExist):
return render(request, 'web/index.html',{
'error_message': "You didn't select a choice.",
})
else:
image_class = instances[int(s)]
docNum = request.POST['imageID']#get document id
doc = reader.document(int(docNum))
fname = doc.get("filename")
print(fname)
#SimpleFSDirectory(File(location)).clearLock(IndexWriter.WRITE_LOCK_NAME);
fileClassField = doc.get("Classification")
if str(fileClassField) == "None":#check if the field exists####NEED TO CHECK THIS
fileClassField = str(ipAddr + ":" + image_class)#I think we must add an ip address to this
else:
fileClassField = str(ipAddr + ":" + fileClassField) + ", " + image_class
#doc.removeField("Classification")
#doc.add(StringField("Classification", fileClassField, Field.Store.YES))
#t = doc.get("Classification")
#reader.close()
indexDir = SimpleFSDirectory(File(location))
writerConfig = IndexWriterConfig(Version.LUCENE_4_10_1, StandardAnalyzer())
writer = IndexWriter(indexDir, writerConfig)
fields = doc.getFields()#get all fields
doc2 = Document()
classificationFieldFlag = False
for f in fields:
field = Field.cast_(f)
(k, v) = field.name(), field.stringValue()
if k == "Classification":
classificationFieldFlag = True
field = StringField("Classification", fileClassField, Field.Store.YES)
doc2.add(field)
else:
doc2.add(field)
if classificationFieldFlag == False:#this does not exist in the document must add
doc2.add(StringField("Classification", fileClassField, Field.Store.YES))
# doc2.add(StringField("Classification", fileClassField, Field.Store.YES))
# doc2.add(StringField("fid", doc.get("fid"), Field.Store.YES))
# doc2.add(StringField("articleid", doc.get("articleid"), Field.Store.YES))
# doc2.add(StringField("caption", doc.get("caption"), Field.Store.YES))
# doc2.add(StringField("figureid", doc.get("figureid"), Field.Store.YES))
# doc2.add(StringField("filename", doc.get("filename"), Field.Store.YES))
# doc2.add(StringField("filepath", doc.get("filepath"), Field.Store.YES))
# doc2.add(StringField("label", doc.get("label"), Field.Store.YES))
#writer.updateDocument(Term("fid","f000000000023"), doc2)#If field exists update
writer.updateDocument(Term("fid", doc.get("fid")), doc2)#If field exists update
writer.commit();
#writer.optimize()
writer.close()
#writer.unlock(SimpleFSDirectory(File(location)))
return HttpResponseRedirect(reverse('web:index', args=()))
示例3: Landadels
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
Berlin - "Gentrifizierung, die: Aufwertung eines Stadtteils durch dessen Sanierung oder Umbau mit der Folge, dass die dort ansässige Bevölkerung durch wohlhabendere Bevölkerungsschichten verdrängt wird."
So beschreibt der Duden ein Phänomen, das vor allem in Großstädten zu beobachten ist: Viele Menschen ziehen in günstige Wohnviertel, durch die Nachfrage steigen die Preise, bis die Mieten schließlich nur noch für die Wohlhabenderen bezahlbar sind - und die Künstler, Studenten, Geringverdiener weichen müssen. In der Weihnachtszeit hat nun ein Fall aus Berlin für Aufsehen gesorgt, der von einigen als Beispiel für Gentrifizierung genannt wird. Auf Facebook hat ein Nutzer die aktuelle Infobroschüre einer evangelischen Kirchengemeinde in Berlin-Mitte gepostet, Seite 25, Rubrik "Taufen". 29 Namen von Kindern und Erwachsenen sind dort zu lesen, darunter die folgenden:
Viktor Paul Theodor, Ada Mai Helene, Rufus Oliver Friedrich, Cäcilie Helene, Edvard Neo, Freya Luise Apollonia, Frederick Theodor Heinrich, Leonore Anna Maria Chiara Helena. Viele der Nachnamen lassen zudem auf einen adligen Hintergrund schließen.
"Das Comeback alter Adelsgeschlechter in Berlin-Mitte = Gentrifizierung im eigentlichen Sinne", lautet ein Kommentar unter dem Facebook-Foto. "In Dresden Gorbitz sähe die Liste anders aus", schreibt ein anderer Nutzer. Ein dritter fasst zusammen: "So heißt also die Gentrifizierung."
Im "Gentrification Blog", betrieben von einem wissenschaftlichen Mitarbeiter der Berliner Humboldt-Universität, ist vor kurzem ein Beitrag zu der Taufliste erschienen: "Berlin: Am Taufbecken der Gentrification - Kirche im Aufwertungsgebiet" heißt er. Die Liste lese sich "wie eine Mischung aus FDP-Wahlliste für das Europaparlament und dem Verzeichnis der höheren Beamten des Diplomatischen Dienstes", heißt es in dem Artikel. Und: "Der Wortsinn der Gentrification - der ja auf die Wiederkehr des niederen Landadels (der Gentry) in den Städten anspielt - bekommt hier jedenfalls einen unerwarteten Realitätsgehalt."
Zu dem Artikel stellte der Autor eine Taufliste aus dem Jahr 2007 aus einer Gemeinde im benachbarten Stadtteil Prenzlauer Berg. Darauf sind unter anderem diese Namen zu finden: Ruby, Matteo, Iwan, Lennart, Emilia, Annabelle, Andreas, Anke.
Jene Kirchgemeinde, in der die mondänen Namen zur Taufe aufgeführt sind, listet auch die Verstorbenen auf. Zwei sind es in der aktuellen Infobroschüre. Nzitu. Und: Herbert."""
lucene.initVM()
# language processor and storage
analyzer = PorterStemmerAnalyzer(Version.LUCENE_CURRENT)
store = SimpleFSDirectory(File('./data-test/'))
# writes data to the index
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer, overwrite=True)
writer = IndexWriter(store, config)
# add Document
doc = Document()
doc.add(Field('content', test_text, Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field('url', "http://test.com", Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.updateDocument(Term("url", "http://test.com"), doc)
writer.commit()
writer.close()
示例4: LuceneIndexer
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class LuceneIndexer(object):
def __init__(self):
lucene.initVM()
# language processor and storage
self.analyzer = GermanAnalyzer(Version.LUCENE_CURRENT)
self.store = SimpleFSDirectory(File('./../Lucene/data/'))
# writes data to the index
config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer, overwrite=True)
self.writer = IndexWriter(self.store, config)
def add_article(self, article):
# constructing a document
doc = Document()
title = Field('title', article.title, Field.Store.YES, Field.Index.ANALYZED)
title.setBoost(10.0)
doc.add(title)
description = Field('description', article.description, Field.Store.YES, Field.Index.ANALYZED)
description.setBoost(5.0)
doc.add(description)
doc.add(Field('keywords', article.keywords, Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field('content', article.content, Field.Store.YES, Field.Index.ANALYZED))
if article.date:
doc.add(Field('date', article.date, Field.Store.YES, Field.Index.NOT_ANALYZED))
if article.last_modified:
doc.add(Field('last_modified', article.last_modified, Field.Store.YES, Field.Index.NOT_ANALYZED))
if article.images:
doc.add(Field('image_url', article.images[0][0], Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field('image_text', article.images[0][1], Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field('url', article.url, Field.Store.YES, Field.Index.NOT_ANALYZED))
# creates document or updates if already exists
self.writer.updateDocument(Term("url", article.url), doc)
def write_to_file(self):
# making changes permanent
self.writer.commit()
self.writer.close()
def perform_search(self, searchterm):
# processing a query
parser = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(searchterm)
# conducting search
searcher = IndexSearcher(DirectoryReader.open(self.store))
start = datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.now() - start
print scoreDocs
print duration
示例5: Index
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class Index(object):
def __init__(self, path, settings):
self._settings = settings
self._multithreaded = settings.multithreaded
self._checker = DirectSpellChecker()
indexDirectory = MMapDirectory(File(join(path, 'index')))
indexDirectory.setUseUnmap(False)
taxoDirectory = MMapDirectory(File(join(path, 'taxo')))
taxoDirectory.setUseUnmap(False)
conf = IndexWriterConfig(Version.LUCENE_4_10_0, settings.analyzer)
conf.setSimilarity(settings.similarity)
mergePolicy = TieredMergePolicy()
mergePolicy.setMaxMergeAtOnce(settings.maxMergeAtOnce)
mergePolicy.setSegmentsPerTier(settings.segmentsPerTier)
conf.setMergePolicy(mergePolicy)
if not settings.readonly:
self._indexWriter = IndexWriter(indexDirectory, conf)
self._indexWriter.commit()
self._taxoWriter = DirectoryTaxonomyWriter(taxoDirectory, IndexWriterConfig.OpenMode.CREATE_OR_APPEND, LruTaxonomyWriterCache(settings.lruTaxonomyWriterCacheSize))
self._taxoWriter.commit()
self._indexAndTaxonomy = IndexAndTaxonomy(settings, indexDirectory, taxoDirectory)
self._readerSettingsWrapper = self._indexAndTaxonomy._readerSettingsWrapper
self._facetsConfig = settings.fieldRegistry.facetsConfig
self._ordinalsReader = CachedOrdinalsReader(DocValuesOrdinalsReader())
def addDocument(self, term, document):
document = self._facetsConfig.build(self._taxoWriter, document)
self._indexWriter.updateDocument(term, document)
def deleteDocument(self, term):
self._indexWriter.deleteDocuments(term)
def search(self, query, filter, collector):
self._indexAndTaxonomy.searcher.search(query, filter, collector)
def suggest(self, query, count, field):
suggestions = {}
for token, startOffset, endOffset in self._analyzeToken(query):
suggestWords = self._checker.suggestSimilar(Term(field, token), count, self._indexAndTaxonomy.searcher.getIndexReader())
if suggestWords:
suggestions[token] = (startOffset, endOffset, [suggestWord.string for suggestWord in suggestWords])
return suggestions
def termsForField(self, field, prefix=None, limit=10, **kwargs):
convert = lambda term: term.utf8ToString()
terms = []
termsEnum = MultiFields.getTerms(self._indexAndTaxonomy.searcher.getIndexReader(), field)
if termsEnum is None:
return terms
iterator = termsEnum.iterator(None)
if prefix:
iterator.seekCeil(BytesRef(prefix))
terms.append((iterator.docFreq(), convert(iterator.term())))
bytesIterator = BytesRefIterator.cast_(iterator)
try:
while len(terms) < limit:
term = convert(bytesIterator.next())
if prefix and not term.startswith(prefix):
break
terms.append((iterator.docFreq(), term))
except StopIteration:
pass
return terms
def fieldnames(self):
indexAndTaxonomy = self._indexAndTaxonomy
fieldnames = []
fields = MultiFields.getFields(indexAndTaxonomy.searcher.getIndexReader())
if fields is None:
return fieldnames
iterator = fields.iterator()
while iterator.hasNext():
fieldnames.append(iterator.next())
return fieldnames
def drilldownFieldnames(self, path=None, limit=50):
taxoReader = self._indexAndTaxonomy.taxoReader
parentOrdinal = TaxonomyReader.ROOT_ORDINAL if path is None else taxoReader.getOrdinal(path[0], path[1:])
childrenIter = taxoReader.getChildren(parentOrdinal)
names = []
while True:
ordinal = childrenIter.next()
if ordinal == TaxonomyReader.INVALID_ORDINAL:
break
names.append(taxoReader.getPath(ordinal).components[-1])
if len(names) >= limit:
break
return names
def numDocs(self):
return self._indexAndTaxonomy.searcher.getIndexReader().numDocs()
def commit(self):
if not self._settings.readonly:
self._taxoWriter.commit()
self._indexWriter.commit()
#.........这里部分代码省略.........
示例6: ImageIndexer
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class ImageIndexer(object):
"""Given an image details the indexer will get all text files, lucene them
for search and retrieval."""
hash_field = FieldType()
hash_field.setStored(True)
hash_field.setTokenized(False)
hash_field.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
text_field = FieldType()
text_field.setStored(False)
text_field.setTokenized(True)
text_field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
mime_map = MimeMapper("/var/www/bcaw/conf/mimemap.conf")
def __init__(self, store_dir):
self.store_dir = store_dir
if not os.path.exists(store_dir):
os.mkdir(store_dir, 0777)
self.store = SimpleFSDirectory(Paths.get(store_dir))
self.analyzer = StandardAnalyzer()
self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
self.config = IndexWriterConfig(self.analyzer)
self.writer = IndexWriter(self.store, self.config)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.writer.close()
def index_text(self, sha1, full_text):
"""Index the full text and map it to the source sha1."""
document = Document()
document.add(Field("sha1", sha1, ImageIndexer.hash_field))
if full_text:
document.add(Field("full_text", full_text, ImageIndexer.text_field))
self.writer.updateDocument(Term("sha1", sha1), document)
else:
logging.info("No text for sha1 %s", sha1)
@classmethod
def get_path_details(cls, temp_path, image_path):
"""Return the byte sequence and the full text for a given path."""
byte_sequence = ByteSequence.from_path(temp_path)
extension = map_mime_to_ext(byte_sequence.mime_type, cls.mime_map)
logging.debug("Assessing MIME: %s EXTENSION %s SHA1:%s", byte_sequence.mime_type,
extension, byte_sequence.sha1)
full_text = ""
if extension is not None:
try:
logging.debug("Textract for SHA1 %s, extension map val %s",
byte_sequence.sha1, extension)
full_text = process(temp_path, extension=extension, encoding='ascii',
preserveLineBreaks=True)
except ExtensionNotSupported as _:
logging.exception("Textract extension not supported for ext %s", extension)
logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
full_text = "N/A"
except LookupError as _:
logging.exception("Lookup error for encoding.")
logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
full_text = "N/A"
except UnicodeDecodeError as _:
logging.exception("UnicodeDecodeError, problem with file encoding")
logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
full_text = "N/A"
except:
logging.exception("Textract UNEXPECTEDLY failed for temp_file.")
logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
full_text = "N/A"
return byte_sequence, full_text
def index_path(self, temp_path, image_path):
"""Index the full text of the file and map it to the file's sha1 and return
the derived ByteStream object and derived full text as a tuple."""
byte_sequence, full_text = self.get_path_details(temp_path, image_path)
if full_text:
self.index_text(byte_sequence.sha1, full_text)
return byte_sequence, full_text