当前位置: 首页>>代码示例>>Python>>正文


Python IndexWriter.updateDocument方法代码示例

本文整理汇总了Python中org.apache.lucene.index.IndexWriter.updateDocument方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.updateDocument方法的具体用法?Python IndexWriter.updateDocument怎么用?Python IndexWriter.updateDocument使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.IndexWriter的用法示例。


在下文中一共展示了IndexWriter.updateDocument方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: updateindex

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
    def updateindex(self, data):
        writer = IndexWriter(
            self.d, self.conf)

        doc = self.buildDocument(data['fields'], data['record'])
        writer.updateDocument(lucene.Term("_id", data['record']['_id']), doc)

        writer.optimize()
        writer.close()
开发者ID:bradleyjones,项目名称:apiary,代码行数:11,代码来源:lucenedriver.py

示例2: survey

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
def survey(request):
    ipAddr = get_client_ip(request)
    instances = (Classes.objects.values_list('image_class_desc'))
    instances = [i[0] for i in instances]
    #cnt = len(instances)
    #lets get out choice
    location = web.__path__[0] + "/static/web/files/index/index.figures"
    #lucene.initVM()
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
    reader = IndexReader.open(SimpleFSDirectory(File(location)))
    searcher = IndexSearcher(reader)
    
        
    try:
        #image_class = image.objects.get(pk=request.POST['survey'])
        s = request.POST['survey']#get from post
        
                
    except (KeyError, Classes.DoesNotExist):
        return render(request, 'web/index.html',{
            'error_message': "You didn't select a choice.",
        })
    else:
        image_class = instances[int(s)]
        docNum = request.POST['imageID']#get document id
        doc = reader.document(int(docNum))
        fname = doc.get("filename")
        print(fname)
        #SimpleFSDirectory(File(location)).clearLock(IndexWriter.WRITE_LOCK_NAME);
        fileClassField = doc.get("Classification")
        if str(fileClassField) == "None":#check if the field exists####NEED TO CHECK THIS
            fileClassField = str(ipAddr + ":" + image_class)#I think we must add an ip address to this
        else:
            fileClassField = str(ipAddr + ":" + fileClassField) + ", " + image_class
            
        #doc.removeField("Classification")
        
        #doc.add(StringField("Classification", fileClassField, Field.Store.YES))
        #t = doc.get("Classification")
        #reader.close()
        indexDir = SimpleFSDirectory(File(location))
        writerConfig = IndexWriterConfig(Version.LUCENE_4_10_1, StandardAnalyzer())
        writer = IndexWriter(indexDir, writerConfig)
        fields = doc.getFields()#get all fields
        doc2 = Document()
        classificationFieldFlag = False
        for f in fields:
            field = Field.cast_(f)
            (k, v) = field.name(), field.stringValue()
            if k == "Classification":
                classificationFieldFlag = True
                field = StringField("Classification", fileClassField, Field.Store.YES)
                doc2.add(field)
            else:
                doc2.add(field)

        if classificationFieldFlag == False:#this does not exist in the document must add
            doc2.add(StringField("Classification", fileClassField, Field.Store.YES))
#         doc2.add(StringField("Classification", fileClassField, Field.Store.YES))
#         doc2.add(StringField("fid", doc.get("fid"), Field.Store.YES))
#         doc2.add(StringField("articleid", doc.get("articleid"), Field.Store.YES))
#         doc2.add(StringField("caption", doc.get("caption"), Field.Store.YES))
#         doc2.add(StringField("figureid", doc.get("figureid"), Field.Store.YES))
#         doc2.add(StringField("filename", doc.get("filename"), Field.Store.YES))
#         doc2.add(StringField("filepath", doc.get("filepath"), Field.Store.YES))
#         doc2.add(StringField("label", doc.get("label"), Field.Store.YES))
        
        #writer.updateDocument(Term("fid","f000000000023"), doc2)#If field exists update
        writer.updateDocument(Term("fid", doc.get("fid")), doc2)#If field exists update
        writer.commit();
        #writer.optimize()
        writer.close()
        #writer.unlock(SimpleFSDirectory(File(location)))
        
    return HttpResponseRedirect(reverse('web:index', args=()))
开发者ID:kevkid,项目名称:YIF,代码行数:79,代码来源:views.py

示例3: Landadels

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
Berlin - "Gentrifizierung, die: Aufwertung eines Stadtteils durch dessen Sanierung oder Umbau mit der Folge, dass die dort ansässige Bevölkerung durch wohlhabendere Bevölkerungsschichten verdrängt wird."

So beschreibt der Duden ein Phänomen, das vor allem in Großstädten zu beobachten ist: Viele Menschen ziehen in günstige Wohnviertel, durch die Nachfrage steigen die Preise, bis die Mieten schließlich nur noch für die Wohlhabenderen bezahlbar sind - und die Künstler, Studenten, Geringverdiener weichen müssen. In der Weihnachtszeit hat nun ein Fall aus Berlin für Aufsehen gesorgt, der von einigen als Beispiel für Gentrifizierung genannt wird. Auf Facebook hat ein Nutzer die aktuelle Infobroschüre einer evangelischen Kirchengemeinde in Berlin-Mitte gepostet, Seite 25, Rubrik "Taufen". 29 Namen von Kindern und Erwachsenen sind dort zu lesen, darunter die folgenden:
Viktor Paul Theodor, Ada Mai Helene, Rufus Oliver Friedrich, Cäcilie Helene, Edvard Neo, Freya Luise Apollonia, Frederick Theodor Heinrich, Leonore Anna Maria Chiara Helena. Viele der Nachnamen lassen zudem auf einen adligen Hintergrund schließen.

"Das Comeback alter Adelsgeschlechter in Berlin-Mitte = Gentrifizierung im eigentlichen Sinne", lautet ein Kommentar unter dem Facebook-Foto. "In Dresden Gorbitz sähe die Liste anders aus", schreibt ein anderer Nutzer. Ein dritter fasst zusammen: "So heißt also die Gentrifizierung."

Im "Gentrification Blog", betrieben von einem wissenschaftlichen Mitarbeiter der Berliner Humboldt-Universität, ist vor kurzem ein Beitrag zu der Taufliste erschienen: "Berlin: Am Taufbecken der Gentrification - Kirche im Aufwertungsgebiet" heißt er. Die Liste lese sich "wie eine Mischung aus FDP-Wahlliste für das Europaparlament und dem Verzeichnis der höheren Beamten des Diplomatischen Dienstes", heißt es in dem Artikel. Und: "Der Wortsinn der Gentrification - der ja auf die Wiederkehr des niederen Landadels (der Gentry) in den Städten anspielt - bekommt hier jedenfalls einen unerwarteten Realitätsgehalt."

Zu dem Artikel stellte der Autor eine Taufliste aus dem Jahr 2007 aus einer Gemeinde im benachbarten Stadtteil Prenzlauer Berg. Darauf sind unter anderem diese Namen zu finden: Ruby, Matteo, Iwan, Lennart, Emilia, Annabelle, Andreas, Anke.

Jene Kirchgemeinde, in der die mondänen Namen zur Taufe aufgeführt sind, listet auch die Verstorbenen auf. Zwei sind es in der aktuellen Infobroschüre. Nzitu. Und: Herbert."""

    lucene.initVM()

    # language processor and storage
    analyzer = PorterStemmerAnalyzer(Version.LUCENE_CURRENT)
    store = SimpleFSDirectory(File('./data-test/'))

    # writes data to the index
    config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer, overwrite=True)
    writer = IndexWriter(store, config)

    # add Document
    doc = Document()
    doc.add(Field('content', test_text, Field.Store.YES, Field.Index.ANALYZED))
    doc.add(Field('url', "http://test.com", Field.Store.YES, Field.Index.NOT_ANALYZED))
    writer.updateDocument(Term("url", "http://test.com"), doc)

    writer.commit()
    writer.close()
开发者ID:martinjuhasz,项目名称:HSRM_KI_Projekt,代码行数:33,代码来源:FancyAnalyzer.py

示例4: LuceneIndexer

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class LuceneIndexer(object):
    def __init__(self):
        lucene.initVM()

        # language processor and storage
        self.analyzer = GermanAnalyzer(Version.LUCENE_CURRENT)
        self.store = SimpleFSDirectory(File('./../Lucene/data/'))

        # writes data to the index
        config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer, overwrite=True)
        self.writer = IndexWriter(self.store, config)


    def add_article(self, article):
        # constructing a document
        doc = Document()

        title = Field('title', article.title, Field.Store.YES, Field.Index.ANALYZED)
        title.setBoost(10.0)
        doc.add(title)

        description = Field('description', article.description, Field.Store.YES, Field.Index.ANALYZED)
        description.setBoost(5.0)
        doc.add(description)

        doc.add(Field('keywords', article.keywords, Field.Store.YES, Field.Index.NOT_ANALYZED))
        doc.add(Field('content', article.content, Field.Store.YES, Field.Index.ANALYZED))
        if article.date:
            doc.add(Field('date', article.date, Field.Store.YES, Field.Index.NOT_ANALYZED))
        if article.last_modified:
            doc.add(Field('last_modified', article.last_modified, Field.Store.YES, Field.Index.NOT_ANALYZED))
        if article.images:
            doc.add(Field('image_url', article.images[0][0], Field.Store.YES, Field.Index.NOT_ANALYZED))
            doc.add(Field('image_text', article.images[0][1], Field.Store.YES, Field.Index.ANALYZED))
        doc.add(Field('url', article.url, Field.Store.YES, Field.Index.NOT_ANALYZED))

        # creates document or updates if already exists
        self.writer.updateDocument(Term("url", article.url), doc)

    def write_to_file(self):
        # making changes permanent
        self.writer.commit()
        self.writer.close()



    def perform_search(self, searchterm):
        # processing a query
        parser = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)
        parser.setDefaultOperator(QueryParser.Operator.AND)

        query = parser.parse(searchterm)

        # conducting search
        searcher = IndexSearcher(DirectoryReader.open(self.store))

        start = datetime.now()
        scoreDocs = searcher.search(query, 50).scoreDocs
        duration = datetime.now() - start

        print scoreDocs
        print duration
开发者ID:martinjuhasz,项目名称:HSRM_KI_Projekt,代码行数:64,代码来源:LuceneIndexer.py

示例5: Index

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class Index(object):
    def __init__(self, path, settings):
        self._settings = settings
        self._multithreaded = settings.multithreaded
        self._checker = DirectSpellChecker()
        indexDirectory = MMapDirectory(File(join(path, 'index')))
        indexDirectory.setUseUnmap(False)
        taxoDirectory = MMapDirectory(File(join(path, 'taxo')))
        taxoDirectory.setUseUnmap(False)
        conf = IndexWriterConfig(Version.LUCENE_4_10_0, settings.analyzer)
        conf.setSimilarity(settings.similarity)
        mergePolicy = TieredMergePolicy()
        mergePolicy.setMaxMergeAtOnce(settings.maxMergeAtOnce)
        mergePolicy.setSegmentsPerTier(settings.segmentsPerTier)
        conf.setMergePolicy(mergePolicy)

        if not settings.readonly:
            self._indexWriter = IndexWriter(indexDirectory, conf)
            self._indexWriter.commit()
            self._taxoWriter = DirectoryTaxonomyWriter(taxoDirectory, IndexWriterConfig.OpenMode.CREATE_OR_APPEND, LruTaxonomyWriterCache(settings.lruTaxonomyWriterCacheSize))
            self._taxoWriter.commit()

        self._indexAndTaxonomy = IndexAndTaxonomy(settings, indexDirectory, taxoDirectory)
        self._readerSettingsWrapper = self._indexAndTaxonomy._readerSettingsWrapper

        self._facetsConfig = settings.fieldRegistry.facetsConfig

        self._ordinalsReader = CachedOrdinalsReader(DocValuesOrdinalsReader())

    def addDocument(self, term, document):
        document = self._facetsConfig.build(self._taxoWriter, document)
        self._indexWriter.updateDocument(term, document)

    def deleteDocument(self, term):
        self._indexWriter.deleteDocuments(term)

    def search(self, query, filter, collector):
        self._indexAndTaxonomy.searcher.search(query, filter, collector)

    def suggest(self, query, count, field):
        suggestions = {}
        for token, startOffset, endOffset in self._analyzeToken(query):
            suggestWords = self._checker.suggestSimilar(Term(field, token), count, self._indexAndTaxonomy.searcher.getIndexReader())
            if suggestWords:
                suggestions[token] = (startOffset, endOffset, [suggestWord.string for suggestWord in suggestWords])
        return suggestions

    def termsForField(self, field, prefix=None, limit=10, **kwargs):
        convert = lambda term: term.utf8ToString()
        terms = []
        termsEnum = MultiFields.getTerms(self._indexAndTaxonomy.searcher.getIndexReader(), field)
        if termsEnum is None:
            return terms
        iterator = termsEnum.iterator(None)
        if prefix:
            iterator.seekCeil(BytesRef(prefix))
            terms.append((iterator.docFreq(), convert(iterator.term())))
        bytesIterator = BytesRefIterator.cast_(iterator)
        try:
            while len(terms) < limit:
                term = convert(bytesIterator.next())
                if prefix and not term.startswith(prefix):
                    break
                terms.append((iterator.docFreq(), term))
        except StopIteration:
            pass
        return terms

    def fieldnames(self):
        indexAndTaxonomy = self._indexAndTaxonomy
        fieldnames = []
        fields = MultiFields.getFields(indexAndTaxonomy.searcher.getIndexReader())
        if fields is None:
            return fieldnames
        iterator = fields.iterator()
        while iterator.hasNext():
            fieldnames.append(iterator.next())
        return fieldnames

    def drilldownFieldnames(self, path=None, limit=50):
        taxoReader = self._indexAndTaxonomy.taxoReader
        parentOrdinal = TaxonomyReader.ROOT_ORDINAL if path is None else taxoReader.getOrdinal(path[0], path[1:])
        childrenIter = taxoReader.getChildren(parentOrdinal)
        names = []
        while True:
            ordinal = childrenIter.next()
            if ordinal == TaxonomyReader.INVALID_ORDINAL:
                break
            names.append(taxoReader.getPath(ordinal).components[-1])
            if len(names) >= limit:
                break
        return names

    def numDocs(self):
        return self._indexAndTaxonomy.searcher.getIndexReader().numDocs()

    def commit(self):
        if not self._settings.readonly:
            self._taxoWriter.commit()
            self._indexWriter.commit()
#.........这里部分代码省略.........
开发者ID:jerryba,项目名称:meresco-lucene,代码行数:103,代码来源:index.py

示例6: ImageIndexer

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import updateDocument [as 别名]
class ImageIndexer(object):
    """Given an image details the indexer will get all text files, lucene them
    for search and retrieval."""
    hash_field = FieldType()
    hash_field.setStored(True)
    hash_field.setTokenized(False)
    hash_field.setIndexOptions(IndexOptions.DOCS_AND_FREQS)

    text_field = FieldType()
    text_field.setStored(False)
    text_field.setTokenized(True)
    text_field.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)

    mime_map = MimeMapper("/var/www/bcaw/conf/mimemap.conf")

    def __init__(self, store_dir):
        self.store_dir = store_dir
        if not os.path.exists(store_dir):
            os.mkdir(store_dir, 0777)
        self.store = SimpleFSDirectory(Paths.get(store_dir))
        self.analyzer = StandardAnalyzer()
        self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
        self.config = IndexWriterConfig(self.analyzer)
        self.writer = IndexWriter(self.store, self.config)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.writer.close()

    def index_text(self, sha1, full_text):
        """Index the full text and map it to the source sha1."""
        document = Document()
        document.add(Field("sha1", sha1, ImageIndexer.hash_field))
        if full_text:
            document.add(Field("full_text", full_text, ImageIndexer.text_field))
            self.writer.updateDocument(Term("sha1", sha1), document)
        else:
            logging.info("No text for sha1 %s", sha1)

    @classmethod
    def get_path_details(cls, temp_path, image_path):
        """Return the byte sequence and the full text for a given path."""
        byte_sequence = ByteSequence.from_path(temp_path)
        extension = map_mime_to_ext(byte_sequence.mime_type, cls.mime_map)
        logging.debug("Assessing MIME: %s EXTENSION %s SHA1:%s", byte_sequence.mime_type,
                      extension, byte_sequence.sha1)
        full_text = ""
        if extension is not None:
            try:
                logging.debug("Textract for SHA1 %s, extension map val %s",
                              byte_sequence.sha1, extension)
                full_text = process(temp_path, extension=extension, encoding='ascii',
                                    preserveLineBreaks=True)
            except ExtensionNotSupported as _:
                logging.exception("Textract extension not supported for ext %s", extension)
                logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
                full_text = "N/A"
            except LookupError as _:
                logging.exception("Lookup error for encoding.")
                logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
                full_text = "N/A"
            except UnicodeDecodeError as _:
                logging.exception("UnicodeDecodeError, problem with file encoding")
                logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
                full_text = "N/A"
            except:
                logging.exception("Textract UNEXPECTEDLY failed for temp_file.")
                logging.debug("Image path for file is %s, temp file at %s", image_path, temp_path)
                full_text = "N/A"
        return byte_sequence, full_text

    def index_path(self, temp_path, image_path):
        """Index the full text of the file and map it to the file's sha1 and return
        the derived ByteStream object and derived full text as a tuple."""
        byte_sequence, full_text = self.get_path_details(temp_path, image_path)
        if full_text:
            self.index_text(byte_sequence.sha1, full_text)
        return byte_sequence, full_text
开发者ID:BitCurator,项目名称:bca-webtools,代码行数:82,代码来源:text_indexer.py


注:本文中的org.apache.lucene.index.IndexWriter.updateDocument方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。