当前位置: 首页>>代码示例>>Python>>正文


Python lucene.IndexWriter类代码示例

本文整理汇总了Python中lucene.IndexWriter的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter类的具体用法?Python IndexWriter怎么用?Python IndexWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IndexWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: index

    def index(source, indexName):

        if(not os.path.exists(indexName)):
            os.mkdir(indexName)
            
        indexDir = File(indexName)

        writer = IndexWriter(SimpleFSDirectory(File(indexName)),StandardAnalyzer(Version.LUCENE_CURRENT), True,IndexWriter.MaxFieldLength.LIMITED)
      
        p = re.compile("(GH\d+\-\d+)\n(.*?)\n+", re.DOTALL)
        res = p.findall(source)
        
        i = 0
        for pair in res:
            i += 1
            doc = Document()
            doc.add(Field("id", pair[0], Field.Store.YES, Field.Index.NO))
            for t in pair[1].split():
                doc.add(Field("content", t.replace("-","_"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                #doc.add(Field("content", pair[1], Field.Store.NO, Field.Index.ANALYZED));
                
            writer.addDocument(doc)
            
        writer.close()
        print str(i)+ " docs indexed"
开发者ID:guillelmo,项目名称:UvA-AIR,代码行数:25,代码来源:index.py

示例2: setUp

    def setUp(self):
        
        animals = [ "aardvark", "beaver", "coati",
                    "dog", "elephant", "frog", "gila monster",
                    "horse", "iguana", "javelina", "kangaroo",
                    "lemur", "moose", "nematode", "orca",
                    "python", "quokka", "rat", "scorpion",
                    "tarantula", "uromastyx", "vicuna",
                    "walrus", "xiphias", "yak", "zebra" ]

        analyzer = WhitespaceAnalyzer()

        aTOmDirectory = RAMDirectory()
        nTOzDirectory = RAMDirectory()

        aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True,
                                 IndexWriter.MaxFieldLength.UNLIMITED)
        nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True,
                                 IndexWriter.MaxFieldLength.UNLIMITED)

        for animal in animals:
            doc = Document()
            doc.add(Field("animal", animal,
                          Field.Store.YES, Field.Index.NOT_ANALYZED))

            if animal[0].lower() < "n":
                aTOmWriter.addDocument(doc)
            else:
                nTOzWriter.addDocument(doc)

        aTOmWriter.close()
        nTOzWriter.close()

        self.searchers = [ IndexSearcher(aTOmDirectory),
                           IndexSearcher(nTOzDirectory) ]
开发者ID:bpgriner01,项目名称:pylucene,代码行数:35,代码来源:MultiSearcherTest.py

示例3: open

    def open(self, name, txn, **kwds):

        super(IndexContainer, self).open(name, txn, **kwds)

        if kwds.get('create', False):
            directory = self.getDirectory()
            indexWriter = IndexWriter(directory, StandardAnalyzer(), True)
            indexWriter.close()
            directory.close()
开发者ID:HackLinux,项目名称:chandler,代码行数:9,代码来源:LuceneContainer.py

示例4: index_files

def index_files (files, index_directory):
  lucene.initVM()
  d = SimpleFSDirectory(File(index_directory))
  analyzer = StandardAnalyzer(Version.LUCENE_30)
  writer = IndexWriter(d, analyzer, True, IndexWriter.MaxFieldLength(512))
  for f in files:
    parse_file(f, writer)
  writer.optimize()
  writer.close()
开发者ID:CrawlingFingers,项目名称:ConcordiaCrawler,代码行数:9,代码来源:indexer.py

示例5: index

 def index(self,path_to_index,path_files):
     'indexes anchor texts from a given folder'
     #lucene.initVM()
     indexDir = path_to_index
     directory_index = SimpleFSDirectory(File(indexDir))
     analyzer = StandardAnalyzer(Version.LUCENE_35)
     writer = IndexWriter(directory_index, analyzer, True, IndexWriter.MaxFieldLength(512))
     listOfPathes = []
     listOfPathes.extend(glob.glob(path_files+"*.txt"))
     counter = 0
     for path_to_file in listOfPathes:
         print path_to_file
         f = open(path_to_file,"r")
         for line in f:
             entry = line.split("\t")
             counter+=1
             """
             optimizes index after a certain amount of added documents
             """
             if counter%500000==0:
                 print counter
                 writer.optimize()
             doc = Document()
             doc.add(Field("anchor", entry[0], Field.Store.YES, Field.Index.ANALYZED))
             doc.add(Field("anchor_uri", entry[1], Field.Store.YES, Field.Index.ANALYZED))
             doc.add(Field("dbpedia_uri", entry[2], Field.Store.YES, Field.Index.ANALYZED))
             doc.add(Field("number", entry[3].replace("\n",""), Field.Store.YES, Field.Index.ANALYZED))
             writer.addDocument(doc)
         writer.optimize()
      
         f.close()
         
     writer.close()
     print counter
     print "done"
开发者ID:swalter2,项目名称:knowledgeLexicalisation,代码行数:35,代码来源:AnchorIndex.py

示例6: addDocuments

    def addDocuments(self, dir, isCompound):

        writer = IndexWriter(dir, SimpleAnalyzer(), True,
                             IndexWriter.MaxFieldLength.LIMITED)
        writer.setUseCompoundFile(isCompound)

        # change to adjust performance of indexing with FSDirectory
        # writer.mergeFactor = writer.mergeFactor
        # writer.maxMergeDocs = writer.maxMergeDocs
        # writer.minMergeDocs = writer.minMergeDocs

        for word in self.docs:
            doc = Document()
            doc.add(Field("keyword", word,
                          Field.Store.YES, Field.Index.NOT_ANALYZED))
            doc.add(Field("unindexed", word,
                          Field.Store.YES, Field.Index.NO))
            doc.add(Field("unstored", word,
                          Field.Store.NO, Field.Index.ANALYZED))
            doc.add(Field("text", word,
                          Field.Store.YES, Field.Index.ANALYZED))
            writer.addDocument(doc)

        writer.optimize()
        writer.close()
开发者ID:ustramooner,项目名称:python-lucenepp,代码行数:25,代码来源:CompoundVersusMultiFileIndexTest.py

示例7: addContents

 def addContents(self,contents):
      try:
           #iwconfig = IndexWriterConfig(SimpleAnalyzer(),IndexWriter.MaxFieldLength.LIMITED)
           writer = IndexWriter(self.ramIndex,SimpleAnalyzer(Version.LUCENE_CURRENT),True,IndexWriter.MaxFieldLength.LIMITED)
           for content in contents:
               doc = Document()
               doc.add(Field("contents",content[1],Field.Store.NO,Field.Index.ANALYZED,Field.TermVector.YES))
               writer.addDocument(doc)
           writer.close()
      except Exception,e:
           print 'Unable to add content to RAM index'        
开发者ID:subramgo,项目名称:Vritti,代码行数:11,代码来源:RAMIndex.py

示例8: setUp

    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, self.porterAnalyzer, True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc = Document()
        doc.add(Field("contents",
                      "The quick brown fox jumps over the lazy dogs",
                       Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()
开发者ID:bpgriner01,项目名称:pylucene,代码行数:12,代码来源:PositionalPorterStopAnalyzerTest.py

示例9: setUp

    def setUp(self):

        # set up sample document
        directory = RAMDirectory()
        writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)
        doc = Document()
        doc.add(Field("field", "the quick brown fox jumped over the lazy dog",
                       Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()

        self.searcher = IndexSearcher(directory)
开发者ID:bpgriner01,项目名称:pylucene,代码行数:13,代码来源:PhraseQueryTest.py

示例10: does_index_exists

 def does_index_exists(self,path_to_index):
     """
     Checks if Index already exists, returns True or False
     """
     try:
         index_directory = SimpleFSDirectory(File(path_to_index))
         analyzer = StandardAnalyzer(Version.LUCENE_35)
         writer = IndexWriter(index_directory, analyzer, False, IndexWriter.MaxFieldLength(512))
         writer.close()
         print path_to_index+" exists"
         return True
     except: 
         return False
开发者ID:swalter2,项目名称:knowledgeLexicalisation,代码行数:13,代码来源:LiveIndex.py

示例11: index

def index(string):
 lucene.initVM()
 indexDir = "REMOVEME.index-dir"
 dir = SimpleFSDirectory(File(indexDir))
 analyzer = StandardAnalyzer(Version.LUCENE_30)
 try:
  writer = IndexWriter(dir, analyzer, False, IndexWriter.MaxFieldLength(512))
 except lucene.JavaError:
  #print 'Inside Index Except'
  writer = IndexWriter(dir, analyzer, True, IndexWriter.MaxFieldLength(512))
#e = sys.exc_info()[0]
#print e
 #print >> sys.stderr, "Currently there are %d documents in the index..." % writer.numDocs()

 doc = Document()
 doc.add(Field("text", string, Field.Store.YES, Field.Index.ANALYZED))
 writer.addDocument(doc)
 #print 'In the index function'
 #print writer.numDocs()

#print >> sys.stderr, "Indexed lines from stdin (%d documents in index)" % (writer.numDocs())
#print >> sys.stderr, "About to optimize index of %d documents..." % writer.numDocs()
 writer.optimize()
#print >> sys.stderr, "...done optimizing index of %d documents" % writer.numDocs()
#print >> sys.stderr, "Closing index of %d documents..." % writer.numDocs()
 #print 'ending Indexing'
 #print string 
 #print 'Total indexes'
 #print writer.numDocs() 
 writer.close()
开发者ID:kansal,项目名称:Sub-Event-Detection,代码行数:30,代码来源:subEventPylucene.py

示例12: setUp

    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        self.addPoint(writer, "El Charro", "restaurant", 1, 2)
        self.addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9)
        self.addPoint(writer, "Los Betos", "restaurant", 9, 6)
        self.addPoint(writer, "Nico's Taco Shop", "restaurant", 3, 8)

        writer.close()

        self.searcher = IndexSearcher(self.directory, True)
        self.query = TermQuery(Term("type", "restaurant"))
开发者ID:bpgriner01,项目名称:pylucene,代码行数:15,代码来源:DistanceSortingTest.py

示例13: setUp

    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, SimpleAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc = Document()
        doc.add(Field("partnum", "Q36",
                      Field.Store.YES, Field.Index.NOT_ANALYZED))
        doc.add(Field("description", "Illidium Space Modulator",
                      Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()

        self.searcher = self.getSearcher()
开发者ID:qiugen,项目名称:pylucene-trunk,代码行数:15,代码来源:KeywordAnalyzerTest.py

示例14: create_index

 def create_index(self,path_to_index):
     """
     Creates new Index
     """
     print "Create new Index"
     path = SimpleFSDirectory(File(path_to_index))
     analyzer = StandardAnalyzer(Version.LUCENE_35)
     writer = IndexWriter(path, analyzer, True, IndexWriter.MaxFieldLength(512))
     doc = Document()
     doc.add(Field("Sentence", "Hello World", Field.Store.YES, Field.Index.ANALYZED))
     doc.add(Field("X", "x", Field.Store.YES, Field.Index.ANALYZED))
     doc.add(Field("Y", "y", Field.Store.YES, Field.Index.ANALYZED))
     doc.add(Field("URI", "uri", Field.Store.YES, Field.Index.ANALYZED))
     writer.addDocument(doc)
     writer.close() 
开发者ID:swalter2,项目名称:knowledgeLexicalisation,代码行数:15,代码来源:LiveIndex.py

示例15: setUp

    def setUp(self):

        self.analyzer = WhitespaceAnalyzer()
        self.directory = RAMDirectory()

        writer = IndexWriter(self.directory, self.analyzer, True, 
                             IndexWriter.MaxFieldLength.LIMITED)

        for i in xrange(1, 501):
            doc = Document()
            doc.add(Field("id", NumberUtils.pad(i),
                          Field.Store.YES, Field.Index.NOT_ANALYZED))
            writer.addDocument(doc)

        writer.close()
开发者ID:bpgriner01,项目名称:pylucene,代码行数:15,代码来源:AdvancedQueryParserTest.py


注:本文中的lucene.IndexWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。