本文整理汇总了Python中lucene.IndexWriter类的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter类的具体用法?Python IndexWriter怎么用?Python IndexWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IndexWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index
def index(source, indexName):
if(not os.path.exists(indexName)):
os.mkdir(indexName)
indexDir = File(indexName)
writer = IndexWriter(SimpleFSDirectory(File(indexName)),StandardAnalyzer(Version.LUCENE_CURRENT), True,IndexWriter.MaxFieldLength.LIMITED)
p = re.compile("(GH\d+\-\d+)\n(.*?)\n+", re.DOTALL)
res = p.findall(source)
i = 0
for pair in res:
i += 1
doc = Document()
doc.add(Field("id", pair[0], Field.Store.YES, Field.Index.NO))
for t in pair[1].split():
doc.add(Field("content", t.replace("-","_"), Field.Store.NO, Field.Index.NOT_ANALYZED));
#doc.add(Field("content", pair[1], Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc)
writer.close()
print str(i)+ " docs indexed"
示例2: setUp
def setUp(self):
animals = [ "aardvark", "beaver", "coati",
"dog", "elephant", "frog", "gila monster",
"horse", "iguana", "javelina", "kangaroo",
"lemur", "moose", "nematode", "orca",
"python", "quokka", "rat", "scorpion",
"tarantula", "uromastyx", "vicuna",
"walrus", "xiphias", "yak", "zebra" ]
analyzer = WhitespaceAnalyzer()
aTOmDirectory = RAMDirectory()
nTOzDirectory = RAMDirectory()
aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True,
IndexWriter.MaxFieldLength.UNLIMITED)
nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True,
IndexWriter.MaxFieldLength.UNLIMITED)
for animal in animals:
doc = Document()
doc.add(Field("animal", animal,
Field.Store.YES, Field.Index.NOT_ANALYZED))
if animal[0].lower() < "n":
aTOmWriter.addDocument(doc)
else:
nTOzWriter.addDocument(doc)
aTOmWriter.close()
nTOzWriter.close()
self.searchers = [ IndexSearcher(aTOmDirectory),
IndexSearcher(nTOzDirectory) ]
示例3: open
def open(self, name, txn, **kwds):
super(IndexContainer, self).open(name, txn, **kwds)
if kwds.get('create', False):
directory = self.getDirectory()
indexWriter = IndexWriter(directory, StandardAnalyzer(), True)
indexWriter.close()
directory.close()
示例4: index_files
def index_files (files, index_directory):
lucene.initVM()
d = SimpleFSDirectory(File(index_directory))
analyzer = StandardAnalyzer(Version.LUCENE_30)
writer = IndexWriter(d, analyzer, True, IndexWriter.MaxFieldLength(512))
for f in files:
parse_file(f, writer)
writer.optimize()
writer.close()
示例5: index
def index(self,path_to_index,path_files):
'indexes anchor texts from a given folder'
#lucene.initVM()
indexDir = path_to_index
directory_index = SimpleFSDirectory(File(indexDir))
analyzer = StandardAnalyzer(Version.LUCENE_35)
writer = IndexWriter(directory_index, analyzer, True, IndexWriter.MaxFieldLength(512))
listOfPathes = []
listOfPathes.extend(glob.glob(path_files+"*.txt"))
counter = 0
for path_to_file in listOfPathes:
print path_to_file
f = open(path_to_file,"r")
for line in f:
entry = line.split("\t")
counter+=1
"""
optimizes index after a certain amount of added documents
"""
if counter%500000==0:
print counter
writer.optimize()
doc = Document()
doc.add(Field("anchor", entry[0], Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("anchor_uri", entry[1], Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("dbpedia_uri", entry[2], Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("number", entry[3].replace("\n",""), Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.optimize()
f.close()
writer.close()
print counter
print "done"
示例6: addDocuments
def addDocuments(self, dir, isCompound):
writer = IndexWriter(dir, SimpleAnalyzer(), True,
IndexWriter.MaxFieldLength.LIMITED)
writer.setUseCompoundFile(isCompound)
# change to adjust performance of indexing with FSDirectory
# writer.mergeFactor = writer.mergeFactor
# writer.maxMergeDocs = writer.maxMergeDocs
# writer.minMergeDocs = writer.minMergeDocs
for word in self.docs:
doc = Document()
doc.add(Field("keyword", word,
Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("unindexed", word,
Field.Store.YES, Field.Index.NO))
doc.add(Field("unstored", word,
Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field("text", word,
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.optimize()
writer.close()
示例7: addContents
def addContents(self,contents):
try:
#iwconfig = IndexWriterConfig(SimpleAnalyzer(),IndexWriter.MaxFieldLength.LIMITED)
writer = IndexWriter(self.ramIndex,SimpleAnalyzer(Version.LUCENE_CURRENT),True,IndexWriter.MaxFieldLength.LIMITED)
for content in contents:
doc = Document()
doc.add(Field("contents",content[1],Field.Store.NO,Field.Index.ANALYZED,Field.TermVector.YES))
writer.addDocument(doc)
writer.close()
except Exception,e:
print 'Unable to add content to RAM index'
示例8: setUp
def setUp(self):
self.directory = RAMDirectory()
writer = IndexWriter(self.directory, self.porterAnalyzer, True,
IndexWriter.MaxFieldLength.UNLIMITED)
doc = Document()
doc.add(Field("contents",
"The quick brown fox jumps over the lazy dogs",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.close()
示例9: setUp
def setUp(self):
# set up sample document
directory = RAMDirectory()
writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
IndexWriter.MaxFieldLength.UNLIMITED)
doc = Document()
doc.add(Field("field", "the quick brown fox jumped over the lazy dog",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.close()
self.searcher = IndexSearcher(directory)
示例10: does_index_exists
def does_index_exists(self,path_to_index):
"""
Checks if Index already exists, returns True or False
"""
try:
index_directory = SimpleFSDirectory(File(path_to_index))
analyzer = StandardAnalyzer(Version.LUCENE_35)
writer = IndexWriter(index_directory, analyzer, False, IndexWriter.MaxFieldLength(512))
writer.close()
print path_to_index+" exists"
return True
except:
return False
示例11: index
def index(string):
lucene.initVM()
indexDir = "REMOVEME.index-dir"
dir = SimpleFSDirectory(File(indexDir))
analyzer = StandardAnalyzer(Version.LUCENE_30)
try:
writer = IndexWriter(dir, analyzer, False, IndexWriter.MaxFieldLength(512))
except lucene.JavaError:
#print 'Inside Index Except'
writer = IndexWriter(dir, analyzer, True, IndexWriter.MaxFieldLength(512))
#e = sys.exc_info()[0]
#print e
#print >> sys.stderr, "Currently there are %d documents in the index..." % writer.numDocs()
doc = Document()
doc.add(Field("text", string, Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
#print 'In the index function'
#print writer.numDocs()
#print >> sys.stderr, "Indexed lines from stdin (%d documents in index)" % (writer.numDocs())
#print >> sys.stderr, "About to optimize index of %d documents..." % writer.numDocs()
writer.optimize()
#print >> sys.stderr, "...done optimizing index of %d documents" % writer.numDocs()
#print >> sys.stderr, "Closing index of %d documents..." % writer.numDocs()
#print 'ending Indexing'
#print string
#print 'Total indexes'
#print writer.numDocs()
writer.close()
示例12: setUp
def setUp(self):
self.directory = RAMDirectory()
writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
IndexWriter.MaxFieldLength.UNLIMITED)
self.addPoint(writer, "El Charro", "restaurant", 1, 2)
self.addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9)
self.addPoint(writer, "Los Betos", "restaurant", 9, 6)
self.addPoint(writer, "Nico's Taco Shop", "restaurant", 3, 8)
writer.close()
self.searcher = IndexSearcher(self.directory, True)
self.query = TermQuery(Term("type", "restaurant"))
示例13: setUp
def setUp(self):
self.directory = RAMDirectory()
writer = IndexWriter(self.directory, SimpleAnalyzer(), True,
IndexWriter.MaxFieldLength.UNLIMITED)
doc = Document()
doc.add(Field("partnum", "Q36",
Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("description", "Illidium Space Modulator",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.close()
self.searcher = self.getSearcher()
示例14: create_index
def create_index(self,path_to_index):
"""
Creates new Index
"""
print "Create new Index"
path = SimpleFSDirectory(File(path_to_index))
analyzer = StandardAnalyzer(Version.LUCENE_35)
writer = IndexWriter(path, analyzer, True, IndexWriter.MaxFieldLength(512))
doc = Document()
doc.add(Field("Sentence", "Hello World", Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("X", "x", Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("Y", "y", Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("URI", "uri", Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.close()
示例15: setUp
def setUp(self):
self.analyzer = WhitespaceAnalyzer()
self.directory = RAMDirectory()
writer = IndexWriter(self.directory, self.analyzer, True,
IndexWriter.MaxFieldLength.LIMITED)
for i in xrange(1, 501):
doc = Document()
doc.add(Field("id", NumberUtils.pad(i),
Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.addDocument(doc)
writer.close()