Python FieldType.setStored方法代碼示例

本文整理匯總了Python中org.apache.lucene.document.FieldType.setStored方法的典型用法代碼示例。如果您正苦於以下問題：Python FieldType.setStored方法的具體用法？Python FieldType.setStored怎麽用？Python FieldType.setStored使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.lucene.document.FieldType的用法示例。

在下文中一共展示了FieldType.setStored方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: index_docs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def index_docs(self, tweets, writer):
        t1 = FieldType()
        t1.setIndexed(True)
        t1.setStored(True)
        t1.setTokenized(True)
        t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
        t1.setStoreTermVectors(True)
        t1.setStoreTermVectorOffsets(True)

        # add each tweet to the index
        for tweet in tweets:
            try:
                # strip out URLs because they provide false index matches
                contents = []
                for word in tweet[1].text.split():
                    if word.startswith("http://") or word.startswith("https://"):
                        continue
                    contents.append(word)
                contents = " ".join(contents)

                if len(contents) == 0: continue

                doc = Document()
                doc.add(Field("contents", contents, t1))
                writer.addDocument(doc)
            except Exception, e:
                print "Failed in index_docs:", e

開發者ID:ryancutter，項目名稱:OnlyWorthy，代碼行數:29，代碼來源:onlyworthy_dev.py

示例2: index_docs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def index_docs(self, train_set, writer):

        t1 = FieldType()
        t1.setIndexed(True)
        t1.setStored(True)
        t1.setTokenized(False)
        t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)

        t2 = FieldType()
        t2.setIndexed(True)
        t2.setStored(True)
        t2.setTokenized(True)
        t2.setStoreTermVectorOffsets(True)
        t2.setStoreTermVectorPayloads(True)
        t2.setStoreTermVectorPositions(True)
        t2.setStoreTermVectors(True)
        t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)

        for ii in train_set:
            doc = Document()
            doc.add(Field("answer", ii['Answer'], t1))
            doc.add(Field("qid", ii['Question ID'], t1))
            doc.add(Field("category", ii['category'], t1))
            doc.add(Field("position", ii['Sentence Position'], t1))
            doc.add(Field("question", ii['Question Text'], t2))
            doc.add(Field("wiki_plain",
                          self.wiki_reader.get_text(ii['Answer']), t2))
            writer.addDocument(doc)

開發者ID:sangheestyle，項目名稱:nlp2014，代碼行數:30，代碼來源:index.py

示例3: indexDocs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def indexDocs(self, root, writer):

        t1 = FieldType()
        t1.setIndexed(True)
        t1.setStored(True)
        t1.setTokenized(False)
        t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
        
        t2 = FieldType()
        t2.setIndexed(True)
        t2.setStored(False)
        t2.setTokenized(True)
        t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
        
        for root, dirnames, filenames in os.walk(root):
            for filename in filenames:
                if not filename.endswith('.html'):
                    continue
                print "adding", filename
                try:
                    path = os.path.join(root, filename)
                    file = open(path)
                    contents = unicode(file.read(), 'iso-8859-1')
                    file.close()
                    doc = Document()
                    doc.add(Field("name", filename, t1))
                    doc.add(Field("path", root, t1))
                    if len(contents) > 0:
                        doc.add(Field("contents", contents, t2))
                    else:
                        print "warning: no content in %s" % filename
                    writer.addDocument(doc)
                except Exception, e:
                    print "Failed in indexDocs:", e

開發者ID:devs4v，項目名稱:devs4v-information-retrieval15，代碼行數:36，代碼來源:IndexFiles.py

示例4: index

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
	def index(self, root):

		t = FieldType()
		t.setIndexed(True)
		t.setStored(True)
		t.setTokenized(True)
		t.setStoreTermVectors(True)
		
		for path, dirs, files in os.walk(root):
			
			for file in files:
				
				filePath = os.path.join(path, file)
				fd = open(filePath)
				content = unicode(fd.read(), 'iso-8859-1')
				fd.close()
				
				doc = Document()
				doc.add(Field('name', file, StringField.TYPE_STORED))

				parent = os.path.split(path)[1]
				doc.add(Field('parent', parent, StringField.TYPE_STORED))

				if len(content) > 0:
					doc.add(Field('content', content, t))

				print 'Indexing %s' % file
				self.mWriter.addDocument(doc)

		self.mWriter.commit()
		self.mWriter.close()

開發者ID:haonguyen14，項目名稱:CLIFinder，代碼行數:33，代碼來源:Indexer.py

示例5: indexDocs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def indexDocs(self, url, writer):
        type1 = FieldType()
        type1.setIndexed(True)
        type1.setStored(True)
        type1.setTokenized(False)
        type1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
        
        type2 = FieldType()
        type2.setIndexed(True)
        type2.setStored(True)
        type2.setTokenized(True)
        type2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
        
        # Read Feeds
        feeds = feedparser.parse(url)

        for item in feeds["entries"]:
            print "adding", item["title"] 
            try:
                link = item["link"] 
                contents = item["description"].encode("utf-8")
                contents = re.sub('<[^<]+?>', '', ''.join(contents))
                title = item["title"]
                doc = Document()
                doc.add(Field("url", link, type1))
                doc.add(Field("title", title, type1))
                if len(contents) > 0:
                    doc.add(Field("contents", contents, type2))
                else:
                    print "warning: no content in %s" % item["title"] 
                writer.addDocument(doc)
            except Exception, e:
                 print "Failed in indexDocs:", e

開發者ID:yelinkyaw，項目名稱:FeedsIndexer，代碼行數:35，代碼來源:IndexFeeds.py

示例6: indexDocs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
 def indexDocs(self, root, writer):
     t1 = FieldType() # for short items, e.g. file name.
     t1.setIndexed(True)
     t1.setStored(True)
     t1.setTokenized(False)
     t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS) # DOCS_AND_FREQS_AND_POSITIONS_OFFSETS
     
     t2 = FieldType() # for content
     t2.setIndexed(True)
     t2.setStored(False) # don't store the original text
     t2.setTokenized(True)
     t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
     
     for root, dirnames, filenames in os.walk(root):
         for filename in filenames:
             print "adding", filename
             try:
                 path = os.path.join(root, filename)
                 file = open(path)
                 contents = unicode(file.read(), 'iso-8859-1')
                 file.close()
                 doc = Document()
                 doc.add(Field("name", filename, t1))
                 doc.add(Field("path", root, t1))
                 if len(contents) > 0:
                     doc.add(Field("contents", contents, t2))
                 else:
                     print "warning: no content in %s" % filename
                 writer.addDocument(doc)
             except Exception, e:
                 print "Failed in indexDocs:", e

開發者ID:w2wei，項目名稱:XPRC，代碼行數:33，代碼來源:IndexFiles.py

示例7: LuceneDocumentField

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
class LuceneDocumentField(object):
    """Internal handler class for possible field types"""

    def __init__(self):
        """Init possible field types"""

        # FIELD_ID: stored, indexed, non-tokenized
        self.field_id = FieldType()
        self.field_id.setIndexed(True)
        self.field_id.setStored(True)
        self.field_id.setTokenized(False)

        # FIELD_ID_TV: stored, indexed, not tokenized, with term vectors (without positions)
        # for storing IDs with term vector info
        self.field_id_tv = FieldType()
        self.field_id_tv.setIndexed(True)
        self.field_id_tv.setStored(True)
        self.field_id_tv.setTokenized(False)
        self.field_id_tv.setStoreTermVectors(True)

        # FIELD_TEXT: stored, indexed, tokenized, with positions
        self.field_text = FieldType()
        self.field_text.setIndexed(True)
        self.field_text.setStored(True)
        self.field_text.setTokenized(True)

        # FIELD_TEXT_TV: stored, indexed, tokenized, with term vectors (without positions)
        self.field_text_tv = FieldType()
        self.field_text_tv.setIndexed(True)
        self.field_text_tv.setStored(True)
        self.field_text_tv.setTokenized(True)
        self.field_text_tv.setStoreTermVectors(True)

        # FIELD_TEXT_TVP: stored, indexed, tokenized, with term vectors and positions
        # (but no character offsets)
        self.field_text_tvp = FieldType()
        self.field_text_tvp.setIndexed(True)
        self.field_text_tvp.setStored(True)
        self.field_text_tvp.setTokenized(True)
        self.field_text_tvp.setStoreTermVectors(True)
        self.field_text_tvp.setStoreTermVectorPositions(True)

    def get_field(self, type):
        """Get Lucene FieldType object for the corresponding internal FIELDTYPE_ value"""
        if type == Lucene.FIELDTYPE_ID:
            return self.field_id
        elif type == Lucene.FIELDTYPE_ID_TV:
            return self.field_id_tv
        elif type == Lucene.FIELDTYPE_TEXT:
            return self.field_text
        elif type == Lucene.FIELDTYPE_TEXT_TV:
            return self.field_text_tv
        elif type == Lucene.FIELDTYPE_TEXT_TVP:
            return self.field_text_tvp
        else:
            raise Exception("Unknown field type")

開發者ID:renespeck，項目名稱:TAGME_Reproducibility，代碼行數:58，代碼來源:lucene_tools.py

示例8: Indexer

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
class Indexer(object):
    def __init__(self, **kwargs):
        """ Initialize a new instance of the Indexer

        :param output: The output directory of the underlying index
        :param anaylzer: The overloaded analyzer to work with
        """
        self.output = kwargs.get("root", "index")
        if not os.path.exists(self.output):
            os.mkdir(self.output)

        self.analyzer = kwargs.get("analyzer", StandardAnalyzer(Version.LUCENE_CURRENT))
        self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
        self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
        self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        self.store = SimpleFSDirectory(File(self.output))
        self.writer = IndexWriter(self.store, self.config)
        self.create_field_types()

    def index(self, document):
        """ Given a new document, add it to the index.

        :param document: The document to add to the indexer
        """
        try:
            self.writer.addDocument(document)
        except Exception:
            logger.exception("Failed to index the supplied document")

    def shutdown(self):
        """ Shutdown the currently processing indexer.
        """
        try:
            # self.writer.optimize()
            self.writer.close()
        except Exception:
            logger.exception("Failed to shutdown the indexer correctly")

    def create_field_types(self):
        """ Create the field types that will be used to specify
        what actions lucene should take on the various fields
        supplied to index.
        """
        self.field_clean = FieldType()
        self.field_clean.setIndexed(True)
        self.field_clean.setStored(True)
        self.field_clean.setTokenized(False)
        self.field_clean.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)

        self.field_dirty = FieldType()
        self.field_dirty.setIndexed(True)
        self.field_dirty.setStored(False)
        self.field_dirty.setTokenized(True)
        self.field_dirty.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)

開發者ID:bashwork，項目名稱:common，代碼行數:56，代碼來源:filesearch.py

示例9: setUp

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def setUp(self):
        super(Test_Bug1842, self).setUp()

        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
        
        w1 = self.getWriter(analyzer=self.analyzer)
        doc1 = Document()

        ftype = FieldType()
        ftype.setStored(False)
        ftype.setIndexed(True)
        ftype.setStoreTermVectors(True)
        doc1.add(Field("all", "blah blah blah Gesundheit", ftype))
        doc1.add(Field('id', '1', StringField.TYPE_NOT_STORED))

        w1.addDocument(doc1)
        w1.close()

開發者ID:devs4v，項目名稱:devs4v-information-retrieval15，代碼行數:19，代碼來源:test_bug1842.py

示例10: index_article

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
def index_article(writer, art_id, art_body):
    art_id_field = FieldType()
    art_id_field.setIndexed(True)
    art_id_field.setStored(True)
    art_id_field.setTokenized(False)
    art_id_field.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)

    art_body_field = FieldType()
    art_body_field.setIndexed(True)
    art_body_field.setStored(True)
    art_body_field.setTokenized(True)
    art_body_field.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)

    doc = Document()
    doc.add(Field("art_id", str(art_id), art_id_field))
    doc.add(Field("art_body", art_body, art_body_field))

    writer.addDocument(doc)

開發者ID:andrely，項目名稱:vg-pipeline，代碼行數:20，代碼來源:indexing.py

示例11: testBinaryFieldInIndex

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
    def testBinaryFieldInIndex(self):

        ft = FieldType()
        ft.setStored(True)

        bytes = JArray('byte')(self.binaryValStored)
        binaryFldStored = StoredField("binaryStored", bytes)
        stringFldStored = Field("stringStored", self.binaryValStored, ft)

        doc = Document()
        doc.add(binaryFldStored)
        doc.add(stringFldStored)

        # test for field count
        self.assertEqual(2, doc.fields.size())

        # add the doc to a ram index
        writer = self.getWriter(analyzer=StandardAnalyzer())
        writer.addDocument(doc)
        writer.close()

        # open a reader and fetch the document
        reader = self.getReader()
        docFromReader = reader.document(0)
        self.assertTrue(docFromReader is not None)

        # fetch the binary stored field and compare it's content with the
        # original one
        bytes = docFromReader.getBinaryValue("binaryStored")
        binaryFldStoredTest = bytes.bytes.bytes_
        self.assertEqual(binaryFldStoredTest, self.binaryValStored)

        # fetch the string field and compare it's content with the original
        # one
        stringFldStoredTest = docFromReader.get("stringStored")
        self.assertEqual(stringFldStoredTest, self.binaryValStored.decode())

        reader.close()

開發者ID:svn2github，項目名稱:pylucene，代碼行數:40，代碼來源:test_BinaryDocument.py

示例12: lazyImport

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
def lazyImport():
    global imported
    if imported:
        return

    from meresco.pylucene import getJVM
    getJVM()

    from java.nio.file import Paths
    from org.apache.lucene.document import Document, StringField, Field, FieldType
    from org.apache.lucene.search import IndexSearcher, TermQuery
    from org.apache.lucene.index import DirectoryReader, Term, IndexWriter, IndexWriterConfig, IndexOptions
    from org.apache.lucene.store import FSDirectory
    from org.apache.lucene.util import Version
    from org.apache.lucene.analysis.core import WhitespaceAnalyzer

    UNINDEXED_TYPE = FieldType()
    UNINDEXED_TYPE.setIndexOptions(IndexOptions.NONE)
    UNINDEXED_TYPE.setStored(True)
    UNINDEXED_TYPE.setTokenized(False)

    imported = True
    globals().update(locals())

開發者ID:seecr，項目名稱:meresco-lucene，代碼行數:25，代碼來源:lucenekeyvaluestore.py

示例13: indexDocs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
 def indexDocs(self, root, writer): 
      
     #Create a new FieldType with default properties. 
     t1 = FieldType() 
     t1.setIndexed(True) 
     t1.setStored(True) 
     t1.setTokenized(False)#True if this field's value should be analyzed by the Analyzer. 
     t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS) 
      
     #Create a new FieldType with default properties. 
     t2 = FieldType() 
     t2.setIndexed(True) 
     t2.setStored(True) 
     t2.setTokenized(True)#True if this field's value should be analyzed by the Analyzer. 
     t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) 
      
     for root, dirnames, filenames in os.walk(root): 
         for filename in filenames: 
             if not filename.endswith('.txt'): 
                 continue 
             print 'adding', filename 
             try: 
                 path = os.path.join(root, filename) 
                 file = open(path) 
                 contents = file.read() 
                 file.close() 
                 doc = Document() 
                 doc.add(Field('name', filename, t1)) 
                 doc.add(Field('path', root, t1)) 
                 if len(contents) > 0: 
                     doc.add(Field('contents', contents, t2)) 
                     print 'length of content is %d'%(len(contents)) 
                 else: 
                     print 'warning: no content in %s' % filename 
                 writer.addDocument(doc) 
             except Exception, e: 
                 print 'Failed in indexDocs:', e

開發者ID:ouceduxzk，項目名稱:AI2-Kaggle，代碼行數:39，代碼來源:indexer.py

示例14: indexDocs

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
	def indexDocs(self,root,writer):
		t1 = FieldType()
		t1.setIndexed(True)
		t1.setStored(True)
		t1.setTokenized(True)
		t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)

		t2 = FieldType()
		t2.setIndexed(True)
		t2.setStored(False)
		t2.setTokenized(True)
		t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)

		for root, dirnames,filenames in os.walk(root):
			# traverse through the doc directory
			for filename in filenames:
				# only if this file ends with '.c'
				if not filename.endswith('.c'):
					continue
				try:
					# only add the filename and path for indexing
					path = os.path.join(root,filename)
					print "adding file : ",path
					file = open(path)
					contents = unicode(file.read(),'utf-8')
					file.close()
					doc = Document()
					doc.add(Field("name",filename,t1))
					doc.add(Field("path",root,t1))
				#	if len(contents) > 0:
				#		doc.add(Field("contents",contents,t2))
				#	else:
				#		print "warning: no content in ",filename
					writer.addDocument(doc)
				except Exception,e:
					print "failed in indexDocs:",e

開發者ID:zz-mars，項目名稱:simple-search，代碼行數:38，代碼來源:indexer.py

示例15: index_wiki

# 需要導入模塊: from org.apache.lucene.document import FieldType [as 別名]
# 或者: from org.apache.lucene.document.FieldType import setStored [as 別名]
def index_wiki(wiki_xmlfile, index_directory_name):
    lucene.initVM()
    # Initialize index directory and analyzer.
    version = Version.LUCENE_CURRENT
    store = FSDirectory.open(File(index_directory_name))
    analyzer = StandardAnalyzer(version)
    # Creates config file.
    config = IndexWriterConfig(version, analyzer)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    writer = IndexWriter(store, config)
    # Set document content field type.
    content_fieldtype = FieldType()
    content_fieldtype.setIndexed(True)
    content_fieldtype.setStored(True)
    content_fieldtype.setTokenized(True)
    content_fieldtype.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
    
    # Set document title field type.
    title_fieldtype = FieldType()
    title_fieldtype.setIndexed(True)
    title_fieldtype.setStored(True)
    title_fieldtype.setTokenized(True)
    title_fieldtype.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
    
    # Set document url field type.
    url_fieldtype = FieldType()
    url_fieldtype.setIndexed(True)
    url_fieldtype.setStored(True)
    url_fieldtype.setTokenized(False)
    url_fieldtype.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
    
    
    for xmldoc in wikicorpusxml((wiki_xmlfile)):
        content = xmldoc.partition('>')[2].partition('<')[0].strip()
        title = xmldoc.partition(' title="')[2].partition('"')[0].strip()
        url = xmldoc.partition(' url="')[2].partition('"')[0].strip()
        doc = Document()
        doc.add(Field("contents", content, content_fieldtype))
        doc.add(Field("title", title, title_fieldtype))
        doc.add(Field("url", url, url_fieldtype))
        writer.addDocument(doc)
     
    writer.commit()
    writer.close()

開發者ID:alvations，項目名稱:Wikicorpus，代碼行數:46，代碼來源:WikiIndex.py

注：本文中的org.apache.lucene.document.FieldType.setStored方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。