本文整理汇总了Python中org.apache.lucene.document.FieldType.setStoreTermVectorPayloads方法的典型用法代码示例。如果您正苦于以下问题:Python FieldType.setStoreTermVectorPayloads方法的具体用法?Python FieldType.setStoreTermVectorPayloads怎么用?Python FieldType.setStoreTermVectorPayloads使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.document.FieldType
的用法示例。
在下文中一共展示了FieldType.setStoreTermVectorPayloads方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index_docs
# 需要导入模块: from org.apache.lucene.document import FieldType [as 别名]
# 或者: from org.apache.lucene.document.FieldType import setStoreTermVectorPayloads [as 别名]
def index_docs(self, train_set, writer):
t1 = FieldType()
t1.setIndexed(True)
t1.setStored(True)
t1.setTokenized(False)
t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
t2 = FieldType()
t2.setIndexed(True)
t2.setStored(True)
t2.setTokenized(True)
t2.setStoreTermVectorOffsets(True)
t2.setStoreTermVectorPayloads(True)
t2.setStoreTermVectorPositions(True)
t2.setStoreTermVectors(True)
t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
for ii in train_set:
doc = Document()
doc.add(Field("answer", ii['Answer'], t1))
doc.add(Field("qid", ii['Question ID'], t1))
doc.add(Field("category", ii['category'], t1))
doc.add(Field("position", ii['Sentence Position'], t1))
doc.add(Field("question", ii['Question Text'], t2))
doc.add(Field("wiki_plain",
self.wiki_reader.get_text(ii['Answer']), t2))
writer.addDocument(doc)
示例2: tweetIndexer
# 需要导入模块: from org.apache.lucene.document import FieldType [as 别名]
# 或者: from org.apache.lucene.document.FieldType import setStoreTermVectorPayloads [as 别名]
def tweetIndexer(self, writer):
t1 = FieldType()
t1.setIndexed(True)
t1.setStored(True)
t1.setTokenized(False)
t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
t2 = FieldType()
t2.setIndexed(True)
t2.setStored(True)
t2.setTokenized(True)
t2.setStoreTermVectorOffsets(True)
t2.setStoreTermVectorPayloads(True)
t2.setStoreTermVectorPositions(True)
t2.setStoreTermVectors(True)
t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
x = 0
for i in range(0,500):
if not os.path.isfile("json/tweets-" + str(i) + ".json"):
break
print "adding tweets-" + str(i) + ".json"
tweets = open("json/tweets-" + str(i) + ".json", "r")
for line in tweets.readlines():
tweet = json.loads(line)
if 'limit' in tweet:
continue
try:
doc = Document()
doc.add(Field("file", "json/tweets-" + str(i) + ".json", t1))
sname = tweet['user']['screen_name']
tid = str(tweet['id'])
text = tweet['text']
uname = tweet['user']['name']
created = tweet['created_at']
tstamp = tweet['timestamp_ms']
place = ""
if tweet['place']:
place = tweet['place']['full_name'] + ", " + tweet['place']['country']
lat = ""
lng = ""
titles = ""
urls = ""
exist = "false"
if tweet['coordinates']:
lat = str(tweet['coordinates']['coordinates'][1])
lng = str(tweet['coordinates']['coordinates'][0])
else:
lat = str((tweet['place']['bounding_box']['coordinates'][0][0][1] + tweet['place']['bounding_box']['coordinates'][0][2][1])/2)
lng = str((tweet['place']['bounding_box']['coordinates'][0][0][0] + tweet['place']['bounding_box']['coordinates'][0][2][0])/2)
if len(tweet['entities']['urls']) != 0:
exist = "true"
for index in range(len(tweet['entities']['urls'])):
title = tweet['entities']['urls'][index]['url_title']
if title == None:
titles += ",-"
else:
title = title.encode('ascii','ignore')
titles += "," + str(title)
urls += " " + str(tweet['entities']['urls'][index]['expanded_url'])
searchable = text + " " + urls + " " + uname + " " + sname + " " + place
doc.add(Field("lookup", searchable, t2))
doc.add(Field("text", text, t2))
doc.add(Field("user_name", uname, t2))
doc.add(Field("screen_name", sname, t2))
doc.add(Field("tweet_id", tid, t2))
doc.add(Field("created_at", created, t2))
doc.add(Field("geo_lat", lat, t2))
doc.add(Field("geo_lng", lng, t2))
doc.add(Field("url_exist", exist, t2))
doc.add(Field("url_url", urls, t2))
doc.add(Field("url_title", titles, t2))
doc.add(Field("timestamp", tstamp, t2))
writer.addDocument(doc)
x += 1
except Exception, e:
pass
tweets.close()