本文整理汇总了Java中net.semanticmetadata.lire.utils.LuceneUtils.createIndexWriter方法的典型用法代码示例。如果您正苦于以下问题:Java LuceneUtils.createIndexWriter方法的具体用法?Java LuceneUtils.createIndexWriter怎么用?Java LuceneUtils.createIndexWriter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类net.semanticmetadata.lire.utils.LuceneUtils
的用法示例。
在下文中一共展示了LuceneUtils.createIndexWriter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: indexFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException {
// System.out.println(">> Indexing " + images.size() + " files.");
// DocumentBuilder builder = DocumentBuilderFactory.getExtensiveDocumentBuilder();
// DocumentBuilder builder = DocumentBuilderFactory.getFastDocumentBuilder();
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
int count = 0;
long time = System.currentTimeMillis();
for (String identifier : images) {
Document doc = builder.createDocument(new FileInputStream(identifier), identifier);
iw.addDocument(doc);
count++;
if (count % 100 == 0) System.out.println(count + " files indexed.");
// if (count == 200) break;
}
long timeTaken = (System.currentTimeMillis() - time);
float sec = ((float) timeTaken) / 1000f;
System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
iw.commit();
iw.close();
}
示例2: indexFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException {
System.out.println(">> Indexing " + images.size() + " files.");
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
int count = 0;
long time = System.currentTimeMillis();
for (String identifier : images) {
Document doc = builder.createDocument(new FileInputStream(identifier), identifier);
iw.addDocument(doc);
count++;
if (count % 100 == 0) System.out.println(count + " files indexed.");
// if (count == 200) break;
}
long timeTaken = (System.currentTimeMillis() - time);
float sec = ((float) timeTaken) / 1000f;
System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
iw.close();
}
示例3: testIndexing
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testIndexing() throws IOException {
ChainedDocumentBuilder b = new ChainedDocumentBuilder();
b.addBuilder(new SiftDocumentBuilder());
// b.addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder());
ArrayList<String> files = net.semanticmetadata.lire.utils.FileUtils.getAllImages(new File("W:\\MultimediaShare\\image_datasets\\ukbench-nister\\full"), true);
System.out.println("files.size() = " + files.size());
IndexWriter writer = LuceneUtils.createIndexWriter("nisterindex_sift", true);
int count = 0;
long ms = System.currentTimeMillis();
for (Iterator<String> iterator = files.iterator(); iterator.hasNext(); ) {
String next = iterator.next();
Document d = b.createDocument(ImageIO.read(new File(next)), next);
writer.addDocument(d);
count++;
if (count % 100 == 0) {
float time = (float) (System.currentTimeMillis() - ms);
System.out.println("Finished " + count + " images, " + (((float) count) / 10200f) * 100 + "%. " + (time / (float) count) + " ms per image.");
}
}
writer.close();
}
示例4: testSurfIndexing
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testSurfIndexing() throws IOException {
ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true);
ChainedDocumentBuilder db = new ChainedDocumentBuilder();
db.addBuilder(new SurfDocumentBuilder());
IndexWriter iw = LuceneUtils.createIndexWriter("sift-idx", true);
for (int i = 0; i < images.size(); i++) {
// int sampleQuery = sampleQueries[i];
// String s = testExtensive + "/" + sampleQuery + ".jpg";
iw.addDocument(db.createDocument(new FileInputStream(images.get(i)), images.get(i)));
if (i % 100 == 99) System.out.print(".");
if (i % 1000 == 999) System.out.print(" ~ " + i + " files indexed\n");
if (i > 1000) break;
}
System.out.println("");
iw.close();
}
示例5: testSiftIndexing
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testSiftIndexing() throws IOException {
ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true);
ChainedDocumentBuilder db = new ChainedDocumentBuilder();
db.addBuilder(new SiftDocumentBuilder());
db.addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder());
IndexWriter iw = LuceneUtils.createIndexWriter("sift-idx", true);
for (int i = 0; i < images.size(); i++) {
// int sampleQuery = sampleQueries[i];
// String s = testExtensive + "/" + sampleQuery + ".jpg";
iw.addDocument(db.createDocument(new FileInputStream(images.get(i)), images.get(i)));
if (i % 100 == 99) System.out.print(".");
if (i % 1000 == 999) System.out.print(" ~ " + i + " files indexed\n");
if (i > 1000) break;
}
System.out.println("");
iw.close();
}
示例6: indexFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException {
ArrayList<String> images = FileUtils.getAllImages(new File(dir), true);
IndexWriter iw = LuceneUtils.createIndexWriter(index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
// select one feature for the large index:
int count = 0;
long ms = System.currentTimeMillis();
DocumentBuilder builder = new ChainedDocumentBuilder();
((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
// ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
count++;
if (count > 100 && count % 5000 == 0) {
System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file");
}
String file = iterator.next();
try {
iw.addDocument(builder.createDocument(new FileInputStream(file), file));
} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
}
}
iw.close();
}
示例7: indexFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
@SuppressWarnings("unused")
private Document indexFiles() throws IOException {
System.out.println("---< indexing >-------------------------");
int count = 0;
DocumentBuilder builder = getDocumentBuilder();
ArrayList<String> allImages = FileUtils.getAllImages(new File("wang-1000"), true);
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
Document document = null;
for (Iterator<String> iterator = allImages.iterator(); iterator.hasNext(); ) {
String filename = iterator.next();
BufferedImage image = ImageIO.read(new FileInputStream(filename));
document = builder.createDocument(image, filename);
iw.addDocument(document);
count++;
if (count % 50 == 0)
System.out.println("finished " + (count * 100) / allImages.size() + "% of the images.");
}
iw.close();
return document;
}
示例8: testIndexMirflickr
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testIndexMirflickr() throws IOException {
List<String> allImages = new LinkedList<String>();
for (int i = 1; i <= 25000; i++)
allImages.add("c:/Temp/mirflickr/im" + i + ".jpg");
System.out.println("Found " + allImages.size() + " files.");
IndexWriter iw = LuceneUtils.createIndexWriter("./index-mirflickr", true);
ParallelIndexer pix = new ParallelIndexer(allImages, new MirFlickrDocumentBuilder());
new Thread(pix).start();
Document doc;
javax.swing.ProgressMonitor pm = new javax.swing.ProgressMonitor(null, "Indexing ...", "", 0, allImages.size());
int count = 0;
while ((doc = pix.getNext()) != null) {
iw.addDocument(doc);
pm.setProgress(++count);
pm.setNote(count + " documents finished");
}
iw.close();
}
示例9: testIndexMissingFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testIndexMissingFiles() throws IOException {
// first delete some of the existing ones ...
System.out.println("Deleting visual words from docs ...");
IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index)));
IndexWriter iw = LuceneUtils.createIndexWriter(index, false);
int maxDocs = ir.maxDoc();
for (int i = 0; i < maxDocs / 10; i++) {
Document d = ir.document(i);
d.removeFields(DocumentBuilder.FIELD_NAME_SURF_VISUAL_WORDS);
d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM);
// d.removeFields(DocumentBuilder.FIELD_NAME_SURF);
iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
}
System.out.println("# of deleted docs: " + maxDocs / 10);
System.out.println("Optimizing and closing ...");
iw.close();
ir.close();
System.out.println("Creating new visual words ...");
SurfFeatureHistogramBuilder surfFeatureHistogramBuilder = new SurfFeatureHistogramBuilder(DirectoryReader.open(FSDirectory.open(new File(index))), numSamples, clusters);
surfFeatureHistogramBuilder.indexMissing();
System.out.println("Finished.");
}
示例10: testCreateIndex
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testCreateIndex() throws IOException {
ChainedDocumentBuilder builder = new ChainedDocumentBuilder();
builder.addBuilder(DocumentBuilderFactory.getColorLayoutBuilder());
builder.addBuilder(DocumentBuilderFactory.getEdgeHistogramBuilder());
builder.addBuilder(DocumentBuilderFactory.getScalableColorBuilder());
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
for (String identifier : testFiles) {
System.out.println("Indexing file " + identifier);
Document doc = builder.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
iw.addDocument(doc);
}
iw.commit();
iw.close();
}
示例11: testCreateCorrelogramIndex
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testCreateCorrelogramIndex() throws IOException {
String[] testFiles = new String[]{"img01.jpg", "img02.jpg", "img03.jpg", "img04.jpg", "img05.jpg", "img06.jpg", "img07.jpg", "img08.jpg", "img09.jpg", "img10.jpg"};
String testFilesPath = "./lire/src/test/resources/small/";
DocumentBuilder builder = DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder();
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
long ms = System.currentTimeMillis();
for (String identifier : testFiles) {
Document doc = builder.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
iw.addDocument(doc);
}
System.out.println("Time taken: " + ((System.currentTimeMillis() - ms) / testFiles.length) + " ms");
iw.commit();
iw.close();
}
示例12: indexMissing
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void indexMissing() throws IOException {
// Reading clusters from disk:
clusters = Cluster.readClusters(clusterFile);
// create & store histograms:
System.out.println("Creating histograms ...");
int[] tmpHist = new int[numClusters];
LireFeature f = getFeatureInstance();
// based on bug report from Einav Itamar <[email protected]>
IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(),
false, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
for (int i = 0; i < reader.maxDoc(); i++) {
// if (!reader.isDeleted(i)) {
for (int j = 0; j < tmpHist.length; j++) {
tmpHist[j] = 0;
}
Document d = reader.document(i);
// Only if there are no values yet:
if (d.getValues(visualWordsFieldName) == null || d.getValues(visualWordsFieldName).length == 0) {
IndexableField[] fields = d.getFields(localFeatureFieldName);
// find the appropriate cluster for each feature:
for (int j = 0; j < fields.length; j++) {
f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset, fields[j].binaryValue().length);
tmpHist[clusterForFeature((Histogram) f)]++;
}
normalize(tmpHist);
d.add(new TextField(visualWordsFieldName, arrayToVisualWordString(tmpHist), Field.Store.YES));
d.add(new StringField(localFeatureHistFieldName, SerializationUtils.arrayToString(tmpHist), Field.Store.YES));
// now write the new one. we use the identifier to update ;)
iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
}
// }
}
iw.commit();
iw.close();
System.out.println("Finished.");
}
示例13: indexFiles
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException {
// eventually check if the directory is there or not ...
IndexWriter iw = LuceneUtils.createIndexWriter(testIndex, false);
int count = 0;
long time = System.currentTimeMillis();
for (String identifier : images) {
// TODO: cut toes from the image ... -> doesn't work out very well. Stable at first, decreasing then.
// TODO: Joint Histogram ...
// TODO: LSA / PCA on the vectors ...-> this looks like a job for me :-D
// TODO: local features ...
Document doc = null;
if (cutImages) {
BufferedImage bimg = ImageUtils.cropImage(ImageIO.read(new FileInputStream(identifier)), 0, 0, 200, 69);
doc = builder.createDocument(bimg, identifier);
} else doc = builder.createDocument(new FileInputStream(identifier), identifier);
iw.addDocument(doc);
count++;
if (count % 100 == 0) {
int percent = (int) Math.floor(((double) count * 100.0) / (double) images.size());
double timeTemp = (double) (System.currentTimeMillis() - time) / 1000d;
int secsLeft = (int) Math.round(((timeTemp / (double) count) * (double) images.size()) - timeTemp);
System.out.println(percent + "% finished (" + count + " files), " + secsLeft + " s left");
}
}
long timeTaken = (System.currentTimeMillis() - time);
float sec = ((float) timeTaken) / 1000f;
System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
iw.commit();
iw.close();
}
示例14: testIndexing
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testIndexing() throws IOException {
ChainedDocumentBuilder cb = new ChainedDocumentBuilder();
cb.addBuilder(new SurfDocumentBuilder());
cb.addBuilder(DocumentBuilderFactory.getColorLayoutBuilder());
System.out.println("-< Getting files to index >--------------");
ArrayList<String> images = FileUtils.getAllImages(new File(dataPath), true);
System.out.println("-< Indexing " + images.size() + " files >--------------");
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
int count = 0;
long time = System.currentTimeMillis();
for (String identifier : images) {
Document doc = cb.createDocument(new FileInputStream(identifier), identifier);
iw.addDocument(doc);
count++;
if (count % 100 == 0) System.out.println(count + " files indexed.");
// if (count == 200) break;
}
long timeTaken = (System.currentTimeMillis() - time);
float sec = ((float) timeTaken) / 1000f;
System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
iw.commit();
iw.close();
System.out.println("-< Local features are getting clustered >--------------");
SurfFeatureHistogramBuilder sh = new SurfFeatureHistogramBuilder(IndexReader.open(FSDirectory.open(new File(indexPath))), 200, 8000);
sh.index();
System.out.println("-< Indexing finished >--------------");
}
示例15: testCreateCorrelogramIndex
import net.semanticmetadata.lire.utils.LuceneUtils; //导入方法依赖的package包/类
public void testCreateCorrelogramIndex() throws IOException {
String[] testFiles = new String[]{"img01.jpg", "img02.jpg", "img03.jpg", "img04.jpg", "img05.jpg", "img06.jpg", "img07.jpg", "img08.jpg", "img09.jpg", "img10.jpg"};
String testFilesPath = "./src/test/resources/small/";
DocumentBuilder builder = DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder();
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
long ms = System.currentTimeMillis();
for (String identifier : testFiles) {
Document doc = builder.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
iw.addDocument(doc);
}
System.out.println("Time taken: " + ((System.currentTimeMillis() - ms) / testFiles.length) + " ms");
iw.close();
}