当前位置: 首页>>代码示例>>Java>>正文


Java SimpleAnalyzer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.SimpleAnalyzer的典型用法代码示例。如果您正苦于以下问题:Java SimpleAnalyzer类的具体用法?Java SimpleAnalyzer怎么用?Java SimpleAnalyzer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


SimpleAnalyzer类属于org.apache.lucene.analysis包,在下文中一共展示了SimpleAnalyzer类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildIndex

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
 * Adds the a corpus to the index
 *
 * @param    source    The source {source,target} that should be added.
 */
public void buildIndex(String source, String filePath) {
    Date start = new Date();
    try {
        IndexWriter writer = new IndexWriter(source + "Index", new SimpleAnalyzer(), true);
        indexDocs(writer, new File(filePath));

        writer.optimize();
        writer.close();

        Date end = new Date();

        System.out.print(end.getTime() - start.getTime());
        System.out.println(" total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }
}
 
开发者ID:aag,项目名称:jchampollion,代码行数:25,代码来源:Corpus.java

示例2: getSentencesContaining

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
 * getSentencesContaining returns a Vector of Integers containing the
 * numbers of the sentences that contain the given words in the source
 * language corpus.
 *
 * @param    words_    The words to be found
 * @return A Vector of the sentence numbers
 */
public Vector<String> getSentencesContaining(String words_) {
    Vector<String> sentenceNums = new Vector<>();

    words_ = requireAll(words_);

    try {
        Analyzer analyzer = new SimpleAnalyzer();

        Query query = QueryParser.parse(words_, "contents", analyzer);
        Hits hits = sourceSearcher.search(query);

        // Add the numbers of all the hits to the Vector
        for (int i = 0; i < hits.length(); i++) {
            Document sentence = hits.doc(i);
            sentenceNums.add(sentence.get("snum"));
            //DEBUG System.out.println(sentence.get("snum") + ": " + sentence.get("contents"));
        }
    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }

    return sentenceNums;
}
 
开发者ID:aag,项目名称:jchampollion,代码行数:33,代码来源:Corpus.java

示例3: numSentencesContaining

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
 * numSentencesContaining returns the number of sentences containing the
 * given words.
 *
 * @param words_    The words to be found
 * @param searcher  The searcher to be searched.
 * @return The number of sentences containing the words
 */
public int numSentencesContaining(String words_, Searcher searcher) {
    int num = 0;

    words_ = requireAll(words_);
    //DEBUG System.out.println("Finding hits for " + words_);

    try {
        Analyzer analyzer = new SimpleAnalyzer();

        Query query = QueryParser.parse(words_, "contents", analyzer);
        Hits hits = searcher.search(query);

        num = hits.length();
    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }
    return num;
}
 
开发者ID:aag,项目名称:jchampollion,代码行数:28,代码来源:Corpus.java

示例4: testDefaultOperator

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testDefaultOperator() throws Exception {
  Query query = new MultiFieldQueryParser(Version.LUCENE_41,
                                          new String[]{"title", "subject"},
      new SimpleAnalyzer()).parse("development");

  Directory dir = TestUtil.getBookIndexDirectory();
  IndexSearcher searcher = new IndexSearcher(
                             dir,
                             true);
  TopDocs hits = searcher.search(query, 10);

  assertTrue(TestUtil.hitsIncludeTitle(
         searcher,
         hits,
         "Ant in Action"));

  assertTrue(TestUtil.hitsIncludeTitle(     //A
         searcher,                          //A
         hits,                              //A
         "Extreme Programming Explained")); //A
  searcher.close();
  dir.close();
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:24,代码来源:MultiFieldQueryParserTest.java

示例5: testSpecifiedOperator

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testSpecifiedOperator() throws Exception {
  Query query = MultiFieldQueryParser.parse(Version.LUCENE_41,
      "lucene",
      new String[]{"title", "subject"},
      new BooleanClause.Occur[]{BooleanClause.Occur.MUST,
                BooleanClause.Occur.MUST},
      new SimpleAnalyzer());

  Directory dir = TestUtil.getBookIndexDirectory();
  IndexSearcher searcher = new IndexSearcher(
                             dir,
                             true);
  TopDocs hits = searcher.search(query, 10);

  assertTrue(TestUtil.hitsIncludeTitle(
          searcher,
          hits,
          "Lucene in Action, Second Edition"));
  assertEquals("one and only one", 1, hits.scoreDocs.length);
  searcher.close();
  dir.close();
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:23,代码来源:MultiFieldQueryParserTest.java

示例6: setUp

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void setUp() throws Exception {
  Directory directory = new RAMDirectory();

  IndexWriter writer = new IndexWriter(directory,
                                       new SimpleAnalyzer(), 
                                       IndexWriter.MaxFieldLength.UNLIMITED);

  Document doc = new Document();
  doc.add(new Field("partnum",
                    "Q36",
                    Field.Store.NO,
                    Field.Index.NOT_ANALYZED_NO_NORMS));   //A
  doc.add(new Field("description",
                    "Illidium Space Modulator",
                    Field.Store.YES,
                    Field.Index.ANALYZED));
  writer.addDocument(doc);

  writer.close();

  searcher = new IndexSearcher(directory);
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:23,代码来源:KeywordAnalyzerTest.java

示例7: getQuery

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public QueryParser getQuery() throws  ParseException {
    
    Analyzer analyzer = new SimpleAnalyzer();
    QueryParser parser = new QueryParser(org.apache.lucene.util.Version.LUCENE_4_0, "title", analyzer);
    
    String querystr = "test*";
    Query query = parser.parse(querystr);
}
 
开发者ID:windup,项目名称:windup-rulesets,代码行数:9,代码来源:HsearchUtil.java

示例8: testBasicQueryParser

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testBasicQueryParser() throws Exception {
  Query query = new QueryParser(Version.LUCENE_41,                //1
                                "description",                //1
                                new SimpleAnalyzer())            //1
                    .parse("partnum:Q36 AND SPACE");                //1
  assertEquals("note Q36 -> q",
               "+partnum:q +space", query.toString("description"));    //2
  assertEquals("doc not found :(", 0, TestUtil.hitCount(searcher, query));
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:10,代码来源:KeywordAnalyzerTest.java

示例9: testPerFieldAnalyzer

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public void testPerFieldAnalyzer() throws Exception {
  PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
                                            new SimpleAnalyzer());
  analyzer.addAnalyzer("partnum", new KeywordAnalyzer());

  Query query = new QueryParser(Version.LUCENE_41,
                                "description", analyzer).parse(
              "partnum:Q36 AND SPACE");

  assertEquals("Q36 kept as-is",
            "+partnum:Q36 +space", query.toString("description"));  
  assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query));
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:14,代码来源:KeywordAnalyzerTest.java

示例10: getAnalyzer

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
public static Analyzer getAnalyzer(Configuration conf) throws IOException {
  Class<? extends Analyzer> analyzerClass = conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class);
  return ReflectionUtils.newInstance(analyzerClass, conf);
}
 
开发者ID:apache,项目名称:accumulo-wikisearch,代码行数:5,代码来源:WikipediaConfiguration.java

示例11: countIntersections

import org.apache.lucene.analysis.SimpleAnalyzer; //导入依赖的package包/类
/**
 * Counts the intersection between the sentences containing S in the source
 * corpus and the sentences containing T in the target corpus.
 *
 * @param S
 *            The words in the source corpus, separated by spaces.
 * @param T
 *            The words in the target corpus, separated by spaces.
 * @return The number of sentences containing both all of the words in S and
 *         all of the words in T.
 */
public int countIntersections(String S, String T) {
    int retNum = 0;

    // Require all terms
    S = requireAll(S);
    T = requireAll(T);

    try {
        // Get all sentences for the source terms
        Analyzer sanalyzer = new SimpleAnalyzer();

        Query squery = QueryParser.parse(S, "contents", sanalyzer);
        Hits sHits = sourceSearcher.search(squery, new Sort("snum"));

        // Get all sentences for the target terms
        Analyzer tanalyzer = new SimpleAnalyzer();

        Query tquery = QueryParser.parse(T, "contents", tanalyzer);
        Hits tHits = targetSearcher.search(tquery, new Sort("snum"));

        int sCount = 0;
        int tCount = 0;
        // Compare the sentences, and count how many match
        while (sCount < sHits.length() && tCount < tHits.length()) {
            Document sSentence = sHits.doc(sCount);
            int sSentNum = Integer.valueOf(sSentence.get("snum"));

            Document tSentence = tHits.doc(tCount);
            int tSentNum = Integer.valueOf(tSentence.get("snum"));

            //DEBUG System.out.println("s " + sSentNum + "\tt " + tSentNum);
            if (sSentNum == tSentNum) {
                retNum++;
                sCount++;
                tCount++;
            } else if (sSentNum > tSentNum) {
                tCount++;
            } else if (sSentNum < tSentNum) {
                sCount++;
            }
        }

    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }

    return retNum;
}
 
开发者ID:aag,项目名称:jchampollion,代码行数:61,代码来源:Corpus.java


注:本文中的org.apache.lucene.analysis.SimpleAnalyzer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。