当前位置: 首页>>代码示例>>Java>>正文


Java UserDictionary类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.ja.dict.UserDictionary的典型用法代码示例。如果您正苦于以下问题:Java UserDictionary类的具体用法?Java UserDictionary怎么用?Java UserDictionary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


UserDictionary类属于org.apache.lucene.analysis.ja.dict包,在下文中一共展示了UserDictionary类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getUserDictionary

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
    try {
        final Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION);
        if (reader == null) {
            return null;
        } else {
            try {
                return UserDictionary.open(reader);
            } finally {
                reader.close();
            }
        }
    } catch (IOException e) {
        throw new ElasticsearchException("failed to load kuromoji user dictionary", e);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:KuromojiTokenizerFactory.java

示例2: userDictionary

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Nullable
private static UserDictionary userDictionary(@Nullable final String[] userDictArray)
        throws UDFArgumentException {
    if (userDictArray == null) {
        return null;
    }

    final StringBuilder builder = new StringBuilder();
    for (String row : userDictArray) {
        builder.append(row).append('\n');
    }
    final Reader reader = new StringReader(builder.toString());
    try {
        return UserDictionary.open(reader); // return null if empty
    } catch (Throwable e) {
        throw new UDFArgumentException(
            "Failed to create user dictionary based on the given array<string>: " + e);
    }
}
 
开发者ID:apache,项目名称:incubator-hivemall,代码行数:20,代码来源:KuromojiUDF.java

示例3: getUserDictionary

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary getUserDictionary(final Environment env, final Settings settings) {
    try {
        final Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION);
        if (reader == null) {
            return null;
        } else {
            try {
                return UserDictionary.open(reader);
            } finally {
                reader.close();
            }
        }
    } catch (final IOException e) {
        throw new ElasticsearchException("failed to load kuromoji user dictionary", e);
    }
}
 
开发者ID:codelibs,项目名称:elasticsearch-analysis-ja,代码行数:17,代码来源:KuromojiTokenizerFactory.java

示例4: inform

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    InputStream stream = loader.openResource(userDictionaryPath);
    String encoding = userDictionaryEncoding;
    if (encoding == null) {
      encoding = IOUtils.UTF_8;
    }
    CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
    Reader reader = new InputStreamReader(stream, decoder);
    userDictionary = new UserDictionary(reader);
  } else {
    userDictionary = null;
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:JapaneseTokenizerFactory.java

示例5: readDict

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary readDict() {
  InputStream is = TestJapaneseTokenizer.class.getResourceAsStream("userdict.txt");
  if (is == null) {
    throw new RuntimeException("Cannot find userdict.txt in test classpath!");
  }
  try {
    try {
      Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8);
      return new UserDictionary(reader);
    } finally {
      is.close();
    }
  } catch (IOException ioe) {
    throw new RuntimeException(ioe);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestJapaneseTokenizer.java

示例6: inform

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  mode = getMode(args);
  String userDictionaryPath = args.get(USER_DICT_PATH);
  if (userDictionaryPath != null) {
    InputStream stream = loader.openResource(userDictionaryPath);
    String encoding = args.get(USER_DICT_ENCODING);
    if (encoding == null) {
      encoding = IOUtils.UTF_8;
    }
    CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
    Reader reader = new InputStreamReader(stream, decoder);
    userDictionary = new UserDictionary(reader);
  } else {
    userDictionary = null;
  }
  discardPunctuation = getBoolean(DISCARD_PUNCTUATION, true);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:21,代码来源:JapaneseTokenizerFactory.java

示例7: readDict

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary readDict() {
  InputStream is = TestJapaneseTokenizer.class.getResourceAsStream("userdict.txt");
  if (is == null) {
    throw new RuntimeException("Cannot find userdict.txt in test classpath!");
  }
  try {
    try {
      Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8);
      return new UserDictionary(reader);
    } finally {
      is.close();
    }
  } catch (IOException ioe) {
    throw new RuntimeException(ioe);
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestJapaneseTokenizer.java

示例8: KuromojiAnalyzerProvider

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public KuromojiAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(
        env, indexSettings.getIndexVersionCreated(), settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:KuromojiAnalyzerProvider.java

示例9: KuromojiAnalyzerProvider

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public KuromojiAnalyzerProvider(final IndexSettings indexSettings, final Environment env, final String name, final Settings settings) {
    super(indexSettings, name, settings);
    final Set<?> stopWords = Analysis.parseStopWords(
            env, indexSettings.getIndexVersionCreated(), settings, JapaneseAnalyzer.getDefaultStopSet());
    final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
    final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
    analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags());
}
 
开发者ID:codelibs,项目名称:elasticsearch-analysis-ja,代码行数:9,代码来源:KuromojiAnalyzerProvider.java

示例10: JapaneseAnalyzer

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags) {
  super(stopwords);
  this.userDict = userDict;
  this.mode = mode;
  this.stoptags = stoptags;
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:JapaneseAnalyzer.java

示例11: super

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
 
开发者ID:europeana,项目名称:search,代码行数:53,代码来源:JapaneseTokenizer.java

示例12: JapaneseAnalyzer

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public JapaneseAnalyzer(Version matchVersion, UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags) {
  super(matchVersion, stopwords);
  this.userDict = userDict;
  this.mode = mode;
  this.stoptags = stoptags;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:7,代码来源:JapaneseAnalyzer.java

示例13: JapaneseTokenizer

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
 * Create a new JapaneseTokenizer.
 * 
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(null); // best effort NPE consumers that don't call reset()

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:51,代码来源:JapaneseTokenizer.java

示例14: JapaneseTokenizer

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
 * Create a new JapaneseTokenizer.
 * <p>
 * Uses the default AttributeFactory.
 * 
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
}
 
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:JapaneseTokenizer.java

示例15: JapaneseTokenizer

import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
 * Create a new JapaneseTokenizer.
 * <p>
 * Uses the default AttributeFactory.
 * 
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:14,代码来源:JapaneseTokenizer.java


注:本文中的org.apache.lucene.analysis.ja.dict.UserDictionary类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。