本文整理汇总了Java中edu.berkeley.nlp.lm.WordIndexer类的典型用法代码示例。如果您正苦于以下问题:Java WordIndexer类的具体用法?Java WordIndexer怎么用?Java WordIndexer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WordIndexer类属于edu.berkeley.nlp.lm包,在下文中一共展示了WordIndexer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: KneserNeyLmReaderCallback
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public KneserNeyLmReaderCallback(final WordIndexer<W> wordIndexer, final int maxOrder, final ConfigOptions opts) {
this.lmOrder = maxOrder;
this.startIndex = wordIndexer.getIndexPossiblyUnk(wordIndexer.getStartSymbol());
this.opts = opts;
double last = Double.NEGATIVE_INFINITY;
for (final double c : opts.kneserNeyMinCounts) {
if (c < last)
throw new IllegalArgumentException("Please ensure that ConfigOptions.kneserNeyMinCounts is monotonic (value was "
+ Arrays.toString(opts.kneserNeyMinCounts) + ")");
last = c;
}
this.wordIndexer = wordIndexer;
final KneserNeyCountValueContainer values = new KneserNeyCountValueContainer(lmOrder, startIndex);//, justLastWord);
ngrams = HashNgramMap.createExplicitWordHashNgramMap(values, opts, lmOrder, false);
}
示例2: dumpBuffer
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
private void dumpBuffer(int[] buffer, int len) {
final int[] copyOf = Arrays.copyOf(buffer, len);
for (int i = 0; i < copyOf.length; ++i) {
if (copyOf[i] < 0) {
copyOf[i] = unkIndex;
}
}
logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
}
示例3: NgramIterableWrapper
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
*
* @param map
* @param wordIndexer
* @param maxOrder
* this is 1-based (i.e. 1 means keep unigrams but not bigrams)
*/
public NgramIterableWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int maxOrder) {
@SuppressWarnings("unchecked")
final NgramsForOrderIterableWrapper<W, V>[] maps = new NgramsForOrderIterableWrapper[maxOrder];
ngramsForOrder = maps;
for (int ngramOrder = 0; ngramOrder < maxOrder; ++ngramOrder) {
ngramsForOrder[ngramOrder] = new NgramsForOrderIterableWrapper<W, V>(map, wordIndexer, ngramOrder);
}
}
示例4: NgramMapWrapper
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
*
* @param map
* @param wordIndexer
* @param maxOrder
* this is 1-based (i.e. 1 means keep unigrams but not bigrams)
*/
public NgramMapWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int maxOrder) {
@SuppressWarnings("unchecked")
final NgramsForOrderMapWrapper<W, V>[] maps = new NgramsForOrderMapWrapper[maxOrder];
ngramsForOrder = maps;
for (int ngramOrder = 0; ngramOrder < maxOrder; ++ngramOrder) {
ngramsForOrder[ngramOrder] = new NgramsForOrderMapWrapper<W, V>(map, wordIndexer, ngramOrder);
}
this.wordIndexer = wordIndexer;
this.ngramMap = map;
}
示例5: NgramsForOrderMapWrapper
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
*
* @param map
* @param ngramOrder
* 0-based, i.e. 0 means unigrams
*/
public NgramsForOrderMapWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int ngramOrder) {
this.map = map;
this.ngramOrder = ngramOrder;
this.wordIndexer = wordIndexer;
iterableWrapper = new NgramsForOrderIterableWrapper<W, V>(map, wordIndexer, ngramOrder);
}
示例6: get
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public V get(final Object arg0) {
if (!(arg0 instanceof List)) return null;
@SuppressWarnings("unchecked")
final List<W> l = (List<W>) arg0;
if (l.size() != ngramOrder + 1) return null;
final int[] ngram = WordIndexer.StaticMethods.toArray(wordIndexer, l);
return getForArray(ngram);
}
示例7: iterator
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public Iterator<Entry<List<W>, V>> iterator() {
return new Iterators.Transform<NgramMap.Entry<V>, java.util.Map.Entry<List<W>, V>>(map.getNgramsForOrder(ngramOrder).iterator())
{
@Override
protected Entry<List<W>, V> transform(final edu.berkeley.nlp.lm.map.NgramMap.Entry<V> next) {
return new java.util.Map.Entry<List<W>, V>()
{
@Override
public List<W> getKey() {
final List<W> ngram = WordIndexer.StaticMethods.toList(wordIndexer, next.key);
return ngram;
}
@Override
public V getValue() {
return next.value;
}
@Override
public V setValue(final V arg0) {
throw new UnsupportedOperationException("Method not yet implemented");
}
};
}
};
}
示例8: readArrayEncodedLmFromArpa
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public static <W> ArrayEncodedProbBackoffLm<W> readArrayEncodedLmFromArpa(final String lmFile, final boolean compress, final WordIndexer<W> wordIndexer,
final ConfigOptions opts, final int lmOrder) {
return readArrayEncodedLmFromArpa(new ArpaLmReader<W>(lmFile, wordIndexer, lmOrder), compress, wordIndexer, opts);
}
示例9: readNgramMapFromBinary
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
*
* @param sortedVocabFile
* should be the vocab_cs.gz file from the Google n-gram corpus.
* @return
*/
public static <W> NgramMapWrapper<W, LongRef> readNgramMapFromBinary(final String binary, final String sortedVocabFile, final WordIndexer<W> wordIndexer) {
GoogleLmReader.addToIndexer(wordIndexer, sortedVocabFile);
wordIndexer.trimAndLock();
@SuppressWarnings("unchecked")
final NgramMap<LongRef> map = (NgramMap<LongRef>) IOUtils.readObjFileHard(binary);
return new NgramMapWrapper<W, LongRef>(map, wordIndexer);
}
示例10: readLmFromGoogleNgramDir
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
* Reads a stupid backoff lm from a directory with n-gram counts in the
* format used by Google n-grams.
*
* @param <W>
* @param dir
* @param compress
* @param wordIndexer
* @param opts
* @return
*/
public static <W> ArrayEncodedNgramLanguageModel<W> readLmFromGoogleNgramDir(final String dir, final boolean compress, final boolean kneserNey,
final WordIndexer<W> wordIndexer, final ConfigOptions opts) {
final GoogleLmReader<W> googleLmReader = new GoogleLmReader<W>(dir, wordIndexer, opts);
if (kneserNey) {
GoogleLmReader.addSpecialSymbols(wordIndexer);
KneserNeyLmReaderCallback<W> kneserNeyReader = new KneserNeyLmReaderCallback<W>(wordIndexer, googleLmReader.getLmOrder(), opts);
googleLmReader.parse(kneserNeyReader);
return readArrayEncodedLmFromArpa(kneserNeyReader, compress, wordIndexer, opts);
} else {
final FirstPassCallback<LongRef> valueAddingCallback = firstPassGoogle(dir, wordIndexer, opts);
final LongArray[] numNgramsForEachWord = valueAddingCallback.getNumNgramsForEachWord();
return secondPassGoogle(opts, googleLmReader, wordIndexer, valueAddingCallback, numNgramsForEachWord, compress);
}
}
示例11: secondPassGoogle
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
private static <W> StupidBackoffLm<W> secondPassGoogle(final ConfigOptions opts, final LmReader<LongRef, NgramOrderedLmReaderCallback<LongRef>> lmReader,
final WordIndexer<W> wordIndexer, final FirstPassCallback<LongRef> valueAddingCallback, final LongArray[] numNgramsForEachWord, final boolean compress) {
final boolean contextEncoded = false;
final boolean reversed = true;
final CountValueContainer values = new CountValueContainer(valueAddingCallback.getValueCounter(), opts.valueRadix, contextEncoded,
new long[numNgramsForEachWord.length]);
final NgramMap<LongRef> map = buildMapCommon(opts, wordIndexer, numNgramsForEachWord, valueAddingCallback.getNumNgramsForEachOrder(), reversed,
lmReader, values, compress);
return new StupidBackoffLm<W>(numNgramsForEachWord.length, wordIndexer, map, opts);
}
示例12: buildMapArpa
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
* @param <W>
* @param opts
* @param lmFile
* @param lmOrder
* @param wordIndexer
* @param valueAddingCallback
* @param numNgramsForEachWord
* @param contextEncoded
* @param reversed
* @return
*/
private static <W> NgramMap<ProbBackoffPair> buildMapArpa(final ConfigOptions opts,
final LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, final WordIndexer<W> wordIndexer,
final FirstPassCallback<ProbBackoffPair> valueAddingCallback, final LongArray[] numNgramsForEachWord, final boolean contextEncoded,
final boolean reversed, final boolean compress) {
final ValueContainer<ProbBackoffPair> values = compress ? new CompressibleProbBackoffValueContainer(valueAddingCallback.getValueCounter(),
opts.valueRadix, contextEncoded, valueAddingCallback.getNumNgramsForEachOrder())
: opts.storeRankedProbBackoffs ? new UncompressedProbBackoffValueContainer(valueAddingCallback.getValueCounter(), opts.valueRadix, contextEncoded,
valueAddingCallback.getNumNgramsForEachOrder()) : new UnrankedUncompressedProbBackoffValueContainer(contextEncoded, valueAddingCallback.getNumNgramsForEachOrder());
if (contextEncoded && compress) throw new RuntimeException("Compression is not supported by context-encoded LMs");
final NgramMap<ProbBackoffPair> map = buildMapCommon(opts, wordIndexer, numNgramsForEachWord, valueAddingCallback.getNumNgramsForEachOrder(), reversed,
lmReader, values, compress);
return map;
}
示例13: buildMapCommon
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
* @param <W>
* @param opts
* @param wordIndexer
* @param valueAddingCallback
* @param numNgramsForEachWord
* @param contextEncoded
* @param reversed
* @param lmReader
* @return
*/
private static <W, V extends Comparable<V>> NgramMap<V> buildMapCommon(final ConfigOptions opts, final WordIndexer<W> wordIndexer,
final LongArray[] numNgramsForEachWord, final long[] numNgramsForEachOrder, final boolean reversed,
final LmReader<V, ? super NgramMapAddingCallback<V>> lmReader, final ValueContainer<V> values, final boolean compress) {
Logger.startTrack("Adding n-grams");
NgramMap<V> map = createNgramMap(opts, numNgramsForEachWord, numNgramsForEachOrder, reversed, values, compress);
final List<int[]> failures = tryBuildingNgramMap(opts, wordIndexer, lmReader, map);
Logger.endTrack();
if (!failures.isEmpty()) {
Logger.startTrack(failures.size() + " missing suffixes or prefixes were found, doing another pass to add n-grams");
for (final int[] failure : failures) {
final int ngramOrder = failure.length - 1;
final int headWord = failure[reversed ? 0 : ngramOrder];
numNgramsForEachOrder[ngramOrder]++;
numNgramsForEachWord[ngramOrder].incrementCount(headWord, 1);
}
// try to clear some memory
for (int ngramOrder = 0; ngramOrder < numNgramsForEachOrder.length; ++ngramOrder) {
values.clearStorageForOrder(ngramOrder);
}
final ValueContainer<V> newValues = values.createFreshValues(numNgramsForEachOrder);
map.clearStorage();
map = createNgramMap(opts, numNgramsForEachWord, numNgramsForEachOrder, reversed, newValues, compress);
lmReader.parse(new NgramMapAddingCallback<V>(map, failures));
Logger.endTrack();
}
return map;
}
示例14: call
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public void call(int[] ngram, int startPos, int endPos, ProbBackoffPair value, String words) {
final String line = StrUtils.join(WordIndexer.StaticMethods.toList(wordIndexer, ngram, startPos, endPos));
final boolean endsWithEndSym = ngram[ngram.length - 1] == wordIndexer.getIndexPossiblyUnk(wordIndexer.getEndSymbol());
if (endsWithEndSym || value.backoff == 0.0f)
out.printf(Locale.US, "%f\t%s\n", value.prob, line);
else {
out.printf(Locale.US, "%f\t%s\t%f\n", value.prob, line, value.backoff);
}
}
示例15: GoogleLmReader
import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public GoogleLmReader(final String rootDir, final WordIndexer<W> wordIndexer, @SuppressWarnings("unused") final ConfigOptions opts) {
this.wordIndexer = wordIndexer;
ngramDirectories = new File(rootDir).listFiles(new FilenameFilter()
{
@Override
public boolean accept(final File dir, final String name) {
return name.endsWith("gms");
}
});
Arrays.sort(ngramDirectories);
lmOrder = ngramDirectories.length;
}