当前位置: 首页>>代码示例>>Java>>正文


Java WordIndexer类代码示例

本文整理汇总了Java中edu.berkeley.nlp.lm.WordIndexer的典型用法代码示例。如果您正苦于以下问题:Java WordIndexer类的具体用法?Java WordIndexer怎么用?Java WordIndexer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


WordIndexer类属于edu.berkeley.nlp.lm包,在下文中一共展示了WordIndexer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: KneserNeyLmReaderCallback

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public KneserNeyLmReaderCallback(final WordIndexer<W> wordIndexer, final int maxOrder, final ConfigOptions opts) {
	this.lmOrder = maxOrder;
	this.startIndex = wordIndexer.getIndexPossiblyUnk(wordIndexer.getStartSymbol());

	this.opts = opts;
	double last = Double.NEGATIVE_INFINITY;
	for (final double c : opts.kneserNeyMinCounts) {
		if (c < last)
			throw new IllegalArgumentException("Please ensure that ConfigOptions.kneserNeyMinCounts is monotonic (value was "
				+ Arrays.toString(opts.kneserNeyMinCounts) + ")");
		last = c;
	}
	this.wordIndexer = wordIndexer;
	final KneserNeyCountValueContainer values = new KneserNeyCountValueContainer(lmOrder, startIndex);//, justLastWord);
	ngrams = HashNgramMap.createExplicitWordHashNgramMap(values, opts, lmOrder, false);

}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:18,代码来源:KneserNeyLmReaderCallback.java

示例2: dumpBuffer

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
private void dumpBuffer(int[] buffer, int len) {
  final int[] copyOf = Arrays.copyOf(buffer, len);
  for (int i = 0; i < copyOf.length; ++i) {
    if (copyOf[i] < 0) {
      copyOf[i] = unkIndex;
    }
  }
  logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
}
 
开发者ID:apache,项目名称:incubator-joshua,代码行数:10,代码来源:LMGrammarBerkeley.java

示例3: NgramIterableWrapper

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * 
 * @param map
 * @param wordIndexer
 * @param maxOrder
 *            this is 1-based (i.e. 1 means keep unigrams but not bigrams)
 */
public NgramIterableWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int maxOrder) {
	@SuppressWarnings("unchecked")
	final NgramsForOrderIterableWrapper<W, V>[] maps = new NgramsForOrderIterableWrapper[maxOrder];
	ngramsForOrder = maps;
	for (int ngramOrder = 0; ngramOrder < maxOrder; ++ngramOrder) {
		ngramsForOrder[ngramOrder] = new NgramsForOrderIterableWrapper<W, V>(map, wordIndexer, ngramOrder);
	}
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:16,代码来源:NgramIterableWrapper.java

示例4: NgramMapWrapper

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * 
 * @param map
 * @param wordIndexer
 * @param maxOrder
 *            this is 1-based (i.e. 1 means keep unigrams but not bigrams)
 */
public NgramMapWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int maxOrder) {
	@SuppressWarnings("unchecked")
	final NgramsForOrderMapWrapper<W, V>[] maps = new NgramsForOrderMapWrapper[maxOrder];
	ngramsForOrder = maps;
	for (int ngramOrder = 0; ngramOrder < maxOrder; ++ngramOrder) {
		ngramsForOrder[ngramOrder] = new NgramsForOrderMapWrapper<W, V>(map, wordIndexer, ngramOrder);
	}
	this.wordIndexer = wordIndexer;
	this.ngramMap = map;
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:18,代码来源:NgramMapWrapper.java

示例5: NgramsForOrderMapWrapper

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * 
 * @param map
 * @param ngramOrder
 *            0-based, i.e. 0 means unigrams
 */
public NgramsForOrderMapWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer, final int ngramOrder) {
	this.map = map;
	this.ngramOrder = ngramOrder;
	this.wordIndexer = wordIndexer;
	iterableWrapper = new NgramsForOrderIterableWrapper<W, V>(map, wordIndexer, ngramOrder);
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:13,代码来源:NgramsForOrderMapWrapper.java

示例6: get

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public V get(final Object arg0) {
	if (!(arg0 instanceof List)) return null;
	@SuppressWarnings("unchecked")
	final List<W> l = (List<W>) arg0;

	if (l.size() != ngramOrder + 1) return null;
	final int[] ngram = WordIndexer.StaticMethods.toArray(wordIndexer, l);

	return getForArray(ngram);

}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:13,代码来源:NgramsForOrderMapWrapper.java

示例7: iterator

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public Iterator<Entry<List<W>, V>> iterator() {
	return new Iterators.Transform<NgramMap.Entry<V>, java.util.Map.Entry<List<W>, V>>(map.getNgramsForOrder(ngramOrder).iterator())
	{

		@Override
		protected Entry<List<W>, V> transform(final edu.berkeley.nlp.lm.map.NgramMap.Entry<V> next) {
			return new java.util.Map.Entry<List<W>, V>()
			{

				@Override
				public List<W> getKey() {
					final List<W> ngram = WordIndexer.StaticMethods.toList(wordIndexer, next.key);
					return ngram;
				}

				@Override
				public V getValue() {
					return next.value;
				}

				@Override
				public V setValue(final V arg0) {
					throw new UnsupportedOperationException("Method not yet implemented");
				}
			};
		}
	};

}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:31,代码来源:NgramsForOrderIterableWrapper.java

示例8: readArrayEncodedLmFromArpa

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public static <W> ArrayEncodedProbBackoffLm<W> readArrayEncodedLmFromArpa(final String lmFile, final boolean compress, final WordIndexer<W> wordIndexer,
	final ConfigOptions opts, final int lmOrder) {
	return readArrayEncodedLmFromArpa(new ArpaLmReader<W>(lmFile, wordIndexer, lmOrder), compress, wordIndexer, opts);
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:5,代码来源:LmReaders.java

示例9: readNgramMapFromBinary

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * 
 * @param sortedVocabFile
 *            should be the vocab_cs.gz file from the Google n-gram corpus.
 * @return
 */
public static <W> NgramMapWrapper<W, LongRef> readNgramMapFromBinary(final String binary, final String sortedVocabFile, final WordIndexer<W> wordIndexer) {
	GoogleLmReader.addToIndexer(wordIndexer, sortedVocabFile);
	wordIndexer.trimAndLock();
	@SuppressWarnings("unchecked")
	final NgramMap<LongRef> map = (NgramMap<LongRef>) IOUtils.readObjFileHard(binary);
	return new NgramMapWrapper<W, LongRef>(map, wordIndexer);
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:14,代码来源:LmReaders.java

示例10: readLmFromGoogleNgramDir

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * Reads a stupid backoff lm from a directory with n-gram counts in the
 * format used by Google n-grams.
 * 
 * @param <W>
 * @param dir
 * @param compress
 * @param wordIndexer
 * @param opts
 * @return
 */
public static <W> ArrayEncodedNgramLanguageModel<W> readLmFromGoogleNgramDir(final String dir, final boolean compress, final boolean kneserNey,
	final WordIndexer<W> wordIndexer, final ConfigOptions opts) {
	final GoogleLmReader<W> googleLmReader = new GoogleLmReader<W>(dir, wordIndexer, opts);
	if (kneserNey) {
		GoogleLmReader.addSpecialSymbols(wordIndexer);
		KneserNeyLmReaderCallback<W> kneserNeyReader = new KneserNeyLmReaderCallback<W>(wordIndexer, googleLmReader.getLmOrder(), opts);
		googleLmReader.parse(kneserNeyReader);
		return readArrayEncodedLmFromArpa(kneserNeyReader, compress, wordIndexer, opts);
	} else {
		final FirstPassCallback<LongRef> valueAddingCallback = firstPassGoogle(dir, wordIndexer, opts);
		final LongArray[] numNgramsForEachWord = valueAddingCallback.getNumNgramsForEachWord();
		return secondPassGoogle(opts, googleLmReader, wordIndexer, valueAddingCallback, numNgramsForEachWord, compress);
	}
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:26,代码来源:LmReaders.java

示例11: secondPassGoogle

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
private static <W> StupidBackoffLm<W> secondPassGoogle(final ConfigOptions opts, final LmReader<LongRef, NgramOrderedLmReaderCallback<LongRef>> lmReader,
	final WordIndexer<W> wordIndexer, final FirstPassCallback<LongRef> valueAddingCallback, final LongArray[] numNgramsForEachWord, final boolean compress) {
	final boolean contextEncoded = false;
	final boolean reversed = true;
	final CountValueContainer values = new CountValueContainer(valueAddingCallback.getValueCounter(), opts.valueRadix, contextEncoded,
		new long[numNgramsForEachWord.length]);
	final NgramMap<LongRef> map = buildMapCommon(opts, wordIndexer, numNgramsForEachWord, valueAddingCallback.getNumNgramsForEachOrder(), reversed,
		lmReader, values, compress);
	return new StupidBackoffLm<W>(numNgramsForEachWord.length, wordIndexer, map, opts);
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:11,代码来源:LmReaders.java

示例12: buildMapArpa

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * @param <W>
 * @param opts
 * @param lmFile
 * @param lmOrder
 * @param wordIndexer
 * @param valueAddingCallback
 * @param numNgramsForEachWord
 * @param contextEncoded
 * @param reversed
 * @return
 */
private static <W> NgramMap<ProbBackoffPair> buildMapArpa(final ConfigOptions opts,
	final LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> lmReader, final WordIndexer<W> wordIndexer,
	final FirstPassCallback<ProbBackoffPair> valueAddingCallback, final LongArray[] numNgramsForEachWord, final boolean contextEncoded,
	final boolean reversed, final boolean compress) {
	final ValueContainer<ProbBackoffPair> values = compress ? new CompressibleProbBackoffValueContainer(valueAddingCallback.getValueCounter(),
		opts.valueRadix, contextEncoded, valueAddingCallback.getNumNgramsForEachOrder())
		: opts.storeRankedProbBackoffs ? new UncompressedProbBackoffValueContainer(valueAddingCallback.getValueCounter(), opts.valueRadix, contextEncoded,
			valueAddingCallback.getNumNgramsForEachOrder()) : new UnrankedUncompressedProbBackoffValueContainer(contextEncoded, valueAddingCallback.getNumNgramsForEachOrder());
	if (contextEncoded && compress) throw new RuntimeException("Compression is not supported by context-encoded LMs");
	final NgramMap<ProbBackoffPair> map = buildMapCommon(opts, wordIndexer, numNgramsForEachWord, valueAddingCallback.getNumNgramsForEachOrder(), reversed,
		lmReader, values, compress);
	return map;
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:26,代码来源:LmReaders.java

示例13: buildMapCommon

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
/**
 * @param <W>
 * @param opts
 * @param wordIndexer
 * @param valueAddingCallback
 * @param numNgramsForEachWord
 * @param contextEncoded
 * @param reversed
 * @param lmReader
 * @return
 */
private static <W, V extends Comparable<V>> NgramMap<V> buildMapCommon(final ConfigOptions opts, final WordIndexer<W> wordIndexer,
	final LongArray[] numNgramsForEachWord, final long[] numNgramsForEachOrder, final boolean reversed,
	final LmReader<V, ? super NgramMapAddingCallback<V>> lmReader, final ValueContainer<V> values, final boolean compress) {
	Logger.startTrack("Adding n-grams");
	NgramMap<V> map = createNgramMap(opts, numNgramsForEachWord, numNgramsForEachOrder, reversed, values, compress);

	final List<int[]> failures = tryBuildingNgramMap(opts, wordIndexer, lmReader, map);
	Logger.endTrack();
	if (!failures.isEmpty()) {
		Logger.startTrack(failures.size() + " missing suffixes or prefixes were found, doing another pass to add n-grams");
		for (final int[] failure : failures) {
			final int ngramOrder = failure.length - 1;
			final int headWord = failure[reversed ? 0 : ngramOrder];
			numNgramsForEachOrder[ngramOrder]++;
			numNgramsForEachWord[ngramOrder].incrementCount(headWord, 1);
		}

		// try to clear some memory
		for (int ngramOrder = 0; ngramOrder < numNgramsForEachOrder.length; ++ngramOrder) {
			values.clearStorageForOrder(ngramOrder);
		}
		final ValueContainer<V> newValues = values.createFreshValues(numNgramsForEachOrder);
		map.clearStorage();
		map = createNgramMap(opts, numNgramsForEachWord, numNgramsForEachOrder, reversed, newValues, compress);
		lmReader.parse(new NgramMapAddingCallback<V>(map, failures));
		Logger.endTrack();
	}
	return map;
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:41,代码来源:LmReaders.java

示例14: call

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
@Override
public void call(int[] ngram, int startPos, int endPos, ProbBackoffPair value, String words) {
	final String line = StrUtils.join(WordIndexer.StaticMethods.toList(wordIndexer, ngram, startPos, endPos));
	final boolean endsWithEndSym = ngram[ngram.length - 1] == wordIndexer.getIndexPossiblyUnk(wordIndexer.getEndSymbol());
	if (endsWithEndSym || value.backoff == 0.0f)
		out.printf(Locale.US, "%f\t%s\n", value.prob, line);
	else {
		out.printf(Locale.US, "%f\t%s\t%f\n", value.prob, line, value.backoff);
	}
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:11,代码来源:KneserNeyFileWritingLmReaderCallback.java

示例15: GoogleLmReader

import edu.berkeley.nlp.lm.WordIndexer; //导入依赖的package包/类
public GoogleLmReader(final String rootDir, final WordIndexer<W> wordIndexer, @SuppressWarnings("unused") final ConfigOptions opts) {
	this.wordIndexer = wordIndexer;
	ngramDirectories = new File(rootDir).listFiles(new FilenameFilter()
	{

		@Override
		public boolean accept(final File dir, final String name) {
			return name.endsWith("gms");
		}
	});
	Arrays.sort(ngramDirectories);
	lmOrder = ngramDirectories.length;
}
 
开发者ID:jasonbaldridge,项目名称:maul,代码行数:14,代码来源:GoogleLmReader.java


注:本文中的edu.berkeley.nlp.lm.WordIndexer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。