当前位置: 首页>>代码示例>>Java>>正文


Java Indexer类代码示例

本文整理汇总了Java中edu.berkeley.nlp.util.Indexer的典型用法代码示例。如果您正苦于以下问题:Java Indexer类的具体用法?Java Indexer怎么用?Java Indexer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Indexer类属于edu.berkeley.nlp.util包,在下文中一共展示了Indexer类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initPunctuations

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
private void initPunctuations(StateSetTreeList trainTrees) {
	punctuationSignatures = new Indexer<String>();
	isPunctuation = new boolean[nWords];
	Counter<String> punctSigCounter = new Counter<String>();
	for (int word = 0; word < nWords; word++) {
		isPunctuation[word] = isPunctuation(wordIndexer.get(word));
	}
	for (Tree<StateSet> tree : trainTrees) {
		getPunctuationSignatures(tree.getYield(), true, punctSigCounter);
	}

	Indexer<String> newPunctuationSignatures = new Indexer<String>();
	for (String sig : punctSigCounter.keySet()) {
		if (punctSigCounter.getCount(sig) >= minFeatureFrequency)
			newPunctuationSignatures.add(sig);
	}
	punctuationSignatures = newPunctuationSignatures;
	punctuationScores = new double[punctuationSignatures.size()][nClasses];
	ArrayUtil.fill(punctuationScores, 1);
	nFeatures += nClasses * punctuationScores.length;
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:22,代码来源:SpanPredictor.java

示例2: buildEncoding

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
private Encoding<F, L> buildEncoding(List<LabeledInstance<I, L>> data) {
	Indexer<F> featureIndexer = new Indexer<F>();
	Indexer<L> labelIndexer = new Indexer<L>();
	for (LabeledInstance<I, L> labeledInstance : data) {
		L label = labeledInstance.getLabel();
		Counter<F> features = featureExtractor
				.extractFeatures(labeledInstance.getInput());
		LabeledFeatureVector<F, L> labeledDatum = new BasicLabeledFeatureVector<F, L>(
				label, features);
		labelIndexer.getIndex(labeledDatum.getLabel());
		for (F feature : labeledDatum.getFeatures().keySet()) {
			featureIndexer.getIndex(feature);
		}
	}
	return new Encoding<F, L>(featureIndexer, labelIndexer);
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:17,代码来源:MaximumEntropyClassifier.java

示例3: SimpleLexicon

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public SimpleLexicon(short[] numSubStates, double threshold) {
	this.numSubStates = numSubStates;
	this.threshold = threshold;
	this.wordIndexer = new Indexer<String>();
	this.numStates = numSubStates.length;
	this.isLogarithmMode = false;
	if (Corpus.myTreebank != Corpus.TreeBankType.WSJ
			|| Corpus.myTreebank == Corpus.TreeBankType.BROWN)
		unknownLevel = 4;

}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:12,代码来源:SimpleLexicon.java

示例4: FeaturizedLexicon

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public FeaturizedLexicon(short[] numSubStates, Featurizer featurizer) {
	this.numSubStates = numSubStates;
	this.wordIndexer = new Indexer<String>();
	this.numStates = numSubStates.length;
	this.isLogarithmMode = false;
	this.featurizer = featurizer;
	minimizer.setMaxIterations(20);
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:9,代码来源:FeaturizedLexicon.java

示例5: refeaturize

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
private void refeaturize() {
	indexedFeatures = new int[numStates][][][];
	featureIndex = new Indexer<String>();
	tagWordsWithFeatures = new int[numStates][];

	for (int tag = 0; tag < numStates; tag++) {
		IntegerIndexer tagIndexer = new IntegerIndexer(wordIndexer.size());
		indexedFeatures[tag] = new int[numSubStates[tag]][wordIndexer
				.size()][];
		// index all the features for each word seen with this tag.
		for (int globalWordIndex = 0; globalWordIndex < wordIndexer.size(); ++globalWordIndex) {
			String word = wordIndexer.getObject(globalWordIndex);
			List<String>[] features = featurizer.featurize(word, tag,
					numSubStates[tag], wordCounter[globalWordIndex],
					tagWordCounts[tag][globalWordIndex]);
			for (int state = 0; state < numSubStates[tag]; ++state) {
				int[] indices = new int[features[state].size()];
				for (int i = 0; i < indices.length; ++i) {
					indices[i] = featureIndex.getIndex(features[state]
							.get(i));
				}
				indexedFeatures[tag][state][globalWordIndex] = indices;

				if (features[state].size() > 0)
					tagIndexer.add(globalWordIndex);
			}
		}

		tagWordsWithFeatures[tag] = new int[tagIndexer.size()];
		for (int j = 0; j < tagIndexer.size(); ++j) {
			tagWordsWithFeatures[tag][j] = tagIndexer.get(j);
		}

	}

	if (featureWeights == null
			|| featureWeights.length != featureIndex.size()) {
		featureWeights = new double[featureIndex.size()];
	}
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:41,代码来源:FeaturizedLexicon.java

示例6: LabelFeatureWeightsManager

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public LabelFeatureWeightsManager(FeatureManager featManager,
		Indexer<L> labels) {
	this.featManager = featManager;
	this.labels = labels;
	if (!featManager.isLocked()) {
		throw new IllegalArgumentException("Feature manager must be locked");
	}
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:9,代码来源:LabelFeatureWeightsManager.java

示例7: Encoding

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public Encoding(Indexer<F> featureIndexer, SubIndexer<L> labelIndexer) {
	this.featureIndexer = featureIndexer;
	this.labelIndexer = labelIndexer;
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:5,代码来源:Encoding.java

示例8: SpanPredictor

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public SpanPredictor(int nWords, StateSetTreeList trainTrees,
		Numberer tagNumberer, Indexer<String> wordIndexer) {
	this.useFirstAndLast = ConditionalTrainer.Options.useFirstAndLast;
	this.usePreviousAndNext = ConditionalTrainer.Options.usePreviousAndNext;
	this.useBeginAndEndPairs = ConditionalTrainer.Options.useBeginAndEndPairs;
	this.useSyntheticClass = ConditionalTrainer.Options.useSyntheticClass;
	this.usePunctuation = ConditionalTrainer.Options.usePunctuation;
	this.minFeatureFrequency = ConditionalTrainer.Options.minFeatureFrequency;

	this.wordIndexer = wordIndexer;
	this.nWords = nWords;
	this.nFeatures = 0;
	if (useSyntheticClass) {
		System.out
				.println("Distinguishing between real and synthetic classes.");
		stateClass = new int[tagNumberer.total()];
		for (int i = 0; i < tagNumberer.total(); i++) {
			String state = (String) tagNumberer.object(i);
			if (state.charAt(0) == '@')
				stateClass[i] = 1; // synthetic
		}
		nClasses = 2;
	} else {
		stateClass = new int[tagNumberer.total()];
		nClasses = 1;
	}
	if (useFirstAndLast) {
		firstWordScore = new double[nWords][nClasses];
		lastWordScore = new double[nWords][nClasses];
		ArrayUtil.fill(firstWordScore, 1);
		ArrayUtil.fill(lastWordScore, 1);
		this.nFeatures += 2 * nWords * nClasses;
	}
	if (usePreviousAndNext) {
		previousWordScore = new double[nWords][nClasses];
		nextWordScore = new double[nWords][nClasses];
		ArrayUtil.fill(previousWordScore, 1);
		ArrayUtil.fill(nextWordScore, 1);
		this.nFeatures += 2 * nWords * nClasses;
	}
	if (useBeginAndEndPairs) {
		initPairs(trainTrees);
	}
	if (usePunctuation) {
		initPunctuations(trainTrees);
	}
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:48,代码来源:SpanPredictor.java

示例9: Encoding

import edu.berkeley.nlp.util.Indexer; //导入依赖的package包/类
public Encoding(Indexer<F> featureIndexer, Indexer<L> labelIndexer) {
	this.featureIndexer = featureIndexer;
	this.labelIndexer = labelIndexer;
}
 
开发者ID:text-machine-lab,项目名称:CliRel,代码行数:5,代码来源:Encoding.java


注:本文中的edu.berkeley.nlp.util.Indexer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。