本文整理汇总了Java中org.galagosearch.core.types.WordCount类的典型用法代码示例。如果您正苦于以下问题:Java WordCount类的具体用法?Java WordCount怎么用?Java WordCount使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WordCount类属于org.galagosearch.core.types包,在下文中一共展示了WordCount类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: reduce
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public ArrayList<WordCount> reduce(List<WordCount> input) throws IOException {
HashMap<String, WordCount> countObjects = new HashMap<String, WordCount>(input.size() / 5);
for (WordCount wordCount : input) {
WordCount original = countObjects.get(wordCount.word);
if (original == null) {
countObjects.put(wordCount.word, original);
} else {
original.documents += wordCount.documents;
original.count += wordCount.count;
}
}
return new ArrayList<WordCount>(countObjects.values());
}
示例2: reduce
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public ArrayList<WordCount> reduce(List<WordCount> input) throws IOException {
HashMap<String, WordCount> countObjects = new HashMap<String, WordCount>();
for (WordCount wordCount : input) {
WordCount original = countObjects.get(wordCount.word);
if (original == null) {
countObjects.put(wordCount.word, wordCount);
} else {
original.documents += wordCount.documents;
original.count += wordCount.count;
}
}
return new ArrayList<WordCount>(countObjects.values());
}
示例3: getWordCountStage
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
/**
* Parses input text, then counts the word tokens. The output is a
* stream of WordCount tokens sorted by word.
*
* @return A stage description for the wordCount stage.
*/
public Stage getWordCountStage() {
Stage stage = new Stage("wordCount");
stage.add(new StageConnectionPoint(
ConnectionPointType.Input,
"splits", new DocumentSplit.FileNameStartKeyOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"wordCounts", new WordCount.WordOrder()));
stage.add(new InputStep("splits"));
stage.add(new Step(UniversalParser.class));
// change here to use your tokenizer
// stage.add(new Step(TagTokenizer.class)) : use default tokenizer
// stage.add(new Step(TokenizerExample.class)) : use your tokenizer
// stage.add(new Step(TagTokenizer.class));
stage.add(new Step(TokenizerExample.class));
stage.add(new Step(WordCounter.class));
stage.add(Utility.getSorter(new WordCount.WordOrder(), WordCountReducer.class));
stage.add(new OutputStep("wordCounts"));
return stage;
}
示例4: getReduceCountsStage
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
/**
* Gathers wordCount data from all the wordCounts stages
* and computes count totals for each word.
*
* @return A stage description for the reduceCounts stage.
*/
public Stage getReduceCountsStage() {
Stage stage = new Stage("reduceCounts");
stage.add(new StageConnectionPoint(
ConnectionPointType.Input,
"wordCounts", new WordCount.WordOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"reducedCounts", new WordCount.WordOrder()));
stage.add(new InputStep("wordCounts"));
stage.add(new Step(WordCountReducer.class));
stage.add(new OutputStep("reducedCounts"));
return stage;
}
示例5: getInvertByCountStage
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
/**
* Inverts wordCount data so that it is sorted by count, not by word.
* The output is in ZipfCount objects, produced by the ZipfCounter
* class.
*
* @return A stage description for the invertByCount stage.
*/
public Stage getInvertByCountStage() {
Stage stage = new Stage("invertByCount");
stage.add(new StageConnectionPoint(
ConnectionPointType.Input,
"reducedCounts", new WordCount.WordOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"zipfCounts", new ZipfCount.OccurrenceCountOrder()));
stage.add(new InputStep("reducedCounts"));
stage.add(new Step(ZipfCounter.class));
stage.add(Utility.getSorter(new ZipfCount.OccurrenceCountOrder()));
stage.add(new OutputStep("zipfCounts"));
return stage;
}
示例6: testCountUnigrams
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public void testCountUnigrams() throws IOException, IncompatibleProcessorException {
Parameters p = new Parameters();
p.add("width", "1");
WordCounter counter = new WordCounter(new FakeParameters(p));
Document document = new Document();
PostStep post = new PostStep();
counter.setProcessor(post);
document.terms = new ArrayList<String>();
document.terms.add("one");
document.terms.add("two");
document.terms.add("one");
counter.process(document);
assertEquals(2, post.results.size());
for (int i = 0; i < post.results.size(); ++i) {
WordCount wc = post.results.get(i);
if (wc.word.equals("one")) {
assertEquals(2, wc.count);
} else {
assertEquals(1, wc.count);
}
}
}
示例7: getInputClass
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public Class<WordCount> getInputClass() {
return WordCount.class;
}
示例8: getOutputClass
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public Class<WordCount> getOutputClass() {
return WordCount.class;
}
示例9: process
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
@Override
public void process(WordCount object) throws IOException {
processor.process(new ZipfCount(1, object.count));
}
示例10: process
import org.galagosearch.core.types.WordCount; //导入依赖的package包/类
public void process(WordCount o) {
results.add((WordCount)o);
}