本文整理汇总了Java中org.galagosearch.core.types.AdditionalDocumentText类的典型用法代码示例。如果您正苦于以下问题:Java AdditionalDocumentText类的具体用法?Java AdditionalDocumentText怎么用?Java AdditionalDocumentText使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AdditionalDocumentText类属于org.galagosearch.core.types包,在下文中一共展示了AdditionalDocumentText类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getLinkCombineStage
import org.galagosearch.core.types.AdditionalDocumentText; //导入依赖的package包/类
public Stage getLinkCombineStage() {
Stage stage = new Stage("linkCombine");
stage.add(new StageConnectionPoint(ConnectionPointType.Input, "documentUrls",
new DocumentData.UrlOrder()));
stage.add(new StageConnectionPoint(ConnectionPointType.Input, "links",
new ExtractedLink.DestUrlOrder()));
stage.add(new StageConnectionPoint(ConnectionPointType.Output, "anchorText",
new AdditionalDocumentText.IdentifierOrder()));
Parameters p = new Parameters();
p.add("documentDatas", "documentUrls");
p.add("extractedLinks", "links");
stage.add(new Step(LinkCombiner.class, p));
stage.add(new Step(AnchorTextCreator.class));
stage.add(Utility.getSorter(new AdditionalDocumentText.IdentifierOrder()));
stage.add(new OutputStep("anchorText"));
return stage;
}
示例2: verify
import org.galagosearch.core.types.AdditionalDocumentText; //导入依赖的package包/类
public static void verify(TupleFlowParameters parameters, ErrorHandler handler) {
if (!Verification.requireParameters(new String[] { "textSource" }, parameters.getXML(), handler))
return;
String readerName = parameters.getXML().get("textSource");
Verification.verifyTypeReader(readerName, AdditionalDocumentText.class, parameters, handler);
}
示例3: process
import org.galagosearch.core.types.AdditionalDocumentText; //导入依赖的package包/类
@Override
public void process(DocumentLinkData object) throws IOException {
AdditionalDocumentText additional = new AdditionalDocumentText();
StringBuilder extraText = new StringBuilder();
additional.identifier = object.identifier;
for (ExtractedLink link : object.links) {
extraText.append("<anchor>");
extraText.append(link.anchorText);
extraText.append("</anchor>");
}
additional.text = extraText.toString();
processor.process(additional);
}
示例4: getParsePostingsStage
import org.galagosearch.core.types.AdditionalDocumentText; //导入依赖的package包/类
public Stage getParsePostingsStage() {
Stage stage = new Stage("parsePostings");
stage.add(new StageConnectionPoint(
ConnectionPointType.Input,
"splits", new DocumentSplit.FileNameStartKeyOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"postings", new DocumentWordPosition.DocumentWordPositionOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"extents", new DocumentExtent.IdentifierOrder()));
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"documentData", new DocumentData.IdentifierOrder()));
if (stemming) {
stage.add(new StageConnectionPoint(
ConnectionPointType.Output,
"stemmedPostings", new DocumentWordPosition.DocumentWordPositionOrder()));
}
if (useLinks) {
stage.add(new StageConnectionPoint(
ConnectionPointType.Input,
"anchorText", new AdditionalDocumentText.IdentifierOrder()));
}
stage.add(new InputStep("splits"));
stage.add(new Step(UniversalParser.class));
if (useLinks) {
Parameters p = new Parameters();
p.add("textSource", "anchorText");
stage.add(new Step(AdditionalTextCombiner.class, p));
}
stage.add(new Step(TagTokenizer.class));
MultiStep multi = new MultiStep();
ArrayList<Step> text =
getExtractionSteps("postings", PostingsPositionExtractor.class,
new DocumentWordPosition.DocumentWordPositionOrder());
ArrayList<Step> extents =
getExtractionSteps("extents", ExtentExtractor.class,
new DocumentExtent.IdentifierOrder());
ArrayList<Step> documentData =
getExtractionSteps("documentData", DocumentDataExtractor.class,
new DocumentData.IdentifierOrder());
multi.groups.add(text);
multi.groups.add(extents);
multi.groups.add(documentData);
if (stemming) {
ArrayList<Step> stemmedSteps = new ArrayList<Step>();
stemmedSteps.add(new Step(Porter2Stemmer.class));
stemmedSteps.add(new Step(PostingsPositionExtractor.class));
stemmedSteps.add(Utility.getSorter(new DocumentWordPosition.DocumentWordPositionOrder()));
stemmedSteps.add(new OutputStep("stemmedPostings"));
multi.groups.add(stemmedSteps);
}
stage.add(multi);
return stage;
}