本文整理汇总了Java中net.sf.okapi.steps.common.RawDocumentToFilterEventsStep类的典型用法代码示例。如果您正苦于以下问题:Java RawDocumentToFilterEventsStep类的具体用法?Java RawDocumentToFilterEventsStep怎么用?Java RawDocumentToFilterEventsStep使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
RawDocumentToFilterEventsStep类属于net.sf.okapi.steps.common包,在下文中一共展示了RawDocumentToFilterEventsStep类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: importXLIFF
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
@Transactional
private void importXLIFF(ImportExportedXliffStep importExportedXliffStep, String xliffContent) {
IPipelineDriver driver = new PipelineDriver();
XLIFFFilter xliffFilter = new XLIFFFilter();
driver.addStep(new RawDocumentToFilterEventsStep(xliffFilter));
importExportedXliffStep.setXliffFilter(xliffFilter);
driver.addStep(importExportedXliffStep);
RawDocument rawDocument = new RawDocument(xliffContent, LocaleId.ENGLISH);
driver.addBatchItem(rawDocument);
logger.debug("Start importing XLIFF");
driver.processBatch();
}
示例2: exportAssetAsXLIFF
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Exports an {@link Asset} as XLIFF for a given locale.
*
* @param assetId {@link Asset#id} to be exported
* @param bcp47Tag bcp47tag of the locale that needs to be exported
* @return an XLIFF that contains {@link Asset}'s translation for that
* locale
*/
@Transactional
public String exportAssetAsXLIFF(Long assetId, String bcp47Tag) {
logger.debug("Export data for asset id: {} and locale: {}", assetId, bcp47Tag);
logger.trace("Create XLIFFWriter");
XLIFFWriter xliffWriter = new XLIFFWriter();
logger.trace("Prepare FilterEventsWriterStep to use an XLIFFWriter with outputstream (allows only one doc to be processed)");
FilterEventsWriterStep filterEventsWriterStep = new FilterEventsWriterStep(xliffWriter);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
filterEventsWriterStep.setOutputStream(byteArrayOutputStream);
filterEventsWriterStep.setOutputEncoding(StandardCharsets.UTF_8.toString());
logger.trace("Prepare the Okapi pipeline");
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep(new TMExportFilter(assetId)));
driver.addStep(filterEventsWriterStep);
logger.trace("Add single document with fake output URI to be processed with an outputStream");
Locale locale = localeService.findByBcp47Tag(bcp47Tag);
RawDocument rawDocument = new RawDocument(RawDocument.EMPTY, LocaleId.ENGLISH, LocaleId.fromBCP47(locale.getBcp47Tag()));
driver.addBatchItem(rawDocument, RawDocument.getFakeOutputURIForStream(), null);
logger.debug("Start processing batch");
driver.processBatch();
logger.trace("Get the output result from the stream");
return StreamUtil.getUTF8OutputStreamAsString(byteArrayOutputStream);
}
示例3: generateTranslationKitAsXLIFF
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Generates and gets a translation kit in XLIFF format for a given
* {@link TM} and {@link Locale}
*
* @param dropId {@link Drop#id}
* @param tmId {@link TM#id}
* @param localeId {@link Locale#id}
* @param type
* @param useInheritance
* @return the XLIFF content
*/
@Transactional
public TranslationKitAsXliff generateTranslationKitAsXLIFF(Long dropId, Long tmId, Long localeId, TranslationKit.Type type, Boolean useInheritance) {
logger.debug("Get translation kit for in tmId: {} and locale: {}", tmId, localeId);
TranslationKit translationKit = addTranslationKit(dropId, localeId, type);
logger.trace("Create XLIFFWriter");
XLIFFWriter xliffWriter = new XLIFFWriter();
logger.trace("Prepare FilterEventsWriterStep to use an XLIFFWriter with outputstream (allows only one doc to be processed)");
FilterEventsWriterStep filterEventsWriterStep = new FilterEventsWriterStep(xliffWriter);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
filterEventsWriterStep.setOutputStream(byteArrayOutputStream);
filterEventsWriterStep.setOutputEncoding(StandardCharsets.UTF_8.toString());
logger.trace("Prepare the Okapi pipeline");
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep(new TranslationKitFilter(translationKit.getId(), type, useInheritance)));
driver.addStep(new TranslationKitStep(translationKit.getId()));
driver.addStep(filterEventsWriterStep);
logger.trace("Add single document with fake output URI to be processed with an outputStream");
Locale locale = localeService.findById(localeId);
RawDocument rawDocument = new RawDocument(RawDocument.EMPTY, LocaleId.ENGLISH, LocaleId.fromBCP47(locale.getBcp47Tag()));
driver.addBatchItem(rawDocument, RawDocument.getFakeOutputURIForStream(), null);
logger.debug("Start processing batch");
driver.processBatch();
logger.trace("Get the output result from the stream");
TranslationKitAsXliff translationKitAsXliff = new TranslationKitAsXliff();
translationKitAsXliff.setContent(StreamUtil.getUTF8OutputStreamAsString(byteArrayOutputStream));
translationKitAsXliff.setTranslationKitId(translationKit.getId());
return translationKitAsXliff;
}
示例4: performAssetExtraction
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Processes the {@link Asset} given the associated {@link AssetExtraction}.
* The CSV format should follow the old Box WebApp syntax.
*
* @param assetExtraction the Asset extraction
* @param filterConfigIdOverride Optional, can be null. Allows to specify
* a specific Okapi filter to use to process the asset
* @param parentTask
*/
@Transactional
@Pollable(message = "Extracting text units from asset")
public void performAssetExtraction(
AssetExtraction assetExtraction,
FilterConfigIdOverride filterConfigIdOverride,
@ParentTask PollableTask parentTask) {
logger.debug("Configuring pipeline");
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep());
driver.addStep(new CheckForDoNotTranslateStep());
driver.addStep(new AssetExtractionStep(assetExtraction.getId()));
//TODO(10) Is this actually used as we have our own logic to set the filter to be used, see following todo
logger.debug("Adding all supported filters to the pipeline driver");
driver.setFilterConfigurationMapper(getConfiguredFilterConfigurationMapper());
Asset asset = assetExtraction.getAsset();
RawDocument rawDocument = new RawDocument(asset.getContent(), LocaleId.ENGLISH);
rawDocument.setAnnotation(new POExtraPluralAnnotation());
//TODO(P1) I think Okapi already implement this logic
String filterConfigId;
if (filterConfigIdOverride != null) {
filterConfigId = filterConfigIdOverride.getOkapiFilterId();
} else {
filterConfigId = getFilterConfigIdForAsset(asset);
}
rawDocument.setFilterConfigId(filterConfigId);
logger.debug("Set filter config {} for asset {}", filterConfigId, asset.getPath());
driver.addBatchItem(rawDocument);
logger.debug("Start processing batch");
driver.processBatch();
}
示例5: internalCheck
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
private byte[] internalCheck(RawDocument rd) throws Exception {
rd.setFilterConfigId(Pipeline.OKF_XLIFF_CONFIG_ID);
System.out.println(rd.getStream().markSupported());
// Create the driver
PipelineDriver driver = new PipelineDriver();
driver.setFilterConfigurationMapper(fcMapper);
ExecutionContext context = new ExecutionContext();
context.setApplicationName("");
context.setIsNoPrompt(true);
driver.setExecutionContext(context);
// Raw document to filter events step
RawDocumentToFilterEventsStep rd2feStep = new RawDocumentToFilterEventsStep();
driver.addStep(rd2feStep);
// languagetool step
LanguageToolStep ltStep = new LanguageToolStep();
Parameters parameters = new Parameters();
parameters.setCheckSource(true);
parameters.setCheckSpelling(true);
parameters.setEnableFalseFriends(true);
driver.addStep(ltStep);
FilterEventsToRawDocumentStep fetrdStep = new FilterEventsToRawDocumentStep();
File temporary = File.createTempFile(Pipeline.TMP_FILE_NAME,
Pipeline.TMP_FILE_EXT);
driver.addStep(fetrdStep);
driver.addBatchItem(rd, temporary.toURI(), getOutputEncoding(rd));
// Process
driver.processBatch();
byte[] retVal = Files.readAllBytes(temporary.toPath());
temporary.delete();
return retVal;
}
示例6: initPipeline
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
private void initPipeline() {
// Make pipeline.
driver = new PipelineDriver();
driver.setFilterConfigurationMapper(getFilterMapper(null, null));
// Step 1: Raw Docs to Filter Events
driver.addStep(new RawDocumentToFilterEventsStep());
// Step 2: Format Conversion
FormatConversionStep conversion = new FormatConversionStep();
driver.addStep(conversion);
cp = (net.sf.okapi.steps.formatconversion.Parameters) conversion.getParameters();
cp.setOutputFormat(net.sf.okapi.steps.formatconversion.Parameters.FORMAT_TMX);
}
示例7: updateTMWithXliff
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Update TM with XLIFF.
*
* @param xliffContent The content of the localized XLIFF TODO(P1) Use BCP47
* tag instead of Locale object?
* @param importStatus specific status to use when importing translation
* @param abstractImportTranslationsStep defines which import logic to apply
* @return the imported XLIFF with information for each text unit about the
* import process
* @throws OkapiBadFilterInputException
*/
private UpdateTMWithXLIFFResult updateTMWithXliff(
String xliffContent,
TMTextUnitVariant.Status importStatus,
AbstractImportTranslationsStep abstractImportTranslationsStep) throws OkapiBadFilterInputException {
logger.debug("Configuring pipeline for localized XLIFF processing");
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep(new XLIFFFilter()));
driver.addStep(getConfiguredQualityStep());
IntegrityCheckStep integrityCheckStep = new IntegrityCheckStep();
driver.addStep(integrityCheckStep);
abstractImportTranslationsStep.setImportWithStatus(importStatus);
driver.addStep(abstractImportTranslationsStep);
//TODO(P1) It sounds like it's not possible to the XLIFFFilter for the output
// because the note is readonly mode and we need to override it to provide more information
logger.debug("Prepare FilterEventsWriterStep to use an XLIFFWriter with outputstream (allows only one doc to be processed)");
FilterEventsWriterStep filterEventsWriterStep = new FilterEventsWriterStep(new XLIFFWriter());
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
filterEventsWriterStep.setOutputStream(byteArrayOutputStream);
filterEventsWriterStep.setOutputEncoding(StandardCharsets.UTF_8.toString());
driver.addStep(filterEventsWriterStep);
// We need to read first the target language, because if we wait for okapi to read
// it from the file it is too late to write the output with the XLIFFWriter
// (missing target language)
String targetLanguage = xliffUtils.getTargetLanguage(xliffContent);
LocaleId targetLocaleId = targetLanguage != null ? LocaleId.fromBCP47(targetLanguage) : LocaleId.EMPTY;
RawDocument rawDocument = new RawDocument(xliffContent, LocaleId.ENGLISH, targetLocaleId);
driver.addBatchItem(rawDocument, RawDocument.getFakeOutputURIForStream(), null);
logger.debug("Start processing batch");
driver.processBatch();
logger.debug("Get the Import report");
ImportTranslationsStepAnnotation importTranslationsStepAnnotation = rawDocument.getAnnotation(ImportTranslationsStepAnnotation.class);
UpdateTMWithXLIFFResult updateReport = new UpdateTMWithXLIFFResult();
updateReport.setXliffContent(StreamUtil.getUTF8OutputStreamAsString(byteArrayOutputStream));
updateReport.setComment(importTranslationsStepAnnotation.getComment());
return updateReport;
}
示例8: generateLocalizedBase
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Parses the given content and adds the translation for every text unit.
* Returns the content of the localized content.
*
* TODO(P1) This needs to support other file formats
*
* @param asset The {@link Asset} used to get translations
* @param content The content to be localized
* @param filterConfigIdOverride
* @param outputBcp47tag Optional, can be null. Allows to generate the file
* for a bcp47 tag that is different from the repository locale (which is
* still used to fetch the translations). This can be used to generate a
* file with tag "fr" even if the translations are stored with fr-FR
* repository locale.
* @param step
* @return the localized asset
*/
private String generateLocalizedBase(Asset asset, String content, FilterConfigIdOverride filterConfigIdOverride, String outputBcp47tag, BasePipelineStep step) {
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep());
driver.addStep(new CheckForDoNotTranslateStep());
driver.addStep(step);
//TODO(P1) see assetExtractor comments
logger.debug("Adding all supported filters to the pipeline driver");
driver.setFilterConfigurationMapper(assetExtractor.getConfiguredFilterConfigurationMapper());
FilterEventsToInMemoryRawDocumentStep filterEventsToInMemoryRawDocumentStep = new FilterEventsToInMemoryRawDocumentStep();
driver.addStep(filterEventsToInMemoryRawDocumentStep);
LocaleId targetLocaleId = LocaleId.fromBCP47(outputBcp47tag);
RawDocument rawDocument = new RawDocument(content, LocaleId.ENGLISH, targetLocaleId);
//TODO(P2) Find a better solution?
rawDocument.setAnnotation(new POExtraPluralAnnotation());
//TODO(P1) see assetExtractor comments
String filterConfigId;
if (filterConfigIdOverride != null) {
filterConfigId = filterConfigIdOverride.getOkapiFilterId();
} else {
filterConfigId = assetExtractor.getFilterConfigIdForAsset(asset);
}
rawDocument.setFilterConfigId(filterConfigId);
logger.debug("Set filter config {} for asset {}", filterConfigId, asset.getPath());
driver.addBatchItem(rawDocument);
logger.debug("Start processing batch");
driver.processBatch();
String localizedContent = filterEventsToInMemoryRawDocumentStep.getOutput(rawDocument);
return localizedContent;
}
示例9: importLocalizedAsset
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
/**
* Imports a localized version of an asset.
*
* The target strings are checked against the source strings and if they are
* equals the status of the imported translation is defined by
* statusForSourceEqTarget. When SKIIPED is specified the import is actually
* skipped.
*
* For not fully translated locales, targets are imported only if they are
* different from target of the parent locale.
*
* @param asset the asset for which the content will be imported
* @param content the localized asset content
* @param repositoryLocale the locale of the content to be imported
* @param statusForSourceEqTarget the status of the text unit variant when
* the source equals the target
*/
public void importLocalizedAsset(
Asset asset,
String content,
RepositoryLocale repositoryLocale,
StatusForSourceEqTarget statusForSourceEqTarget,
FilterConfigIdOverride filterConfigIdOverride) {
String bcp47Tag = repositoryLocale.getLocale().getBcp47Tag();
logger.debug("Configuring pipeline to import localized file");
IPipelineDriver driver = new PipelineDriver();
driver.addStep(new RawDocumentToFilterEventsStep());
driver.addStep(new CheckForDoNotTranslateStep());
driver.addStep(new ImportTranslationsFromLocalizedAssetStep(asset, repositoryLocale, statusForSourceEqTarget));
logger.debug("Adding all supported filters to the pipeline driver");
driver.setFilterConfigurationMapper(assetExtractor.getConfiguredFilterConfigurationMapper());
FilterEventsToInMemoryRawDocumentStep filterEventsToInMemoryRawDocumentStep = new FilterEventsToInMemoryRawDocumentStep();
driver.addStep(filterEventsToInMemoryRawDocumentStep);
LocaleId targetLocaleId = LocaleId.fromBCP47(bcp47Tag);
RawDocument rawDocument = new RawDocument(content, LocaleId.ENGLISH, targetLocaleId);
String filterConfigId;
if (filterConfigIdOverride != null) {
filterConfigId = filterConfigIdOverride.getOkapiFilterId();
} else {
filterConfigId = assetExtractor.getFilterConfigIdForAsset(asset);
}
rawDocument.setFilterConfigId(filterConfigId);
logger.debug("Set filter config {} for asset {}", filterConfigId, asset.getPath());
driver.addBatchItem(rawDocument);
logger.debug("Start processing batch");
driver.processBatch();
}
示例10: generateOmegaTKit
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep; //导入依赖的package包/类
private void generateOmegaTKit(LocaleId locale, LeveragingStep lStep, Set<RawDocument> docs, File tmx) {
// Make pipeline.
PipelineDriver driver = new PipelineDriver();
driver.setFilterConfigurationMapper(getFilterMapper());
driver.setRootDirectories("", getProject().getBaseDir().getAbsolutePath());
driver.setOutputDirectory(getProject().getBaseDir().getAbsolutePath());
// Step 1: Raw Docs to Filter Events
driver.addStep(new RawDocumentToFilterEventsStep());
if (srx != null) {
// Step 2: Segmentation
SegmentationStep segmentation = new SegmentationStep();
driver.addStep(segmentation);
net.sf.okapi.steps.segmentation.Parameters sp =
(net.sf.okapi.steps.segmentation.Parameters) segmentation.getParameters();
sp.setSegmentSource(true);
sp.setSourceSrxPath(new File(getProject().getBaseDir(), srx).getAbsolutePath());
sp.setCopySource(false);
}
// Step 3: Leverage
driver.addStep(lStep);
// Step 4: Approve ALL the TUs!
ApproverStep approver = new ApproverStep();
driver.addStep(approver);
// Step 5: Extraction
ExtractionStep extraction = new ExtractionStep();
driver.addStep(extraction);
net.sf.okapi.steps.rainbowkit.creation.Parameters ep =
(net.sf.okapi.steps.rainbowkit.creation.Parameters) extraction.getParameters();
ep.setWriterClass("net.sf.okapi.steps.rainbowkit.omegat.OmegaTPackageWriter");
ep.setWriterOptions("#v1\n"
+ "placeholderMode.b=false\n"
+ "allowSegmentation.b=false\n"
+ "includePostProcessingHook.b=true");
ep.setPackageName("Translate_" + locale.toString());
ep.setPackageDirectory(workDir.toString());
for (RawDocument doc : docs) {
driver.addBatchItem(doc, doc.getInputURI(), outEnc);
}
driver.processBatch();
}