當前位置: 首頁>>代碼示例>>Java>>正文


Java MultipleOutputs.write方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.write方法的典型用法代碼示例。如果您正苦於以下問題:Java MultipleOutputs.write方法的具體用法?Java MultipleOutputs.write怎麽用?Java MultipleOutputs.write使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.mapreduce.lib.output.MultipleOutputs的用法示例。


在下文中一共展示了MultipleOutputs.write方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: reduce

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException {
    final Configuration configuration = context.getConfiguration();
    final String sourcePath = configuration.get("compactionSourcePath");
    final String targetPath = configuration.get("compactionTargetPath");

    // Reducer stores data at the target directory retaining the directory structure of files
    String filePath = key.toString().replace(sourcePath, targetPath);
    if (key.toString().endsWith("/")) {
        filePath = filePath.concat("file");
    }

    log.info("Compaction output path {}", filePath);
    final URI uri = URI.create(filePath);
    final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, Text>(context);
    try {
        for (final Text text : values) {
            multipleOutputs.write(NullWritable.get(), text, uri.toString());
        }
    } finally {
        multipleOutputs.close();
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:26,代碼來源:TextCompactionReducer.java

示例2: reduce

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void reduce(final Text key, final Iterable<OrcValue> values, final Context context) throws IOException, InterruptedException {
    final Configuration configuration = context.getConfiguration();
    final String sourcePath = configuration.get("compactionSourcePath");
    final String targetPath = configuration.get("compactionTargetPath");

    // Reducer stores data at the target directory retaining the directory structure of files
    String filePath = key.toString().replace(sourcePath, targetPath);
    if (key.toString().endsWith("/")) {
        filePath = filePath.concat("file");
    }

    log.info("Compaction output path {}", filePath);
    final URI uri = URI.create(filePath);
    final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, OrcValue>(context);
    try {
        for (final OrcValue text : values) {
            multipleOutputs.write(NullWritable.get(), text, uri.toString());
        }
    } finally {
        multipleOutputs.close();
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:26,代碼來源:OrcCompactionReducer.java

示例3: reduce

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void reduce(final Text key, final Iterable<BytesWritable> values, final Context context) throws IOException, InterruptedException {
    final Configuration configuration = context.getConfiguration();
    final String sourcePath = configuration.get("compactionSourcePath");
    final String targetPath = configuration.get("compactionTargetPath");

    // Reducer stores data at the target directory retaining the directory structure of files
    String filePath = key.toString().replace(sourcePath, targetPath);
    if (key.toString().endsWith("/")) {
        filePath = filePath.concat("file");
    }

    log.info("Compaction output path {}", filePath);
    final URI uri = URI.create(filePath);
    final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, BytesWritable>(context);
    try {
        for (final BytesWritable text : values) {
            multipleOutputs.write(NullWritable.get(), text, uri.toString());
        }
    } finally {
        multipleOutputs.close();
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:26,代碼來源:BytesWritableCompactionReducer.java

示例4: writeSingleWARCWritableToOutput

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
 * Writes single WARCWritable to the output with specific output file prefix
 *
 * @param warcWritable    warc record
 * @param multipleOutputs output
 * @throws IOException          exception
 * @throws InterruptedException exception
 */
// TODO move somewhere else?
public static void writeSingleWARCWritableToOutput(WARCWritable warcWritable,
        MultipleOutputs<NullWritable, WARCWritable> multipleOutputs)
        throws IOException, InterruptedException
{
    WARCRecord.Header header = warcWritable.getRecord().getHeader();
    String license = header.getField(WARCRecord.WARCRecordFieldConstants.LICENSE);
    String language = header.getField(WARCRecord.WARCRecordFieldConstants.LANGUAGE);
    String noBoilerplate = header
            .getField(WARCRecord.WARCRecordFieldConstants.NO_BOILERPLATE);
    String minimalHtml = header.getField(WARCRecord.WARCRecordFieldConstants.MINIMAL_HTML);

    // set the file name prefix
    String fileName = createOutputFilePrefix(license, language, noBoilerplate, minimalHtml);

    // bottleneck of single reducer for all "Lic_none_Lang_en" pages (majority of Web)
    //        if ("en".equals(language) && LicenseDetector.NO_LICENCE.equals(license)) {
    //            long simHash = Long
    //                    .valueOf(header.getField(WARCRecord.WARCRecordFieldConstants.SIMHASH));
    //            int binNumber = getBinNumberFromSimHash(simHash);
    //            fileName = createOutputFilePrefix(license, language, noBoilerplate);
    //        }

    multipleOutputs.write(NullWritable.get(), warcWritable, fileName);
}
 
開發者ID:dkpro,項目名稱:dkpro-c4corpus,代碼行數:34,代碼來源:WARCWriterReducerClass.java

示例5: writeTopKBounds

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
public void writeTopKBounds(MultipleOutputs<?, ?> sideOutputs, String outputName, String path, int minsup) throws IOException, InterruptedException {
	final IntWritable itemW  = new IntWritable();
	final IntWritable boundW = new IntWritable();
	
	TIntObjectIterator<PatternWithFreq[]> it = this.topK.iterator();
	
	while (it.hasNext()) {
		it.advance();
		if (it.value()[this.k - 1] != null) {
			final int supportCount = it.value()[this.k - 1].getSupportCount();
			
			if (supportCount > minsup) {
				itemW.set(it.key());
				boundW.set(supportCount);
				sideOutputs.write(outputName, itemW, boundW, path);
			}
		}
	}
}
 
開發者ID:slide-lig,項目名稱:TopPI,代碼行數:20,代碼來源:PerItemTopKHadoopCollector.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.write方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。