本文整理匯總了Java中org.apache.hadoop.fs.ContentSummary.getLength方法的典型用法代碼示例。如果您正苦於以下問題:Java ContentSummary.getLength方法的具體用法?Java ContentSummary.getLength怎麽用?Java ContentSummary.getLength使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.fs.ContentSummary
的用法示例。
在下文中一共展示了ContentSummary.getLength方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: estimateNumberOfReducers
import org.apache.hadoop.fs.ContentSummary; //導入方法依賴的package包/類
/**
* Estimate the number of reducers needed for this job, based on job input,
* and configuration parameters.
*
* The output of this method should only be used if the output of this
* MapRedTask is not being used to populate a bucketed table and the user
* has not specified the number of reducers to use.
*
* @return the number of reducers.
*/
public static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSummary,
MapWork work, boolean finalMapRed) throws IOException {
long bytesPerReducer = conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
double samplePercentage = getHighestSamplePercentage(work);
long totalInputFileSize = getTotalInputFileSize(inputSummary, work, samplePercentage);
// if all inputs are sampled, we should shrink the size of reducers accordingly.
if (totalInputFileSize != inputSummary.getLength()) {
LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
+ maxReducers + " estimated totalInputFileSize=" + totalInputFileSize);
} else {
LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers="
+ maxReducers + " totalInputFileSize=" + totalInputFileSize);
}
// If this map reduce job writes final data to a table and bucketing is being inferred,
// and the user has configured Hive to do this, make sure the number of reducers is a
// power of two
boolean powersOfTwo = conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) &&
finalMapRed && !work.getBucketedColsByDirectory().isEmpty();
return estimateReducers(totalInputFileSize, bytesPerReducer, maxReducers, powersOfTwo);
}
示例2: processPath
import org.apache.hadoop.fs.ContentSummary; //導入方法依賴的package包/類
@Override
protected void processPath(PathData item) throws IOException {
ContentSummary contentSummary = item.fs.getContentSummary(item.path);
long length = contentSummary.getLength();
long spaceConsumed = contentSummary.getSpaceConsumed();
usagesTable.addRow(formatSize(length), formatSize(spaceConsumed), item);
}
示例3: isEmptyPath
import org.apache.hadoop.fs.ContentSummary; //導入方法依賴的package包/類
public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx)
throws Exception {
if (ctx != null) {
ContentSummary cs = ctx.getCS(dirPath);
if (cs != null) {
LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: "
+ cs.getFileCount() + " num directories: " + cs.getDirectoryCount());
return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1);
} else {
LOG.info("Content Summary not cached for " + dirPath);
}
}
return isEmptyPath(job, dirPath);
}
示例4: getTotalInputFileSize
import org.apache.hadoop.fs.ContentSummary; //導入方法依賴的package包/類
/**
* Computes the total input file size. If block sampling was used it will scale this
* value by the highest sample percentage (as an estimate for input).
*
* @param inputSummary
* @param work
* @param highestSamplePercentage
* @return estimated total input size for job
*/
public static long getTotalInputFileSize (ContentSummary inputSummary, MapWork work,
double highestSamplePercentage) {
long totalInputFileSize = inputSummary.getLength();
if (work.getNameToSplitSample() == null || work.getNameToSplitSample().isEmpty()) {
// If percentage block sampling wasn't used, we don't need to do any estimation
return totalInputFileSize;
}
if (highestSamplePercentage >= 0) {
totalInputFileSize = Math.min((long) (totalInputFileSize * (highestSamplePercentage / 100D))
, totalInputFileSize);
}
return totalInputFileSize;
}