本文整理汇总了Java中org.apache.hadoop.tools.util.DistCpUtils.publish方法的典型用法代码示例。如果您正苦于以下问题:Java DistCpUtils.publish方法的具体用法?Java DistCpUtils.publish怎么用?Java DistCpUtils.publish使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.tools.util.DistCpUtils
的用法示例。
在下文中一共展示了DistCpUtils.publish方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createSplits
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
List<DynamicInputChunk> chunks)
throws IOException {
int numMaps = getNumMapTasks(jobContext.getConfiguration());
final int nSplits = Math.min(numMaps, chunks.size());
List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
for (int i=0; i< nSplits; ++i) {
TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
chunks.get(i).assignTo(taskId);
splits.add(new FileSplit(chunks.get(i).getPath(), 0,
// Setting non-zero length for FileSplit size, to avoid a possible
// future when 0-sized file-splits are considered "empty" and skipped
// over.
getMinRecordsPerChunk(jobContext.getConfiguration()),
null));
}
DistCpUtils.publish(jobContext.getConfiguration(),
CONF_LABEL_NUM_SPLITS, splits.size());
return splits;
}
示例2: createSplits
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
List<DynamicInputChunk> chunks)
throws IOException {
int numMaps = getNumMapTasks(jobContext.getConfiguration());
final int nSplits = Math.min(numMaps, chunks.size());
List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
for (int i=0; i< nSplits; ++i) {
TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
chunks.get(i).assignTo(taskId);
splits.add(new FileSplit(chunks.get(i).getPath(), 0,
// Setting non-zero length for FileSplit size, to avoid a possible
// future when 0-sized file-splits are considered "empty" and skipped
// over.
MIN_RECORDS_PER_CHUNK,
null));
}
DistCpUtils.publish(jobContext.getConfiguration(),
CONF_LABEL_NUM_SPLITS, splits.size());
return splits;
}
示例3: getNumberOfRecords
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
(JobContext context) throws IOException {
final Configuration configuration = context.getConfiguration();
int numRecords = getNumberOfRecords(configuration);
int numMaps = getNumMapTasks(configuration);
int maxChunksTolerable = getMaxChunksTolerable(configuration);
// Number of chunks each map will process, on average.
int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
validateNumChunksUsing(splitRatio, numMaps, maxChunksTolerable);
int numEntriesPerChunk = (int)Math.ceil((float)numRecords
/(splitRatio * numMaps));
DistCpUtils.publish(context.getConfiguration(),
CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
numEntriesPerChunk);
final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
int nChunksOpenAtOnce
= Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
Path listingPath = getListingFilePath(configuration);
SequenceFile.Reader reader
= new SequenceFile.Reader(configuration,
SequenceFile.Reader.file(listingPath));
List<DynamicInputChunk> openChunks
= new ArrayList<DynamicInputChunk>();
List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
CopyListingFileStatus fileStatus = new CopyListingFileStatus();
Text relPath = new Text();
int recordCounter = 0;
int chunkCount = 0;
try {
while (reader.next(relPath, fileStatus)) {
if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
// All chunks full. Create new chunk-set.
closeAll(openChunks);
chunksFinal.addAll(openChunks);
openChunks = createChunks(
configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);
chunkCount += openChunks.size();
nChunksOpenAtOnce = openChunks.size();
recordCounter = 0;
}
// Shuffle into open chunks.
openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
++recordCounter;
}
} finally {
closeAll(openChunks);
chunksFinal.addAll(openChunks);
IOUtils.closeStream(reader);
}
LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size());
return chunksFinal;
}
示例4: getNumberOfRecords
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
(JobContext context) throws IOException {
final Configuration configuration = context.getConfiguration();
int numRecords = getNumberOfRecords(configuration);
int numMaps = getNumMapTasks(configuration);
int maxChunksTolerable = getMaxChunksTolerable(configuration);
// Number of chunks each map will process, on average.
int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
validateNumChunksUsing(splitRatio, numMaps, maxChunksTolerable);
int numEntriesPerChunk = (int)Math.ceil((float)numRecords
/(splitRatio * numMaps));
DistCpUtils.publish(context.getConfiguration(),
CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
numEntriesPerChunk);
final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
int nChunksOpenAtOnce
= Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
Path listingPath = getListingFilePath(configuration);
SequenceFile.Reader reader
= new SequenceFile.Reader(configuration,
SequenceFile.Reader.file(listingPath));
List<DynamicInputChunk> openChunks
= new ArrayList<DynamicInputChunk>();
List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
CopyListingFileStatus fileStatus = new CopyListingFileStatus();
Text relPath = new Text();
int recordCounter = 0;
int chunkCount = 0;
try {
while (reader.next(relPath, fileStatus)) {
if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
// All chunks full. Create new chunk-set.
closeAll(openChunks);
chunksFinal.addAll(openChunks);
openChunks = createChunks(chunkCount, nChunksTotal,
nChunksOpenAtOnce);
chunkCount += openChunks.size();
nChunksOpenAtOnce = openChunks.size();
recordCounter = 0;
}
// Shuffle into open chunks.
openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
++recordCounter;
}
} finally {
closeAll(openChunks);
chunksFinal.addAll(openChunks);
IOUtils.closeStream(reader);
}
LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size());
return chunksFinal;
}
示例5: getNumberOfRecords
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
(JobContext context) throws IOException {
final Configuration configuration = context.getConfiguration();
int numRecords = getNumberOfRecords(configuration);
int numMaps = getNumMapTasks(configuration);
// Number of chunks each map will process, on average.
int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
validateNumChunksUsing(splitRatio, numMaps);
int numEntriesPerChunk = (int)Math.ceil((float)numRecords
/(splitRatio * numMaps));
DistCpUtils.publish(context.getConfiguration(),
CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
numEntriesPerChunk);
final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
int nChunksOpenAtOnce
= Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
Path listingPath = getListingFilePath(configuration);
SequenceFile.Reader reader
= new SequenceFile.Reader(configuration,
SequenceFile.Reader.file(listingPath));
List<DynamicInputChunk> openChunks
= new ArrayList<DynamicInputChunk>();
List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
FileStatus fileStatus = new FileStatus();
Text relPath = new Text();
int recordCounter = 0;
int chunkCount = 0;
try {
while (reader.next(relPath, fileStatus)) {
if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
// All chunks full. Create new chunk-set.
closeAll(openChunks);
chunksFinal.addAll(openChunks);
openChunks = createChunks(
configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);
chunkCount += openChunks.size();
nChunksOpenAtOnce = openChunks.size();
recordCounter = 0;
}
// Shuffle into open chunks.
openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
++recordCounter;
}
} finally {
closeAll(openChunks);
chunksFinal.addAll(openChunks);
IOUtils.closeStream(reader);
}
LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size());
return chunksFinal;
}