本文整理汇总了Java中org.apache.hadoop.fs.FileUtil.stat2Paths方法的典型用法代码示例。如果您正苦于以下问题:Java FileUtil.stat2Paths方法的具体用法?Java FileUtil.stat2Paths怎么用?Java FileUtil.stat2Paths使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.FileUtil
的用法示例。
在下文中一共展示了FileUtil.stat2Paths方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readMatrixFromOutput
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
public static double[][] readMatrixFromOutput(Path dir, int n, int m) throws IOException {
double[][] a = new double[n][m];
Configuration conf = new Configuration();
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
FileSystem fs = dir.getFileSystem(conf);
for (Path p : FileUtil.stat2Paths(fs.listStatus(dir))) {
if (p.toString().contains("part")) {
readMatrixFromFile(fs, p, a);
}
}
return a;
}
示例2: readOutput
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
static String readOutput(Path outDir, Configuration conf)
throws IOException {
FileSystem fs = outDir.getFileSystem(conf);
StringBuffer result = new StringBuffer();
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
for (Path outputFile : fileList) {
LOG.info("Path" + ": "+ outputFile);
BufferedReader file =
new BufferedReader(new InputStreamReader(fs.open(outputFile)));
String line = file.readLine();
while (line != null) {
result.append(line);
result.append("\n");
line = file.readLine();
}
file.close();
}
return result.toString();
}
示例3: readOutput
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
public static String readOutput(Path outDir, Configuration conf)
throws IOException {
FileSystem fs = outDir.getFileSystem(conf);
StringBuffer result = new StringBuffer();
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
for (Path outputFile : fileList) {
LOG.info("Path" + ": "+ outputFile);
BufferedReader file =
new BufferedReader(new InputStreamReader(fs.open(outputFile)));
String line = file.readLine();
while (line != null) {
result.append(line);
result.append("\n");
line = file.readLine();
}
file.close();
}
return result.toString();
}
示例4: configure
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
示例5: validateOutput
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
private void validateOutput() throws IOException {
Path[] outputFiles = FileUtil.stat2Paths(
localFs.listStatus(new Path(TEST_ROOT_DIR + "/out"),
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = localFs.open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
while (line != null) {
StringTokenizer tokeniz = new StringTokenizer(line, "\t");
String key = tokeniz.nextToken();
String value = tokeniz.nextToken();
LOG.info("Output: key: "+ key + " value: "+ value);
int errors = Integer.parseInt(value);
assertTrue(errors == 0);
line = reader.readLine();
}
reader.close();
}
}
示例6: checkOutput
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
protected void checkOutput() throws IOException {
StringBuffer output = new StringBuffer(256);
Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(
new Path(OUTPUT_DIR)));
for (int i = 0; i < fileList.length; i++){
LOG.info("Adding output from file: " + fileList[i]);
output.append(StreamUtil.slurpHadoop(fileList[i], fileSys));
}
assertOutput(expectedOutput, output.toString());
}
示例7: testTextAppend
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
@Test
public void testTextAppend() throws InterruptedException, LifecycleException,
EventDeliveryException, IOException {
LOG.debug("Starting...");
final long rollCount = 3;
final long batchSize = 2;
final String fileName = "FlumeData";
String newPath = testPath + "/singleTextBucket";
int totalEvents = 0;
int i = 1, j = 1;
// clear the test directory
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path dirPath = new Path(newPath);
fs.delete(dirPath, true);
fs.mkdirs(dirPath);
Context context = new Context();
// context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
context.put("hdfs.path", newPath);
context.put("hdfs.filePrefix", fileName);
context.put("hdfs.rollCount", String.valueOf(rollCount));
context.put("hdfs.batchSize", String.valueOf(batchSize));
context.put("hdfs.writeFormat", "Text");
context.put("hdfs.fileType", "DataStream");
Configurables.configure(sink, context);
Channel channel = new MemoryChannel();
Configurables.configure(channel, context);
sink.setChannel(channel);
sink.start();
Calendar eventDate = Calendar.getInstance();
List<String> bodies = Lists.newArrayList();
// push the event batches into channel
for (i = 1; i < 4; i++) {
Transaction txn = channel.getTransaction();
txn.begin();
for (j = 1; j <= batchSize; j++) {
Event event = new SimpleEvent();
eventDate.clear();
eventDate.set(2011, i, i, i, 0); // yy mm dd
event.getHeaders().put("timestamp",
String.valueOf(eventDate.getTimeInMillis()));
event.getHeaders().put("hostname", "Host" + i);
String body = "Test." + i + "." + j;
event.setBody(body.getBytes());
bodies.add(body);
channel.put(event);
totalEvents++;
}
txn.commit();
txn.close();
// execute sink to process the events
sink.process();
}
sink.stop();
// loop through all the files generated and check their contains
FileStatus[] dirStat = fs.listStatus(dirPath);
Path[] fList = FileUtil.stat2Paths(dirStat);
// check that the roll happened correctly for the given data
long expectedFiles = totalEvents / rollCount;
if (totalEvents % rollCount > 0) expectedFiles++;
Assert.assertEquals("num files wrong, found: " +
Lists.newArrayList(fList), expectedFiles, fList.length);
verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
示例8: getReaders
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
/** Open the output generated by this format. */
public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir)
throws IOException {
FileSystem fs = dir.getFileSystem(conf);
Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
// sort names, so that hash partitioning works
Arrays.sort(names);
SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length];
for (int i = 0; i < names.length; i++) {
parts[i] = new SequenceFile.Reader(fs, names[i], conf);
}
return parts;
}
示例9: getSplits
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf job, int numSplits)
throws IOException {
Path[] paths = FileUtil.stat2Paths(listStatus(job));
List<MultiFileSplit> splits = new ArrayList<MultiFileSplit>(Math.min(numSplits, paths.length));
if (paths.length != 0) {
// HADOOP-1818: Manage splits only if there are paths
long[] lengths = new long[paths.length];
long totLength = 0;
for(int i=0; i<paths.length; i++) {
FileSystem fs = paths[i].getFileSystem(job);
lengths[i] = fs.getContentSummary(paths[i]).getLength();
totLength += lengths[i];
}
double avgLengthPerSplit = ((double)totLength) / numSplits;
long cumulativeLength = 0;
int startIndex = 0;
for(int i=0; i<numSplits; i++) {
int splitSize = findSize(i, avgLengthPerSplit, cumulativeLength
, startIndex, lengths);
if (splitSize != 0) {
// HADOOP-1818: Manage split only if split size is not equals to 0
Path[] splitPaths = new Path[splitSize];
long[] splitLengths = new long[splitSize];
System.arraycopy(paths, startIndex, splitPaths , 0, splitSize);
System.arraycopy(lengths, startIndex, splitLengths , 0, splitSize);
splits.add(new MultiFileSplit(job, splitPaths, splitLengths));
startIndex += splitSize;
for(long l: splitLengths) {
cumulativeLength += l;
}
}
}
}
return splits.toArray(new MultiFileSplit[splits.size()]);
}
示例10: launchWordCount
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
static String launchWordCount(URI fileSys, JobConf conf, String input,
int numMaps, int numReduces)
throws IOException {
final Path inDir = new Path("/testing/wc/input");
final Path outDir = new Path("/testing/wc/output");
FileSystem fs = FileSystem.get(fileSys, conf);
configureWordCount(fs, conf, input, numMaps, numReduces, inDir, outDir);
JobClient.runJob(conf);
StringBuffer result = new StringBuffer();
{
Path[] parents = FileUtil.stat2Paths(fs.listStatus(outDir.getParent()));
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
for(int i=0; i < fileList.length; ++i) {
BufferedReader file =
new BufferedReader(new InputStreamReader(fs.open(fileList[i])));
String line = file.readLine();
while (line != null) {
result.append(line);
result.append("\n");
line = file.readLine();
}
file.close();
}
}
return result.toString();
}
示例11: getReaders
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
/** Open the output generated by this format. */
public static MapFile.Reader[] getReaders(Path dir,
Configuration conf) throws IOException {
FileSystem fs = dir.getFileSystem(conf);
Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
// sort names, so that hash partitioning works
Arrays.sort(names);
MapFile.Reader[] parts = new MapFile.Reader[names.length];
for (int i = 0; i < names.length; i++) {
parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
}
return parts;
}
示例12: testComplexNameWithRegex
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
public void testComplexNameWithRegex() throws Exception {
OutputStream os = getFileSystem().create(new Path(getInputDir(),
"text.txt"));
Writer wr = new OutputStreamWriter(os);
wr.write("b a\n");
wr.close();
JobConf conf = createJobConf();
conf.setJobName("name \\Evalue]");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(IdentityMapper.class);
FileInputFormat.setInputPaths(conf, getInputDir());
FileOutputFormat.setOutputPath(conf, getOutputDir());
JobClient.runJob(conf);
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(getOutputDir(),
new Utils.OutputFileUtils.OutputFilesFilter()));
assertEquals(1, outputFiles.length);
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
assertEquals("0\tb a", reader.readLine());
assertNull(reader.readLine());
reader.close();
}
示例13: doTestTextBatchAppend
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
public void doTestTextBatchAppend(boolean useRawLocalFileSystem)
throws Exception {
LOG.debug("Starting...");
final long rollCount = 10;
final long batchSize = 2;
final String fileName = "FlumeData";
String newPath = testPath + "/singleTextBucket";
int totalEvents = 0;
int i = 1, j = 1;
// clear the test directory
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path dirPath = new Path(newPath);
fs.delete(dirPath, true);
fs.mkdirs(dirPath);
Context context = new Context();
// context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
context.put("hdfs.path", newPath);
context.put("hdfs.filePrefix", fileName);
context.put("hdfs.rollCount", String.valueOf(rollCount));
context.put("hdfs.rollInterval", "0");
context.put("hdfs.rollSize", "0");
context.put("hdfs.batchSize", String.valueOf(batchSize));
context.put("hdfs.writeFormat", "Text");
context.put("hdfs.useRawLocalFileSystem",
Boolean.toString(useRawLocalFileSystem));
context.put("hdfs.fileType", "DataStream");
Configurables.configure(sink, context);
Channel channel = new MemoryChannel();
Configurables.configure(channel, context);
sink.setChannel(channel);
sink.start();
Calendar eventDate = Calendar.getInstance();
List<String> bodies = Lists.newArrayList();
// push the event batches into channel to roll twice
for (i = 1; i <= (rollCount * 10) / batchSize; i++) {
Transaction txn = channel.getTransaction();
txn.begin();
for (j = 1; j <= batchSize; j++) {
Event event = new SimpleEvent();
eventDate.clear();
eventDate.set(2011, i, i, i, 0); // yy mm dd
String body = "Test." + i + "." + j;
event.setBody(body.getBytes());
bodies.add(body);
channel.put(event);
totalEvents++;
}
txn.commit();
txn.close();
// execute sink to process the events
sink.process();
}
sink.stop();
// loop through all the files generated and check their contains
FileStatus[] dirStat = fs.listStatus(dirPath);
Path[] fList = FileUtil.stat2Paths(dirStat);
// check that the roll happened correctly for the given data
long expectedFiles = totalEvents / rollCount;
if (totalEvents % rollCount > 0) expectedFiles++;
Assert.assertEquals("num files wrong, found: " +
Lists.newArrayList(fList), expectedFiles, fList.length);
// check the contents of the all files
verifyOutputTextFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
示例14: testAvroAppend
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
@Test
public void testAvroAppend() throws InterruptedException, LifecycleException,
EventDeliveryException, IOException {
LOG.debug("Starting...");
final long rollCount = 3;
final long batchSize = 2;
final String fileName = "FlumeData";
String newPath = testPath + "/singleTextBucket";
int totalEvents = 0;
int i = 1, j = 1;
// clear the test directory
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path dirPath = new Path(newPath);
fs.delete(dirPath, true);
fs.mkdirs(dirPath);
Context context = new Context();
// context.put("hdfs.path", testPath + "/%Y-%m-%d/%H");
context.put("hdfs.path", newPath);
context.put("hdfs.filePrefix", fileName);
context.put("hdfs.rollCount", String.valueOf(rollCount));
context.put("hdfs.batchSize", String.valueOf(batchSize));
context.put("hdfs.writeFormat", "Text");
context.put("hdfs.fileType", "DataStream");
context.put("serializer", "AVRO_EVENT");
Configurables.configure(sink, context);
Channel channel = new MemoryChannel();
Configurables.configure(channel, context);
sink.setChannel(channel);
sink.start();
Calendar eventDate = Calendar.getInstance();
List<String> bodies = Lists.newArrayList();
// push the event batches into channel
for (i = 1; i < 4; i++) {
Transaction txn = channel.getTransaction();
txn.begin();
for (j = 1; j <= batchSize; j++) {
Event event = new SimpleEvent();
eventDate.clear();
eventDate.set(2011, i, i, i, 0); // yy mm dd
event.getHeaders().put("timestamp",
String.valueOf(eventDate.getTimeInMillis()));
event.getHeaders().put("hostname", "Host" + i);
String body = "Test." + i + "." + j;
event.setBody(body.getBytes());
bodies.add(body);
channel.put(event);
totalEvents++;
}
txn.commit();
txn.close();
// execute sink to process the events
sink.process();
}
sink.stop();
// loop through all the files generated and check their contains
FileStatus[] dirStat = fs.listStatus(dirPath);
Path[] fList = FileUtil.stat2Paths(dirStat);
// check that the roll happened correctly for the given data
long expectedFiles = totalEvents / rollCount;
if (totalEvents % rollCount > 0) expectedFiles++;
Assert.assertEquals("num files wrong, found: " +
Lists.newArrayList(fList), expectedFiles, fList.length);
verifyOutputAvroFiles(fs, conf, dirPath.toUri().getPath(), fileName, bodies);
}
示例15: testComparator
import org.apache.hadoop.fs.FileUtil; //导入方法依赖的package包/类
private void testComparator(String keySpec, int expect)
throws Exception {
String root = System.getProperty("test.build.data", "/tmp");
Path inDir = new Path(root, "test_cmp/in");
Path outDir = new Path(root, "test_cmp/out");
conf.set("mapreduce.partition.keycomparator.options", keySpec);
conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
line1 +"\n" + line2 + "\n");
job.setMapperClass(InverseMapper.class);
job.setReducerClass(Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setSortComparatorClass(KeyFieldBasedComparator.class);
job.setPartitionerClass(KeyFieldBasedPartitioner.class);
job.waitForCompletion(true);
assertTrue(job.isSuccessful());
// validate output
Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines (both the lines must end up in the same
//reducer since the partitioner takes the same key spec for all
//lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}