本文整理汇总了Java中org.apache.pig.tools.pigstats.PigStats.getJobGraph方法的典型用法代码示例。如果您正苦于以下问题:Java PigStats.getJobGraph方法的具体用法?Java PigStats.getJobGraph怎么用?Java PigStats.getJobGraph使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.tools.pigstats.PigStats
的用法示例。
在下文中一共展示了PigStats.getJobGraph方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getJobs
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
/**
* Retrieves a list of Job objects from the PigStats object
* @param stats
* @return A list of ExecJob objects
*/
protected List<ExecJob> getJobs(PigStats stats) {
LinkedList<ExecJob> jobs = new LinkedList<ExecJob>();
JobGraph jGraph = stats.getJobGraph();
Iterator<JobStats> iter = jGraph.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
for (OutputStats output : js.getOutputs()) {
if (js.isSuccessful()) {
jobs.add(new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, output
.getPOStore(), output.getAlias(), stats));
} else {
HJob hjob = new HJob(HJob.JOB_STATUS.FAILED, pigContext, output
.getPOStore(), output.getAlias(), stats);
hjob.setException(js.getException());
jobs.add(hjob);
}
}
}
return jobs;
}
示例2: assertAllDocumentsOk
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
PigServer ps = setup(script, conf);
List<ExecJob> jobs = ps.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
for (JobStats js : stats.getJobGraph()) {
Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters();
assertNotNull(hadoopCounters);
VespaCounters counters = VespaCounters.get(hadoopCounters);
assertEquals(10, counters.getDocumentsSent());
assertEquals(0, counters.getDocumentsFailed());
assertEquals(10, counters.getDocumentsOk());
}
}
示例3: executeBatch
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
private void executeBatch() throws IOException {
if (mPigServer.isBatchOn()) {
if (mExplain != null) {
explainCurrentBatch();
}
if (!mLoadOnly) {
mPigServer.executeBatch();
PigStats stats = PigStats.get();
JobGraph jg = stats.getJobGraph();
Iterator<JobStats> iter = jg.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
if (!js.isSuccessful()) {
mNumFailedJobs++;
Exception exp = (js.getException() != null) ? js.getException()
: new ExecException(
"Job failed, hadoop does not return any error message",
2244);
LogUtils.writeLog(exp,
mPigServer.getPigContext().getProperties().getProperty("pig.logfile"),
log,
"true".equalsIgnoreCase(mPigServer.getPigContext().getProperties().getProperty("verbose")),
"Pig Stack Trace");
} else {
mNumSucceededJobs++;
}
}
}
}
}
示例4: executeBatch
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
private void executeBatch() throws IOException {
if (mPigServer.isBatchOn()) {
if (mExplain != null) {
explainCurrentBatch();
}
if (!mLoadOnly) {
mPigServer.executeBatch();
PigStats stats = PigStats.get();
JobGraph jg = stats.getJobGraph();
Iterator<JobStats> iter = jg.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
if (!js.isSuccessful()) {
mNumFailedJobs++;
Exception exp = (js.getException() != null) ? js.getException()
: new ExecException(
"Job " + (js.getJobId() == null ? "" : js.getJobId() + " ") +
"failed, hadoop does not return any error message",
2244);
LogUtils.writeLog(exp,
mPigServer.getPigContext().getProperties().getProperty("pig.logfile"),
log,
"true".equalsIgnoreCase(mPigServer.getPigContext().getProperties().getProperty("verbose")),
"Pig Stack Trace");
} else {
mNumSucceededJobs++;
}
}
}
}
}
示例5: getJobs
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
/**
* Retrieves a list of Job objects from the PigStats object
* @param stats
* @return A list of ExecJob objects
*/
protected List<ExecJob> getJobs(PigStats stats) {
LinkedList<ExecJob> jobs = new LinkedList<ExecJob>();
if (stats instanceof EmptyPigStats) {
HJob job = new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, stats.result(null)
.getPOStore(), null);
jobs.add(job);
return jobs;
}
JobGraph jGraph = stats.getJobGraph();
Iterator<JobStats> iter = jGraph.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
for (OutputStats output : js.getOutputs()) {
if (js.isSuccessful()) {
jobs.add(new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, output
.getPOStore(), output.getAlias(), stats));
} else {
HJob hjob = new HJob(HJob.JOB_STATUS.FAILED, pigContext, output
.getPOStore(), output.getAlias(), stats);
hjob.setException(js.getException());
jobs.add(hjob);
}
}
}
return jobs;
}
示例6: testMapOnly
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
if(t > 50) count ++;
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = foreach b generate $0 - 50;");
ExecJob job = pigServer.store("c", "output_map_only");
PigStats pigStats = job.getStatistics();
//counting the no. of bytes in the output file
//long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
"output_map_only", pigServer.getPigContext()), pigServer
.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output_map_only"), true);
System.out.println("============================================");
System.out.println("Test case Map Only");
System.out.println("============================================");
JobGraph jg = pigStats.getJobGraph();
Iterator<JobStats> iter = jg.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(count, js.getMapOutputRecords());
assertEquals(0, js.getReduceInputRecords());
assertEquals(0, js.getReduceOutputRecords());
System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
assertEquals(filesize, js.getHdfsBytesWritten());
}
}
示例7: testMapOnlyBinStorage
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapOnlyBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
if(t > 50)
count ++;
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = foreach b generate $0 - 50;");
ExecJob job = pigServer.store("c", "output_map_only", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
"output_map_only", pigServer.getPigContext()),
pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output_map_only"), true);
System.out.println("============================================");
System.out.println("Test case Map Only");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(count, js.getMapOutputRecords());
assertEquals(0, js.getReduceInputRecords());
assertEquals(0, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例8: testMapReduceOnly
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapReduceOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group;");
ExecJob job = pigServer.store("c", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(MAX, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例9: testMapReduceOnlyBinStorage
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapReduceOnlyBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group;");
ExecJob job = pigServer.store("c", "output", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(MAX, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例10: testMapCombineReduce
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapCombineReduce() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
ExecJob job = pigServer.store("c", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapCombineReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(count, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例11: testMapCombineReduceBinStorage
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapCombineReduceBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
ExecJob job = pigServer.store("c", "output", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapCombineReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(count, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例12: testMultipleMRJobs
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMultipleMRJobs() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = order a by $0;");
pigServer.registerQuery("c = group b by $0;");
pigServer.registerQuery("d = foreach c generate group, SUM(b.$1);");
ExecJob job = pigServer.store("d", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MultipleMRJobs");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
JobStats js = (JobStats)jp.getSinks().get(0);
System.out.println("Job id: " + js.getName());
System.out.println(jp.toString());
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(count, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
assertEquals(filesize, js.getHdfsBytesWritten());
}
示例13: testMapOnly
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
if(t > 50) count ++;
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = foreach b generate $0 - 50;");
ExecJob job = pigServer.store("c", "output_map_only");
PigStats pigStats = job.getStatistics();
//counting the no. of bytes in the output file
//long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
"output_map_only", pigServer.getPigContext()), pigServer
.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output_map_only"), true);
System.out.println("============================================");
System.out.println("Test case Map Only");
System.out.println("============================================");
JobGraph jg = pigStats.getJobGraph();
Iterator<JobStats> iter = jg.iterator();
while (iter.hasNext()) {
MRJobStats js = (MRJobStats) iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(count, js.getMapOutputRecords());
assertEquals(0, js.getReduceInputRecords());
assertEquals(0, js.getReduceOutputRecords());
System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
assertEquals(filesize, js.getHdfsBytesWritten());
}
}
示例14: testMapOnlyBinStorage
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapOnlyBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
if(t > 50)
count ++;
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = foreach b generate $0 - 50;");
ExecJob job = pigServer.store("c", "output_map_only", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
"output_map_only", pigServer.getPigContext()),
pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output_map_only"), true);
System.out.println("============================================");
System.out.println("Test case Map Only");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
MRJobStats js = (MRJobStats) iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(count, js.getMapOutputRecords());
assertEquals(0, js.getReduceInputRecords());
assertEquals(0, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例15: testMapReduceOnly
import org.apache.pig.tools.pigstats.PigStats; //导入方法依赖的package包/类
@Test
public void testMapReduceOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group;");
ExecJob job = pigServer.store("c", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
MRJobStats js = (MRJobStats) iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(MAX, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}