本文整理汇总了Java中org.apache.pig.PigServer.store方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.store方法的具体用法?Java PigServer.store怎么用?Java PigServer.store使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.PigServer
的用法示例。
在下文中一共展示了PigServer.store方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testBytesWritten_JIRA_1027
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testBytesWritten_JIRA_1027() {
File outputFile = null;
try {
String fileName = this.getClass().getName() + "_" + "testBytesWritten_JIRA_1027";
outputFile = File.createTempFile(fileName, ".out");
String filePath = outputFile.getAbsolutePath();
outputFile.delete();
PigServer pig = new PigServer(ExecType.LOCAL);
pig.registerQuery("A = load 'test/org/apache/pig/test/data/passwd';");
ExecJob job = pig.store("A", filePath);
PigStats stats = job.getStatistics();
File dataFile = new File( outputFile.getAbsoluteFile() + File.separator + "part-00000" );
assertEquals(dataFile.length(), stats.getBytesWritten());
} catch (IOException e) {
LOG.error("Error while generating file", e);
fail("Encountered IOException");
} finally {
if (outputFile != null) {
// Hadoop Local mode creates a directory
// Hence we need to delete a directory recursively
deleteDirectory(outputFile);
}
}
}
示例2: generateFullJoinForPreprocessForAlphasLearning
import org.apache.pig.PigServer; //导入方法依赖的package包/类
public static String generateFullJoinForPreprocessForAlphasLearning(int t,
String dataPath, String dataInitialSchema, char seperator,
String prefix, boolean isForRMSE) throws IOException {
PigServer pigServer = new PigServer(ExecType.MAPREDUCE);
pigServer.registerQuery("data = LOAD '" + prefix + dataPath
+ "' USING PigStorage('" + seperator + "') AS ("
+ dataInitialSchema + ");");
pigServer.registerQuery("V = LOAD '" + prefix + "V_" + (t)
+ "' USING PigStorage('\t') AS (iid:int, iSolNumber:int) ;");
pigServer.registerQuery("U = LOAD '" + prefix + "U_" + (t)
+ "' USING PigStorage('\t') AS (uid:int, uSolNumber:int) ;");
pigServer.registerQuery("dataJoinV = JOIN data BY iid, V BY iid;");
pigServer
.registerQuery("dataJoinVJoinU = JOIN dataJoinV BY uid, U BY uid;");
pigServer
.registerQuery("fullJoin = foreach dataJoinVJoinU GENERATE data::uid AS uid, data::iid AS iid, U::uSolNumber AS uSolNumber , V::iSolNumber AS iSolNumber, data::rating as rating;");
String outputPath = null;
if (!isForRMSE){
outputPath = "AlphasLearningFullJoin_" + t;
}else {
outputPath = "RMSEFullJoin_" + t;
}
pigServer.store("fullJoin", prefix + outputPath);
pigServer.shutdown();
return prefix + outputPath;
}
示例3: script
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* 运行脚本
*
* @param scriptPath
* @param storeId
* @param storeFile
*/
public void script(InputStream scriptPath, String storeId, String storeFile) {
PigServer pigServer = null;
try {
pigServer = new MumuPigConfiguration().mapreduce();
pigServer.debugOn();
pigServer.registerScript(scriptPath);
if (storeId != null) {
pigServer.store(storeId, storeFile);
}
} catch (IOException e) {
e.printStackTrace();
pigServer.shutdown();
}
}
示例4: saveToHadoop
import org.apache.pig.PigServer; //导入方法依赖的package包/类
public void saveToHadoop() throws IOException {
PigServer pigServer = new PigServer(ExecType.MAPREDUCE);
String pigQuery = "REGISTER 'WebContent/WEB-INF/lib/mongo-hadoop-core-1.3.2.jar';"
+ "REGISTER 'WebContent/WEB-INF/lib/mongo-hadoop-pig-1.3.0.jar';"
+ " A = LOAD 'mongodb://localhost:27017/chatroom.messageBackup'"
+ " USING com.mongodb.hadoop.pig.MongoLoader('address, message, time')"
+ " AS (address:chararray, message:chararray, time:datetime);";
pigServer.registerQuery(pigQuery);
pigServer.store("A", "/user/luffy/chatroom/" + this.time.toString().replaceAll(" ", "_").replaceAll(":", "-"));
pigServer.shutdown();
deleteBackupCollection();
}
示例5: registerAndStore
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void registerAndStore(PigServer pigServer) throws IOException {
// pigServer.debugOn();
List<String> commands = getCommands();
for (final String command : commands) {
pigServer.registerQuery(command);
}
String outFileName = Util.removeColon(input.getAbsolutePath() + ".out");
pigServer.store("counts", outFileName);
Util.deleteFile(cluster, outFileName);
}
示例6: testShouldStoreTupleAsHiveArray
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testShouldStoreTupleAsHiveArray() throws IOException, InterruptedException, SerDeException {
String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";
String singlePartitionedFile = simpleDataFile.getAbsolutePath();
File outputFile = new File("testhiveColumnarStore");
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
+ ";");
server.registerQuery("b = FOREACH a GENERATE f1, TOTUPLE(f2,f3);");
//when
server.store("b", outputFile.getAbsolutePath(), storeString);
//then
Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");
ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");
assertEquals(2, struct.getFieldsAsList().size());
Object o = struct.getField(0);
assertEquals(LazyString.class, o.getClass());
o = struct.getField(1);
assertEquals(LazyArray.class, o.getClass());
LazyArray arr = (LazyArray)o;
List<Object> values = arr.getList();
for(Object value : values) {
assertEquals(LazyString.class, value.getClass());
String valueStr =((LazyString) value).getWritableObject().toString();
assertEquals("Sample value", valueStr);
}
}
示例7: testPigStatsGetList
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testPigStatsGetList() {
File outputFile = null;
try {
String filename = this.getClass().getSimpleName() + "_" + "testPigStatsGetList";
outputFile = File.createTempFile(filename, ".out");
String filePath = outputFile.getAbsolutePath();
outputFile.delete();
PigServer pigServer = new PigServer(ExecType.LOCAL);
pigServer.registerQuery("a = load 'test/org/apache/pig/test/data/passwd';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, COUNT(a) as cnt;");
pigServer.registerQuery("d = group c by cnt;");
pigServer.registerQuery("e = foreach d generate group;");
ExecJob job = pigServer.store("e", filePath);
JobGraph jobGraph = job.getStatistics().getJobGraph();
assertEquals(2, jobGraph.getJobList().size());
} catch (IOException e) {
LOG.error("IOException while creating file ", e);
fail("Encountered IOException");
} finally {
if (outputFile != null) {
// delete the directory before returning
deleteDirectory(outputFile);
}
}
}
示例8: createInputFile
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private static void createInputFile(PigServer pigServer, String[] inputs, int id, File outputDir) throws IOException {
File input = File.createTempFile("tmp", "");
input.delete();
Util.createLocalInputFile(input.getAbsolutePath(), inputs);
pigServer.registerQuery("A = load '" + Util.encodeEscape(input.getAbsolutePath()) + "' as (a0:int, a1:int);");
File output = new File(outputDir, "/" + id);
pigServer.store("A", output.getAbsolutePath(), "org.apache.pig.piggybank.storage.IndexedStorage('\t','0,1')");
}
示例9: testShouldStoreBagAsHiveArray
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException {
String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";
String singlePartitionedFile = simpleDataFile.getAbsolutePath();
File outputFile = new File("testhiveColumnarStore");
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
+ ";");
server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);");
//when
server.store("b", outputFile.getAbsolutePath(), storeString);
//then
Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");
ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");
assertEquals(2, struct.getFieldsAsList().size());
Object o = struct.getField(0);
assertEquals(LazyString.class, o.getClass());
o = struct.getField(1);
assertEquals(LazyArray.class, o.getClass());
LazyArray arr = (LazyArray)o;
List<Object> values = arr.getList();
for(Object value : values) {
assertEquals(LazyString.class, value.getClass());
String valueStr =((LazyString) value).getWritableObject().toString();
assertEquals("Sample value", valueStr);
}
}
示例10: testMapOnly
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
if(t > 50) count ++;
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = foreach b generate $0 - 50;");
ExecJob job = pigServer.store("c", "output_map_only");
PigStats pigStats = job.getStatistics();
//counting the no. of bytes in the output file
//long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
"output_map_only", pigServer.getPigContext()), pigServer
.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output_map_only"), true);
System.out.println("============================================");
System.out.println("Test case Map Only");
System.out.println("============================================");
JobGraph jg = pigStats.getJobGraph();
Iterator<JobStats> iter = jg.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(count, js.getMapOutputRecords());
assertEquals(0, js.getReduceInputRecords());
assertEquals(0, js.getReduceOutputRecords());
System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
assertEquals(filesize, js.getHdfsBytesWritten());
}
}
示例11: testShouldStoreMapAsHiveMap
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testShouldStoreMapAsHiveMap() throws IOException, InterruptedException, SerDeException {
String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";
String singlePartitionedFile = simpleDataFile.getAbsolutePath();
File outputFile = new File("testhiveColumnarStore");
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
+ ";");
server.registerQuery("b = FOREACH a GENERATE f1, TOMAP(f2,f3);");
//when
server.store("b", outputFile.getAbsolutePath(), storeString);
//then
Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");
ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 map<string,string>");
assertEquals(2, struct.getFieldsAsList().size());
Object o = struct.getField(0);
assertEquals(LazyString.class, o.getClass());
o = struct.getField(1);
assertEquals(LazyMap.class, o.getClass());
LazyMap arr = (LazyMap)o;
Map<Object,Object> values = arr.getMap();
for(Entry<Object,Object> entry : values.entrySet()) {
assertEquals(LazyString.class, entry.getKey().getClass());
assertEquals(LazyString.class, entry.getValue().getClass());
String keyStr =((LazyString) entry.getKey()).getWritableObject().toString();
assertEquals("Sample value", keyStr);
String valueStr =((LazyString) entry.getValue()).getWritableObject().toString();
assertEquals("Sample value", valueStr);
}
}
示例12: testMapReduceOnly
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapReduceOnly() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group;");
ExecJob job = pigServer.store("c", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(MAX, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例13: testMapReduceOnlyBinStorage
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapReduceOnlyBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group;");
ExecJob job = pigServer.store("c", "output", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(MAX, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例14: testMapCombineReduce
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapCombineReduce() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
ExecJob job = pigServer.store("c", "output");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapCombineReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
MRJobStats js = (MRJobStats) iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(count, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}
示例15: testMapCombineReduceBinStorage
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapCombineReduceBinStorage() throws IOException, ExecException {
int count = 0;
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
int [] nos = new int[10];
for(int i = 0; i < 10; i++)
nos[i] = 0;
for(int i = 0; i < MAX; i++) {
int index = r.nextInt(10);
int value = r.nextInt(100);
nos[index] += value;
pw.println(index + "\t" + value);
}
pw.close();
for(int i = 0; i < 10; i++) {
if(nos[i] > 0) count ++;
}
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = group a by $0;");
pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
ExecJob job = pigServer.store("c", "output", "BinStorage");
PigStats pigStats = job.getStatistics();
InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
pigServer.getPigContext()), pigServer.getPigContext());
long filesize = 0;
while(is.read() != -1) filesize++;
is.close();
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("output"), true);
System.out.println("============================================");
System.out.println("Test case MapCombineReduce");
System.out.println("============================================");
JobGraph jp = pigStats.getJobGraph();
Iterator<JobStats> iter = jp.iterator();
while (iter.hasNext()) {
JobStats js = iter.next();
System.out.println("Map input records : " + js.getMapInputRecords());
assertEquals(MAX, js.getMapInputRecords());
System.out.println("Map output records : " + js.getMapOutputRecords());
assertEquals(MAX, js.getMapOutputRecords());
System.out.println("Reduce input records : " + js.getReduceInputRecords());
assertEquals(count, js.getReduceInputRecords());
System.out.println("Reduce output records : " + js.getReduceOutputRecords());
assertEquals(count, js.getReduceOutputRecords());
}
System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
assertEquals(filesize, pigStats.getBytesWritten());
}