本文整理汇总了Java中org.apache.pig.PigServer类的典型用法代码示例。如果您正苦于以下问题:Java PigServer类的具体用法?Java PigServer怎么用?Java PigServer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PigServer类属于org.apache.pig包,在下文中一共展示了PigServer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.pig.PigServer; //导入依赖的package包/类
private PigServer setup(String script, Configuration conf) throws Exception {
if (conf == null) {
conf = new HdfsConfiguration();
}
conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");
// Parameter substitutions - can also be set by configuration
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
示例2: createPig
import org.apache.pig.PigServer; //导入依赖的package包/类
protected PigServer createPig() throws ExecException {
HdpBootstrap.hackHadoopStagingOnWin();
Properties properties = HdpBootstrap.asProperties(QueryTestParams.provisionQueries(HdpBootstrap.hadoopConfig()));
String pigHost = properties.getProperty("pig");
// remote Pig instance
if (StringUtils.hasText(pigHost) && !"local".equals(pig)) {
LogFactory.getLog(PigWrapper.class).info("Executing Pig in Map/Reduce mode");
return new PigServer(ExecType.MAPREDUCE, properties);
}
// use local instance
LogFactory.getLog(PigWrapper.class).info("Executing Pig in local mode");
properties.put("mapred.job.tracker", "local");
return new PigServer(ExecType.LOCAL, properties);
}
示例3: open
import org.apache.pig.PigServer; //导入依赖的package包/类
@Override
public void open() {
String execType = getProperty("zeppelin.pig.execType");
if (execType == null) {
execType = "mapreduce";
}
String includeJobStats = getProperty("zeppelin.pig.includeJobStats");
if (includeJobStats != null) {
this.includeJobStats = Boolean.parseBoolean(includeJobStats);
}
try {
pigServer = new PigServer(execType);
for (Map.Entry entry : getProperties().entrySet()) {
if (!entry.getKey().toString().startsWith("zeppelin.")) {
pigServer.getPigContext().getProperties().setProperty(entry.getKey().toString(),
entry.getValue().toString());
}
}
} catch (IOException e) {
LOGGER.error("Fail to initialize PigServer", e);
throw new RuntimeException("Fail to initialize PigServer", e);
}
}
示例4: DeduplicationJob
import org.apache.pig.PigServer; //导入依赖的package包/类
public DeduplicationJob(List<String> files, List<String> dimensions) {
this.files = files;
Properties props = new Properties();
props.setProperty("output.compression.enabled", "true");
props.setProperty("output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
try {
this.pigServer = new PigServer(ExecType.MAPREDUCE, props);
} catch (ExecException e) {
log.error("Couldn't execute pig server: {}", e.getMessage());
e.printStackTrace();
}
this.usingAsDimensions = Joiner.on(":chararray, ").join(dimensions) + ", data:Map[], count:int";
this.groupByDimensions = Joiner.on(", ").join(dimensions);
this.loaderDimensions = "'" + Joiner.on("','").join(dimensions) + "'";
}
示例5: testMissingCaseExpression
import org.apache.pig.PigServer; //导入依赖的package包/类
/**
* Verify that FrontendException is thrown when case expression is missing,
* and when branches do not contain conditional expressions.
* @throws Exception
*/
@Test(expected = FrontendException.class)
public void testMissingCaseExpression() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = resetData(pigServer);
data.set("foo",
tuple(1),
tuple(2),
tuple(3),
tuple(4),
tuple(5)
);
pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
pigServer.registerQuery("B = FOREACH A GENERATE (" +
" CASE " + // No case expression
" WHEN 0 THEN '3n'" + // When expression is not conditional
" WHEN 1 THEN '3n+1'" +
" ELSE '3n+2'" +
" END" +
");");
pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
fail("FrontendException must be thrown for invalid case statement");
}
示例6: checkSchemaEx
import org.apache.pig.PigServer; //导入依赖的package包/类
private void checkSchemaEx(String query, String expectedErr) throws IOException {
PigServer pig = new PigServer(ExecType.LOCAL);
boolean foundEx = false;
try{
Util.registerMultiLineQuery(pig, query);
pig.dumpSchema("u");
}catch(FrontendException e){
PigException pigEx = LogUtils.getPigException(e);
foundEx = true;
if(!pigEx.getMessage().contains(expectedErr)){
String msg = "Expected exception message matching '"
+ expectedErr + "' but got '" + pigEx.getMessage() + "'" ;
fail(msg);
}
}
if(!foundEx)
fail("No exception thrown. Exception is expected.");
}
示例7: testPartialExecution
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void testPartialExecution() throws Throwable {
PigServer server = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
PigContext context = server.getPigContext();
FileLocalizer.setInitialized(false);
String strCmd = "rmf bar; rmf baz; "
+ "a = load '"
+ Util.generateURI("file:test/org/apache/pig/test/data/passwd",
context)
+ "';"
+ "store a into 'bar'; exec; a = load 'bar'; store a into 'baz';\n";
ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
InputStreamReader reader = new InputStreamReader(cmd);
Grunt grunt = new Grunt(new BufferedReader(reader), context);
grunt.exec();
}
示例8: setUp
import org.apache.pig.PigServer; //导入依赖的package包/类
@Before
public void setUp() throws Exception {
ArrayList<Tuple> tuples = new ArrayList<Tuple>();
log.info("Setting up");
pigServer = new PigServer(ExecType.LOCAL);
data = resetData(pigServer);
Random r = new Random();
for (int i = 0; i < MAX; i++) {
tuples.add(tuple(i,GenRandomData.genRandString(r)));
}
Schema s = new Schema();
s.add(new Schema.FieldSchema("index", DataType.INTEGER));
s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
data.set("test", s, tuples);
}
示例9: testFsCommand
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void testFsCommand() throws Throwable {
PigServer server = new PigServer(ExecType.MAPREDUCE,cluster.getProperties());
PigContext context = server.getPigContext();
String strCmd =
"fs -ls /;"
+"fs -mkdir /fstmp;"
+"fs -mkdir /fstmp/foo;"
+"cd /fstmp;"
+"fs -copyFromLocal test/org/apache/pig/test/data/passwd bar;"
+"a = load 'bar';"
+"cd foo;"
+"store a into 'baz';"
+"cd /;"
+"fs -ls .;"
+"fs -rmr /fstmp/foo/baz;";
ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
InputStreamReader reader = new InputStreamReader(cmd);
Grunt grunt = new Grunt(new BufferedReader(reader), context);
grunt.exec();
}
示例10: copyFromLocalToCluster
import org.apache.pig.PigServer; //导入依赖的package包/类
/**
* Utility method to copy a file form local filesystem to the dfs on
* the minicluster for testing in mapreduce mode
* @param cluster a reference to the minicluster
* @param localFileName the pathname of local file
* @param fileNameOnCluster the name with which the file should be created on the minicluster
* @throws IOException
*/
static public void copyFromLocalToCluster(MiniGenericCluster cluster,
String localFileName, String fileNameOnCluster) throws IOException {
if(Util.WINDOWS){
if (!localFileName.contains(":")) {
localFileName = localFileName.replace('\\','/');
} else {
localFileName = localFileName.replace('/','\\');
}
fileNameOnCluster = fileNameOnCluster.replace('\\','/');
}
PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
String script = getMkDirCommandForHadoop2_0(fileNameOnCluster) + "fs -put " + localFileName + " " + fileNameOnCluster;
GruntParser parser = new GruntParser(new StringReader(script), ps);
parser.setInteractive(false);
try {
parser.parseStopOnError();
} catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
throw new IOException(e);
}
}
示例11: testBzipStoreInMultiQuery
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void testBzipStoreInMultiQuery() throws Exception {
String[] inputData = new String[] {
"1\t2\r3\t4"
};
String inputFileName = "input.txt";
Util.createInputFile(cluster, inputFileName, inputData);
PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster
.getProperties());
pig.setBatchOn();
pig.registerQuery("a = load '" + inputFileName + "';");
pig.registerQuery("store a into 'output.bz2';");
pig.registerQuery("store a into 'output';");
pig.executeBatch();
FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
pig.getPigContext().getProperties()));
FileStatus stat = fs.getFileStatus(new Path("output/part-m-00000"));
assertTrue(stat.getLen() > 0);
stat = fs.getFileStatus(new Path("output.bz2/part-m-00000.bz2"));
assertTrue(stat.getLen() > 0);
}
示例12: testDump
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void testDump() throws Exception {
PigServer pigServer = new PigServer(MODE);
Data data = Storage.resetData(pigServer);
data.set("input",
tuple("test1"),
tuple("test2"));
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
Iterator<Tuple> result = pigServer.openIterator("A");
List<Tuple> resultList = new ArrayList<Tuple>();
while (result.hasNext()) {
resultList.add(result.next());
}
assertEquals(Arrays.asList(tuple("test1"), tuple("test2")), resultList);
pigServer.shutdown();
}
示例13: testGroupAllWithParallel
import org.apache.pig.PigServer; //导入依赖的package包/类
/**
* Test parallelism for group all
* @throws Exception
*/
@Test
public void testGroupAllWithParallel() throws Exception {
PigServer pigServer = new PigServer(cluster.getExecType(), cluster
.getProperties());
pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:chararray);");
pigServer.registerQuery("B = group A all parallel 5;");
{
Iterator<Tuple> iter = pigServer.openIterator("B");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"('all',{('one'),('two'),('two')})"
});
Util.checkQueryOutputsAfterSort(iter, expectedRes);
JobGraph jGraph = PigStats.get().getJobGraph();
checkGroupAllWithParallelGraphResult(jGraph);
}
}
示例14: testForEachFlatten
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void testForEachFlatten() throws Exception {
PigServer pigServer = newPigServer();
Data data = Storage.resetData(pigServer);
data.set("input",
tuple(bag(tuple("1"), tuple("2"), tuple("3"))),
tuple(bag(tuple("4"), tuple("5"), tuple("6"))));
pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
pigServer.registerQuery("B = FOREACH A GENERATE FLATTEN($0);");
pigServer.registerQuery("STORE B INTO 'output' using mock.Storage;");
assertEquals(
Arrays.asList(tuple("1"), tuple("2"), tuple("3"), tuple("4"), tuple("5"), tuple("6")),
data.get("output"));
pigServer.shutdown();
}
示例15: test8
import org.apache.pig.PigServer; //导入依赖的package包/类
@Test
public void test8() throws Exception {
String query = "a = load 'foo' using " + TestLoader.class.getName() +
"('srcid, mrkt, dstid, name, age', 'srcid,name');" +
"b = filter a by (srcid < 20) or (name == 'foo');" +
"store b into 'output';";
LogicalPlan plan = Util.buildLp(new PigServer(pc), query);
Rule rule = new PartitionFilterOptimizer("test");
List<OperatorPlan> matches = rule.match(plan);
if (matches != null) {
Transformer transformer = rule.getNewTransformer();
for (OperatorPlan m : matches) {
if (transformer.check(m)) {
transformer.transform(m);
}
}
OperatorSubPlan newPlan = (OperatorSubPlan)transformer.reportChanges();
Assert.assertTrue(newPlan.getBasePlan().size() == 3);
}
}