本文整理汇总了Java中org.apache.pig.ExecType类的典型用法代码示例。如果您正苦于以下问题:Java ExecType类的具体用法?Java ExecType怎么用?Java ExecType使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ExecType类属于org.apache.pig包,在下文中一共展示了ExecType类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.pig.ExecType; //导入依赖的package包/类
private PigServer setup(String script, Configuration conf) throws Exception {
if (conf == null) {
conf = new HdfsConfiguration();
}
conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");
// Parameter substitutions - can also be set by configuration
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
示例2: createPig
import org.apache.pig.ExecType; //导入依赖的package包/类
protected PigServer createPig() throws ExecException {
HdpBootstrap.hackHadoopStagingOnWin();
Properties properties = HdpBootstrap.asProperties(QueryTestParams.provisionQueries(HdpBootstrap.hadoopConfig()));
String pigHost = properties.getProperty("pig");
// remote Pig instance
if (StringUtils.hasText(pigHost) && !"local".equals(pig)) {
LogFactory.getLog(PigWrapper.class).info("Executing Pig in Map/Reduce mode");
return new PigServer(ExecType.MAPREDUCE, properties);
}
// use local instance
LogFactory.getLog(PigWrapper.class).info("Executing Pig in local mode");
properties.put("mapred.job.tracker", "local");
return new PigServer(ExecType.LOCAL, properties);
}
示例3: setUp
import org.apache.pig.ExecType; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
super.setUp();
SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor();
visitor.setTablePrefix("l_");
visitor.setInstance("stratus");
visitor.setZk("stratus13:2181");
visitor.setUser("root");
visitor.setPassword("password");
engine = new SparqlQueryPigEngine();
engine.setSparqlToPigTransformVisitor(visitor);
engine.setExecType(ExecType.LOCAL);
engine.setInference(false);
engine.setStats(false);
engine.init();
}
示例4: DeduplicationJob
import org.apache.pig.ExecType; //导入依赖的package包/类
public DeduplicationJob(List<String> files, List<String> dimensions) {
this.files = files;
Properties props = new Properties();
props.setProperty("output.compression.enabled", "true");
props.setProperty("output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
try {
this.pigServer = new PigServer(ExecType.MAPREDUCE, props);
} catch (ExecException e) {
log.error("Couldn't execute pig server: {}", e.getMessage());
e.printStackTrace();
}
this.usingAsDimensions = Joiner.on(":chararray, ").join(dimensions) + ", data:Map[], count:int";
this.groupByDimensions = Joiner.on(", ").join(dimensions);
this.loaderDimensions = "'" + Joiner.on("','").join(dimensions) + "'";
}
示例5: setup
import org.apache.pig.ExecType; //导入依赖的package包/类
@Before
public void setup() throws IOException {
pig = new PigServer(ExecType.LOCAL);
Util.deleteDirectory(new File(dataDir));
try {
pig.mkdirs(dataDir);
Util.createLocalInputFile(dataDir + scalarInput,
new String[] {
"{ \"i\": 1, \"l\": 10, \"f\": 2.718, \"d\": 3.1415, \"b\": \"17\", \"c\": \"aardvark\" }",
"{ \"i\": 2, \"l\": 100, \"f\": 1.234, \"d\": 3.3333, \"b\": null, \"c\": \"17.0\" }"
});
Util.createLocalInputFile(dataDir + complexInput,
new String[] {
"{ \"tuple\": { \"a\": 1, \"b\": 2 }, \"nested_tuple\": { \"a\": 1, \"b\": { \"c\": 2, \"d\": 3 } }, \"bag\": [{ \"a\": 1, \"b\": 2 }, { \"a\": 3, \"b\": 4 }], \"nested_bag\": [{\"a\": 1, \"b\": [{ \"c\": 2, \"d\": 3 }, { \"c\": 4, \"d\": 5 }]}], \"map\": { \"a\": 1, \"b\": 2 }, \"nested_map\": { \"a\": { \"b\": 1, \"c\": 2 } } }",
"{ \"tuple\": { \"a\": 3, \"b\": 4 }, \"nested_tuple\": { \"a\": 4, \"b\": { \"c\": 5, \"d\": 6 } }, \"bag\": [{ \"a\": 5, \"b\": 6 }, { \"a\": 7, \"b\": 8 }], \"nested_bag\": [{\"a\": 6, \"b\": [{ \"c\": 7, \"d\": 8 }, { \"c\": 9, \"d\": 0 }]}], \"map\": { \"a\": 3, \"b\": 4 }, \"nested_map\": { \"a\": { \"b\": 3, \"c\": 4 } } }"
});
Util.createLocalInputFile(dataDir + nestedArrayInput,
new String[] {
"{ \"arr\": [1, 2, 3, 4], \"nested_arr\": [[1, 2], [3, 4]], \"nested_arr_2\": [[1, 2], [3, 4]], \"very_nested_arr\": [[[1, 2], [3, 4]], [[5, 6], [7, 6]]], \"i\": 9 }"
});
} catch (IOException e) {};
}
示例6: testProjectStarMulti
import org.apache.pig.ExecType; //导入依赖的package包/类
/**
* Test projecting multiple *
* @throws IOException
* @throws ParseException
*/
@Test
public void testProjectStarMulti() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
String query =
" l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
+ "f = foreach l1 generate * as (aa, bb, cc), *;"
;
Util.registerMultiLineQuery(pig, query);
Schema expectedSch = Utils.getSchemaFromString(
"aa : int, bb : int, cc : int, a : int, b : int, c : int");
Schema sch = pig.dumpSchema("f");
assertEquals("Checking expected schema", expectedSch, sch);
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(10,20,30,10,20,30)",
"(11,21,31,11,21,31)",
});
Iterator<Tuple> it = pig.openIterator("f");
Util.checkQueryOutputsAfterSort(it, expectedRes);
}
示例7: testPigTempDir
import org.apache.pig.ExecType; //导入依赖的package包/类
@Test
public void testPigTempDir() throws Throwable {
Properties properties = PropertiesUtil.loadDefaultProperties();
File pigTempDir = new File(tempDir, FILE_SEPARATOR + "tmp" + FILE_SEPARATOR + "test");
properties.put("pig.temp.dir", pigTempDir.getPath());
PigContext pigContext=new PigContext(ExecType.LOCAL, properties);
pigContext.connect();
FileLocalizer.setInitialized(false);
String tempPath= FileLocalizer.getTemporaryPath(pigContext).toString();
Path path = new Path(tempPath);
assertTrue(tempPath.startsWith(pigTempDir.toURI().toString()));
FileSystem fs = FileSystem.get(path.toUri(),
ConfigurationUtil.toConfiguration(pigContext.getProperties()));
FileStatus status = fs.getFileStatus(path.getParent());
// Temporary root dir should have 700 as permission
assertEquals("rwx------", status.getPermission().toString());
pigTempDir.delete();
FileLocalizer.setInitialized(false);
}
示例8: setUp
import org.apache.pig.ExecType; //导入依赖的package包/类
@Before
public void setUp() throws Exception {
ArrayList<Tuple> tuples = new ArrayList<Tuple>();
log.info("Setting up");
pigServer = new PigServer(ExecType.LOCAL);
data = resetData(pigServer);
Random r = new Random();
for (int i = 0; i < MAX; i++) {
tuples.add(tuple(i,GenRandomData.genRandString(r)));
}
Schema s = new Schema();
s.add(new Schema.FieldSchema("index", DataType.INTEGER));
s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
data.set("test", s, tuples);
}
示例9: testHadoopJHLoader
import org.apache.pig.ExecType; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Test
public void testHadoopJHLoader() throws Exception {
PigServer pig = new PigServer(ExecType.LOCAL);
pig.registerQuery("a = load '" + INPUT_DIR
+ "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() "
+ "as (j:map[], m:map[], r:map[]);");
Iterator<Tuple> iter = pig.openIterator("a");
assertTrue(iter.hasNext());
Tuple t = iter.next();
Map<String, Object> job = (Map<String, Object>)t.get(0);
assertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String)job.get("PIG_SCRIPT_ID"));
assertEquals("job_201004271216_9998", (String)job.get("JOBID"));
assertEquals("job_201004271216_9995", (String)job.get("PIG_JOB_PARENTS"));
assertEquals("0.8.0-dev", (String)job.get("PIG_VERSION"));
assertEquals("0.20.2", (String)job.get("HADOOP_VERSION"));
assertEquals("d", (String)job.get("PIG_JOB_ALIAS"));
assertEquals("PigLatin:Test.pig", job.get("JOBNAME"));
assertEquals("ORDER_BY", (String)job.get("PIG_JOB_FEATURE"));
assertEquals("1", (String)job.get("TOTAL_MAPS"));
assertEquals("1", (String)job.get("TOTAL_REDUCES"));
}
示例10: testNegativeWithAlias
import org.apache.pig.ExecType; //导入依赖的package包/类
/**
* Verify that alias is not assignable to the ASSERT operator
* @throws Exception
*/
@Test(expected=FrontendException.class)
public void testNegativeWithAlias() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = resetData(pigServer);
data.set("foo",
tuple(1),
tuple(2),
tuple(3)
);
try {
pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
pigServer.registerQuery("B = ASSERT A BY i > 1 , 'i should be greater than 1';");
}
catch (FrontendException fe) {
Util.checkMessageInException(fe, "Syntax error, unexpected symbol at or near 'B'");
throw fe;
}
}
示例11: testLocalModeTakesLessThan5secs
import org.apache.pig.ExecType; //导入依赖的package包/类
/**
* The hadoop provided JobControl sleeps 5000 ms in between job status checks
* making the minimum runtime of a job effectively 5 seconds
* This tests checks that we don't get back to this behavior
* @see HadoopShims#newJobControl(String, org.apache.hadoop.conf.Configuration, org.apache.pig.impl.PigContext)
*/
@Test
public void testLocalModeTakesLessThan5secs() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = resetData(pigServer);
data.set("in", tuple("a"));
long t0 = System.currentTimeMillis();
pigServer.registerQuery(
"A = LOAD 'in' using mock.Storage();\n"
+ "STORE A INTO 'out' USING mock.Storage();");
long t1 = System.currentTimeMillis();
List<Tuple> list = data.get("out");
assertEquals(1, list.size());
assertEquals("a", list.get(0).get(0));
assertTrue("must take less than 5 seconds", (t1 - t0) < 5000);
}
示例12: testNullTupleCols
import org.apache.pig.ExecType; //导入依赖的package包/类
@Test
public void testNullTupleCols() throws Exception {
String inputFileName = "TestProject-testNullTupleCols-input.txt";
String input[] = { "1\t(hello,world)", "2\t(good)", "3" };
Util.createLocalInputFile(inputFileName, input);
// PigStorage will return null as the value for the tuple field in the
// second record since it does not comply with the schema and in the
// third record since the field is absent
String query = "a = load '" + inputFileName + "' as (i:int, " +
"t:tuple(s1:chararray, s2:chararray));" +
"b = foreach a generate t.s1, t.s2;";
PigServer ps = new PigServer(ExecType.LOCAL);
Util.registerMultiLineQuery(ps, query);
Iterator<Tuple> it = ps.openIterator("b");
Tuple[] expectedResults = new Tuple[] {
(Tuple)Util.getPigConstant("('hello', 'world')"),
(Tuple)Util.getPigConstant("(null, null)"),
(Tuple)Util.getPigConstant("(null, null)")
};
int i = 0;
while (it.hasNext()) {
assertEquals(expectedResults[i++], it.next());
}
}
示例13: visitUserFunc
import org.apache.pig.ExecType; //导入依赖的package包/类
@Override
public void visitUserFunc(POUserFunc func) throws VisitorException {
// XXX Hadoop currently doesn't support distributed cache in local mode.
// This line will be removed after the support is added
if (pigContext.getExecType() == ExecType.LOCAL) return;
// set up distributed cache for files indicated by the UDF
String[] files = func.getCacheFiles();
if (files == null) return;
try {
setupDistributedCache(pigContext, conf, files, false);
} catch (IOException e) {
String msg = "Internal error. Distributed cache could not " +
"be set up for the requested files";
throw new VisitorException(msg, e);
}
}
示例14: testDatePartitionedFiles
import org.apache.pig.ExecType; //导入依赖的package包/类
@Test
public void testDatePartitionedFiles() throws IOException {
int count = 0;
String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
+ ", '" + startingDate + ":" + endingDate + "')";
System.out.println(funcSpecString);
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
new FuncSpec(funcSpecString));
server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
+ funcSpecString + ";");
Iterator<Tuple> result = server.openIterator("a");
while ((result.next()) != null) {
count++;
}
Assert.assertEquals(datePartitionedRowCount, count);
}
示例15: testDump
import org.apache.pig.ExecType; //导入依赖的package包/类
@Test
public void testDump() throws Throwable {
PigServer server = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
PigContext context = server.getPigContext();
String strCmd =
"rmf bla;"
+"a = load '"
+ Util.generateURI("file:test/org/apache/pig/test/data/passwd", context) + "';"
+"e = group a by $0;"
+"f = foreach e generate group, COUNT($1);"
+"store f into 'bla';"
+"f1 = load 'bla';"
+"g = order f1 by $1;"
+"dump g;";
ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
InputStreamReader reader = new InputStreamReader(cmd);
Grunt grunt = new Grunt(new BufferedReader(reader), context);
grunt.exec();
}