本文整理汇总了Java中org.apache.hadoop.mapred.RecordWriter.close方法的典型用法代码示例。如果您正苦于以下问题:Java RecordWriter.close方法的具体用法?Java RecordWriter.close怎么用?Java RecordWriter.close使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.RecordWriter
的用法示例。
在下文中一共展示了RecordWriter.close方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testWriteBufferData
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test(enabled = true)
public void testWriteBufferData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableBuffer<?>> sess =
new MneDurableOutputSession<DurableBuffer<?>>(null, m_conf,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableBuffer<?>> mdvalue =
new MneDurableOutputValue<DurableBuffer<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer =
outputFormat.getRecordWriter(m_fs, m_conf, null, null);
DurableBuffer<?> dbuf = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dbuf = genupdDurableBuffer(sess, cs);
Assert.assertNotNull(dbuf);
writer.write(nada, mdvalue.of(dbuf));
}
m_checksum = cs.getValue();
writer.close(null);
sess.close();
}
示例2: testClose
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test public void testClose() throws IOException, InterruptedException {
RecordWriter<LongWritable, JsonObject> recordWriter =
new BigQueryMapredRecordWriter<LongWritable, JsonObject>(
mockRecordWriter, mockTaskAttemptContext);
Reporter reporter = null; // unused by code under test
recordWriter.close(reporter);
verify(mockRecordWriter).close(any(TaskAttemptContext.class));
doThrow(new IOException("test")).
when(mockRecordWriter).close(any(TaskAttemptContext.class));
expectedException.expect(IOException.class);
try {
recordWriter.close(reporter);
} finally {
verify(mockRecordWriter, times(2)).close(any(TaskAttemptContext.class));
}
}
示例3: openCloseTableOutputFormat
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
/**
* Open and close a TableOutputFormat. The closing the RecordWriter should release HBase
* Connection (ZK) resources, and will throw exception if they are exhausted.
*/
static void openCloseTableOutputFormat(int iter) throws IOException {
LOG.info("Instantiating TableOutputFormat connection " + iter);
JobConf conf = new JobConf();
conf.addResource(UTIL.getConfiguration());
conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
TableOutputFormat tof = new TableOutputFormat();
RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
rw.close(null);
}
示例4: writeExcelOutputFormatExcel2013SingleSheet
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheet() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheettestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2");
assertEquals(0,spreadSheetValue.get().length,"Input Split for Excel file contain row 2 and is empty");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contain row 3 with 3 columns");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"1\"");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"2\"");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 3 == \"3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contain row 4 with 1 column");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}
示例5: writeExcelOutputFormatExcel2013SingleSheetEncryptedPositive
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositive() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheettestoutencryptedpositive";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new excel format
// security
// for the new Excel format you need to decide on your own which algorithms are secure
job.set("hadoopoffice.write.security.crypt.encrypt.mode","agile");
job.set("hadoopoffice.write.security.crypt.encrypt.algorithm","aes256");
job.set("hadoopoffice.write.security.crypt.chain.mode","cbc");
job.set("hadoopoffice.write.security.crypt.hash.algorithm","sha512");
job.set("hadoopoffice.write.security.crypt.password","test");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// you just need to provide the password to read encrypted data
job.set("hadoopoffice.read.security.crypt.password","test");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2");
assertEquals(0,spreadSheetValue.get().length,"Input Split for Excel file contain row 2 and is empty");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contain row 3 with 3 columns");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"1\"");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"2\"");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 3 == \"3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contain row 4 with 1 column");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}
示例6: writeExcelOutputFormatExcel2013SingleSheetEncryptedNegative
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetEncryptedNegative() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheettestoutencryptednegative";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new excel format
// security
// for the new Excel format you need to decide on your own which algorithms are secure
job.set("hadoopoffice.write.security.crypt.encrypt.mode","agile");
job.set("hadoopoffice.write.security.crypt.encrypt.algorithm","aes256");
job.set("hadoopoffice.write.security.crypt.chain.mode","cbc");
job.set("hadoopoffice.write.security.crypt.hash.algorithm","sha512");
job.set("hadoopoffice.write.security.crypt.password","test");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// you just need to provide the password to read encrypted data
job.set("hadoopoffice.read.security.crypt.password","test2");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNull(reader,"Null record reader implies invalid password");
}
示例7: writeExcelOutputFormatExcel2013SingleSheetLowFootPrint
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetLowFootPrint() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheettestoutlowfootprint";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// low footprint
job.set("hadoopoffice.write.lowFootprint", "true");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2");
assertEquals(0,spreadSheetValue.get().length,"Input Split for Excel file contain row 2 and is empty");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contain row 3 with 3 columns");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"1\"");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"2\"");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 3 == \"3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contain row 4 with 1 column");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}
示例8: writeExcelOutputFormatExcel2003SingleSheetEncryptedNegative
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2003SingleSheetEncryptedNegative() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2003singlesheettestoutencryptednegative";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.ms-excel"); // old excel format
// security
// for the old Excel format you simply need to define only a password
job.set("hadoopoffice.write.security.crypt.password","test");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xls");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.read.security.crypt.password","test2");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNull(reader,"Null record reader implies invalid password");
}
示例9: writeExcelOutputFormatExcel2013SingleSheetMetaDataMatchAllNegative
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetMetaDataMatchAllNegative() throws IOException {
// one row string and three columns ("test1","test2","test3")
// the idea here is to have some content although we only evaluate metadata
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheetmetadatanegativetestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
// set all the meta data including to custom properties
job.set("hadoopoffice.write.metadata.category","dummycategory");
job.set("hadoopoffice.write.metadata.contentstatus","dummycontentstatus");
job.set("hadoopoffice.write.metadata.contenttype","dummycontenttype");
job.set("hadoopoffice.write.metadata.created","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.creator","dummycreator");
job.set("hadoopoffice.write.metadata.description","dummydescription");
job.set("hadoopoffice.write.metadata.identifier","dummyidentifier");
job.set("hadoopoffice.write.metadata.keywords","dummykeywords");
job.set("hadoopoffice.write.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.write.metadata.lastprinted","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.modified","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.write.metadata.revision","2");
job.set("hadoopoffice.write.metadata.subject","dummysubject");
job.set("hadoopoffice.write.metadata.title","dummytitle");
job.set("hadoopoffice.write.metadata.custom.mycustomproperty1","dummymycustomproperty1");
job.set("hadoopoffice.write.metadata.custom.mycustomproperty2","dummymycustomproperty2");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// set metadata to match all
job.set("hadoopoffice.read.filter.metadata.matchAll","true");
// following filter
job.set("hadoopoffice.read.filter.metadata.category","no Category");
job.set("hadoopoffice.read.filter.metadata.contentstatus","dummycontentstatus");
job.set("hadoopoffice.read.filter.metadata.contenttype","dummycontenttype");
job.set("hadoopoffice.read.filter.metadata.created","12:00:00 01.01.2016");
job.set("hadoopoffice.read.filter.metadata.creator","dummycreator");
job.set("hadoopoffice.read.filter.metadata.description","dummydescription");
job.set("hadoopoffice.read.filter.metadata.identifier","dummyidentifier");
job.set("hadoopoffice.read.filter.metadata.keywords","dummykeywords");
job.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.read.filter.metadata.lastprinted","12:00:00 01.01.2016");
job.set("hadoopoffice.read.filter.metadata.modified","12:00:00 01.01.2016");
job.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.read.filter.metadata.revision","2");
job.set("hadoopoffice.read.filter.metadata.subject","dummysubject");
job.set("hadoopoffice.read.filter.metadata.title","dummytitle");
job.set("hadoopoffice.read.filter.metadata.custom.mycustomproperty1","dummymycustomproperty1");
job.set("hhadoopoffice.read.filter.metadata.custom.mycustomproperty2","dummymycustomproperty2");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
// if following assertion is true that means the document has wrongly NOT been filtered out
assertFalse(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
}
示例10: writeExcelOutputFormatExcel2013SingleSheetMetaDataMatchOnceNegative
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetMetaDataMatchOnceNegative() throws IOException {
// one row string and three columns ("test1","test2","test3")
// the idea here is to have some content although we only evaluate metadata
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheetmetadatanativeoncetestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
// set all the meta data including to custom properties
job.set("hadoopoffice.write.metadata.category","dummycategory");
job.set("hadoopoffice.write.metadata.contentstatus","dummycontentstatus");
job.set("hadoopoffice.write.metadata.contenttype","dummycontenttype");
job.set("hadoopoffice.write.metadata.created","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.creator","dummycreator");
job.set("hadoopoffice.write.metadata.description","dummydescription");
job.set("hadoopoffice.write.metadata.identifier","dummyidentifier");
job.set("hadoopoffice.write.metadata.keywords","dummykeywords");
job.set("hadoopoffice.write.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.write.metadata.lastprinted","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.modified","12:00:00 01.01.2016");
job.set("hadoopoffice.write.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser");
job.set("hadoopoffice.write.metadata.revision","2");
job.set("hadoopoffice.write.metadata.subject","dummysubject");
job.set("hadoopoffice.write.metadata.title","dummytitle");
job.set("hadoopoffice.write.metadata.custom.mycustomproperty1","dummymycustomproperty1");
job.set("hadoopoffice.write.metadata.custom.mycustomproperty2","dummymycustomproperty2");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// set metadata to match all
job.set("hadoopoffice.read.filter.metadata.matchAll","false");
// following filter
job.set("hadoopoffice.read.filter.metadata.category","dummycategory2");
job.set("hadoopoffice.read.filter.metadata.contentstatus","dummycontentstatus2");
job.set("hadoopoffice.read.filter.metadata.contenttype","dummycontenttype2");
job.set("hadoopoffice.read.filter.metadata.created","12:00:00 01.01.2017");
job.set("hadoopoffice.read.filter.metadata.creator","dummycreator2");
job.set("hadoopoffice.read.filter.metadata.description","dummydescription2");
job.set("hadoopoffice.read.filter.metadata.identifier","dummyidentifier2");
job.set("hadoopoffice.read.filter.metadata.keywords","dummykeywords2");
job.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser2");
job.set("hadoopoffice.read.filter.metadata.lastprinted","12:00:00 01.01.2017");
job.set("hadoopoffice.read.filter.metadata.modified","12:00:00 01.01.2017");
job.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser","dummylastmodifiedbyuser2");
job.set("hadoopoffice.read.filter.metadata.revision","3");
job.set("hadoopoffice.read.filter.metadata.subject","dummysubject2");
job.set("hadoopoffice.read.filter.metadata.title","dummytitle2");
job.set("hadoopoffice.read.filter.metadata.custom.mycustomproperty1","dummymycustomproperty12");
job.set("hhadoopoffice.read.filter.metadata.custom.mycustomproperty2","dummymycustomproperty22");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
// if following assertion is true that means the document has (wrongly) NOT been filtered out
assertFalse(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
}
示例11: writeExcelOutputFormatExcel2013SingleSheetGZipCompressed
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetGZipCompressed() throws IOException {
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// empty row => nothing todo
// one row numbers (1,2,3)
SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("","","1","A3","Sheet1");
SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("","","2","B3","Sheet1");
SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("","","3","C3","Sheet1");
// one row formulas (=A3+B3)
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("","","A3+B3","A4","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheetcompressedtestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
job.setBoolean("mapreduce.output.fileoutputformat.compress",true);
job.set("mapreduce.output.fileoutputformat.compress.codec","org.apache.hadoop.io.compress.GzipCodec");
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.write(null,a3);
writer.write(null,b3);
writer.write(null,c3);
writer.write(null,a4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx.gz");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx.gz]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx.gz]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2");
assertEquals(0,spreadSheetValue.get().length,"Input Split for Excel file contain row 2 and is empty");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contain row 3 with 3 columns");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"1\"");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"2\"");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 3 == \"3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contain row 4 with 1 column");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"3\"");
}
示例12: writeExcelOutputFormatExcel2013SingleSheetComment
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013SingleSheetComment() throws IOException {
// 2nd cell with a comment
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2","This is a test","","B1","Sheet1");
SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013singlesheetcommenttestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a1);
writer.write(null,b1);
writer.write(null,c1);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("This is a test",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getComment(),"Input Split for Excel file contains row 1 with cell 2 comment == \"This is a test\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}
示例13: writeExcelOutputFormatExcel2013MultiSheet
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013MultiSheet() throws IOException {
// one sheet "Sheet1"
// one row string and three columns ("test1","test2","test3")
SpreadSheetCellDAO sheet1a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO sheet1b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO sheet1c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// one sheet "Sheet2"
// one row string and three columns ("test4","test5","test6")
SpreadSheetCellDAO sheet2a1 = new SpreadSheetCellDAO("test4","","","A1","Sheet2");
SpreadSheetCellDAO sheet2b1 = new SpreadSheetCellDAO("test5","","","B1","Sheet2");
SpreadSheetCellDAO sheet2c1 = new SpreadSheetCellDAO("test6","","","C1","Sheet2");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013multisheettestout";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new Excel format, anyway default, but here for illustrative purposes
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,sheet1a1);
writer.write(null,sheet1b1);
writer.write(null,sheet1c1);
writer.write(null,sheet2a1);
writer.write(null,sheet2b1);
writer.write(null,sheet2c1);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1 Sheet1");
assertEquals("["+fileName+".xlsx]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns for Sheet1");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1 Sheet2");
assertEquals("["+fileName+".xlsx]Sheet2!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Sheet2!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns for Sheet1");
assertEquals("test4",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test4\"");
assertEquals("test5",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test5\"");
assertEquals("test6",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test6\"");
}
示例14: writeExcelOutputFormatExcel2003SingleSheetOneLinkedWorkbook
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2003SingleSheetOneLinkedWorkbook() throws IOException {
// write linkedworkbook1
// one row string and three columns ("test1","test2","test3")
// (String formattedValue, String comment, String formula, String address,String sheetName)
SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1","","","A1","Sheet1");
SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2","","","B1","Sheet1");
SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3","","","C1","Sheet1");
// write
JobConf job = new JobConf(defaultConf);
String linkedWB1FileName="excel2003linkedwb1";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.ms-excel");
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, linkedWB1FileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,wb1a1);
writer.write(null,wb1b1);
writer.write(null,wb1c1);
writer.close(reporter);
// write mainworkbook
String linkedWorkbookFilename="["+tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+linkedWB1FileName+".xls]";
SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4","","","A1","Sheet1");
SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("","","["+linkedWB1FileName+".xls]Sheet1!B1","B1","Sheet1"); // should be test2 in the end
// write
job = new JobConf(defaultConf);
String mainWBfileName="excel2003singlesheetlinkedwbtestout";
outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.ms-excel");
job.set("hadoopoffice.write.linkedworkbooks",linkedWorkbookFilename);
outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(null, job, mainWBfileName, null);
assertNotNull(writerMain,"Format returned null RecordWriter");
writerMain.write(null,a1);
writerMain.write(null,b1);
writerMain.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+mainWBfileName+".xls");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// enable option to read linked workbooks
job.setBoolean("hadoopoffice.read.linkedworkbooks",true);
job.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks",false);
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1 Sheet1");
assertEquals("["+mainWBfileName+".xls]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+mainWBfileName+".xls]Sheet1!A1\"");
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 2 columns for Sheet1");
assertEquals("test4",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test4\"");
// this comes from the external workbook
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
}
示例15: writeExcelOutputFormatExcel2013TemplateSingleSheet
import org.apache.hadoop.mapred.RecordWriter; //导入方法依赖的package包/类
@Test
public void writeExcelOutputFormatExcel2013TemplateSingleSheet() throws IOException {
// one row string and three columns ("test1","test2","test3")
// change the cell A4 from Test4 to Test5 from the template
SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("Test5","","","A4","Table1");
// change b4 from 10 to 60
SpreadSheetCellDAO b4 = new SpreadSheetCellDAO("","","60","B4","Table1");
// write
JobConf job = new JobConf(defaultConf);
String fileName="excel2013basedontemplate";
String tmpDir=tmpPath.toString();
Path outputPath = new Path(tmpDir);
FileOutputFormat.setOutputPath(job, outputPath);
// set generic outputformat settings
job.set(JobContext.TASK_ATTEMPT_ID, attempt);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
job.set("hadoopoffice.write.mimeType","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new excel format
// template
ClassLoader classLoader = getClass().getClassLoader();
String fileNameTemplate=classLoader.getResource("templatetest1.xlsx").getFile();
job.set("hadoopoffice.write.template.file",fileNameTemplate);
//
ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
RecordWriter<NullWritable,SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null);
assertNotNull(writer,"Format returned null RecordWriter");
writer.write(null,a4);
writer.write(null,b4);
writer.close(reporter);
// try to read it again
job = new JobConf(defaultConf);
Path inputFile = new Path(tmpDir+File.separator+"_temporary"+File.separator+"0"+File.separator+"_temporary"+File.separator+attempt+File.separator+fileName+".xlsx");
FileInputFormat.setInputPaths(job, inputFile);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
inputFormat.configure(job);
InputSplit[] inputSplits = inputFormat.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("["+fileName+".xlsx]Table1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"["+fileName+".xlsx]Table1!A1\"");
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 2 columns");
assertEquals("Test",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"Test\"");
assertEquals("10",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"10\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2");
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contains row 2 with 2 columns");
assertEquals("Test2",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 2 with cell 1 == \"Test2\"");
assertEquals("50",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 2 with cell 2 == \"50\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3");
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contain row 3 with 2 columns");
assertEquals("Test3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"Test3\"");
assertEquals("20",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"20\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4");
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contain row 4 with 2 columns");
assertEquals("Test5",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"Test5\"");
assertEquals("60",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"60\"");
}