本文整理汇总了Java中parquet.hadoop.ParquetWriter.write方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetWriter.write方法的具体用法?Java ParquetWriter.write怎么用?Java ParquetWriter.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.hadoop.ParquetWriter
的用法示例。
在下文中一共展示了ParquetWriter.write方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String []args) throws IOException{
int fileNum = 10; //num of files constructed
int fileRecordNum = 50; //record num of each file
int rowKey = 0;
for(int i = 0; i < fileNum; ++ i ) {
Map<String, String> metas = new HashMap<>();
metas.put(HConstants.START_KEY, genRowKey("%10d", rowKey + 1));
metas.put(HConstants.END_KEY, genRowKey("%10d", rowKey + fileRecordNum));
ParquetWriter<Group> writer = initWriter("pfile/scanner_test_file" + i, metas);
for (int j = 0; j < fileRecordNum; ++j) {
rowKey ++;
Group group = sfg.newGroup().append("rowkey", genRowKey("%10d", rowKey))
.append("cf:name", "wangxiaoyi" + rowKey)
.append("cf:age", String.format("%10d", rowKey))
.append("cf:job", "student")
.append("timestamp", System.currentTimeMillis());
writer.write(group);
}
writer.close();
}
}
示例2: main
import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径
Configuration configuration = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式
" message people { " +
"required binary rowkey;" +
"required binary cf:name;" +
"required binary cf:age;" +
"required int64 timestamp;"+
" }");
GroupWriteSupport.setSchema(schema, configuration);
SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
Path file = new Path(root, "people002.parquet");
Map<String, String> meta = new HashMap<String, String>();
meta.put("startkey", "1");
meta.put("endkey", "2");
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(meta),
CompressionCodecName.UNCOMPRESSED,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
configuration);
Group group = sfg.newGroup().append("rowkey", "1")
.append("cf:name", "wangxiaoyi")
.append("cf:age", "24")
.append("timestamp", System.currentTimeMillis());
for (int i = 0; i < 10000; ++i) {
writer.write(
sfg.newGroup()
.append("name", "wangxiaoyi" + i)
.append("age", i));
}
writer.close();
}
示例3: writeToFile
import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void writeToFile(File f, List<User> users) throws IOException {
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport(null));
for (User u : users) {
writer.write(groupFromUser(u));
}
writer.close();
}
示例4: main
import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String[] args) {
String inputFile = null;
String outputFile = null;
HelpFormatter formatter = new HelpFormatter();
// create Options object
Options options = new Options();
// add t option
options.addOption("i", true, "input avro file");
options.addOption("o", true, "output Parquet file");
CommandLineParser parser = new DefaultParser();
CommandLine cmd;
try {
cmd = parser.parse(options, args);
inputFile = cmd.getOptionValue("i");
if (inputFile == null) {
formatter.printHelp("AvroToParquet", options);
return;
}
outputFile = cmd.getOptionValue("o");
} catch (ParseException exc) {
System.err.println("Problem with command line parameters: " + exc.getMessage());
return;
}
File avroFile = new File(inputFile);
if (!avroFile.exists()) {
System.err.println("Could not open file: " + inputFile);
return;
}
try {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader;
dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
Schema avroSchema = dataFileReader.getSchema();
// choose compression scheme
CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;
// set Parquet file block size and page size values
int blockSize = 256 * 1024 * 1024;
int pageSize = 64 * 1024;
String base = FilenameUtils.removeExtension(avroFile.getAbsolutePath()) + ".parquet";
if(outputFile != null) {
File file = new File(outputFile);
base = file.getAbsolutePath();
}
Path outputPath = new Path("file:///"+base);
// the ParquetWriter object that will consume Avro GenericRecords
ParquetWriter<GenericRecord> parquetWriter;
parquetWriter = new AvroParquetWriter<GenericRecord>(outputPath, avroSchema, compressionCodecName, blockSize, pageSize);
for (GenericRecord record : dataFileReader) {
parquetWriter.write(record);
}
dataFileReader.close();
parquetWriter.close();
} catch (IOException e) {
System.err.println("Caught exception: " + e.getMessage());
}
}