当前位置: 首页>>代码示例>>Java>>正文


Java ParquetWriter.write方法代码示例

本文整理汇总了Java中parquet.hadoop.ParquetWriter.write方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetWriter.write方法的具体用法?Java ParquetWriter.write怎么用?Java ParquetWriter.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parquet.hadoop.ParquetWriter的用法示例。


在下文中一共展示了ParquetWriter.write方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String []args) throws IOException{


        int fileNum = 10;   //num of files constructed
        int fileRecordNum = 50; //record num of each file
        int rowKey = 0;
        for(int i = 0; i < fileNum; ++ i ) {

            Map<String, String> metas = new HashMap<>();
            metas.put(HConstants.START_KEY, genRowKey("%10d", rowKey + 1));
            metas.put(HConstants.END_KEY, genRowKey("%10d", rowKey + fileRecordNum));

            ParquetWriter<Group> writer = initWriter("pfile/scanner_test_file" + i, metas);

            for (int j = 0;  j < fileRecordNum; ++j) {
                rowKey ++;
                Group group = sfg.newGroup().append("rowkey", genRowKey("%10d", rowKey))
                        .append("cf:name", "wangxiaoyi" + rowKey)
                        .append("cf:age", String.format("%10d", rowKey))
                        .append("cf:job", "student")
                        .append("timestamp", System.currentTimeMillis());
                writer.write(group);
            }

            writer.close();
        }
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:28,代码来源:GenerateParquetFile.java

示例2: main

import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {


        Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径

        Configuration configuration = new Configuration();

        MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式

                " message people { " +

                        "required binary rowkey;" +
                        "required binary cf:name;" +
                        "required binary cf:age;" +
                        "required int64 timestamp;"+
                 " }");

        GroupWriteSupport.setSchema(schema, configuration);

        SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
        Path file = new Path(root, "people002.parquet");

        Map<String, String> meta = new HashMap<String, String>();
        meta.put("startkey", "1");
        meta.put("endkey", "2");


        ParquetWriter<Group> writer = new ParquetWriter<Group>(
                file,
                new GroupWriteSupport(meta),
                CompressionCodecName.UNCOMPRESSED,
                1024,
                1024,
                512,
                true,
                false,
                ParquetProperties.WriterVersion.PARQUET_1_0,
                configuration);

        Group group = sfg.newGroup().append("rowkey", "1")
                      .append("cf:name", "wangxiaoyi")
                      .append("cf:age", "24")
                      .append("timestamp", System.currentTimeMillis());


        for (int i = 0; i < 10000; ++i) {
            writer.write(
                    sfg.newGroup()
                            .append("name", "wangxiaoyi" + i)
                            .append("age", i));
        }
        writer.close();


    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:56,代码来源:TestParquetWrite.java

示例3: writeToFile

import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void writeToFile(File f, List<User> users) throws IOException {
    Configuration conf = new Configuration();
    GroupWriteSupport.setSchema(schema, conf);

    ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport(null));
    for (User u : users) {
        writer.write(groupFromUser(u));
    }
    writer.close();
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:11,代码来源:PhoneBookWriter.java

示例4: main

import parquet.hadoop.ParquetWriter; //导入方法依赖的package包/类
public static void main(String[] args) {

		String inputFile = null;
		String outputFile = null;
		
		HelpFormatter formatter = new HelpFormatter();
		// create Options object
		Options options = new Options();

		// add t option
		options.addOption("i", true, "input avro file");
		options.addOption("o", true, "output Parquet file");
		CommandLineParser parser = new DefaultParser();
		CommandLine cmd;
		try {
			cmd = parser.parse(options, args);
			inputFile = cmd.getOptionValue("i");
			if (inputFile == null) {
				formatter.printHelp("AvroToParquet", options);
				return;
			}
			outputFile = cmd.getOptionValue("o");
		} catch (ParseException exc) {
			System.err.println("Problem with command line parameters: " + exc.getMessage());
			return;
		}

		File avroFile = new File(inputFile);

		if (!avroFile.exists()) {
			System.err.println("Could not open file: " + inputFile);
			return;
		}
		try {

			DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
			DataFileReader<GenericRecord> dataFileReader;
			dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
			Schema avroSchema = dataFileReader.getSchema();

			// choose compression scheme
			CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;

			// set Parquet file block size and page size values
			int blockSize = 256 * 1024 * 1024;
			int pageSize = 64 * 1024;

			String base = FilenameUtils.removeExtension(avroFile.getAbsolutePath()) + ".parquet";
			if(outputFile != null) {
				File file = new File(outputFile);
				base = file.getAbsolutePath();
			}
			
			Path outputPath = new Path("file:///"+base);

			// the ParquetWriter object that will consume Avro GenericRecords
			ParquetWriter<GenericRecord> parquetWriter;
			parquetWriter = new AvroParquetWriter<GenericRecord>(outputPath, avroSchema, compressionCodecName, blockSize, pageSize);
			for (GenericRecord record : dataFileReader) {
				parquetWriter.write(record);
			}
			dataFileReader.close();
			parquetWriter.close();
		} catch (IOException e) {
			System.err.println("Caught exception: " + e.getMessage());
		}
	}
 
开发者ID:CohesionForce,项目名称:avroToParquet,代码行数:68,代码来源:AvroToParquet.java


注:本文中的parquet.hadoop.ParquetWriter.write方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。