本文整理匯總了Java中io.confluent.connect.hdfs.partitioner.Partitioner類的典型用法代碼示例。如果您正苦於以下問題:Java Partitioner類的具體用法?Java Partitioner怎麽用?Java Partitioner使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
Partitioner類屬於io.confluent.connect.hdfs.partitioner包,在下文中一共展示了Partitioner類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: constructAvroTable
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
private Table constructAvroTable(String database, String tableName, Schema schema, Partitioner partitioner)
throws HiveMetaStoreException {
Table table = newTable(database, tableName);
table.setTableType(TableType.EXTERNAL_TABLE);
table.getParameters().put("EXTERNAL", "TRUE");
String tablePath = FileUtils.hiveDirectoryName(url, topicsDir, tableName);
table.setDataLocation(new Path(tablePath));
table.setSerializationLib(avroSerde);
try {
table.setInputFormatClass(avroInputFormat);
table.setOutputFormatClass(avroOutputFormat);
} catch (HiveException e) {
throw new HiveMetaStoreException("Cannot find input/output format:", e);
}
List<FieldSchema> columns = HiveSchemaConverter.convertSchema(schema);
table.setFields(columns);
table.setPartCols(partitioner.partitionFields());
table.getParameters().put(AVRO_SCHEMA_LITERAL, avroData.fromConnectSchema(schema).toString());
return table;
}
示例2: constructParquetTable
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
private Table constructParquetTable(String database, String tableName, Schema schema, Partitioner partitioner) throws HiveMetaStoreException {
Table table = newTable(database, tableName);
table.setTableType(TableType.EXTERNAL_TABLE);
table.getParameters().put("EXTERNAL", "TRUE");
String tablePath = FileUtils.hiveDirectoryName(url, topicsDir, tableName);
table.setDataLocation(new Path(tablePath));
table.setSerializationLib(getHiveParquetSerde());
try {
table.setInputFormatClass(getHiveParquetInputFormat());
table.setOutputFormatClass(getHiveParquetOutputFormat());
} catch (HiveException e) {
throw new HiveMetaStoreException("Cannot find input/output format:", e);
}
// convert copycat schema schema to Hive columns
List<FieldSchema> columns = HiveSchemaConverter.convertSchema(schema);
table.setFields(columns);
table.setPartCols(partitioner.partitionFields());
return table;
}
示例3: constructAvroTable
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
private Table constructAvroTable(String database, String tableName, Schema schema, Partitioner partitioner)
throws HiveMetaStoreException {
Table table = new Table(database, tableName);
table.setTableType(TableType.EXTERNAL_TABLE);
table.getParameters().put("EXTERNAL", "TRUE");
String tablePath = FileUtils.hiveDirectoryName(url, topicsDir, tableName);
table.setDataLocation(new Path(tablePath));
table.setSerializationLib(avroSerde);
try {
table.setInputFormatClass(avroInputFormat);
table.setOutputFormatClass(avroOutputFormat);
} catch (HiveException e) {
throw new HiveMetaStoreException("Cannot find input/output format:", e);
}
List<FieldSchema> columns = HiveSchemaConverter.convertSchema(schema);
table.setFields(columns);
table.setPartCols(partitioner.partitionFields());
table.getParameters().put(AVRO_SCHEMA_LITERAL, avroData.fromConnectSchema(schema).toString());
return table;
}
示例4: visible
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Override
public boolean visible(String name, Map<String, Object> connectorConfigs) {
String partitionerName = (String) connectorConfigs.get(PARTITIONER_CLASS_CONFIG);
try {
@SuppressWarnings("unchecked")
Class<? extends Partitioner> partitioner = (Class<? extends Partitioner>) Class.forName(partitionerName);
if (classNameEquals(partitionerName, DefaultPartitioner.class)) {
return false;
} else if (FieldPartitioner.class.isAssignableFrom(partitioner)) {
// subclass of FieldPartitioner
return name.equals(PARTITION_FIELD_NAME_CONFIG);
} else if (TimeBasedPartitioner.class.isAssignableFrom(partitioner)) {
// subclass of TimeBasedPartitioner
if (classNameEquals(partitionerName, DailyPartitioner.class) || classNameEquals(partitionerName, HourlyPartitioner.class)) {
return name.equals(LOCALE_CONFIG) || name.equals(TIMEZONE_CONFIG);
} else {
return name.equals(PARTITION_DURATION_MS_CONFIG) || name.equals(PATH_FORMAT_CONFIG) || name.equals(LOCALE_CONFIG) || name.equals(TIMEZONE_CONFIG);
}
} else {
throw new ConfigException("Not a valid partitioner class: " + partitionerName);
}
} catch (ClassNotFoundException e) {
throw new ConfigException("Partitioner class not found: " + partitionerName);
}
}
示例5: constructParquetTable
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
private Table constructParquetTable(String database, String tableName, Schema schema, Partitioner partitioner) throws HiveMetaStoreException {
Table table = new Table(database, tableName);
table.setTableType(TableType.EXTERNAL_TABLE);
table.getParameters().put("EXTERNAL", "TRUE");
String tablePath = FileUtils.hiveDirectoryName(url, topicsDir, tableName);
table.setDataLocation(new Path(tablePath));
table.setSerializationLib(getHiveParquetSerde());
try {
table.setInputFormatClass(getHiveParquetInputFormat());
table.setOutputFormatClass(getHiveParquetOutputFormat());
} catch (HiveException e) {
throw new HiveMetaStoreException("Cannot find input/output format:", e);
}
// convert copycat schema schema to Hive columns
List<FieldSchema> columns = HiveSchemaConverter.convertSchema(schema);
table.setFields(columns);
table.setPartCols(partitioner.partitionFields());
return table;
}
示例6: createPartitioner
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
private Partitioner createPartitioner(HdfsSinkConnectorConfig config)
throws ClassNotFoundException, IllegalAccessException, InstantiationException {
@SuppressWarnings("unchecked")
Class<? extends Partitioner> partitionerClasss = (Class<? extends Partitioner>)
Class.forName(config.getString(HdfsSinkConnectorConfig.PARTITIONER_CLASS_CONFIG));
Map<String, Object> map = copyConfig(config);
Partitioner partitioner = partitionerClasss.newInstance();
partitioner.configure(map);
return partitioner;
}
示例7: TopicPartitionWriter
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
public TopicPartitionWriter(
TopicPartition tp,
Storage storage,
RecordWriterProvider writerProvider,
Partitioner partitioner,
HdfsSinkConnectorConfig connectorConfig,
SinkTaskContext context,
AvroData avroData) {
this(tp, storage, writerProvider, partitioner, connectorConfig, context, avroData, null, null, null, null, null);
}
示例8: testWriteRecordNonZeroInitailOffset
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testWriteRecordNonZeroInitailOffset() throws Exception {
DataWriter hdfsWriter = new DataWriter(connectorConfig, context, avroData);
Partitioner partitioner = hdfsWriter.getPartitioner();
hdfsWriter.recover(TOPIC_PARTITION);
String key = "key";
Schema schema = createSchema();
Struct record = createRecord(schema);
Collection<SinkRecord> sinkRecords = new ArrayList<>();
for (long offset = 3; offset < 10; offset++) {
SinkRecord sinkRecord =
new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);
sinkRecords.add(sinkRecord);
}
hdfsWriter.write(sinkRecords);
hdfsWriter.close(assignment);
hdfsWriter.stop();
String directory = partitioner.generatePartitionedPath(TOPIC, "partition=" + String.valueOf(PARTITION));
// Last file (offset 9) doesn't satisfy size requirement and gets discarded on close
long[] validOffsets = {2, 5, 8};
for (int i = 1; i < validOffsets.length; i++) {
long startOffset = validOffsets[i - 1] + 1;
long endOffset = validOffsets[i];
Path path = new Path(FileUtils.committedFileName(url, topicsDir, directory,
TOPIC_PARTITION, startOffset, endOffset,
extension, ZERO_PAD_FMT));
Collection<Object> records = schemaFileReader.readData(conf, path);
long size = endOffset - startOffset + 1;
assertEquals(size, records.size());
for (Object avroRecord : records) {
assertEquals(avroData.fromConnectData(schema, record), avroRecord);
}
}
}
示例9: testWriteRecordDefaultWithPadding
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testWriteRecordDefaultWithPadding() throws Exception {
Partitioner partitioner = new DefaultPartitioner();
partitioner.configure(Collections.<String, Object>emptyMap());
connectorProps.put(HdfsSinkConnectorConfig.FILENAME_OFFSET_ZERO_PAD_WIDTH_CONFIG, "2");
configureConnector();
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(
TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, avroData);
String key = "key";
Schema schema = createSchema();
Struct[] records = createRecords(schema);
Collection<SinkRecord> sinkRecords = createSinkRecords(records, key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
topicPartitionWriter.recover();
topicPartitionWriter.write();
topicPartitionWriter.close();
Set<Path> expectedFiles = new HashSet<>();
expectedFiles.add(new Path(url + "/" + topicsDir + "/" + TOPIC + "/partition=" + PARTITION +
"/" + TOPIC + "+" + PARTITION + "+00+02" + extension));
expectedFiles.add(new Path(url + "/" + topicsDir + "/" + TOPIC + "/partition=" + PARTITION +
"/" + TOPIC + "+" + PARTITION + "+03+05" + extension));
expectedFiles.add(new Path(url + "/" + topicsDir + "/" + TOPIC + "/partition=" + PARTITION +
"/" + TOPIC + "+" + PARTITION + "+06+08" + extension));
verify(expectedFiles, records, schema);
}
示例10: testWriteRecordFieldPartitioner
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testWriteRecordFieldPartitioner() throws Exception {
Map<String, Object> config = createConfig();
Partitioner partitioner = new FieldPartitioner();
partitioner.configure(config);
String partitionField = (String) config.get(HdfsSinkConnectorConfig.PARTITION_FIELD_NAME_CONFIG);
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(
TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, avroData);
String key = "key";
Schema schema = createSchema();
Struct[] records = createRecords(schema);
Collection<SinkRecord> sinkRecords = createSinkRecords(records, key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
topicPartitionWriter.recover();
topicPartitionWriter.write();
topicPartitionWriter.close();
String directory1 = partitioner.generatePartitionedPath(TOPIC, partitionField + "=" + String.valueOf(16));
String directory2 = partitioner.generatePartitionedPath(TOPIC, partitionField + "=" + String.valueOf(17));
String directory3 = partitioner.generatePartitionedPath(TOPIC, partitionField + "=" + String.valueOf(18));
Set<Path> expectedFiles = new HashSet<>();
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory1, TOPIC_PARTITION, 0, 2, extension, ZERO_PAD_FMT)));
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory2, TOPIC_PARTITION, 3, 5, extension, ZERO_PAD_FMT)));
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory3, TOPIC_PARTITION, 6, 8, extension, ZERO_PAD_FMT)));
verify(expectedFiles, records, schema);
}
示例11: testWriteRecordTimeBasedPartition
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testWriteRecordTimeBasedPartition() throws Exception {
Map<String, Object> config = createConfig();
Partitioner partitioner = new TimeBasedPartitioner();
partitioner.configure(config);
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(
TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, avroData);
String key = "key";
Schema schema = createSchema();
Struct[] records = createRecords(schema);
Collection<SinkRecord> sinkRecords = createSinkRecords(records, key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
topicPartitionWriter.recover();
topicPartitionWriter.write();
topicPartitionWriter.close();
long partitionDurationMs = (Long) config.get(HdfsSinkConnectorConfig.PARTITION_DURATION_MS_CONFIG);
String pathFormat = (String) config.get(HdfsSinkConnectorConfig.PATH_FORMAT_CONFIG);
String timeZoneString = (String) config.get(HdfsSinkConnectorConfig.TIMEZONE_CONFIG);
long timestamp = System.currentTimeMillis();
String encodedPartition = TimeUtils.encodeTimestamp(partitionDurationMs, pathFormat, timeZoneString, timestamp);
String directory = partitioner.generatePartitionedPath(TOPIC, encodedPartition);
Set<Path> expectedFiles = new HashSet<>();
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory, TOPIC_PARTITION, 0, 2, extension, ZERO_PAD_FMT)));
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory, TOPIC_PARTITION, 3, 5, extension, ZERO_PAD_FMT)));
expectedFiles.add(new Path(FileUtils.committedFileName(url, topicsDir, directory, TOPIC_PARTITION, 6, 8, extension, ZERO_PAD_FMT)));
verify(expectedFiles, records, schema);
}
示例12: testCreateTable
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testCreateTable() throws Exception {
prepareData(TOPIC, PARTITION);
Partitioner partitioner = HiveTestUtils.getPartitioner();
Schema schema = createSchema();
hive.createTable(hiveDatabase, TOPIC, schema, partitioner);
String location = "partition=" + String.valueOf(PARTITION);
hiveMetaStore.addPartition(hiveDatabase, TOPIC, location);
List<String> expectedColumnNames = new ArrayList<>();
for (Field field: schema.fields()) {
expectedColumnNames.add(field.name());
}
Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
List<String> actualColumnNames = new ArrayList<>();
for (FieldSchema column: table.getSd().getCols()) {
actualColumnNames.add(column.getName());
}
assertEquals(expectedColumnNames, actualColumnNames);
List<FieldSchema> partitionCols = table.getPartitionKeys();
assertEquals(1, partitionCols.size());
assertEquals("partition", partitionCols.get(0).getName());
String[] expectedResult = {"true", "12", "12", "12.2", "12.2", "12"};
String result = HiveTestUtils.runHive(hiveExec, "SELECT * FROM " + TOPIC);
String[] rows = result.split("\n");
// Only 6 of the 7 records should have been delivered due to flush_size = 3
assertEquals(6, rows.length);
for (String row: rows) {
String[] parts = HiveTestUtils.parseOutput(row);
for (int j = 0; j < expectedResult.length; ++j) {
assertEquals(expectedResult[j], parts[j]);
}
}
}
示例13: testAlterSchema
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testAlterSchema() throws Exception {
prepareData(TOPIC, PARTITION);
Partitioner partitioner = HiveTestUtils.getPartitioner();
Schema schema = createSchema();
hive.createTable(hiveDatabase, TOPIC, schema, partitioner);
String location = "partition=" + String.valueOf(PARTITION);
hiveMetaStore.addPartition(hiveDatabase, TOPIC, location);
List<String> expectedColumnNames = new ArrayList<>();
for (Field field: schema.fields()) {
expectedColumnNames.add(field.name());
}
Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
List<String> actualColumnNames = new ArrayList<>();
for (FieldSchema column: table.getSd().getCols()) {
actualColumnNames.add(column.getName());
}
assertEquals(expectedColumnNames, actualColumnNames);
Schema newSchema = createNewSchema();
hive.alterSchema(hiveDatabase, TOPIC, newSchema);
String[] expectedResult = {"true", "12", "12", "12.2", "12.2", "abc", "12"};
String result = HiveTestUtils.runHive(hiveExec, "SELECT * from " + TOPIC);
String[] rows = result.split("\n");
// Only 6 of the 7 records should have been delivered due to flush_size = 3
assertEquals(6, rows.length);
for (String row: rows) {
String[] parts = HiveTestUtils.parseOutput(row);
for (int j = 0; j < expectedResult.length; ++j) {
assertEquals(expectedResult[j], parts[j]);
}
}
}
示例14: testAlterSchema
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testAlterSchema() throws Exception {
prepareData(TOPIC, PARTITION);
Partitioner partitioner = HiveTestUtils.getPartitioner();
Schema schema = createSchema();
hive.createTable(hiveDatabase, TOPIC, schema, partitioner);
String location = "partition=" + String.valueOf(PARTITION);
hiveMetaStore.addPartition(hiveDatabase, TOPIC, location);
List<String> expectedColumnNames = new ArrayList<>();
for (Field field: schema.fields()) {
expectedColumnNames.add(field.name());
}
Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
List<String> actualColumnNames = new ArrayList<>();
for (FieldSchema column: table.getSd().getCols()) {
actualColumnNames.add(column.getName());
}
assertEquals(expectedColumnNames, actualColumnNames);
Schema newSchema = createNewSchema();
hive.alterSchema(hiveDatabase, TOPIC, newSchema);
String[] expectedResult = {"true", "12", "12", "12.2", "12.2", "NULL", "12"};
String result = HiveTestUtils.runHive(hiveExec, "SELECT * from " + TOPIC);
String[] rows = result.split("\n");
// Only 6 of the 7 records should have been delivered due to flush_size = 3
assertEquals(6, rows.length);
for (String row: rows) {
String[] parts = HiveTestUtils.parseOutput(row);
for (int j = 0; j < expectedResult.length; ++j) {
assertEquals(expectedResult[j], parts[j]);
}
}
}
示例15: testWriteRecord
import io.confluent.connect.hdfs.partitioner.Partitioner; //導入依賴的package包/類
@Test
public void testWriteRecord() throws Exception {
DataWriter hdfsWriter = new DataWriter(connectorConfig, context, avroData);
Partitioner partitioner = hdfsWriter.getPartitioner();
hdfsWriter.recover(TOPIC_PARTITION);
String key = "key";
Schema schema = createSchema();
Struct record = createRecord(schema);
Collection<SinkRecord> sinkRecords = new ArrayList<>();
for (long offset = 0; offset < 7; offset++) {
SinkRecord sinkRecord =
new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);
sinkRecords.add(sinkRecord);
}
hdfsWriter.write(sinkRecords);
hdfsWriter.close(assignment);
hdfsWriter.stop();
String encodedPartition = "partition=" + String.valueOf(PARTITION);
String directory = partitioner.generatePartitionedPath(TOPIC, encodedPartition);
// Last file (offset 6) doesn't satisfy size requirement and gets discarded on close
long[] validOffsets = {-1, 2, 5};
for (int i = 1; i < validOffsets.length; i++) {
long startOffset = validOffsets[i - 1] + 1;
long endOffset = validOffsets[i];
Path path = new Path(
FileUtils.committedFileName(url, topicsDir, directory, TOPIC_PARTITION, startOffset,
endOffset, extension, ZERO_PAD_FMT));
Collection<Object> records = schemaFileReader.readData(conf, path);
long size = endOffset - startOffset + 1;
assertEquals(size, records.size());
for (Object avroRecord : records) {
assertEquals(avroData.fromConnectData(schema, record), avroRecord);
}
}
}