本文整理汇总了Java中org.apache.hadoop.hive.ql.plan.TableScanDesc类的典型用法代码示例。如果您正苦于以下问题:Java TableScanDesc类的具体用法?Java TableScanDesc怎么用?Java TableScanDesc使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TableScanDesc类属于org.apache.hadoop.hive.ql.plan包,在下文中一共展示了TableScanDesc类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getQueryFilter
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
private DynamoDBQueryFilter getQueryFilter(JobConf conf, Map<String, String>
hiveDynamoDBMapping, Map<String, String> hiveTypeMapping) throws IOException {
if (hiveDynamoDBMapping == null) {
/*
* Column mapping may be null when user has mapped a DynamoDB item
* onto a single hive map<string, string> column.
*/
return new DynamoDBQueryFilter();
}
DynamoDBClient client = new DynamoDBClient(conf);
String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized == null) {
return new DynamoDBQueryFilter();
}
ExprNodeDesc filterExpr =
ShimsLoader.getHiveShims().deserializeExpression(filterExprSerialized);
DynamoDBFilterPushdown pushdown = new DynamoDBFilterPushdown();
List<KeySchemaElement> schema =
client.describeTable(conf.get(DynamoDBConstants.TABLE_NAME)).getKeySchema();
DynamoDBQueryFilter queryFilter = pushdown.predicateToDynamoDBFilter(
schema, hiveDynamoDBMapping, hiveTypeMapping, filterExpr);
return queryFilter;
}
示例2: SolrTable
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
public SolrTable(JobConf conf) {
String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized != null) {
ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, conf);
log.debug("filterExpr="+filterExpr.getExprString());
SolrStorageHandler.buildQuery(filterExpr,fq,q);
}
this.url = ConfigurationUtil.getUrl(conf);
this.qs = ConfigurationUtil.getQs(conf);
this.fields = ConfigurationUtil.getAllColumns(conf.get(ConfigurationUtil.SOLR_COLUMN_MAPPING));
this.facetType = conf.get(ConfigurationUtil.SOLR_FACET_MAPPING);
log.info("solr.url="+url+" solr.qs="+qs+" fq="+fq+" q="+q);
this.solrSplitSize = ConfigurationUtil.getSolrSplitSize(conf);
this.outputBuffer = new ArrayList<SolrInputDocument>(solrSplitSize);
this.server = new HttpSolrServer(url);
}
示例3: pushFilters
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {
final TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
LOG.debug("Not pushing filters because TableScanDesc is null");
return;
}
// construct column name list for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
// push down filters
final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
if (filterExpr == null) {
LOG.debug("Not pushing filters because FilterExpr is null");
return;
}
final String filterText = filterExpr.getExprString();
final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
jobConf.set(
TableScanDesc.FILTER_TEXT_CONF_STR,
filterText);
jobConf.set(
TableScanDesc.FILTER_EXPR_CONF_STR,
filterExprSerialized);
}
示例4: parseFilterPredicate
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
/**
* Look for a filter predicate pushed down by the StorageHandler. If a
* filter was pushed down, the filter expression and the list of indexed
* columns should be set in the JobConf properties. If either is not set, we
* can't deal with the filter here so return null. If both are present in
* the JobConf, translate the filter expression into a list of C*
* IndexExpressions which we'll later use in queries. The filter expression
* should translate exactly to IndexExpressions, as our
* HiveStoragePredicateHandler implementation has already done this once. As
* an additional check, if this is no longer the case & there is some
* residual predicate after translation, throw an Exception.
*
* @param jobConf Job Configuration
* @return C* IndexExpressions representing the pushed down filter or null
* pushdown is not possible
* @throws java.io.IOException if there are problems deserializing from the
* JobConf
*/
private List<IndexExpression> parseFilterPredicate(JobConf jobConf) throws IOException {
String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized == null) {
return null;
}
ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, jobConf);
String encodedIndexedColumns = jobConf.get(AbstractCassandraSerDe.CASSANDRA_INDEXED_COLUMNS);
Set<ColumnDef> indexedColumns = CqlPushdownPredicate.deserializeIndexedColumns(encodedIndexedColumns);
if (indexedColumns.isEmpty()) {
return null;
}
IndexPredicateAnalyzer analyzer = CqlPushdownPredicate.newIndexPredicateAnalyzer(indexedColumns);
List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions);
// There should be no residual predicate since we already negotiated
// that earlier in CqlStorageHandler.decomposePredicate.
if (residualPredicate != null) {
throw new RuntimeException("Unexpected residual predicate : " + residualPredicate.getExprString());
}
if (!searchConditions.isEmpty()) {
return CqlPushdownPredicate.translateSearchConditions(searchConditions, indexedColumns);
} else {
throw new RuntimeException("At least one search condition expected in filter predicate");
}
}
示例5: parseFilterPredicate
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
/**
* Look for a filter predicate pushed down by the StorageHandler. If a
* filter was pushed down, the filter expression and the list of indexed
* columns should be set in the JobConf properties. If either is not set, we
* can't deal with the filter here so return null. If both are present in
* the JobConf, translate the filter expression into a list of C*
* IndexExpressions which we'll later use in queries. The filter expression
* should translate exactly to IndexExpressions, as our
* HiveStoragePredicateHandler implementation has already done this once. As
* an additional check, if this is no longer the case & there is some
* residual predicate after translation, throw an Exception.
*
* @param jobConf Job Configuration
* @return C* IndexExpressions representing the pushed down filter or null
* pushdown is not possible
* @throws IOException if there are problems deserializing from the JobConf
*/
private List<IndexExpression> parseFilterPredicate(JobConf jobConf) throws IOException {
String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized == null) {
return null;
}
ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, jobConf);
String encodedIndexedColumns = jobConf.get(AbstractCassandraSerDe.CASSANDRA_INDEXED_COLUMNS);
Set<ColumnDef> indexedColumns = CassandraPushdownPredicate.deserializeIndexedColumns(encodedIndexedColumns);
if (indexedColumns.isEmpty()) {
return null;
}
IndexPredicateAnalyzer analyzer = CassandraPushdownPredicate.newIndexPredicateAnalyzer(indexedColumns);
List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, searchConditions);
// There should be no residual predicate since we already negotiated
// that earlier in CassandraStorageHandler.decomposePredicate.
if (residualPredicate != null) {
throw new RuntimeException("Unexpected residual predicate : " + residualPredicate.getExprString());
}
if (!searchConditions.isEmpty()) {
return CassandraPushdownPredicate.translateSearchConditions(searchConditions, indexedColumns);
} else {
throw new RuntimeException("At least one search condition expected in filter predicate");
}
}
示例6: getSearchConditions
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
/**
*
* @param conf JobConf
* @return list of IndexSearchConditions from the filter expression.
*/
public List<IndexSearchCondition> getSearchConditions(JobConf conf) {
List<IndexSearchCondition> sConditions = Lists.newArrayList();
String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if(filteredExprSerialized == null)
return sConditions;
ExprNodeDesc filterExpr = Utilities.deserializeExpression(filteredExprSerialized, conf);
IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
if(residual != null)
throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
return sConditions;
}
示例7: HiveReaderSetting
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
public HiveReaderSetting( final FileSplit split, final JobConf job ){
config = new Configuration();
disableSkipBlock = job.getBoolean( "mds.disable.block.skip" , false );
disableFilterPushdown = job.getBoolean( "mds.disable.filter.pushdown" , false );
Set<String> pathNameSet= createPathSet( split.getPath() );
List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR );
if( filterExprSerialized != null ){
filterExprs.add( SerializationUtilities.deserializeExpression(filterExprSerialized) );
}
MapWork mapWork;
try{
mapWork = Utilities.getMapWork(job);
}catch( Exception e ){
mapWork = null;
}
if( mapWork == null ){
node = createExpressionNode( filterExprs );
isVectorModeFlag = false;
return;
}
node = createExpressionNode( filterExprs );
for( Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet() ){
if( ! pathNameSet.contains( pathsAndParts.getKey() ) ){
continue;
}
Properties props = pathsAndParts.getValue().getTableDesc().getProperties();
if( props.containsKey( "mds.expand" ) ){
config.set( "spread.reader.expand.column" , props.getProperty( "mds.expand" ) );
}
if( props.containsKey( "mds.flatten" ) ){
config.set( "spread.reader.flatten.column" , props.getProperty( "mds.flatten" ) );
}
}
config.set( "spread.reader.read.column.names" , createReadColumnNames( job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) );
// Next Hive vesion;
// Utilities.getUseVectorizedInputFileFormat(job)
isVectorModeFlag = Utilities.isVectorMode( job );
}
示例8: initialize
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
public void initialize(final InputSplit split, final Configuration conf) throws IOException {
this.startTime = System.currentTimeMillis();
/** batch size for reading multiple records together **/
batchSize = NumberUtils.toInt(conf.get(MonarchUtils.MONARCH_BATCH_SIZE), MonarchUtils.MONARCH_BATCH_SIZE_DEFAULT);
final MonarchSplit ms = (MonarchSplit) split;
this.readColIds = ColumnProjectionUtils.getReadColumnIDs(conf);
final String expression = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (expression != null && columns != null) {
this.pushDownfilters = getPushDownFilters(expression, columns);
if (this.pushDownfilters != null) {
for (Filter mFilter : pushDownfilters.getFilters()) {
logger.info("Pushing filter= {}", mFilter);
int id=-1;
for(int i=0; i< columns.length; i++){
if(columns[i].equalsIgnoreCase(((SingleColumnValueFilter) mFilter).getColumnNameString())) {
id =i;
break;
}
}
if (!readColIds.contains(id) && readColIds.size() > 0 && id != -1) {
readColIds.add(id);
}
}
}
}
/** create the scan with required parameters.. **/
Scan scan = new Scan();
scan.setBucketIds(ms.getBucketIds());
scan.setBatchSize(batchSize);
scan.setReturnKeysFlag(false);
final String str = conf.get(MonarchUtils.READ_FILTER_ON_LATEST_VERSION);
if (str != null) {
scan.setFilterOnLatestVersionOnly(Boolean.getBoolean(str));
}
final boolean isOldestFirst = Boolean.getBoolean(conf.get(MonarchUtils.READ_OLDEST_FIRST));
final int maxVersions = NumberUtils.toInt(conf.get(MonarchUtils.READ_MAX_VERSIONS), 1);
scan.setMaxVersions(maxVersions, isOldestFirst);
if (pushDownfilters != null) {
scan.setFilter(pushDownfilters);
}
scan.setBucketToServerMap(ms.getBucketToServerMap());
Collections.sort(this.readColIds);
if (! readColIds.isEmpty()) {
scan.setColumns(readColIds);
}
logger.info("Retrieving columns= {}", scan.getColumns());
this.mResultScanner = this.anyTable.getScanner(scan);
this.valueIterator = this.mResultScanner.iterator();
if (logger.isDebugEnabled()) {
logger.debug("{} - Initialize MonarchRecordReader: batchSize= {}, split= {}", new Date(), batchSize, ms);
}
}
示例9: pushFilters
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
// ensure filters are not set from previous pushFilters
jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
return;
}
// 2015-10-27 Added by JeongMin Ju
//////////////////////////////////////////////////////////////////////
setReadColumns(jobConf, tableScan);
//////////////////////////////////////////////////////////////////////
// construct column name list and types for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc)scanDesc.getFilterExpr();
if (filterExpr == null) {
return;
}
Serializable filterObject = scanDesc.getFilterObject();
if (filterObject != null) {
jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, Utilities.serializeObject(filterObject));
}
String filterText = filterExpr.getExprString();
String filterExprSerialized = Utilities.serializeExpression(filterExpr);
if (LOG.isDebugEnabled()) {
LOG.debug("Filter text = " + filterText);
LOG.debug("Filter expression = " + filterExprSerialized);
}
jobConf.set(
TableScanDesc.FILTER_TEXT_CONF_STR,
filterText);
jobConf.set(
TableScanDesc.FILTER_EXPR_CONF_STR,
filterExprSerialized);
}
示例10: getSplits
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
String tableName = jobConf.get(PhoenixConfigurationUtil.INPUT_TABLE_NAME);
// List<IndexSearchCondition> conditionList = null;
String query = null;
String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue());
if (LOG.isDebugEnabled()) {
LOG.debug("<<<<<<<<<< target table name at split phase : " + tableName + " >>>>>>>>>>");
LOG.debug("<<<<<<<<<< whereCondition : " + jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) + " >>>>>>>>>>");
LOG.debug("<<<<<<<<<< " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " + executionEngine + " >>>>>>>>>>");
}
// 2016-04-04 modified by JeongMin Ju : Changed predicate push down processing to tez-way.
// if (PhoenixStorageHandlerConstants.MR.equals(executionEngine)) {
// String predicateKey = PhoenixStorageHandlerUtil.getTableKeyOfSession(jobConf, tableName);
//
// if (LOG.isDebugEnabled()) {
// LOG.debug("<<<<<<<<<< predicateKey : " + predicateKey + " >>>>>>>>>>");
// }
//
// PhoenixPredicateDecomposer predicateDecomposer = PhoenixPredicateDecomposerManager.getPredicateDecomposer(predicateKey);
// if (predicateDecomposer != null && predicateDecomposer.isCalledPPD()) {
// conditionList = predicateDecomposer.getSearchConditionList();
// }
//
// query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName, ColumnProjectionUtils.getReadColumnNames(jobConf), conditionList);
// } else if (PhoenixStorageHandlerConstants.TEZ.equals(executionEngine)) {
Map<String, String> columnTypeMap = PhoenixStorageHandlerUtil.createColumnTypeMap(jobConf);
if (LOG.isDebugEnabled()) {
LOG.debug("<<<<<<<<<< columnType : " + columnTypeMap + " >>>>>>>>>>");
}
String whereClause = jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR);
query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName, ColumnProjectionUtils.getReadColumnNames(jobConf), whereClause, columnTypeMap);
// } else {
// throw new IOException(executionEngine + " execution engine unsupported yet.");
// }
final QueryPlan queryPlan = getQueryPlan(jobConf, query);
final List<KeyRange> allSplits = queryPlan.getSplits();
final List<InputSplit> splits = generateSplits(jobConf, queryPlan, allSplits, query);
return splits.toArray(new InputSplit[splits.size()]);
}
示例11: generateQuery
import org.apache.hadoop.hive.ql.plan.TableScanDesc; //导入依赖的package包/类
public static SolrQuery generateQuery(JobConf job){
SolrQuery solrQuery = new SolrQuery();
String query = job.get(ExternalTableProperties.SOLR_QUERY);
solrQuery.setQuery(query);
String fields = StringUtils.join(new ExternalTableProperties().COLUMN_NAMES, ", ");
solrQuery.set("fl", fields);
// Since each mapper is going to query each shard separately
// we set "distrib" --> false.
solrQuery.set("distrib", "false");
// pass the filter query by doing predicate pushdown.
String filterExprSerialized = job.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if(filterExprSerialized == null) {
// If no predicate pushdown is possible
return solrQuery;
}
ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized);
IndexPredicateAnalyzer analyzer = PredicateAnalyzer.getPredicateAnalyzer();
List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
analyzer.analyzePredicate(filterExpr, searchConditions);
for (IndexSearchCondition condition : searchConditions){
String fieldName = condition.getColumnDesc().getColumn();
String value = condition.getConstantDesc().getValue().toString();
StringBuffer fqExp = new StringBuffer();
if (condition.getComparisonOp().equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual")){
// Formulating Filter Query Expression.
fqExp.append(fieldName).append(":").append(value);
solrQuery.addFilterQuery(fqExp.toString());
LOG.debug("Equals comparison found, adding it to SOLR filter query");
}
if (condition.getComparisonOp().equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan")){
fqExp.append(fieldName).append(":").append("{").append(value)
.append(" TO *}");
solrQuery.addFilterQuery(fqExp.toString());
LOG.debug("Greater than comparison found, adding it to SOLR filter query");
}
if (condition.getComparisonOp().equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan")) {
fqExp.append(fieldName).append(":").append("[").append(value)
.append(" TO *]");
solrQuery.addFilterQuery(fqExp.toString());
LOG.debug("Greater than or equals comparison found, adding it to SOLR filter query");
}
if (condition.getComparisonOp().equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan")){
fqExp.append(fieldName).append(":").append("{* TO ").append(value)
.append(" }");
solrQuery.addFilterQuery(fqExp.toString());
LOG.debug("Less than comparison found, adding it to SOLR filter query");
}
if (condition.getComparisonOp().equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan")) {
fqExp.append(fieldName).append(":").append("[* TO ").append(value)
.append(" ]");
solrQuery.addFilterQuery(fqExp.toString());
LOG.debug("Less than or equals comparison found, adding it to SOLR filter query");
}
}
return solrQuery;
}