本文整理汇总了C++中VectorType::getVectorNumElements方法的典型用法代码示例。如果您正苦于以下问题:C++ VectorType::getVectorNumElements方法的具体用法?C++ VectorType::getVectorNumElements怎么用?C++ VectorType::getVectorNumElements使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类VectorType
的用法示例。
在下文中一共展示了VectorType::getVectorNumElements方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: getOCLTypeName
static std::string getOCLTypeName(Type *Ty, bool Signed) {
switch (Ty->getTypeID()) {
case Type::HalfTyID:
return "half";
case Type::FloatTyID:
return "float";
case Type::DoubleTyID:
return "double";
case Type::IntegerTyID: {
if (!Signed)
return (Twine('u') + getOCLTypeName(Ty, true)).str();
unsigned BW = Ty->getIntegerBitWidth();
switch (BW) {
case 8:
return "char";
case 16:
return "short";
case 32:
return "int";
case 64:
return "long";
default:
return (Twine('i') + Twine(BW)).str();
}
}
case Type::VectorTyID: {
VectorType *VecTy = cast<VectorType>(Ty);
Type *EleTy = VecTy->getElementType();
unsigned Size = VecTy->getVectorNumElements();
return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
}
default:
return "unknown";
}
}
示例2: getMemoryOpCost
unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned Alignment,
unsigned AddressSpace) {
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
(Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
!isPowerOf2_32(NumElem)) {
// Scalarization
unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
unsigned ScalarCompareCost =
getCmpSelInstrCost(Instruction::ICmp,
Type::getInt8Ty(getGlobalContext()), NULL);
unsigned BranchCost = getCFInstrCost(Instruction::Br);
unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
unsigned ValueSplitCost =
getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
Opcode == Instruction::Store);
unsigned MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
unsigned Cost = 0;
if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
LT.second.getVectorNumElements());
// Expanding requires fill mask with zeroes
Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
}
if (!ST->hasAVX512())
return Cost + LT.first*4; // Each maskmov costs 4
// AVX-512 masked load/store is cheapper
return Cost+LT.first;
}
示例3: decompose
// Lowers this interleaved access group into X86-specific
// instructions/intrinsics.
bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
SmallVector<Instruction *, 4> DecomposedVectors;
SmallVector<Value *, 4> TransposedVectors;
VectorType *ShuffleTy = Shuffles[0]->getType();
if (isa<LoadInst>(Inst)) {
// Try to generate target-sized register(/instruction).
decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
Type *ShuffleEltTy = Inst->getType();
unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor;
// Perform matrix-transposition in order to compute interleaved
// results by generating some sort of (optimized) target-specific
// instructions.
switch (NumSubVecElems) {
default:
return false;
case 4:
transpose_4x4(DecomposedVectors, TransposedVectors);
break;
case 8:
case 16:
case 32:
deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
NumSubVecElems);
break;
}
// Now replace the unoptimized-interleaved-vectors with the
// transposed-interleaved vectors.
for (unsigned i = 0, e = Shuffles.size(); i < e; ++i)
Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
return true;
}
Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous
// elements.
switch (NumSubVecElems) {
case 4:
transpose_4x4(DecomposedVectors, TransposedVectors);
break;
case 16:
case 32:
interleave8bitStride4(DecomposedVectors, TransposedVectors, NumSubVecElems);
break;
default:
return false;
}
// 3. Concatenate the contiguous-vectors back into a wide vector.
Value *WideVec = concatenateVectors(Builder, TransposedVectors);
// 4. Generate a store instruction for wide-vec.
StoreInst *SI = cast<StoreInst>(Inst);
Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(),
SI->getAlignment());
return true;
}
示例4: replaceExtractElements
/// If we have insertion into a vector that is wider than the vector that we
/// are extracting from, try to widen the source vector to allow a single
/// shufflevector to replace one or more insert/extract pairs.
static void replaceExtractElements(InsertElementInst *InsElt,
ExtractElementInst *ExtElt,
InstCombiner &IC) {
VectorType *InsVecType = InsElt->getType();
VectorType *ExtVecType = ExtElt->getVectorOperandType();
unsigned NumInsElts = InsVecType->getVectorNumElements();
unsigned NumExtElts = ExtVecType->getVectorNumElements();
// The inserted-to vector must be wider than the extracted-from vector.
if (InsVecType->getElementType() != ExtVecType->getElementType() ||
NumExtElts >= NumInsElts)
return;
// Create a shuffle mask to widen the extended-from vector using undefined
// values. The mask selects all of the values of the original vector followed
// by as many undefined values as needed to create a vector of the same length
// as the inserted-to vector.
SmallVector<Constant *, 16> ExtendMask;
IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
for (unsigned i = 0; i < NumExtElts; ++i)
ExtendMask.push_back(ConstantInt::get(IntType, i));
for (unsigned i = NumExtElts; i < NumInsElts; ++i)
ExtendMask.push_back(UndefValue::get(IntType));
Value *ExtVecOp = ExtElt->getVectorOperand();
auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
? ExtVecOpInst->getParent()
: ExtElt->getParent();
// TODO: This restriction matches the basic block check below when creating
// new extractelement instructions. If that limitation is removed, this one
// could also be removed. But for now, we just bail out to ensure that we
// will replace the extractelement instruction that is feeding our
// insertelement instruction. This allows the insertelement to then be
// replaced by a shufflevector. If the insertelement is not replaced, we can
// induce infinite looping because there's an optimization for extractelement
// that will delete our widening shuffle. This would trigger another attempt
// here to create that shuffle, and we spin forever.
if (InsertionBlock != InsElt->getParent())
return;
auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
ConstantVector::get(ExtendMask));
// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
// extract, so any subsequent extracts in the same basic block can use it.
// TODO: Insert before the earliest ExtractElementInst that is replaced.
if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
WideVec->insertAfter(ExtVecOpInst);
else
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
for (User *U : ExtVecOp->users()) {
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
NewExt->insertAfter(WideVec);
IC.replaceInstUsesWith(*OldExt, NewExt);
}
}