本文整理汇总了C++中MemCpyInst::getAlignment方法的典型用法代码示例。如果您正苦于以下问题:C++ MemCpyInst::getAlignment方法的具体用法?C++ MemCpyInst::getAlignment怎么用?C++ MemCpyInst::getAlignment使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类MemCpyInst
的用法示例。
在下文中一共展示了MemCpyInst::getAlignment方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: processByValArgument
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
if (TD == 0) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
true, CS.getInstruction(),
CS.getInstruction()->getParent());
if (!DepInfo.isClobber())
return false;
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
if (MDep == 0 || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
// then it is some target specific value that we can't know.
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
if (ByValAlign == 0) return false;
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
if (MDep->getAlignment() < ByValAlign &&
getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
// memcpy(a <- b)
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
MemDepResult SourceDep =
MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
false, CS.getInstruction(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType())
TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
"tmpcast", CS.getInstruction());
DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n"
<< " " << *CS.getInstruction() << "\n");
// Otherwise we're good! Update the byval argument.
CS.setArgument(ArgNo, TmpCast);
++NumMemCpyInstr;
return true;
}
示例2: visitCallSite
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
MemRef::Callee);
if (Function *F = dyn_cast<Function>(findValue(Callee,
/*OffsetOk=*/false))) {
Assert(CS.getCallingConv() == F->getCallingConv(),
"Undefined behavior: Caller and callee calling convention differ",
&I);
FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = CS.arg_size();
Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
: FT->getNumParams() == NumActualArgs,
"Undefined behavior: Call argument count mismatches callee "
"argument count",
&I);
Assert(FT->getReturnType() == I.getType(),
"Undefined behavior: Call return type mismatches "
"callee return type",
&I);
// Check argument types (in case the callee was casted) and attributes.
// TODO: Verify that caller and callee attributes are compatible.
Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
for (; AI != AE; ++AI) {
Value *Actual = *AI;
if (PI != PE) {
Argument *Formal = &*PI++;
Assert(Formal->getType() == Actual->getType(),
"Undefined behavior: Call argument type mismatches "
"callee parameter type",
&I);
// Check that noalias arguments don't alias other arguments. This is
// not fully precise because we don't know the sizes of the dereferenced
// memory regions.
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
Assert(Result != MustAlias && Result != PartialAlias,
"Unusual: noalias argument aliases another argument", &I);
}
// Check that an sret argument points to valid memory.
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
DL->getABITypeAlignment(Ty), Ty,
MemRef::Read | MemRef::Write);
}
}
}
}
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
Value *Obj = findValue(*AI, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj),
"Undefined behavior: Call with \"tail\" keyword references "
"alloca",
&I);
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
switch (II->getIntrinsicID()) {
default: break;
// TODO: Check more intrinsics
case Intrinsic::memcpy: {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
MCI->getAlignment(), nullptr, MemRef::Write);
visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
MCI->getAlignment(), nullptr, MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
uint64_t Size = 0;
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
MustAlias,
"Undefined behavior: memcpy source and destination overlap", &I);
//.........这里部分代码省略.........
示例3: handleAlloca
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
// Array allocations are probably not worth handling, since an allocation of
// the array type is the canonical form.
if (!I.isStaticAlloca() || I.isArrayAllocation())
return;
IRBuilder<> Builder(&I);
// First try to replace the alloca with a vector
Type *AllocaTy = I.getAllocatedType();
DEBUG(dbgs() << "Trying to promote " << I << '\n');
if (tryPromoteAllocaToVector(&I))
return;
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
const Function &ContainingFunction = *I.getParent()->getParent();
// FIXME: We should also try to get this value from the reqd_work_group_size
// function attribute if it is available.
unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
int AllocaSize =
WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
if (AllocaSize > LocalMemAvailable) {
DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
return;
}
std::vector<Value*> WorkList;
if (!collectUsesWithPtrTypes(&I, WorkList)) {
DEBUG(dbgs() << " Do not know how to convert all uses\n");
return;
}
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
Function *F = I.getParent()->getParent();
Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
GlobalVariable *GV = new GlobalVariable(
*Mod, GVTy, false, GlobalValue::InternalLinkage,
UndefValue::get(GVTy),
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(true);
GV->setAlignment(I.getAlignment());
Value *TCntY, *TCntZ;
std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
Value *TIdX = getWorkitemID(Builder, 0);
Value *TIdY = getWorkitemID(Builder, 1);
Value *TIdZ = getWorkitemID(Builder, 2);
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
TID = Builder.CreateAdd(TID, TIdZ);
Value *Indices[] = {
Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
TID
};
Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
for (Value *V : WorkList) {
CallInst *Call = dyn_cast<CallInst>(V);
if (!Call) {
Type *EltTy = V->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
// The operand's value should be corrected on its own.
if (isa<AddrSpaceCastInst>(V))
continue;
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
V->mutateType(NewTy);
continue;
}
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
if (!Intr) {
// FIXME: What is this for? It doesn't make sense to promote arbitrary
// function calls. If the call is to a defined function that can also be
// promoted, we should be able to do this once that function is also
// rewritten.
//.........这里部分代码省略.........
示例4: visitAlloca
//.........这里部分代码省略.........
AttributeSet AttrSet;
AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
Value *ReadLocalSizeY = Mod->getOrInsertFunction(
"llvm.r600.read.local.size.y", FTy, AttrSet);
Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
"llvm.r600.read.local.size.z", FTy, AttrSet);
Value *ReadTIDIGX = Mod->getOrInsertFunction(
"llvm.r600.read.tidig.x", FTy, AttrSet);
Value *ReadTIDIGY = Mod->getOrInsertFunction(
"llvm.r600.read.tidig.y", FTy, AttrSet);
Value *ReadTIDIGZ = Mod->getOrInsertFunction(
"llvm.r600.read.tidig.z", FTy, AttrSet);
Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
Value *TIdX = Builder.CreateCall(ReadTIDIGX);
Value *TIdY = Builder.CreateCall(ReadTIDIGY);
Value *TIdZ = Builder.CreateCall(ReadTIDIGZ);
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
TID = Builder.CreateAdd(TID, TIdZ);
std::vector<Value*> Indices;
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
Indices.push_back(TID);
Value *Offset = Builder.CreateGEP(GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
for (std::vector<Value*>::iterator i = WorkList.begin(),
e = WorkList.end(); i != e; ++i) {
Value *V = *i;
CallInst *Call = dyn_cast<CallInst>(V);
if (!Call) {
Type *EltTy = V->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
// The operand's value should be corrected on its own.
if (isa<AddrSpaceCastInst>(V))
continue;
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
V->mutateType(NewTy);
continue;
}
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
if (!Intr) {
std::vector<Type*> ArgTypes;
for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
ArgIdx != ArgEnd; ++ArgIdx) {
ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
}
Function *F = Call->getCalledFunction();
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
F->isVarArg());
Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
F->getAttributes());
Function *NewF = cast<Function>(C);
Call->setCalledFunction(NewF);
continue;
}
Builder.SetInsertPoint(Intr);
switch (Intr->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
// These intrinsics are for address space 0 only
Intr->eraseFromParent();
continue;
case Intrinsic::memcpy: {
MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
MemCpy->getLength(), MemCpy->getAlignment(),
MemCpy->isVolatile());
Intr->eraseFromParent();
continue;
}
case Intrinsic::memset: {
MemSetInst *MemSet = cast<MemSetInst>(Intr);
Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
MemSet->getLength(), MemSet->getAlignment(),
MemSet->isVolatile());
Intr->eraseFromParent();
continue;
}
default:
Intr->dump();
llvm_unreachable("Don't know how to promote alloca intrinsic use.");
}
}
}
示例5: visitCallSite
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
// TODO: Check function alignment?
visitMemoryReference(I, Callee, 0, 0);
if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
Assert1(CS.getCallingConv() == F->getCallingConv(),
"Undefined behavior: Caller and callee calling convention differ",
&I);
const FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
Assert1(FT->isVarArg() ?
FT->getNumParams() <= NumActualArgs :
FT->getNumParams() == NumActualArgs,
"Undefined behavior: Call argument count mismatches callee "
"argument count", &I);
// TODO: Check argument types (in case the callee was casted)
// TODO: Check ABI-significant attributes.
// TODO: Check noalias attribute.
// TODO: Check sret attribute.
}
// TODO: Check the "tail" keyword constraints.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
switch (II->getIntrinsicID()) {
default: break;
// TODO: Check more intrinsics
case Intrinsic::memcpy: {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0);
visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
unsigned Size = 0;
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts()))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
AliasAnalysis::MustAlias,
"Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0);
visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0);
break;
}
case Intrinsic::memset: {
MemSetInst *MSI = cast<MemSetInst>(&I);
visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0);
break;
}
case Intrinsic::vastart:
Assert1(I.getParent()->getParent()->isVarArg(),
"Undefined behavior: va_start called in a non-varargs function",
&I);
visitMemoryReference(I, CS.getArgument(0), 0, 0);
break;
case Intrinsic::vacopy:
visitMemoryReference(I, CS.getArgument(0), 0, 0);
visitMemoryReference(I, CS.getArgument(1), 0, 0);
break;
case Intrinsic::vaend:
visitMemoryReference(I, CS.getArgument(0), 0, 0);
break;
case Intrinsic::stackrestore:
visitMemoryReference(I, CS.getArgument(0), 0, 0);
break;
}
}
示例6: handleAlloca
// FIXME: Should try to pick the most likely to be profitable allocas first.
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
// Array allocations are probably not worth handling, since an allocation of
// the array type is the canonical form.
if (!I.isStaticAlloca() || I.isArrayAllocation())
return;
IRBuilder<> Builder(&I);
// First try to replace the alloca with a vector
Type *AllocaTy = I.getAllocatedType();
DEBUG(dbgs() << "Trying to promote " << I << '\n');
if (tryPromoteAllocaToVector(&I)) {
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
return;
}
const Function &ContainingFunction = *I.getParent()->getParent();
// Don't promote the alloca to LDS for shader calling conventions as the work
// item ID intrinsics are not supported for these calling conventions.
// Furthermore not all LDS is available for some of the stages.
if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
return;
// FIXME: We should also try to get this value from the reqd_work_group_size
// function attribute if it is available.
unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
const DataLayout &DL = Mod->getDataLayout();
unsigned Align = I.getAlignment();
if (Align == 0)
Align = DL.getABITypeAlignment(I.getAllocatedType());
// FIXME: This computed padding is likely wrong since it depends on inverse
// usage order.
//
// FIXME: It is also possible that if we're allowed to use all of the memory
// could could end up using more than the maximum due to alignment padding.
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
NewSize += AllocSize;
if (NewSize > LocalMemLimit) {
DEBUG(dbgs() << " " << AllocSize
<< " bytes of local memory not available to promote\n");
return;
}
CurrentLocalMemUsage = NewSize;
std::vector<Value*> WorkList;
if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
DEBUG(dbgs() << " Do not know how to convert all uses\n");
return;
}
DEBUG(dbgs() << "Promoting alloca to local memory\n");
Function *F = I.getParent()->getParent();
Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
GlobalVariable *GV = new GlobalVariable(
*Mod, GVTy, false, GlobalValue::InternalLinkage,
UndefValue::get(GVTy),
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
AMDGPUAS::LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
Value *TCntY, *TCntZ;
std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
Value *TIdX = getWorkitemID(Builder, 0);
Value *TIdY = getWorkitemID(Builder, 1);
Value *TIdZ = getWorkitemID(Builder, 2);
Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
Tmp0 = Builder.CreateMul(Tmp0, TIdX);
Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
TID = Builder.CreateAdd(TID, TIdZ);
Value *Indices[] = {
Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
TID
};
Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
//.........这里部分代码省略.........