本文整理汇总了C++中machinebasicblock::iterator::modifiesRegister方法的典型用法代码示例。如果您正苦于以下问题:C++ iterator::modifiesRegister方法的具体用法?C++ iterator::modifiesRegister怎么用?C++ iterator::modifiesRegister使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类machinebasicblock::iterator
的用法示例。
在下文中一共展示了iterator::modifiesRegister方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: CCMask
// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
bool SystemZElimCompare::
fuseCompareAndBranch(MachineInstr *Compare,
SmallVectorImpl<MachineInstr *> &CCUsers) {
// See whether we have a comparison that can be fused.
unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
Compare);
if (!FusedOpcode)
return false;
// See whether we have a single branch with which to fuse.
if (CCUsers.size() != 1)
return false;
MachineInstr *Branch = CCUsers[0];
if (Branch->getOpcode() != SystemZ::BRC)
return false;
// Make sure that the operands are available at the branch.
unsigned SrcReg = Compare->getOperand(0).getReg();
unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
Compare->getOperand(1).getReg() : 0);
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
return false;
// Read the branch mask and target.
MachineOperand CCMask(MBBI->getOperand(1));
MachineOperand Target(MBBI->getOperand(2));
assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
"Invalid condition-code mask for integer comparison");
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC must use CC");
Branch->RemoveOperand(CCUse);
Branch->RemoveOperand(2);
Branch->RemoveOperand(1);
Branch->RemoveOperand(0);
// Rebuild Branch as a fused compare and branch.
Branch->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
.addOperand(Compare->getOperand(0))
.addOperand(Compare->getOperand(1))
.addOperand(CCMask)
.addOperand(Target)
.addReg(SystemZ::CC, RegState::ImplicitDefine);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
for (++MBBI; MBBI != MBBE; ++MBBI) {
MBBI->clearRegisterKills(SrcReg, TRI);
if (SrcReg2)
MBBI->clearRegisterKills(SrcReg2, TRI);
}
FusedComparisons += 1;
return true;
}
示例2: canBeFeederToNewValueJump
// We have identified this II could be feeder to NVJ,
// verify that it can be.
static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
const TargetRegisterInfo *TRI,
MachineBasicBlock::iterator II,
MachineBasicBlock::iterator end,
MachineBasicBlock::iterator skip,
MachineFunction &MF) {
// Predicated instruction can not be feeder to NVJ.
if (QII->isPredicated(*II))
return false;
// Bail out if feederReg is a paired register (double regs in
// our case). One would think that we can check to see if a given
// register cmpReg1 or cmpReg2 is a sub register of feederReg
// using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
// before the callsite of this function
// But we can not as it comes in the following fashion.
// %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
// %R0<def> = KILL %R0, %D0<imp-use,kill>
// %P0<def> = CMPEQri %R0<kill>, 0
// Hence, we need to check if it's a KILL instruction.
if (II->getOpcode() == TargetOpcode::KILL)
return false;
if (II->isImplicitDef())
return false;
// Make sure there there is no 'def' or 'use' of any of the uses of
// feeder insn between it's definition, this MI and jump, jmpInst
// skipping compare, cmpInst.
// Here's the example.
// r21=memub(r22+r24<<#0)
// p0 = cmp.eq(r21, #0)
// r4=memub(r3+r21<<#0)
// if (p0.new) jump:t .LBB29_45
// Without this check, it will be converted into
// r4=memub(r3+r21<<#0)
// r21=memub(r22+r24<<#0)
// p0 = cmp.eq(r21, #0)
// if (p0.new) jump:t .LBB29_45
// and result WAR hazards if converted to New Value Jump.
for (unsigned i = 0; i < II->getNumOperands(); ++i) {
if (II->getOperand(i).isReg() &&
(II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
MachineBasicBlock::iterator localII = II;
++localII;
unsigned Reg = II->getOperand(i).getReg();
for (MachineBasicBlock::iterator localBegin = localII;
localBegin != end; ++localBegin) {
if (localBegin == skip ) continue;
// Check for Subregisters too.
if (localBegin->modifiesRegister(Reg, TRI) ||
localBegin->readsRegister(Reg, TRI))
return false;
}
}
}
return true;
}
示例3: runOnMachineFunction
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
ST = &MF.getSubtarget<SISubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
IV = getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
HardwareLimits.Named.VM = getVmcntBitMask(IV);
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
IsFlatOutstanding = false;
ReturnsVoid = MFI->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
SmallVector<MachineInstr *, 4> RemoveMI;
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
bool HaveScalarStores = false;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (!HaveScalarStores && TII->isScalarStore(*I))
HaveScalarStores = true;
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
// corrupt vccz bit, we need to:
// 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
// complete.
// 2. Restore the correct value of vccz by writing the current value
// of vcc back to vcc.
if (TII->isSMRD(I->getOpcode())) {
VCCZCorrupt = true;
} else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
// FIXME: We only care about SMRD instructions here, not LDS or GDS.
// Whenever we store a value in vcc, the correct value of vccz is
// restored.
VCCZCorrupt = false;
}
// Check if we need to apply the bug work-around
if (VCCZCorrupt && readsVCCZ(*I)) {
DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
// Wait on everything, not just LGKM. vccz reads usually come from
// terminators, and we always wait on everything at the end of the
// block, so if we only wait on LGKM here, we might end up with
// another s_waitcnt inserted right after this if there are non-LGKM
// instructions still outstanding.
insertWait(MBB, I, LastIssued);
// Restore the vccz bit. Any time a value is written to vcc, the vcc
// bit is updated, so we can restore the bit by reading the value of
// vcc and then writing it back to the register.
BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::VCC)
.addReg(AMDGPU::VCC);
}
}
// Record pre-existing, explicitly requested waits
if (I->getOpcode() == AMDGPU::S_WAITCNT) {
handleExistingWait(*I);
RemoveMI.push_back(&*I);
continue;
}
Counters Required;
// Wait for everything before a barrier.
//
// S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
ST->needWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG)
Required = LastIssued;
else
//.........这里部分代码省略.........
示例4: canCompareBeNewValueJump
static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
const TargetRegisterInfo *TRI,
MachineBasicBlock::iterator II,
unsigned pReg,
bool secondReg,
bool optLocation,
MachineBasicBlock::iterator end,
MachineFunction &MF) {
MachineInstr &MI = *II;
// If the second operand of the compare is an imm, make sure it's in the
// range specified by the arch.
if (!secondReg) {
int64_t v = MI.getOperand(2).getImm();
if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi ||
MI.getOpcode() == Hexagon::C2_cmpgti) &&
(v == -1))))
return false;
}
unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
cmpReg1 = MI.getOperand(1).getReg();
if (secondReg) {
cmpOp2 = MI.getOperand(2).getReg();
// Make sure that that second register is not from COPY
// At machine code level, we don't need this, but if we decide
// to move new value jump prior to RA, we would be needing this.
MachineRegisterInfo &MRI = MF.getRegInfo();
if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
MachineInstr *def = MRI.getVRegDef(cmpOp2);
if (def->getOpcode() == TargetOpcode::COPY)
return false;
}
}
// Walk the instructions after the compare (predicate def) to the jump,
// and satisfy the following conditions.
++II ;
for (MachineBasicBlock::iterator localII = II; localII != end;
++localII) {
// Check 1.
// If "common" checks fail, bail out.
if (!commonChecksToProhibitNewValueJump(optLocation, localII))
return false;
// Check 2.
// If there is a def or use of predicate (result of compare), bail out.
if (localII->modifiesRegister(pReg, TRI) ||
localII->readsRegister(pReg, TRI))
return false;
// Check 3.
// If there is a def of any of the use of the compare (operands of compare),
// bail out.
// Eg.
// p0 = cmp.eq(r2, r0)
// r2 = r4
// if (p0.new) jump:t .LBB28_3
if (localII->modifiesRegister(cmpReg1, TRI) ||
(secondReg && localII->modifiesRegister(cmpOp2, TRI)))
return false;
}
return true;
}
示例5: fuseCompareOperations
// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
bool SystemZElimCompare::fuseCompareOperations(
MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
// See whether we have a single branch with which to fuse.
if (CCUsers.size() != 1)
return false;
MachineInstr *Branch = CCUsers[0];
SystemZII::FusedCompareType Type;
switch (Branch->getOpcode()) {
case SystemZ::BRC:
Type = SystemZII::CompareAndBranch;
break;
case SystemZ::CondReturn:
Type = SystemZII::CompareAndReturn;
break;
case SystemZ::CallBCR:
Type = SystemZII::CompareAndSibcall;
break;
case SystemZ::CondTrap:
Type = SystemZII::CompareAndTrap;
break;
default:
return false;
}
// See whether we have a comparison that can be fused.
unsigned FusedOpcode =
TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
if (!FusedOpcode)
return false;
// Make sure that the operands are available at the branch.
// SrcReg2 is the register if the source operand is a register,
// 0 if the source operand is immediate, and the base register
// if the source operand is memory (index is not supported).
unsigned SrcReg = Compare.getOperand(0).getReg();
unsigned SrcReg2 =
Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
return false;
// Read the branch mask, target (if applicable), regmask (if applicable).
MachineOperand CCMask(MBBI->getOperand(1));
assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
"Invalid condition-code mask for integer comparison");
// This is only valid for CompareAndBranch.
MachineOperand Target(MBBI->getOperand(
Type == SystemZII::CompareAndBranch ? 2 : 0));
const uint32_t *RegMask;
if (Type == SystemZII::CompareAndSibcall)
RegMask = MBBI->getOperand(2).getRegMask();
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC/BCR must use CC");
Branch->RemoveOperand(CCUse);
// Remove target (branch) or regmask (sibcall).
if (Type == SystemZII::CompareAndBranch ||
Type == SystemZII::CompareAndSibcall)
Branch->RemoveOperand(2);
Branch->RemoveOperand(1);
Branch->RemoveOperand(0);
// Rebuild Branch as a fused compare and branch.
// SrcNOps is the number of MI operands of the compare instruction
// that we need to copy over.
unsigned SrcNOps = 2;
if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT)
SrcNOps = 3;
Branch->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
for (unsigned I = 0; I < SrcNOps; I++)
MIB.addOperand(Compare.getOperand(I));
MIB.addOperand(CCMask);
if (Type == SystemZII::CompareAndBranch) {
// Only conditional branches define CC, as they may be converted back
// to a non-fused branch because of a long displacement. Conditional
// returns don't have that problem.
MIB.addOperand(Target)
.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
}
if (Type == SystemZII::CompareAndSibcall)
MIB.addRegMask(RegMask);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
for (++MBBI; MBBI != MBBE; ++MBBI) {
MBBI->clearRegisterKills(SrcReg, TRI);
if (SrcReg2)
MBBI->clearRegisterKills(SrcReg2, TRI);
}
FusedComparisons += 1;
return true;
}
示例6: runOnMachineFunction
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
// Optimize sequences emitted for control flow lowering. They are originally
// emitted as the separate operations because spill code may need to be
// inserted for the saved copy of exec.
//
// x = copy exec
// z = s_<op>_b64 x, y
// exec = copy z
// =>
// x = s_<op>_saveexec_b64 y
//
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
MachineBasicBlock::reverse_iterator E = MBB.rend();
if (I == E)
continue;
unsigned CopyToExec = isCopyToExec(*I);
if (CopyToExec == AMDGPU::NoRegister)
continue;
// Scan backwards to find the def.
auto CopyToExecInst = &*I;
auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
if (CopyFromExecInst == E) {
auto PrepareExecInst = std::next(I);
if (PrepareExecInst == E)
continue;
// Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
if (CopyToExecInst->getOperand(1).isKill() &&
isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
PrepareExecInst->getOperand(0).setIsRenamable(false);
DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
CopyToExecInst->eraseFromParent();
}
continue;
}
if (isLiveOut(MBB, CopyToExec)) {
// The copied register is live out and has a second use in another block.
DEBUG(dbgs() << "Exec copy source register is live out\n");
continue;
}
unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
MachineInstr *SaveExecInst = nullptr;
SmallVector<MachineInstr *, 4> OtherUseInsts;
for (MachineBasicBlock::iterator J
= std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
J != JE; ++J) {
if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
// Make sure this is inserted after any VALU ops that may have been
// scheduled in between.
SaveExecInst = nullptr;
break;
}
bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);
if (J->modifiesRegister(CopyToExec, TRI)) {
if (SaveExecInst) {
DEBUG(dbgs() << "Multiple instructions modify "
<< printReg(CopyToExec, TRI) << '\n');
SaveExecInst = nullptr;
break;
}
unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
break;
if (ReadsCopyFromExec) {
SaveExecInst = &*J;
DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
continue;
} else {
DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
break;
}
} else if (ReadsCopyFromExec && !SaveExecInst) {
// Make sure no other instruction is trying to use this copy, before it
// will be rewritten by the saveexec, i.e. hasOneUse. There may have
// been another use, such as an inserted spill. For example:
//
//.........这里部分代码省略.........
示例7: predicate
/// For a given conditional copy, predicate the definition of the source of
/// the copy under the given condition (using the same predicate register as
/// the copy).
bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
// TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
unsigned Opc = TfrI->getOpcode();
(void)Opc;
assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
<< ": " << *TfrI);
MachineOperand &MD = TfrI->getOperand(0);
MachineOperand &MP = TfrI->getOperand(1);
MachineOperand &MS = TfrI->getOperand(2);
// The source operand should be a <kill>. This is not strictly necessary,
// but it makes things a lot simpler. Otherwise, we would need to rename
// some registers, which would complicate the transformation considerably.
if (!MS.isKill())
return false;
RegisterRef RT(MS);
unsigned PredR = MP.getReg();
MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
if (!DefI || !isPredicable(DefI))
return false;
DEBUG(dbgs() << "Source def: " << *DefI);
// Collect the information about registers defined and used between the
// DefI and the TfrI.
// Map: reg -> bitmask of subregs
ReferenceMap Uses, Defs;
MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
// Check if the predicate register is valid between DefI and TfrI.
// If it is, we can then ignore instructions predicated on the negated
// conditions when collecting def and use information.
bool PredValid = true;
for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
if (!I->modifiesRegister(PredR, 0))
continue;
PredValid = false;
break;
}
for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
MachineInstr *MI = &*I;
// If this instruction is predicated on the same register, it could
// potentially be ignored.
// By default assume that the instruction executes on the same condition
// as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
unsigned Exec = Exec_Then | Exec_Else;
if (PredValid && HII->isPredicated(*MI) && MI->readsRegister(PredR))
Exec = (Cond == HII->isPredicatedTrue(*MI)) ? Exec_Then : Exec_Else;
for (auto &Op : MI->operands()) {
if (!Op.isReg())
continue;
// We don't want to deal with physical registers. The reason is that
// they can be aliased with other physical registers. Aliased virtual
// registers must share the same register number, and can only differ
// in the subregisters, which we are keeping track of. Physical
// registers ters no longer have subregisters---their super- and
// subregisters are other physical registers, and we are not checking
// that.
RegisterRef RR = Op;
if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
return false;
ReferenceMap &Map = Op.isDef() ? Defs : Uses;
addRefToMap(RR, Map, Exec);
}
}
// The situation:
// RT = DefI
// ...
// RD = TfrI ..., RT
// If the register-in-the-middle (RT) is used or redefined between
// DefI and TfrI, we may not be able proceed with this transformation.
// We can ignore a def that will not execute together with TfrI, and a
// use that will. If there is such a use (that does execute together with
// TfrI), we will not be able to move DefI down. If there is a use that
// executed if TfrI's condition is false, then RT must be available
// unconditionally (cannot be predicated).
// Essentially, we need to be able to rename RT to RD in this segment.
if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
return false;
RegisterRef RD = MD;
// If the predicate register is defined between DefI and TfrI, the only
// potential thing to do would be to move the DefI down to TfrI, and then
// predicate. The reaching def (DefI) must be movable down to the location
// of the TfrI.
// If the target register of the TfrI (RD) is not used or defined between
// DefI and TfrI, consider moving TfrI up to DefI.
bool CanUp = canMoveOver(TfrI, Defs, Uses);
bool CanDown = canMoveOver(DefI, Defs, Uses);
// The TfrI does not access memory, but DefI could. Check if it's safe
// to move DefI down to TfrI.
//.........这里部分代码省略.........
示例8: runOnMachineFunction
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
ST = &MF.getSubtarget<SISubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
SmallVector<MachineInstr *, 4> RemoveMI;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
// corrupt vccz bit, we need to:
// 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
// complete.
// 2. Restore the correct value of vccz by writing the current value
// of vcc back to vcc.
if (TII->isSMRD(I->getOpcode())) {
VCCZCorrupt = true;
} else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
// FIXME: We only care about SMRD instructions here, not LDS or GDS.
// Whenever we store a value in vcc, the correct value of vccz is
// restored.
VCCZCorrupt = false;
}
// Check if we need to apply the bug work-around
if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
// Wait on everything, not just LGKM. vccz reads usually come from
// terminators, and we always wait on everything at the end of the
// block, so if we only wait on LGKM here, we might end up with
// another s_waitcnt inserted right after this if there are non-LGKM
// instructions still outstanding.
insertWait(MBB, I, LastIssued);
// Restore the vccz bit. Any time a value is written to vcc, the vcc
// bit is updated, so we can restore the bit by reading the value of
// vcc and then writing it back to the register.
BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::VCC)
.addReg(AMDGPU::VCC);
}
}
// Record pre-existing, explicitly requested waits
if (I->getOpcode() == AMDGPU::S_WAITCNT) {
handleExistingWait(*I);
RemoveMI.push_back(&*I);
continue;
}
Counters Required;
// Wait for everything before a barrier.
//
// S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
if (I->getOpcode() == AMDGPU::S_BARRIER ||
I->getOpcode() == AMDGPU::S_SENDMSG)
Required = LastIssued;
else
Required = handleOperands(*I);
Counters Increment = getHwCounts(*I);
if (countersNonZero(Required) || countersNonZero(Increment))
increaseCounters(Required, DelayedWaitOn);
Changes |= insertWait(MBB, I, Required);
pushInstruction(MBB, I, Increment);
handleSendMsg(MBB, I);
}
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
//.........这里部分代码省略.........