Skip to content

Commit 57d2a2c

Browse files
rohitaggarwal007Rohit Aggarwal
andauthored
[AggressiveInstCombine] POPCNT generation for bit-count pattern (#180917)
The proposal is to enhance LLVM by teaching it to recognize the pattern and replace it with the hardware POPCNT instruction. #177109 has the first pattern of the popcnt fold. --------- Co-authored-by: Rohit Aggarwal <Rohit.Aggarwal@amd.com>
1 parent 0da34b8 commit 57d2a2c

2 files changed

Lines changed: 677 additions & 0 deletions

File tree

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,98 @@ static bool tryToRecognizePopCount(Instruction &I) {
386386
return false;
387387
}
388388

389+
// Try to recognize below function as popcount intrinsic.
390+
// Ref. Hackers Delight
391+
// int popcnt(unsigned x) {
392+
// x = x - ((x >> 1) & 0x55555555);
393+
// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
394+
// x = (x + (x >> 4)) & 0x0F0F0F0F;
395+
// x = x + (x >> 8);
396+
// x = x + (x >> 16);
397+
// return x & 0x0000003F;
398+
// }
399+
400+
// int popcnt(unsigned x) {
401+
// x = x - ((x >> 1) & 0x55555555);
402+
// x = x - 3*((x >> 2) & 0x33333333);
403+
// x = (x + (x >> 4)) & 0x0F0F0F0F;
404+
// x = x + (x >> 8);
405+
// x = x + (x >> 16);
406+
// return x & 0x0000003F;
407+
// }
408+
409+
static bool tryToRecognizePopCount2n3(Instruction &I) {
410+
if (I.getOpcode() != Instruction::And)
411+
return false;
412+
413+
Type *Ty = I.getType();
414+
if (!Ty->isIntOrIntVectorTy())
415+
return false;
416+
417+
unsigned Len = Ty->getScalarSizeInBits();
418+
419+
if (Len > 64 || Len <= 8 || Len % 8 != 0)
420+
return false;
421+
422+
// Len should be a power of 2 for the loop to work correctly
423+
if (!isPowerOf2_32(Len))
424+
return false;
425+
426+
APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
427+
APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
428+
APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
429+
430+
APInt MaskRes = APInt(Len, 2 * Len - 1);
431+
432+
Value *Add1;
433+
if (!match(&I, m_And(m_Value(Add1), m_SpecificInt(MaskRes))))
434+
return false;
435+
436+
Value *Add2;
437+
for (unsigned I = Len; I >= 16; I = I / 2) {
438+
// Matching "x = x + (x >> I/2)" for I-bit.
439+
if (!match(Add1, m_c_Add(m_LShr(m_Value(Add2), m_SpecificInt(I / 2)),
440+
m_Deferred(Add2))))
441+
return false;
442+
Add1 = Add2;
443+
}
444+
445+
Value *And1 = Add1;
446+
// Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
447+
if (!match(And1, m_And(m_c_Add(m_LShr(m_Value(Add2), m_SpecificInt(4)),
448+
m_Deferred(Add2)),
449+
m_SpecificInt(Mask0F))))
450+
return false;
451+
452+
Value *Sub1;
453+
llvm::APInt NegThree(/*BitWidth=*/Len, /*Value=*/-3,
454+
/*isSigned=*/true);
455+
// x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
456+
if (!match(Add2, m_c_Add(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
457+
m_SpecificInt(Mask33)),
458+
m_And(m_Deferred(Sub1), m_SpecificInt(Mask33)))) &&
459+
// Matching "x = x - 3*((x >> 2) & 0x33333333)".
460+
!match(Add2, m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),
461+
m_SpecificInt(Mask33)),
462+
m_SpecificInt(NegThree)),
463+
m_Deferred(Sub1))))
464+
return false;
465+
466+
Value *Root;
467+
// x = x - ((x >> 1) & 0x55555555);
468+
if (!match(Sub1, m_Sub(m_Value(Root),
469+
m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),
470+
m_SpecificInt(Mask55)))))
471+
return false;
472+
473+
LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
474+
IRBuilder<> Builder(&I);
475+
I.replaceAllUsesWith(
476+
Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
477+
++NumPopCountRecognized;
478+
return true;
479+
}
480+
389481
/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
390482
/// C2 saturate the value of the fp conversion. The transform is not reversable
391483
/// as the fptosi.sat is more defined than the input - all values produce a
@@ -2024,6 +2116,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
20242116
MadeChange |= foldAnyOrAllBitsSet(I);
20252117
MadeChange |= foldGuardedFunnelShift(I, DT);
20262118
MadeChange |= tryToRecognizePopCount(I);
2119+
MadeChange |= tryToRecognizePopCount2n3(I);
20272120
MadeChange |= tryToFPToSat(I, TTI);
20282121
MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
20292122
MadeChange |= tryToRecognizeTableBasedLog2(I, DL, TTI);

0 commit comments

Comments
 (0)