@@ -386,6 +386,98 @@ static bool tryToRecognizePopCount(Instruction &I) {
386386 return false ;
387387}
388388
389+ // Try to recognize below function as popcount intrinsic.
390+ // Ref. Hackers Delight
391+ // int popcnt(unsigned x) {
392+ // x = x - ((x >> 1) & 0x55555555);
393+ // x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
394+ // x = (x + (x >> 4)) & 0x0F0F0F0F;
395+ // x = x + (x >> 8);
396+ // x = x + (x >> 16);
397+ // return x & 0x0000003F;
398+ // }
399+
400+ // int popcnt(unsigned x) {
401+ // x = x - ((x >> 1) & 0x55555555);
402+ // x = x - 3*((x >> 2) & 0x33333333);
403+ // x = (x + (x >> 4)) & 0x0F0F0F0F;
404+ // x = x + (x >> 8);
405+ // x = x + (x >> 16);
406+ // return x & 0x0000003F;
407+ // }
408+
409+ static bool tryToRecognizePopCount2n3 (Instruction &I) {
410+ if (I.getOpcode () != Instruction::And)
411+ return false ;
412+
413+ Type *Ty = I.getType ();
414+ if (!Ty->isIntOrIntVectorTy ())
415+ return false ;
416+
417+ unsigned Len = Ty->getScalarSizeInBits ();
418+
419+ if (Len > 64 || Len <= 8 || Len % 8 != 0 )
420+ return false ;
421+
422+ // Len should be a power of 2 for the loop to work correctly
423+ if (!isPowerOf2_32 (Len))
424+ return false ;
425+
426+ APInt Mask55 = APInt::getSplat (Len, APInt (8 , 0x55 ));
427+ APInt Mask33 = APInt::getSplat (Len, APInt (8 , 0x33 ));
428+ APInt Mask0F = APInt::getSplat (Len, APInt (8 , 0x0F ));
429+
430+ APInt MaskRes = APInt (Len, 2 * Len - 1 );
431+
432+ Value *Add1;
433+ if (!match (&I, m_And (m_Value (Add1), m_SpecificInt (MaskRes))))
434+ return false ;
435+
436+ Value *Add2;
437+ for (unsigned I = Len; I >= 16 ; I = I / 2 ) {
438+ // Matching "x = x + (x >> I/2)" for I-bit.
439+ if (!match (Add1, m_c_Add (m_LShr (m_Value (Add2), m_SpecificInt (I / 2 )),
440+ m_Deferred (Add2))))
441+ return false ;
442+ Add1 = Add2;
443+ }
444+
445+ Value *And1 = Add1;
446+ // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".
447+ if (!match (And1, m_And (m_c_Add (m_LShr (m_Value (Add2), m_SpecificInt (4 )),
448+ m_Deferred (Add2)),
449+ m_SpecificInt (Mask0F))))
450+ return false ;
451+
452+ Value *Sub1;
453+ llvm::APInt NegThree (/* BitWidth=*/ Len, /* Value=*/ -3 ,
454+ /* isSigned=*/ true );
455+ // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".
456+ if (!match (Add2, m_c_Add (m_And (m_LShr (m_Value (Sub1), m_SpecificInt (2 )),
457+ m_SpecificInt (Mask33)),
458+ m_And (m_Deferred (Sub1), m_SpecificInt (Mask33)))) &&
459+ // Matching "x = x - 3*((x >> 2) & 0x33333333)".
460+ !match (Add2, m_Add (m_Mul (m_And (m_LShr (m_Value (Sub1), m_SpecificInt (2 )),
461+ m_SpecificInt (Mask33)),
462+ m_SpecificInt (NegThree)),
463+ m_Deferred (Sub1))))
464+ return false ;
465+
466+ Value *Root;
467+ // x = x - ((x >> 1) & 0x55555555);
468+ if (!match (Sub1, m_Sub (m_Value (Root),
469+ m_And (m_LShr (m_Deferred (Root), m_SpecificInt (1 )),
470+ m_SpecificInt (Mask55)))))
471+ return false ;
472+
473+ LLVM_DEBUG (dbgs () << " Recognized popcount intrinsic\n " );
474+ IRBuilder<> Builder (&I);
475+ I.replaceAllUsesWith (
476+ Builder.CreateIntrinsic (Intrinsic::ctpop, I.getType (), {Root}));
477+ ++NumPopCountRecognized;
478+ return true ;
479+ }
480+
389481// / Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
390482// / C2 saturate the value of the fp conversion. The transform is not reversable
391483// / as the fptosi.sat is more defined than the input - all values produce a
@@ -2024,6 +2116,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
20242116 MadeChange |= foldAnyOrAllBitsSet (I);
20252117 MadeChange |= foldGuardedFunnelShift (I, DT);
20262118 MadeChange |= tryToRecognizePopCount (I);
2119+ MadeChange |= tryToRecognizePopCount2n3 (I);
20272120 MadeChange |= tryToFPToSat (I, TTI);
20282121 MadeChange |= tryToRecognizeTableBasedCttz (I, DL);
20292122 MadeChange |= tryToRecognizeTableBasedLog2 (I, DL, TTI);
0 commit comments