Skip to content

Commit 2fa38f3

Browse files
committed
[AArch64][SelectionDAG] Generate subs+csel for usub.sat
Fixes #191488 As this is a regression of #170076, adds a check to avoid generic lowering of usub.sat to X - zext(X != 0) in case of aarch64 via a virtual hook in TargetLowering. All other backends will still receive generic lowering as implemented in the original patch. Change-Id: I0a194bcc9e66819c12d0f9179464823301f0d7bf
1 parent 38e0a9e commit 2fa38f3

6 files changed

Lines changed: 34 additions & 4 deletions

File tree

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3595,6 +3595,10 @@ class LLVM_ABI TargetLoweringBase {
35953595
return false;
35963596
}
35973597

3598+
/// Should usub.sat(X, 1) prefer the generic lowering X - zext(X != 0) over
3599+
/// the default overflow/select expansion?
3600+
virtual bool preferSubOfZextForUsubSatOne(EVT VT) const { return true; }
3601+
35983602
/// True if target has some particular form of dealing with pointer arithmetic
35993603
/// semantics for pointers with the given value type. False if pointer
36003604
/// arithmetic should not be preserved for passes such as instruction

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11475,7 +11475,8 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
1147511475
}
1147611476

1147711477
// usub.sat(a, 1) -> sub(a, zext(a != 0))
11478-
if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
11478+
if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11479+
preferSubOfZextForUsubSatOne(VT)) {
1147911480
LHS = DAG.getFreeze(LHS);
1148011481
SDValue Zero = DAG.getConstant(0, dl, VT);
1148111482
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30978,6 +30978,11 @@ bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
3097830978
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
3097930979
}
3098030980

30981+
bool AArch64TargetLowering::preferSubOfZextForUsubSatOne(EVT /*VT*/) const {
30982+
// See https://github.com/llvm/llvm-project/issues/191488
30983+
return false;
30984+
}
30985+
3098130986
bool AArch64TargetLowering::preferSelectsOverBooleanArithmetic(EVT VT) const {
3098230987
// Expand scalar and SVE operations using selects. Neon vectors prefer sub to
3098330988
// avoid vselect becoming bsl / unrolling.

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ class AArch64TargetLowering : public TargetLowering {
450450

451451
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
452452

453+
bool preferSubOfZextForUsubSatOne(EVT VT) const override;
454+
453455
bool preferSelectsOverBooleanArithmetic(EVT VT) const override;
454456

455457
bool isComplexDeinterleavingSupported() const override;

llvm/test/CodeGen/AArch64/and-mask-removal.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,9 @@ define i64 @pr58109(i8 signext %0) {
483483
; CHECK-SD-LABEL: pr58109:
484484
; CHECK-SD: ; %bb.0:
485485
; CHECK-SD-NEXT: add w8, w0, #1
486-
; CHECK-SD-NEXT: ands w8, w8, #0xff
487-
; CHECK-SD-NEXT: cset w9, ne
488-
; CHECK-SD-NEXT: sub w0, w8, w9
486+
; CHECK-SD-NEXT: and w8, w8, #0xff
487+
; CHECK-SD-NEXT: subs w8, w8, #1
488+
; CHECK-SD-NEXT: csel w0, wzr, w8, lo
489489
; CHECK-SD-NEXT: ret
490490
;
491491
; CHECK-GI-LABEL: pr58109:

llvm/test/CodeGen/AArch64/usub_sat_plus.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@ declare i16 @llvm.usub.sat.i16(i16, i16)
88
declare i32 @llvm.usub.sat.i32(i32, i32)
99
declare i64 @llvm.usub.sat.i64(i64, i64)
1010

11+
define i32 @sat_dec_i32(i32 %x) nounwind {
12+
; CHECK-SD-LABEL: sat_dec_i32:
13+
; CHECK-SD: // %bb.0:
14+
; CHECK-SD-NEXT: subs w8, w0, #1
15+
; CHECK-SD-NEXT: csel w0, wzr, w8, lo
16+
; CHECK-SD-NEXT: ret
17+
;
18+
; CHECK-GI-LABEL: sat_dec_i32:
19+
; CHECK-GI: // %bb.0:
20+
; CHECK-GI-NEXT: subs w8, w0, #1
21+
; CHECK-GI-NEXT: cset w9, lo
22+
; CHECK-GI-NEXT: tst w9, #0x1
23+
; CHECK-GI-NEXT: csel w0, wzr, w8, ne
24+
; CHECK-GI-NEXT: ret
25+
%tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 1)
26+
ret i32 %tmp
27+
}
28+
1129
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
1230
; CHECK-SD-LABEL: func32:
1331
; CHECK-SD: // %bb.0:

0 commit comments

Comments
 (0)