Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LegalizeIntegerTypes] Use forceExpandWideMUL in ExpandIntRes_XMULO. #123432

Merged
merged 1 commit into from
Jan 18, 2025

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Jan 18, 2025

This generates basically the same code with the operands commuted, but gets there with less legalization steps.

This generates basically the same code with the operands commuted,
but gets there with less legalization steps.
@llvmbot
Copy link
Member

llvmbot commented Jan 18, 2025

@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-loongarch

@llvm/pr-subscribers-backend-x86

Author: Craig Topper (topperc)

Changes

This generates basically the same code with the operands commuted, but gets there with less legalization steps.


Full diff: https://github.com/llvm/llvm-project/pull/123432.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+2-6)
  • (modified) llvm/test/CodeGen/LoongArch/smul-with-overflow.ll (+5-5)
  • (modified) llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/smul-with-overflow.ll (+69-67)
  • (modified) llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll (+2-2)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index be7521f3416850..7f7a9990476b52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5084,13 +5084,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
       TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
     // FIXME: This is not an optimal expansion, but better than crashing.
-    EVT WideVT =
-        EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
-    SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
-    SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
-    SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
     SDValue MulLo, MulHi;
-    SplitInteger(Mul, MulLo, MulHi);
+    TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0),
+                           N->getOperand(1), MulLo, MulHi);
     SDValue SRA =
         DAG.getNode(ISD::SRA, dl, VT, MulLo,
                     DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
index 739680e6141dca..67a10d4bcbaea9 100644
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
@@ -191,7 +191,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ; LA32-NEXT:    sltu $s5, $s5, $s1
 ; LA32-NEXT:    sltu $s1, $s1, $s0
 ; LA32-NEXT:    sltu $s0, $s0, $t6
-; LA32-NEXT:    mul.w $t2, $a3, $t5
+; LA32-NEXT:    mul.w $t2, $t5, $a3
 ; LA32-NEXT:    st.w $a3, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:    sltu $t4, $fp, $t4
 ; LA32-NEXT:    mulh.wu $fp, $a5, $t3
@@ -232,10 +232,10 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ; LA32-NEXT:    add.w $a7, $s4, $t2
 ; LA32-NEXT:    st.w $a7, $sp, 12 # 4-byte Folded Spill
 ; LA32-NEXT:    add.w $s3, $t7, $a7
-; LA32-NEXT:    mulh.wu $a7, $a3, $t5
-; LA32-NEXT:    add.w $t4, $a7, $a0
-; LA32-NEXT:    mul.w $s2, $s6, $t5
-; LA32-NEXT:    add.w $s1, $t4, $s2
+; LA32-NEXT:    mulh.wu $a7, $t5, $a3
+; LA32-NEXT:    add.w $t4, $a0, $a7
+; LA32-NEXT:    mul.w $s2, $t5, $s6
+; LA32-NEXT:    add.w $s1, $s2, $t4
 ; LA32-NEXT:    add.w $fp, $s1, $s3
 ; LA32-NEXT:    add.w $a0, $fp, $t6
 ; LA32-NEXT:    add.w $fp, $s8, $a0
diff --git a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
index ac0b1128ca812a..4d6f99abc02dc4 100644
--- a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
@@ -114,7 +114,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
 ; SPARC-NEXT:    addxcc %o0, %o3, %l6
 ; SPARC-NEXT:    addcc %l2, %o1, %l2
 ; SPARC-NEXT:    sra %i4, 31, %i4
-; SPARC-NEXT:    umul %g4, %i4, %g4
+; SPARC-NEXT:    umul %i4, %g4, %g4
 ; SPARC-NEXT:    rd %y, %o0
 ; SPARC-NEXT:    addxcc %l6, %l7, %l6
 ; SPARC-NEXT:    umul %i4, %g2, %g2
diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index da0e3fdc1a5272..42904ee0db90c1 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -435,8 +435,8 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -822,7 +822,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    movq %r9, %r15
 ; X64-NEXT:    movq %rcx, %r9
-; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rdx, %r10
 ; X64-NEXT:    movq %rsi, %r12
 ; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
@@ -830,42 +830,42 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X64-NEXT:    negq %r11
 ; X64-NEXT:    andl $1, %r9d
 ; X64-NEXT:    negq %r9
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    addq %rdx, %rbp
+; X64-NEXT:    addq %rdx, %rdi
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    addq %rax, %rbp
+; X64-NEXT:    addq %rax, %rdi
 ; X64-NEXT:    adcq %rdx, %rcx
 ; X64-NEXT:    setb %sil
-; X64-NEXT:    movzbl %sil, %edi
+; X64-NEXT:    movzbl %sil, %r14d
 ; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %rdi
+; X64-NEXT:    adcq %rdx, %r14
 ; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %r10, %r13
-; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rbx, %rbp
+; X64-NEXT:    adcq $0, %r13
 ; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    addq %r13, %rax
+; X64-NEXT:    addq %rbp, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    adcq %r13, %rsi
 ; X64-NEXT:    setb %r8b
-; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    addq %rsi, %rax
@@ -873,63 +873,64 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X64-NEXT:    adcq %rdx, %rbx
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
 ; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    adcq %rbp, %rbx
+; X64-NEXT:    adcq %rdi, %rbx
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %r14
 ; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    movq %rdx, %r13
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    addq %r13, %r14
+; X64-NEXT:    addq %r13, %r15
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %r15, %r14
+; X64-NEXT:    addq %r8, %r15
 ; X64-NEXT:    adcq %r13, %rbp
 ; X64-NEXT:    setb %al
-; X64-NEXT:    addq %r8, %rbp
+; X64-NEXT:    addq %rdi, %rbp
 ; X64-NEXT:    movzbl %al, %r12d
 ; X64-NEXT:    adcq %rdx, %r12
-; X64-NEXT:    addq %r15, %rsi
+; X64-NEXT:    addq %r8, %rsi
+; X64-NEXT:    movq %r8, %r10
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rbx, %r14
+; X64-NEXT:    adcq %rbx, %r15
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    adcq $0, %r12
 ; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    adcq %rdi, %r12
+; X64-NEXT:    adcq %r14, %r12
 ; X64-NEXT:    setb %cl
 ; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    mulq %r11
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rdx, %r10
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    addq %rax, %r10
-; X64-NEXT:    adcq %rdx, %rdi
-; X64-NEXT:    setb %bl
-; X64-NEXT:    addq %rax, %rdi
-; X64-NEXT:    movzbl %bl, %esi
-; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rdx, %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    addq %rax, %r8
+; X64-NEXT:    adcq %rdx, %rbx
+; X64-NEXT:    setb %r14b
+; X64-NEXT:    addq %rax, %rbx
+; X64-NEXT:    movzbl %r14b, %r14d
+; X64-NEXT:    adcq %rdx, %r14
 ; X64-NEXT:    addq %rax, %rbp
-; X64-NEXT:    adcq %r12, %r10
+; X64-NEXT:    adcq %r12, %r8
 ; X64-NEXT:    movzbl %cl, %eax
-; X64-NEXT:    adcq %rax, %rdi
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rax, %rbx
+; X64-NEXT:    adcq $0, %r14
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %r8
+; X64-NEXT:    movq %rsi, %rdi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    addq %rax, %r8
+; X64-NEXT:    addq %rax, %rdi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT:    addq %rbx, %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    addq %r12, %rdi
 ; X64-NEXT:    adcq %rax, %rcx
 ; X64-NEXT:    setb %al
 ; X64-NEXT:    addq %rsi, %rcx
@@ -937,42 +938,43 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X64-NEXT:    adcq %rdx, %rsi
 ; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    imulq %r11
-; X64-NEXT:    movq %rbx, %r11
+; X64-NEXT:    movq %r12, %r11
 ; X64-NEXT:    addq %rax, %r11
-; X64-NEXT:    movq %r8, %r12
+; X64-NEXT:    movq %rdi, %r12
 ; X64-NEXT:    adcq %rdx, %r12
 ; X64-NEXT:    addq %rcx, %r11
 ; X64-NEXT:    adcq %rsi, %r12
-; X64-NEXT:    movq %r15, %r9
+; X64-NEXT:    movq %r10, %r9
 ; X64-NEXT:    addq %r13, %r9
 ; X64-NEXT:    adcq $0, %r13
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    addq %rcx, %r9
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    adcq %rsi, %r13
-; X64-NEXT:    setb %bl
+; X64-NEXT:    setb %r10b
 ; X64-NEXT:    addq %rcx, %r13
-; X64-NEXT:    movzbl %bl, %ecx
+; X64-NEXT:    movzbl %r10b, %ecx
 ; X64-NEXT:    adcq %rsi, %rcx
-; X64-NEXT:    addq %r15, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    adcq %r9, %rdx
 ; X64-NEXT:    addq %r13, %rax
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
-; X64-NEXT:    adcq %r8, %r9
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq %rdi, %r9
 ; X64-NEXT:    adcq %r11, %rax
 ; X64-NEXT:    adcq %r12, %rdx
-; X64-NEXT:    addq %rbp, %r15
-; X64-NEXT:    adcq %r10, %r9
-; X64-NEXT:    adcq %rdi, %rax
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; X64-NEXT:    movq %r14, %rcx
+; X64-NEXT:    addq %rbp, %rsi
+; X64-NEXT:    adcq %r8, %r9
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    adcq %r14, %rdx
+; X64-NEXT:    movq %r15, %rcx
 ; X64-NEXT:    sarq $63, %rcx
 ; X64-NEXT:    xorq %rcx, %rdx
 ; X64-NEXT:    xorq %rcx, %r9
 ; X64-NEXT:    orq %rdx, %r9
 ; X64-NEXT:    xorq %rcx, %rax
-; X64-NEXT:    xorq %r15, %rcx
+; X64-NEXT:    xorq %rsi, %rcx
 ; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    orq %r9, %rcx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
@@ -980,9 +982,9 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
 ; X64-NEXT:    andl $1, %esi
 ; X64-NEXT:    movq %rsi, %rdx
 ; X64-NEXT:    negq %rdx
-; X64-NEXT:    xorq %rdx, %r14
+; X64-NEXT:    xorq %rdx, %r15
 ; X64-NEXT:    xorq %rax, %rdx
-; X64-NEXT:    orq %r14, %rdx
+; X64-NEXT:    orq %r15, %rdx
 ; X64-NEXT:    orq %rcx, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
index 816633b5b18ab8..15f302355784ce 100644
--- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
@@ -505,8 +505,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
 ; X64-NEXT:    addq %rax, %r9
 ; X64-NEXT:    adcq %rdx, %rsi
 ; X64-NEXT:    sarq $63, %r12
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; X64-NEXT:    mulq %r12
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Reload
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    movq %rax, %r14

@topperc topperc changed the title [LegalizeInterTypes] Use forceExpandWideMUL in ExpandIntRes_XMULO. [LegalizeIntegerTypes] Use forceExpandWideMUL in ExpandIntRes_XMULO. Jan 18, 2025
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@topperc topperc merged commit 9f7c85f into llvm:main Jan 18, 2025
12 checks passed
@topperc topperc deleted the pr/forceexpandmul branch January 18, 2025 16:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants