-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LegalizeIntegerTypes] Use forceExpandWideMUL in ExpandIntRes_XMULO. #123432
Conversation
This generates basically the same code with the operands commuted, but gets there with less legalization steps.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Craig Topper (topperc) ChangesThis generates basically the same code with the operands commuted, but gets there with less legalization steps. Full diff: https://github.com/llvm/llvm-project/pull/123432.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index be7521f3416850..7f7a9990476b52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5084,13 +5084,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
// FIXME: This is not an optimal expansion, but better than crashing.
- EVT WideVT =
- EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
- SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
- SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
- SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
SDValue MulLo, MulHi;
- SplitInteger(Mul, MulLo, MulHi);
+ TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0),
+ N->getOperand(1), MulLo, MulHi);
SDValue SRA =
DAG.getNode(ISD::SRA, dl, VT, MulLo,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
index 739680e6141dca..67a10d4bcbaea9 100644
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
@@ -191,7 +191,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA32-NEXT: sltu $s5, $s5, $s1
; LA32-NEXT: sltu $s1, $s1, $s0
; LA32-NEXT: sltu $s0, $s0, $t6
-; LA32-NEXT: mul.w $t2, $a3, $t5
+; LA32-NEXT: mul.w $t2, $t5, $a3
; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT: sltu $t4, $fp, $t4
; LA32-NEXT: mulh.wu $fp, $a5, $t3
@@ -232,10 +232,10 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA32-NEXT: add.w $a7, $s4, $t2
; LA32-NEXT: st.w $a7, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: add.w $s3, $t7, $a7
-; LA32-NEXT: mulh.wu $a7, $a3, $t5
-; LA32-NEXT: add.w $t4, $a7, $a0
-; LA32-NEXT: mul.w $s2, $s6, $t5
-; LA32-NEXT: add.w $s1, $t4, $s2
+; LA32-NEXT: mulh.wu $a7, $t5, $a3
+; LA32-NEXT: add.w $t4, $a0, $a7
+; LA32-NEXT: mul.w $s2, $t5, $s6
+; LA32-NEXT: add.w $s1, $s2, $t4
; LA32-NEXT: add.w $fp, $s1, $s3
; LA32-NEXT: add.w $a0, $fp, $t6
; LA32-NEXT: add.w $fp, $s8, $a0
diff --git a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
index ac0b1128ca812a..4d6f99abc02dc4 100644
--- a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
@@ -114,7 +114,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
; SPARC-NEXT: addxcc %o0, %o3, %l6
; SPARC-NEXT: addcc %l2, %o1, %l2
; SPARC-NEXT: sra %i4, 31, %i4
-; SPARC-NEXT: umul %g4, %i4, %g4
+; SPARC-NEXT: umul %i4, %g4, %g4
; SPARC-NEXT: rd %y, %o0
; SPARC-NEXT: addxcc %l6, %l7, %l6
; SPARC-NEXT: umul %i4, %g2, %g2
diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index da0e3fdc1a5272..42904ee0db90c1 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -435,8 +435,8 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -822,7 +822,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %r9, %r15
; X64-NEXT: movq %rcx, %r9
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rsi, %r12
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
@@ -830,42 +830,42 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: negq %r11
; X64-NEXT: andl $1, %r9d
; X64-NEXT: negq %r9
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %rdx, %rbp
+; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: adcq $0, %rcx
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %rax, %rbp
+; X64-NEXT: addq %rax, %rdi
; X64-NEXT: adcq %rdx, %rcx
; X64-NEXT: setb %sil
-; X64-NEXT: movzbl %sil, %edi
+; X64-NEXT: movzbl %sil, %r14d
; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: adcq %rdx, %rdi
+; X64-NEXT: adcq %rdx, %r14
; X64-NEXT: movq %r12, %rax
; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rbx
-; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r10, %r13
-; X64-NEXT: adcq $0, %rbx
+; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rax, %rbp
+; X64-NEXT: addq %rbx, %rbp
+; X64-NEXT: adcq $0, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: addq %r13, %rax
+; X64-NEXT: addq %rbp, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rbx, %rsi
+; X64-NEXT: adcq %r13, %rsi
; X64-NEXT: setb %r8b
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: addq %rsi, %rax
@@ -873,63 +873,64 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: adcq %rdx, %rbx
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: adcq %rbp, %rbx
+; X64-NEXT: adcq %rdi, %rbx
; X64-NEXT: adcq $0, %rcx
-; X64-NEXT: adcq $0, %rdi
+; X64-NEXT: adcq $0, %r14
; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %r13
-; X64-NEXT: movq %rax, %r15
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r14
-; X64-NEXT: movq %rax, %r14
; X64-NEXT: movq %rax, %r8
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: movq %rax, %r15
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %r13, %r14
+; X64-NEXT: addq %r13, %r15
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rbp
-; X64-NEXT: addq %r15, %r14
+; X64-NEXT: addq %r8, %r15
; X64-NEXT: adcq %r13, %rbp
; X64-NEXT: setb %al
-; X64-NEXT: addq %r8, %rbp
+; X64-NEXT: addq %rdi, %rbp
; X64-NEXT: movzbl %al, %r12d
; X64-NEXT: adcq %rdx, %r12
-; X64-NEXT: addq %r15, %rsi
+; X64-NEXT: addq %r8, %rsi
+; X64-NEXT: movq %r8, %r10
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rbx, %r14
+; X64-NEXT: adcq %rbx, %r15
; X64-NEXT: adcq $0, %rbp
; X64-NEXT: adcq $0, %r12
; X64-NEXT: addq %rcx, %rbp
-; X64-NEXT: adcq %rdi, %r12
+; X64-NEXT: adcq %r14, %r12
; X64-NEXT: setb %cl
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %rdx, %r10
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: addq %rax, %r10
-; X64-NEXT: adcq %rdx, %rdi
-; X64-NEXT: setb %bl
-; X64-NEXT: addq %rax, %rdi
-; X64-NEXT: movzbl %bl, %esi
-; X64-NEXT: adcq %rdx, %rsi
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: adcq $0, %rbx
+; X64-NEXT: addq %rax, %r8
+; X64-NEXT: adcq %rdx, %rbx
+; X64-NEXT: setb %r14b
+; X64-NEXT: addq %rax, %rbx
+; X64-NEXT: movzbl %r14b, %r14d
+; X64-NEXT: adcq %rdx, %r14
; X64-NEXT: addq %rax, %rbp
-; X64-NEXT: adcq %r12, %r10
+; X64-NEXT: adcq %r12, %r8
; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: adcq %rax, %rdi
-; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %rax, %rbx
+; X64-NEXT: adcq $0, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: movq %rsi, %r8
+; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: addq %rax, %r8
+; X64-NEXT: addq %rax, %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: adcq $0, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT: addq %rbx, %r8
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT: addq %r12, %rdi
; X64-NEXT: adcq %rax, %rcx
; X64-NEXT: setb %al
; X64-NEXT: addq %rsi, %rcx
@@ -937,42 +938,43 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: adcq %rdx, %rsi
; X64-NEXT: movq %r9, %rax
; X64-NEXT: imulq %r11
-; X64-NEXT: movq %rbx, %r11
+; X64-NEXT: movq %r12, %r11
; X64-NEXT: addq %rax, %r11
-; X64-NEXT: movq %r8, %r12
+; X64-NEXT: movq %rdi, %r12
; X64-NEXT: adcq %rdx, %r12
; X64-NEXT: addq %rcx, %r11
; X64-NEXT: adcq %rsi, %r12
-; X64-NEXT: movq %r15, %r9
+; X64-NEXT: movq %r10, %r9
; X64-NEXT: addq %r13, %r9
; X64-NEXT: adcq $0, %r13
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: addq %rcx, %r9
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: adcq %rsi, %r13
-; X64-NEXT: setb %bl
+; X64-NEXT: setb %r10b
; X64-NEXT: addq %rcx, %r13
-; X64-NEXT: movzbl %bl, %ecx
+; X64-NEXT: movzbl %r10b, %ecx
; X64-NEXT: adcq %rsi, %rcx
-; X64-NEXT: addq %r15, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: addq %rsi, %rax
; X64-NEXT: adcq %r9, %rdx
; X64-NEXT: addq %r13, %rax
; X64-NEXT: adcq %rcx, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
-; X64-NEXT: adcq %r8, %r9
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: adcq %rdi, %r9
; X64-NEXT: adcq %r11, %rax
; X64-NEXT: adcq %r12, %rdx
-; X64-NEXT: addq %rbp, %r15
-; X64-NEXT: adcq %r10, %r9
-; X64-NEXT: adcq %rdi, %rax
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; X64-NEXT: movq %r14, %rcx
+; X64-NEXT: addq %rbp, %rsi
+; X64-NEXT: adcq %r8, %r9
+; X64-NEXT: adcq %rbx, %rax
+; X64-NEXT: adcq %r14, %rdx
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: sarq $63, %rcx
; X64-NEXT: xorq %rcx, %rdx
; X64-NEXT: xorq %rcx, %r9
; X64-NEXT: orq %rdx, %r9
; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: xorq %r15, %rcx
+; X64-NEXT: xorq %rsi, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: orq %r9, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
@@ -980,9 +982,9 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: andl $1, %esi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: negq %rdx
-; X64-NEXT: xorq %rdx, %r14
+; X64-NEXT: xorq %rdx, %r15
; X64-NEXT: xorq %rax, %rdx
-; X64-NEXT: orq %r14, %rdx
+; X64-NEXT: orq %r15, %rdx
; X64-NEXT: orq %rcx, %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
index 816633b5b18ab8..15f302355784ce 100644
--- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
@@ -505,8 +505,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: addq %rax, %r9
; X64-NEXT: adcq %rdx, %rsi
; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; X64-NEXT: mulq %r12
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Reload
; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: movq %rax, %r14
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This generates basically the same code with the operands commuted, but gets there with less legalization steps.