mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
PR target/110551: Tweak mulx register allocation using peephole2.
This patch is a follow-up to my previous PR target/110551 patch, this time to address the additional move after mulx, seen on TARGET_BMI2 architectures (such as -march=haswell). The complication here is that the flexible multiple-set mulx instruction is introduced into RTL after reload, by split2, and therefore can't benefit from register preferencing. This results in RTL like the following: (insn 32 31 17 2 (parallel [ (set (reg:DI 4 si [orig:101 r ] [101]) (mult:DI (reg:DI 1 dx [109]) (reg:DI 5 di [109]))) (set (reg:DI 5 di [ r+8 ]) (umul_highpart:DI (reg:DI 1 dx [109]) (reg:DI 5 di [109]))) ]) "pr110551-2.c":8:17 -1 (nil)) (insn 17 32 9 2 (set (reg:DI 0 ax [107]) (reg:DI 5 di [ r+8 ])) "pr110551-2.c":9:40 90 {*movdi_internal} (expr_list:REG_DEAD (reg:DI 5 di [ r+8 ]) (nil))) Here insn 32, the mulx instruction, places its results in si and di, and then immediately after decides to move di to ax, with di now dead. This can be trivially cleaned up by a peephole2. I've added an additional constraint that the two SET_DESTs can't be the same register to avoid confusing the middle-end, but this has well-defined behaviour on x86_64/BMI2, encoding a umul_highpart. For the new test case, compiled on x86_64 with -O2 -march=haswell: Before: mulx64: movabsq $-7046029254386353131, %rdx mulx %rdi, %rsi, %rdi movq %rdi, %rax xorq %rsi, %rax ret After: mulx64: movabsq $-7046029254386353131, %rdx mulx %rdi, %rsi, %rax xorq %rsi, %rax ret 2023-11-01 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR target/110551 * config/i386/i386.md (*bmi2_umul<mode><dwi>3_1): Tidy condition as operands[2] with predicate register_operand must be !MEM_P. (peephole2): Optimize a mulx followed by a register-to-register move, to place result in the correct destination if possible. gcc/testsuite/ChangeLog PR target/110551 * gcc.target/i386/pr110551-2.c: New test case.
This commit is contained in:
parent
274c904471
commit
80b1a37100
@ -9764,13 +9764,37 @@
|
||||
(match_operand:DWIH 3 "nonimmediate_operand" "rm")))
|
||||
(set (match_operand:DWIH 1 "register_operand" "=r")
|
||||
(umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
|
||||
"TARGET_BMI2
|
||||
&& !(MEM_P (operands[2]) && MEM_P (operands[3]))"
|
||||
"TARGET_BMI2"
|
||||
"mulx\t{%3, %0, %1|%1, %0, %3}"
|
||||
[(set_attr "type" "imulx")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Tweak *bmi2_umul<mode><dwi>3_1 to eliminate following mov.
|
||||
(define_peephole2
|
||||
[(parallel [(set (match_operand:DWIH 0 "general_reg_operand")
|
||||
(mult:DWIH (match_operand:DWIH 2 "register_operand")
|
||||
(match_operand:DWIH 3 "nonimmediate_operand")))
|
||||
(set (match_operand:DWIH 1 "general_reg_operand")
|
||||
(umul_highpart:DWIH (match_dup 2) (match_dup 3)))])
|
||||
(set (match_operand:DWIH 4 "general_reg_operand")
|
||||
(match_operand:DWIH 5 "general_reg_operand"))]
|
||||
"TARGET_BMI2
|
||||
&& ((REGNO (operands[5]) == REGNO (operands[0])
|
||||
&& REGNO (operands[1]) != REGNO (operands[4]))
|
||||
|| (REGNO (operands[5]) == REGNO (operands[1])
|
||||
&& REGNO (operands[0]) != REGNO (operands[4])))
|
||||
&& peep2_reg_dead_p (2, operands[5])"
|
||||
[(parallel [(set (match_dup 0) (mult:DWIH (match_dup 2) (match_dup 3)))
|
||||
(set (match_dup 1)
|
||||
(umul_highpart:DWIH (match_dup 2) (match_dup 3)))])]
|
||||
{
|
||||
if (REGNO (operands[5]) == REGNO (operands[0]))
|
||||
operands[0] = operands[4];
|
||||
else
|
||||
operands[1] = operands[4];
|
||||
})
|
||||
|
||||
(define_insn "*umul<mode><dwi>3_1"
|
||||
[(set (match_operand:<DWI> 0 "register_operand" "=r,A")
|
||||
(mult:<DWI>
|
||||
|
12
gcc/testsuite/gcc.target/i386/pr110551-2.c
Normal file
12
gcc/testsuite/gcc.target/i386/pr110551-2.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2 -march=haswell" } */
|
||||
|
||||
typedef unsigned long long uint64_t;
|
||||
|
||||
uint64_t mulx64(uint64_t x)
|
||||
{
|
||||
__uint128_t r = (__uint128_t)x * 0x9E3779B97F4A7C15ull;
|
||||
return (uint64_t)r ^ (uint64_t)( r >> 64 );
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "movq" } } */
|
Loading…
Reference in New Issue
Block a user