[PATCH 1/3] RISC-V: Improve codegen for negative repeating large constants

Improve handling of constants where its upper and lower 32-bit
halves are the same and have negative values.

e.g. for:

unsigned long f (void) { return 0xf0f0f0f0f0f0f0f0UL; }

Without the patch:

li      a0,-252645376
addi    a0,a0,240
li      a5,-252645376
addi    a5,a5,241
slli    a5,a5,32
add     a0,a5,a0

With the patch:

li      a5,252645376
addi    a5,a5,-241
slli    a0,a5,32
add     a0,a0,a5
xori    a0,a0,-1

gcc/ChangeLog:
	* config/riscv/riscv.cc (riscv_split_integer_cost): Adjust the
	cost of negative repeating constants.
	(riscv_split_integer): Handle negative repeating constants.

gcc/testsuite/ChangeLog:
	* gcc.target/riscv/synthesis-11.c: New test.
This commit is contained in:
Raphael Moreira Zinsly 2024-09-04 17:21:24 -06:00 committed by Jeff Law
parent 5326306e7d
commit cbea72b265
2 changed files with 49 additions and 8 deletions

View File

@ -1242,18 +1242,20 @@ static int
riscv_split_integer_cost (HOST_WIDE_INT val)
{
int cost;
unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
unsigned HOST_WIDE_INT loval = val & 0xffffffff;
unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
/* This routine isn't used by pattern conditions, so whether or
not to allow new pseudos can be a function of where we are in the
RTL pipeline. We shouldn't need scratch pseudos for this case
anyway. */
RTL pipeline. */
bool allow_new_pseudos = can_create_pseudo_p ();
cost = 2 + riscv_build_integer (codes, loval, VOIDmode, allow_new_pseudos);
if (loval != hival)
cost += riscv_build_integer (codes, hival, VOIDmode, allow_new_pseudos);
else if ((loval & 0x80000000) != 0)
cost = 3 + riscv_build_integer (codes, ~loval & 0xffffffff,
VOIDmode, allow_new_pseudos);
return cost;
}
@ -1276,11 +1278,16 @@ riscv_integer_cost (HOST_WIDE_INT val, bool allow_new_pseudos)
static rtx
riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
{
unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
unsigned HOST_WIDE_INT loval = val & 0xffffffff;
unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
rtx x = gen_reg_rtx (mode);
bool eq_neg = (loval == hival) && ((loval & 0x80000000) != 0);
riscv_move_integer (lo, lo, loval, mode);
if (eq_neg)
riscv_move_integer (lo, lo, ~loval & 0xffffffff, mode);
else
riscv_move_integer (lo, lo, loval, mode);
if (loval == hival)
hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
@ -1291,7 +1298,13 @@ riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
}
hi = force_reg (mode, hi);
return gen_rtx_PLUS (mode, hi, lo);
x = gen_rtx_PLUS (mode, hi, lo);
if (eq_neg)
{
x = force_reg (mode, x);
x = gen_rtx_XOR (mode, x, GEN_INT (-1));
}
return x;
}
/* Return true if X is a thread-local symbol. */

View File

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-require-effective-target rv64 } */
/* We aggressively skip as we really just need to test the basic synthesis
which shouldn't vary based on the optimization level. -O1 seems to work
and eliminates the usual sources of extraneous dead code that would throw
off the counts. */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
/* { dg-options "-march=rv64gc" } */
/* Rather than test for a specific synthesis of all these constants or
having thousands of tests each testing one variant, we just test the
total number of instructions.
This isn't expected to change much and any change is worthy of a look. */
/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 60 } } */
unsigned long foo_0xf857f2def857f2de(void) { return 0xf857f2def857f2deUL; }
unsigned long foo_0x99660e6399660e63(void) { return 0x99660e6399660e63UL; }
unsigned long foo_0x937f1b75937f1b75(void) { return 0x937f1b75937f1b75UL; }
unsigned long foo_0xb5019fa0b5019fa0(void) { return 0xb5019fa0b5019fa0UL; }
unsigned long foo_0xb828e6c1b828e6c1(void) { return 0xb828e6c1b828e6c1UL; }
unsigned long foo_0x839d87e9839d87e9(void) { return 0x839d87e9839d87e9UL; }
unsigned long foo_0xc29617c1c29617c1(void) { return 0xc29617c1c29617c1UL; }
unsigned long foo_0xa4118119a4118119(void) { return 0xa4118119a4118119UL; }
unsigned long foo_0x8c01df7d8c01df7d(void) { return 0x8c01df7d8c01df7dUL; }
unsigned long foo_0xf0e23d6bf0e23d6b(void) { return 0xf0e23d6bf0e23d6bUL; }