mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
Improve ext-dce's ability to eliminate more extensions
I was looking at a regression in ext-dce's behavior just before Cauldron. Essentially a bugfix in ext-dce ended up causing us to fail to eliminate some useless extensions. When we have a SUBREG object with SUBREG_PROMOTED_VAR* flags set, we generally have to be more conservative in how we process bit group liveness, making bits live that wouldn't obviously be live otherwise. That's not always necessary though. For example, if we're storing a promoted subreg into memory, we may not care about those extra live bits on this instance of the subreg object (remember subregs are not shared!). Essentially if the mode of the memory reference is not wider than the mode of the inner REG, then we can clear the promoted state which in turn may allow more extension elimination. So at the start of ext-dce we do a simple pass over the IL and remove promoted subreg state when it's obviously safe to do so (memory stores when the modes allow it). That prevents extra bits from being live and ultimately allows us to remove more useless extensions. The testcase is in theory generic, but many targets won't have an opportunity to optimize this case. So rather then build out a large inclusion/exclusion list, I've just made the test risc-v specific. Bootstrapped and regression tested on aarch64, riscv64, s390x, etc in my tester. gcc/ * ext-dce.cc (maybe_clear_subreg_promoted_p): New function. (ext_dce_execute): Call it. gcc/testsuite * gcc.target/riscv/ext-dce-1.c: New test.
This commit is contained in:
parent
4a8eb5c6d8
commit
beec291225
@ -941,6 +941,38 @@ ext_dce_process_bb (basic_block bb)
|
||||
}
|
||||
}
|
||||
|
||||
/* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
|
||||
is usually helpful. However, in some cases setting the value when
|
||||
it not strictly needed can cause this pass to miss optimizations.
|
||||
|
||||
Specifically consider (set (mem) (subreg (reg))). If set in that
|
||||
case it will cause more bit groups to be live for REG than would
|
||||
be strictly necessary which in turn can inhibit extension removal.
|
||||
|
||||
So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
|
||||
is obviously not needed. */
|
||||
|
||||
static void
|
||||
maybe_clear_subreg_promoted_p (void)
|
||||
{
|
||||
for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (!NONDEBUG_INSN_P (insn))
|
||||
continue;
|
||||
|
||||
rtx set = single_set (insn);
|
||||
if (!set)
|
||||
continue;
|
||||
|
||||
/* There may be other cases where we should clear, but for
|
||||
now, this is the only known case where it causes problems. */
|
||||
if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
|
||||
&& GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
|
||||
SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* We optimize away sign/zero extensions in this pass and replace
|
||||
them with SUBREGs indicating certain bits are don't cares.
|
||||
|
||||
@ -1077,6 +1109,9 @@ static bool ext_dce_rd_confluence_n (edge) { return true; }
|
||||
void
|
||||
ext_dce_execute (void)
|
||||
{
|
||||
/* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
|
||||
to this pass. Clear it for those cases. */
|
||||
maybe_clear_subreg_promoted_p ();
|
||||
df_analyze ();
|
||||
ext_dce_init ();
|
||||
|
||||
|
44
gcc/testsuite/gcc.target/riscv/ext-dce-1.c
Normal file
44
gcc/testsuite/gcc.target/riscv/ext-dce-1.c
Normal file
@ -0,0 +1,44 @@
|
||||
/* { dg-options "-O2 -fdump-rtl-ext_dce" } */
|
||||
typedef unsigned char __uint8_t;
|
||||
typedef unsigned int __uint32_t;
|
||||
typedef __uint8_t uint8_t;
|
||||
typedef __uint32_t uint32_t;
|
||||
static inline void
|
||||
unaligned_write32le(uint8_t *buf, uint32_t num)
|
||||
{
|
||||
buf[0] = num;
|
||||
buf[1] = num >> 8;
|
||||
buf[2] = num >> 16;
|
||||
buf[3] = num >> 24;
|
||||
return;
|
||||
}
|
||||
typedef struct {
|
||||
uint32_t dict_size;
|
||||
} lzma_options_lzma;
|
||||
typedef void lzma_coder;
|
||||
typedef struct lzma_next_coder_s lzma_next_coder;
|
||||
struct lzma_next_coder_s {
|
||||
lzma_coder *coder;
|
||||
};
|
||||
struct lzma_coder_s {
|
||||
uint8_t header[(1 + 4 + 8)];
|
||||
};
|
||||
|
||||
void
|
||||
alone_encoder_init(lzma_next_coder *next, const lzma_options_lzma *options)
|
||||
{
|
||||
uint32_t d = options->dict_size - 1;
|
||||
d |= d >> 2;
|
||||
#if 0
|
||||
d |= d >> 3;
|
||||
d |= d >> 4;
|
||||
d |= d >> 8;
|
||||
d |= d >> 16;
|
||||
#endif
|
||||
if (d != (4294967295U))
|
||||
++d;
|
||||
unaligned_write32le(((struct lzma_coder_s*)next->coder)->header + 1, d);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump "Successfully transformed to:" "ext_dce" } } */
|
||||
|
Loading…
Reference in New Issue
Block a user