PR target/31985: Improve memory operand use with doubleword add.

This patch addresses the last remaining issue with PR target/31985, that
GCC could make better use of memory addressing modes when implementing
double word addition.  This is achieved by adding a define_insn_and_split
that combines an *add<dwi>3_doubleword with a *concat<mode><dwi>3, so
that the components of the concat can be used directly, without first
being loaded into a double word register.

For test_c in the bugzilla PR:

Before:
        pushl   %ebx
        subl    $16, %esp
        movl    28(%esp), %eax
        movl    36(%esp), %ecx
        movl    32(%esp), %ebx
        movl    24(%esp), %edx
        addl    %ecx, %eax
        adcl    %ebx, %edx
        movl    %eax, 8(%esp)
        movl    %edx, 12(%esp)
        addl    $16, %esp
        popl    %ebx
        ret

After:
test_c:
        subl    $20, %esp
        movl    36(%esp), %eax
        movl    32(%esp), %edx
        addl    28(%esp), %eax
        adcl    24(%esp), %edx
        movl    %eax, 8(%esp)
        movl    %edx, 12(%esp)
        addl    $20, %esp
        ret

2023-06-16  Roger Sayle  <roger@nextmovesoftware.com>
	    Uros Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog
	PR target/31985
	* config/i386/i386.md (*add<dwi>3_doubleword_concat): New
	define_insn_and_split combine *add<dwi>3_doubleword with
	a *concat<mode><dwi>3 for more efficient lowering after reload.

gcc/testsuite/ChangeLog
	PR target/31985
	* gcc.target/i386/pr31985.c: New test case.
This commit is contained in:
Roger Sayle 2023-06-16 16:18:27 +01:00
parent 154c690395
commit 95020d33ad
2 changed files with 44 additions and 0 deletions

View File

@ -6172,6 +6172,36 @@
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
(define_insn_and_split "*add<dwi>3_doubleword_concat"
[(set (match_operand:<DWI> 0 "register_operand" "=&r")
(plus:<DWI>
(any_or_plus:<DWI>
(ashift:<DWI>
(zero_extend:<DWI>
(match_operand:DWIH 2 "nonimmediate_operand" "rm"))
(match_operand:QI 3 "const_int_operand"))
(zero_extend:<DWI>
(match_operand:DWIH 4 "nonimmediate_operand" "rm")))
(match_operand:<DWI> 1 "register_operand" "0")))
(clobber (reg:CC FLAGS_REG))]
"INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
[(parallel [(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:DWIH (match_dup 1) (match_dup 4))
(match_dup 1)))
(set (match_dup 0)
(plus:DWIH (match_dup 1) (match_dup 4)))])
(parallel [(set (match_dup 5)
(plus:DWIH
(plus:DWIH
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
(match_dup 6))
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[5]);")
(define_insn "*add<mode>_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI48

View File

@ -0,0 +1,14 @@
/* { dg-do compile { target ia32 } } */
/* { dg-options "-O2" } */
void test_c (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
{
volatile unsigned int x, y;
unsigned long long __a = b | ((unsigned long long)a << 32);
unsigned long long __b = d | ((unsigned long long)c << 32);
unsigned long long __c = __a + __b;
x = (unsigned int)(__c & 0xffffffff);
y = (unsigned int)(__c >> 32);
}
/* { dg-final { scan-assembler-times "movl" 4 } } */