RISC-V: Support combine extend and reduce sum to widen reduce sum

This patch add combine pattern to combine extend and reduce sum
to widen reduce sum. The pattern in autovec.md was adjusted as
needed. Note that the current vectorization cannot generate reduce
operand which is LMUL=M8, because this means that we need an LMUL=M16
for the extended operand, which is currently not possible. So I've
added VI_QHS_NO_M8 and VF_HS_NO_M8 mode iterator, which exclude
mode which is LMUL=M8.

	PR target/111381

gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*reduc_plus_scal_<mode>):
	New combine pattern.
	(*fold_left_widen_plus_<mode>): Ditto.
	(*mask_len_fold_left_widen_plus_<mode>): Ditto.
	* config/riscv/autovec.md (reduc_plus_scal_<mode>):
	Change from define_expand to define_insn_and_split.
	(fold_left_plus_<mode>): Ditto.
	(mask_len_fold_left_plus_<mode>): Ditto.
	* config/riscv/riscv-v.cc (expand_reduction):
	Support widen reduction.
	* config/riscv/vector-iterators.md (UNSPEC_WREDUC_SUM):
	Add new iterators and attrs.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c: New test.
This commit is contained in:
Lehua Ding 2023-09-14 23:35:42 +08:00
parent 05cb873005
commit 68cb873fd3
10 changed files with 321 additions and 27 deletions

View File

@ -1196,6 +1196,88 @@
}
[(set_attr "type" "vfwmul")])
;; Combine extend + vredsum to vwredsum[u]
(define_insn_and_split "*reduc_plus_scal_<mode>"
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
(unspec:<V_DOUBLE_EXTEND_VEL> [
(any_extend:<V_DOUBLE_EXTEND>
(match_operand:VI_QHS_NO_M8 1 "register_operand"))
] UNSPEC_REDUC_SUM))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (<WREDUC_UNSPEC>, operands,
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
}
[(set_attr "type" "vector")])
;; Combine extend + vfredusum to vfwredusum
(define_insn_and_split "*reduc_plus_scal_<mode>"
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
(unspec:<V_DOUBLE_EXTEND_VEL> [
(float_extend:<V_DOUBLE_EXTEND>
(match_operand:VF_HS_NO_M8 1 "register_operand"))
] UNSPEC_REDUC_SUM_UNORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, operands,
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
}
[(set_attr "type" "vector")])
;; Combine extend + vfredosum to vfwredosum
(define_insn_and_split "*fold_left_widen_plus_<mode>"
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
(unspec:<V_DOUBLE_EXTEND_VEL> [
(float_extend:<V_DOUBLE_EXTEND>
(match_operand:VF_HS_NO_M8 2 "register_operand"))
(match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand")
] UNSPEC_REDUC_SUM_ORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands,
operands[1],
riscv_vector::reduction_type::FOLD_LEFT);
DONE;
}
[(set_attr "type" "vector")])
;; Combine extend + mask vfredosum to mask vfwredosum
(define_insn_and_split "*mask_len_fold_left_widen_plus_<mode>"
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
(unspec:<V_DOUBLE_EXTEND_VEL> [
(float_extend:<V_DOUBLE_EXTEND>
(match_operand:VF_HS_NO_M8 2 "register_operand"))
(match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand")
(match_operand:<VM> 3 "vector_mask_operand")
(match_operand 4 "autovec_length_operand")
(match_operand 5 "const_0_operand")
] UNSPEC_REDUC_SUM_ORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
if (rtx_equal_p (operands[4], const0_rtx))
emit_move_insn (operands[0], operands[1]);
else
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands,
operands[1],
riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT);
DONE;
}
[(set_attr "type" "vector")])
;; =============================================================================
;; Misc combine patterns
;; =============================================================================

View File

@ -2086,14 +2086,20 @@
;; - vredxor.vs
;; -------------------------------------------------------------------------
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(match_operand:VI 1 "register_operand")]
"TARGET_VECTOR"
(define_insn_and_split "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [
(match_operand:VI 1 "register_operand")
] UNSPEC_REDUC_SUM))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, operands, CONST0_RTX (<VEL>mode));
DONE;
})
}
[(set_attr "type" "vector")])
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
@ -2173,15 +2179,21 @@
;; - vfredmin.vs
;; -------------------------------------------------------------------------
(define_expand "reduc_plus_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(match_operand:VF 1 "register_operand")]
"TARGET_VECTOR"
(define_insn_and_split "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [
(match_operand:VF 1 "register_operand")
] UNSPEC_REDUC_SUM_UNORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, operands,
CONST0_RTX (<VEL>mode));
DONE;
})
}
[(set_attr "type" "vector")])
(define_expand "reduc_smax_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
@ -2215,27 +2227,38 @@
;; -------------------------------------------------------------------------
;; Unpredicated in-order FP reductions.
(define_expand "fold_left_plus_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(match_operand:<VEL> 1 "register_operand")
(match_operand:VF 2 "register_operand")]
"TARGET_VECTOR"
(define_insn_and_split "fold_left_plus_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [
(match_operand:VF 2 "register_operand")
(match_operand:<VEL> 1 "register_operand")
] UNSPEC_REDUC_SUM_ORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, operands,
operands[1],
riscv_vector::reduction_type::FOLD_LEFT);
DONE;
})
}
[(set_attr "type" "vector")])
;; Predicated in-order FP reductions.
(define_expand "mask_len_fold_left_plus_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(match_operand:<VEL> 1 "register_operand")
(match_operand:VF 2 "register_operand")
(match_operand:<VM> 3 "vector_mask_operand")
(match_operand 4 "autovec_length_operand")
(match_operand 5 "const_0_operand")]
"TARGET_VECTOR"
(define_insn_and_split "mask_len_fold_left_plus_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [
(match_operand:VF 2 "register_operand")
(match_operand:<VEL> 1 "register_operand")
(match_operand:<VM> 3 "vector_mask_operand")
(match_operand 4 "autovec_length_operand")
(match_operand 5 "const_0_operand")
] UNSPEC_REDUC_SUM_ORDERED))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
if (rtx_equal_p (operands[4], const0_rtx))
emit_move_insn (operands[0], operands[1]);
@ -2244,7 +2267,8 @@
operands[1],
riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT);
DONE;
})
}
[(set_attr "type" "vector")])
;; -------------------------------------------------------------------------
;; ---- [INT,FP] Extract active element

View File

@ -3212,7 +3212,8 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type)
{
rtx vector = type == reduction_type::UNORDERED ? ops[1] : ops[2];
machine_mode vmode = GET_MODE (vector);
machine_mode m1_mode = get_m1_mode (vmode).require ();
machine_mode vel_mode = GET_MODE (ops[0]);
machine_mode m1_mode = get_m1_mode (vel_mode).require ();
rtx m1_tmp = gen_reg_rtx (m1_mode);
rtx scalar_move_ops[] = {m1_tmp, init};
@ -3225,7 +3226,9 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type)
rtx reduc_ops[] = {m1_tmp2, vector, m1_tmp};
if (unspec == UNSPEC_REDUC_SUM_ORDERED
|| unspec == UNSPEC_REDUC_SUM_UNORDERED)
|| unspec == UNSPEC_WREDUC_SUM_ORDERED
|| unspec == UNSPEC_REDUC_SUM_UNORDERED
|| unspec == UNSPEC_WREDUC_SUM_UNORDERED)
{
insn_code icode = code_for_pred (unspec, vmode);
if (type == reduction_type::MASK_LEN_FOLD_LEFT)

View File

@ -686,6 +686,14 @@
RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
])
(define_mode_iterator VI_QHS_NO_M8 [
RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
])
(define_mode_iterator VF_HS [
(RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH")
(RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
@ -695,6 +703,23 @@
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
])
(define_mode_iterator VF_HS_NO_M8 [
(RVVM4HF "TARGET_ZVFH")
(RVVM2HF "TARGET_ZVFH")
(RVVM1HF "TARGET_ZVFH")
(RVVMF2HF "TARGET_ZVFH")
(RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
(RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
(RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
(RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
])
(define_mode_iterator VF_HS_M8 [
(RVVM8HF "TARGET_ZVFH")
(RVVM8SF "TARGET_VECTOR_ELEN_FP_32")
])
(define_mode_iterator V_VLSI_QHS [
RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
@ -1319,6 +1344,8 @@
(UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
])
(define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")])
(define_mode_attr VINDEX [
(RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI")
(RVVMF2QI "RVVMF2QI") (RVVMF4QI "RVVMF4QI") (RVVMF8QI "RVVMF8QI")
@ -1743,6 +1770,18 @@
(V1DF "DF") (V2DF "DF") (V4DF "DF") (V8DF "DF") (V16DF "DF") (V32DF "DF") (V64DF "DF") (V128DF "DF") (V256DF "DF") (V512DF "DF")
])
(define_mode_attr V_DOUBLE_EXTEND_VEL [
(RVVM4QI "HI") (RVVM2QI "HI") (RVVM1QI "HI") (RVVMF2QI "HI") (RVVMF4QI "HI") (RVVMF8QI "HI")
(RVVM4HI "SI") (RVVM2HI "SI") (RVVM1HI "SI") (RVVMF2HI "SI") (RVVMF4HI "SI")
(RVVM4SI "DI") (RVVM2SI "DI") (RVVM1SI "DI") (RVVMF2SI "DI")
(RVVM4HF "SF") (RVVM2HF "SF") (RVVM1HF "SF") (RVVMF2HF "SF") (RVVMF4HF "SF")
(RVVM4SF "DF") (RVVM2SF "DF") (RVVM1SF "DF") (RVVMF2SF "DF")
])
(define_mode_attr vel [
(RVVM8QI "qi") (RVVM4QI "qi") (RVVM2QI "qi") (RVVM1QI "qi") (RVVMF2QI "qi") (RVVMF4QI "qi") (RVVMF8QI "qi")
@ -2101,6 +2140,18 @@
(RVVM1QI "64") (RVVMF2QI "64") (RVVMF4QI "64") (RVVMF8QI "64")
])
(define_mode_attr V_DOUBLE_EXTEND [
(RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI") (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI")
(RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI") (RVVMF4HI "RVVMF2SI")
(RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI")
(RVVM4HF "RVVM8SF") (RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SF") (RVVMF2HF "RVVM1SF") (RVVMF4HF "RVVMF2SF")
(RVVM4SF "RVVM8DF") (RVVM2SF "RVVM4DF") (RVVM1SF "RVVM2DF") (RVVMF2SF "RVVM1DF")
])
(define_mode_attr V_DOUBLE_TRUNC [
(RVVM8HI "RVVM4QI") (RVVM4HI "RVVM2QI") (RVVM2HI "RVVM1QI") (RVVM1HI "RVVMF2QI") (RVVMF2HI "RVVMF4QI") (RVVMF4HI "RVVMF8QI")

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2, N) \
__attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \
TYPE1 sum = 0; \
for (int i = 0; i < N; i += 1) \
sum += a[i]; \
return sum; \
}
#define TEST_ALL(TEST) \
TEST(int16_t, int8_t, 16) \
TEST(int32_t, int16_t, 8) \
TEST(int64_t, int32_t, 4) \
TEST(uint16_t, uint8_t, 16) \
TEST(uint32_t, uint16_t, 8) \
TEST(uint64_t, uint32_t, 4) \
TEST(float, _Float16, 8) \
TEST(double, float, 4)
TEST_ALL(TEST_TYPE)
/* { dg-final { scan-assembler-times {\tvfwredusum\.vs} 2 } } */
/* { dg-final { scan-assembler-times {\tvwredsum\.vs} 3 } } */
/* { dg-final { scan-assembler-times {\tvwredsumu\.vs} 3 } } */

View File

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2) \
__attribute__((noipa)) \
TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a, int n) { \
TYPE1 sum = 0; \
for (int i = 0; i < n; i += 1) \
sum += a[i]; \
return sum; \
}
#define TEST_ALL(TEST) \
TEST(float, _Float16) \
TEST(double, float)
TEST_ALL(TEST_TYPE)
/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2, N) \
__attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \
TYPE1 sum = 0; \
for (int i = 0; i < N; i += 1) \
sum += a[i]; \
return sum; \
}
#define TEST_ALL(TEST) \
TEST(float, _Float16, 8) \
TEST(double, float, 4)
TEST_ALL(TEST_TYPE)
/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */

View File

@ -0,0 +1,24 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include "widen_reduc_order-1.c"
#define N 99
#define RUN(TYPE1, TYPE2) \
{ \
TYPE2 a[N]; \
TYPE1 r = 0; \
for (int i = 0; i < N; i++) { \
a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
r += a[i]; \
asm volatile("" ::: "memory"); \
} \
if (r != reduc_##TYPE1##_##TYPE2(a, N)) \
__builtin_abort(); \
}
int __attribute__((optimize(1))) main() {
TEST_ALL(RUN)
return 0;
}

View File

@ -0,0 +1,22 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include "widen_reduc_order-2.c"
#define RUN(TYPE1, TYPE2, N) \
{ \
TYPE2 a[N]; \
TYPE1 r = 0; \
for (int i = 0; i < N; i++) { \
a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
r += a[i]; \
asm volatile("" ::: "memory"); \
} \
if (r != reduc_##TYPE1##_##TYPE2(a)) \
__builtin_abort(); \
}
int __attribute__((optimize(1))) main() {
TEST_ALL(RUN)
return 0;
}

View File

@ -0,0 +1,22 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include "widen_reduc-1.c"
#define RUN(TYPE1, TYPE2, N) \
{ \
TYPE2 a[N]; \
TYPE1 r = 0; \
for (int i = 0; i < N; i++) { \
a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
r += a[i]; \
asm volatile("" ::: "memory"); \
} \
if (r != reduc_##TYPE1##_##TYPE2(a)) \
__builtin_abort(); \
}
int __attribute__((optimize(1))) main() {
TEST_ALL(RUN)
return 0;
}