mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
aarch64: Optimise calls to ldexp with SVE FSCALE instruction [PR111733]
This patch uses the FSCALE instruction provided by SVE to implement the standard ldexp family of functions. Currently, with '-Ofast -mcpu=neoverse-v2', GCC generates libcalls for the following code: float test_ldexpf (float x, int i) { return __builtin_ldexpf (x, i); } double test_ldexp (double x, int i) { return __builtin_ldexp(x, i); } GCC Output: test_ldexpf: b ldexpf test_ldexp: b ldexp Since SVE has support for an FSCALE instruction, we can use this to process scalar floats by moving them to a vector register and performing an fscale call, similar to how LLVM tackles an ldexp builtin as well. New Output: test_ldexpf: fmov s31, w0 ptrue p7.b, vl4 fscale z0.s, p7/m, z0.s, z31.s ret test_ldexp: sxtw x0, w0 ptrue p7.b, vl8 fmov d31, x0 fscale z0.d, p7/m, z0.d, z31.d ret This is a revision of an earlier patch, and now uses the extended definition of aarch64_ptrue_reg to generate predicate registers with the appropriate set bits. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR <soumyaa@nvidia.com> gcc/ChangeLog: PR target/111733 * config/aarch64/aarch64-sve.md (ldexp<mode>3): Added a new pattern to match ldexp calls with scalar floating modes and expand to the existing pattern for FSCALE. * config/aarch64/iterators.md: (SVE_FULL_F_SCALAR): Added an iterator to match all FP SVE modes as well as their scalar equivalents. (VPRED): Extended the attribute to handle GPF_HF modes. * internal-fn.def (LDEXP): Changed macro to incorporate ldexpf16. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/fscale.c: New test.
This commit is contained in:
parent
445d8bb6a8
commit
9b2915d95d
@ -5088,6 +5088,21 @@
|
||||
;; - FTSSEL
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
(define_expand "ldexp<mode>3"
|
||||
[(set (match_operand:GPF_HF 0 "register_operand")
|
||||
(unspec:GPF_HF
|
||||
[(match_dup 3)
|
||||
(const_int SVE_STRICT_GP)
|
||||
(match_operand:GPF_HF 1 "register_operand")
|
||||
(match_operand:<V_INT_EQUIV> 2 "register_operand")]
|
||||
UNSPEC_COND_FSCALE))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
operands[3] = aarch64_ptrue_reg (<VPRED>mode,
|
||||
GET_MODE_UNIT_SIZE (<MODE>mode));
|
||||
}
|
||||
)
|
||||
|
||||
;; Unpredicated floating-point binary operations that take an integer as
|
||||
;; their second operand.
|
||||
(define_insn "@aarch64_sve_<optab><mode>"
|
||||
@ -5103,17 +5118,17 @@
|
||||
;; Predicated floating-point binary operations that take an integer
|
||||
;; as their second operand.
|
||||
(define_insn "@aarch64_pred_<optab><mode>"
|
||||
[(set (match_operand:SVE_FULL_F 0 "register_operand")
|
||||
(unspec:SVE_FULL_F
|
||||
[(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand")
|
||||
(unspec:SVE_FULL_F_SCALAR
|
||||
[(match_operand:<VPRED> 1 "register_operand")
|
||||
(match_operand:SI 4 "aarch64_sve_gp_strictness")
|
||||
(match_operand:SVE_FULL_F 2 "register_operand")
|
||||
(match_operand:SVE_FULL_F_SCALAR 2 "register_operand")
|
||||
(match_operand:<V_INT_EQUIV> 3 "register_operand")]
|
||||
SVE_COND_FP_BINARY_INT))]
|
||||
"TARGET_SVE"
|
||||
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
|
||||
[ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
|
||||
[ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
|
||||
[ w , Upl , 0 , w ; * ] <sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
|
||||
[ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -452,6 +452,9 @@
|
||||
;; All fully-packed SVE floating-point vector modes.
|
||||
(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
|
||||
|
||||
;; Fully-packed SVE floating-point vector modes and their scalar equivalents.
|
||||
(define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF])
|
||||
|
||||
;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements.
|
||||
(define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI])
|
||||
|
||||
@ -2354,7 +2357,8 @@
|
||||
(VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
|
||||
(V8QI "VNx8BI") (V16QI "VNx16BI")
|
||||
(V4HI "VNx4BI") (V8HI "VNx8BI") (V2SI "VNx2BI")
|
||||
(V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI")])
|
||||
(V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI")
|
||||
(HF "VNx8BI") (SF "VNx4BI") (DF "VNx2BI")])
|
||||
|
||||
;; ...and again in lower case.
|
||||
(define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
|
||||
|
@ -441,7 +441,7 @@ DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
|
||||
|
||||
/* FP scales. */
|
||||
DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
|
||||
DEF_INTERNAL_FLT_FLOATN_FN (LDEXP, ECF_CONST, ldexp, binary)
|
||||
|
||||
/* Ternary math functions. */
|
||||
DEF_INTERNAL_FLT_FLOATN_FN (FMA, ECF_CONST, fma, ternary)
|
||||
|
46
gcc/testsuite/gcc.target/aarch64/sve/fscale.c
Normal file
46
gcc/testsuite/gcc.target/aarch64/sve/fscale.c
Normal file
@ -0,0 +1,46 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-Ofast" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
/*
|
||||
** test_ldexpf16:
|
||||
** ...
|
||||
** ptrue (p[0-7]).b, vl2
|
||||
** ...
|
||||
** fscale z[0-9]+\.h, \1/m, z[0-9]+\.h, z[0-9]+\.h
|
||||
** ret
|
||||
*/
|
||||
_Float16
|
||||
test_ldexpf16 (_Float16 x, int i)
|
||||
{
|
||||
return __builtin_ldexpf16 (x, i);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_ldexpf:
|
||||
** ...
|
||||
** ptrue (p[0-7])\.b, vl4
|
||||
** ...
|
||||
** fscale z[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s
|
||||
** ret
|
||||
*/
|
||||
float
|
||||
test_ldexpf (float x, int i)
|
||||
{
|
||||
return __builtin_ldexpf (x, i);
|
||||
}
|
||||
|
||||
/*
|
||||
** test_ldexp:
|
||||
** ...
|
||||
** ptrue (p[0-7]).b, vl8
|
||||
** ...
|
||||
** fscale z[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d
|
||||
** ret
|
||||
*/
|
||||
double
|
||||
test_ldexp (double x, int i)
|
||||
{
|
||||
return __builtin_ldexp (x, i);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user