mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
SVE intrinsics: Fold svdiv with all-zero operands to zero vector
This patch folds svdiv where one of the operands is all-zeros to a zero vector, if one of the following conditions holds: - the dividend is all zeros or - the divisor is all zeros, and the predicate is ptrue or the predication is _x or _z. This case was not covered by the recent patch that implemented constant folding, because that covered only cases where both operands are constant vectors. Here, the operation is folded as soon as one of the operands is a constant zero vector. Folding of divison by 0 to return 0 is in accordance with the semantics of sdiv and udiv. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Add folding of all-zero operands to zero vector. gcc/testsuite/ * gcc.target/aarch64/sve/fold_div_zero.c: New test. * gcc.target/aarch64/sve/const_fold_div_1.c: Adjust expected outcome.
This commit is contained in:
parent
008f4510d7
commit
e311dd13a9
@ -758,30 +758,41 @@ public:
|
||||
if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
|
||||
return res;
|
||||
|
||||
/* If the dividend is all zeros, fold to zero vector. */
|
||||
tree op1 = gimple_call_arg (f.call, 1);
|
||||
if (integer_zerop (op1))
|
||||
return gimple_build_assign (f.lhs, op1);
|
||||
|
||||
/* If the divisor is all zeros, fold to zero vector. */
|
||||
tree pg = gimple_call_arg (f.call, 0);
|
||||
tree op2 = gimple_call_arg (f.call, 2);
|
||||
if (integer_zerop (op2)
|
||||
&& (f.pred != PRED_m
|
||||
|| is_ptrue (pg, f.type_suffix (0).element_bytes)))
|
||||
return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
|
||||
|
||||
/* If the divisor is a uniform power of 2, fold to a shift
|
||||
instruction. */
|
||||
tree op2 = gimple_call_arg (f.call, 2);
|
||||
tree divisor_cst = uniform_integer_cst_p (op2);
|
||||
|
||||
if (!divisor_cst || !integer_pow2p (divisor_cst))
|
||||
tree op2_cst = uniform_integer_cst_p (op2);
|
||||
if (!op2_cst || !integer_pow2p (op2_cst))
|
||||
return NULL;
|
||||
|
||||
tree new_divisor;
|
||||
gcall *call;
|
||||
|
||||
if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
|
||||
if (f.type_suffix (0).unsigned_p && tree_to_uhwi (op2_cst) != 1)
|
||||
{
|
||||
function_instance instance ("svlsr", functions::svlsr,
|
||||
shapes::binary_uint_opt_n, MODE_n,
|
||||
f.type_suffix_ids, GROUP_none, f.pred);
|
||||
call = f.redirect_call (instance);
|
||||
tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
|
||||
tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : op2_cst;
|
||||
new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (tree_int_cst_sign_bit (divisor_cst)
|
||||
|| tree_to_shwi (divisor_cst) == 1)
|
||||
if (tree_int_cst_sign_bit (op2_cst)
|
||||
|| tree_to_shwi (op2_cst) == 1)
|
||||
return NULL;
|
||||
|
||||
function_instance instance ("svasrd", functions::svasrd,
|
||||
@ -789,7 +800,7 @@ public:
|
||||
f.type_suffix_ids, GROUP_none, f.pred);
|
||||
call = f.redirect_call (instance);
|
||||
new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
|
||||
tree_log2 (divisor_cst));
|
||||
tree_log2 (op2_cst));
|
||||
}
|
||||
|
||||
gimple_call_set_arg (call, 2, new_divisor);
|
||||
|
@ -45,7 +45,7 @@ svint64_t s64_z_pg (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_0 (svbool_t pg)
|
||||
@ -55,9 +55,7 @@ svint64_t s64_z_pg_0 (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_by0:
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdivr \2\.d, p[0-7]/m, \2\.d, \1
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_by0 (svbool_t pg)
|
||||
@ -149,7 +147,7 @@ svint64_t s64_z_pg_n (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_n_s64_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
|
||||
@ -159,9 +157,7 @@ svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_n_s64_by0:
|
||||
** mov (z[0-9]+\.d), #5
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdivr \2\.d, p[0-7]/m, \2\.d, \1
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n_s64_by0 (svbool_t pg)
|
||||
|
369
gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c
Normal file
369
gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c
Normal file
@ -0,0 +1,369 @@
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include "arm_sve.h"
|
||||
|
||||
/*
|
||||
** s64_x_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svdiv_x (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svdiv_z (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svdiv_m (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_x (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_z (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg_op2:
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdiv (z[0-9]\.d), p[0-7]/m, \2, \1\.d
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_m (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_x_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_x (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_z (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_m_pg_op2:
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** sdiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_m (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_x_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_x (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_z_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_z (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_m_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svdiv_n_s64_m (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svdiv_x (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svdiv_z (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svdiv_m (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_x (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_z (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg_op2:
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** udiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_m (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_x (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_z (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_m (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_x_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_x (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_z (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_m_pg_op2:
|
||||
** mov (z[0-9]+)\.b, #0
|
||||
** udiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_m (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_x_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_x (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_z_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_z (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_m_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svdiv_n_u64_m (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user