mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
SVE intrinsics: Fold svmul with all-zero operands to zero vector
As recently implemented for svdiv, this patch folds svmul to a zero vector if one of the operands is a zero vector. This transformation is applied if at least one of the following conditions is met: - the first operand is all zeros or - the second operand is all zeros, and the predicate is ptrue or the predication is _x or _z. In contrast to constant folding, which was implemented in a previous patch, this transformation is applied as soon as one of the operands is a zero vector, while the other operand can be a variable. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold): Add folding of all-zero operands to zero vector. gcc/testsuite/ * gcc.target/aarch64/sve/const_fold_mul_1.c: Adjust expected outcome. * gcc.target/aarch64/sve/fold_mul_zero.c: New test.
This commit is contained in:
parent
9a99559a47
commit
08aba2dd8c
@ -2020,7 +2020,22 @@ public:
|
||||
gimple *
|
||||
fold (gimple_folder &f) const override
|
||||
{
|
||||
return f.fold_const_binary (MULT_EXPR);
|
||||
if (auto *res = f.fold_const_binary (MULT_EXPR))
|
||||
return res;
|
||||
|
||||
/* If one of the operands is all zeros, fold to zero vector. */
|
||||
tree op1 = gimple_call_arg (f.call, 1);
|
||||
if (integer_zerop (op1))
|
||||
return gimple_build_assign (f.lhs, op1);
|
||||
|
||||
tree pg = gimple_call_arg (f.call, 0);
|
||||
tree op2 = gimple_call_arg (f.call, 2);
|
||||
if (integer_zerop (op2)
|
||||
&& (f.pred != PRED_m
|
||||
|| is_ptrue (pg, f.type_suffix (0).element_bytes)))
|
||||
return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -35,7 +35,7 @@ svint64_t s64_z_pg (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_0 (svbool_t pg)
|
||||
@ -117,7 +117,7 @@ svint64_t s64_z_pg_n (svbool_t pg)
|
||||
|
||||
/*
|
||||
** s64_z_pg_n_s64_0:
|
||||
** mov z[0-9]+\.d, p[0-7]/z, #0
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_n_s64_0 (svbool_t pg)
|
||||
|
365
gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c
Normal file
365
gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c
Normal file
@ -0,0 +1,365 @@
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
#include "arm_sve.h"
|
||||
|
||||
/*
|
||||
** s64_x_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svmul_x (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svmul_z (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
|
||||
{
|
||||
return svmul_m (pg, svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svmul_x (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svmul_z (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue_op1 (svint64_t op2)
|
||||
{
|
||||
return svmul_m (svptrue_b64 (), svdup_s64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_x (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_z (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_pg_op2:
|
||||
** mov z[0-9]+\.d, p0/m, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_m (pg, op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_x_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_x (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_z_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_z (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_m_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_m (svptrue_b64 (), op1, svdup_s64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_x_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_x (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_z (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_m_pg_op2:
|
||||
** mov z[0-9]+\.d, p0/m, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_m (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_x_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_x (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_z_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_z (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** s64_n_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svint64_t s64_n_m_ptrue_op2 (svint64_t op1)
|
||||
{
|
||||
return svmul_n_s64_m (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svmul_x (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svmul_z (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
|
||||
{
|
||||
return svmul_m (pg, svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svmul_x (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svmul_z (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue_op1:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
|
||||
{
|
||||
return svmul_m (svptrue_b64 (), svdup_u64 (0), op2);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_x (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_z (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_pg_op2:
|
||||
** mov z[0-9]+\.d, p0/m, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_m (pg, op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_x_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_x (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_z_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_z (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_m (svptrue_b64 (), op1, svdup_u64 (0));
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_x_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_x_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_x (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_z_pg_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_z (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_m_pg_op2:
|
||||
** mov z[0-9]+\.d, p0/m, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_m (pg, op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_x_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_x_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_x (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_z_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_z_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_z (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** u64_n_m_ptrue_op2:
|
||||
** mov z[0-9]+\.b, #0
|
||||
** ret
|
||||
*/
|
||||
svuint64_t u64_n_m_ptrue_op2 (svuint64_t op1)
|
||||
{
|
||||
return svmul_n_u64_m (svptrue_b64 (), op1, 0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user