SVE intrinsics: Fold svmul with all-zero operands to zero vector

As recently implemented for svdiv, this patch folds svmul to a zero
vector if one of the operands is a zero vector. This transformation is
applied if at least one of the following conditions is met:
- the first operand is all zeros or
- the second operand is all zeros, and the predicate is ptrue or the
predication is _x or _z.

In contrast to constant folding, which was implemented in a previous
patch, this transformation is applied as soon as one of the operands is
a zero vector, while the other operand can be a variable.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>

gcc/
	* config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold):
	Add folding of all-zero operands to zero vector.

gcc/testsuite/
	* gcc.target/aarch64/sve/const_fold_mul_1.c: Adjust expected
	outcome.
	* gcc.target/aarch64/sve/fold_mul_zero.c: New test.
This commit is contained in:
Jennifer Schmitz 2024-09-17 00:15:38 -07:00
parent 9a99559a47
commit 08aba2dd8c
3 changed files with 383 additions and 3 deletions

View File

@ -2020,7 +2020,22 @@ public:
gimple *
fold (gimple_folder &f) const override
{
return f.fold_const_binary (MULT_EXPR);
if (auto *res = f.fold_const_binary (MULT_EXPR))
return res;
/* If one of the operands is all zeros, fold to zero vector. */
tree op1 = gimple_call_arg (f.call, 1);
if (integer_zerop (op1))
return gimple_build_assign (f.lhs, op1);
tree pg = gimple_call_arg (f.call, 0);
tree op2 = gimple_call_arg (f.call, 2);
if (integer_zerop (op2)
&& (f.pred != PRED_m
|| is_ptrue (pg, f.type_suffix (0).element_bytes)))
return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
return NULL;
}
};

View File

@ -35,7 +35,7 @@ svint64_t s64_z_pg (svbool_t pg)
/*
** s64_z_pg_0:
** mov z[0-9]+\.d, p[0-7]/z, #0
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_pg_0 (svbool_t pg)
@ -117,7 +117,7 @@ svint64_t s64_z_pg_n (svbool_t pg)
/*
** s64_z_pg_n_s64_0:
** mov z[0-9]+\.d, p[0-7]/z, #0
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_pg_n_s64_0 (svbool_t pg)

View File

@ -0,0 +1,365 @@
/* { dg-final { check-function-bodies "**" "" } } */
/* { dg-options "-O2" } */
#include "arm_sve.h"
/*
** s64_x_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
{
return svmul_x (pg, svdup_s64 (0), op2);
}
/*
** s64_z_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
{
return svmul_z (pg, svdup_s64 (0), op2);
}
/*
** s64_m_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
{
return svmul_m (pg, svdup_s64 (0), op2);
}
/*
** s64_x_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_x_ptrue_op1 (svint64_t op2)
{
return svmul_x (svptrue_b64 (), svdup_s64 (0), op2);
}
/*
** s64_z_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_ptrue_op1 (svint64_t op2)
{
return svmul_z (svptrue_b64 (), svdup_s64 (0), op2);
}
/*
** s64_m_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_m_ptrue_op1 (svint64_t op2)
{
return svmul_m (svptrue_b64 (), svdup_s64 (0), op2);
}
/*
** s64_x_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_x (pg, op1, svdup_s64 (0));
}
/*
** s64_z_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_z (pg, op1, svdup_s64 (0));
}
/*
** s64_m_pg_op2:
** mov z[0-9]+\.d, p0/m, #0
** ret
*/
svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_m (pg, op1, svdup_s64 (0));
}
/*
** s64_x_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_x_ptrue_op2 (svint64_t op1)
{
return svmul_x (svptrue_b64 (), op1, svdup_s64 (0));
}
/*
** s64_z_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_z_ptrue_op2 (svint64_t op1)
{
return svmul_z (svptrue_b64 (), op1, svdup_s64 (0));
}
/*
** s64_m_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_m_ptrue_op2 (svint64_t op1)
{
return svmul_m (svptrue_b64 (), op1, svdup_s64 (0));
}
/*
** s64_n_x_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_n_x_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_n_s64_x (pg, op1, 0);
}
/*
** s64_n_z_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_n_s64_z (pg, op1, 0);
}
/*
** s64_n_m_pg_op2:
** mov z[0-9]+\.d, p0/m, #0
** ret
*/
svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1)
{
return svmul_n_s64_m (pg, op1, 0);
}
/*
** s64_n_x_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_n_x_ptrue_op2 (svint64_t op1)
{
return svmul_n_s64_x (svptrue_b64 (), op1, 0);
}
/*
** s64_n_z_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_n_z_ptrue_op2 (svint64_t op1)
{
return svmul_n_s64_z (svptrue_b64 (), op1, 0);
}
/*
** s64_n_m_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svint64_t s64_n_m_ptrue_op2 (svint64_t op1)
{
return svmul_n_s64_m (svptrue_b64 (), op1, 0);
}
/*
** u64_x_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
{
return svmul_x (pg, svdup_u64 (0), op2);
}
/*
** u64_z_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
{
return svmul_z (pg, svdup_u64 (0), op2);
}
/*
** u64_m_pg_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
{
return svmul_m (pg, svdup_u64 (0), op2);
}
/*
** u64_x_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_x_ptrue_op1 (svuint64_t op2)
{
return svmul_x (svptrue_b64 (), svdup_u64 (0), op2);
}
/*
** u64_z_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_z_ptrue_op1 (svuint64_t op2)
{
return svmul_z (svptrue_b64 (), svdup_u64 (0), op2);
}
/*
** u64_m_ptrue_op1:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
{
return svmul_m (svptrue_b64 (), svdup_u64 (0), op2);
}
/*
** u64_x_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_x (pg, op1, svdup_u64 (0));
}
/*
** u64_z_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_z (pg, op1, svdup_u64 (0));
}
/*
** u64_m_pg_op2:
** mov z[0-9]+\.d, p0/m, #0
** ret
*/
svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_m (pg, op1, svdup_u64 (0));
}
/*
** u64_x_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_x_ptrue_op2 (svuint64_t op1)
{
return svmul_x (svptrue_b64 (), op1, svdup_u64 (0));
}
/*
** u64_z_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_z_ptrue_op2 (svuint64_t op1)
{
return svmul_z (svptrue_b64 (), op1, svdup_u64 (0));
}
/*
** u64_m_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
{
return svmul_m (svptrue_b64 (), op1, svdup_u64 (0));
}
/*
** u64_n_x_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_n_x_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_n_u64_x (pg, op1, 0);
}
/*
** u64_n_z_pg_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_n_u64_z (pg, op1, 0);
}
/*
** u64_n_m_pg_op2:
** mov z[0-9]+\.d, p0/m, #0
** ret
*/
svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1)
{
return svmul_n_u64_m (pg, op1, 0);
}
/*
** u64_n_x_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_n_x_ptrue_op2 (svuint64_t op1)
{
return svmul_n_u64_x (svptrue_b64 (), op1, 0);
}
/*
** u64_n_z_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_n_z_ptrue_op2 (svuint64_t op1)
{
return svmul_n_u64_z (svptrue_b64 (), op1, 0);
}
/*
** u64_n_m_ptrue_op2:
** mov z[0-9]+\.b, #0
** ret
*/
svuint64_t u64_n_m_ptrue_op2 (svuint64_t op1)
{
return svmul_n_u64_m (svptrue_b64 (), op1, 0);
}