vect: Don't apply masks to operations on invariants [PR108979]

The loop body in the testcase contains an operation on invariants.
SLP detects this and can hoist/schedule the operation outside of
the loop.  However, after the fix for PR96373, we would try to
apply a loop mask to this operation, even though the mask is
defined in the loop.

The patch does what Richi suggested in the PR: suppress the
masking for externs and constants.

gcc/
	PR tree-optimization/108979
	* tree-vect-stmts.cc (vectorizable_operation): Don't mask
	operations on invariants.

gcc/testsuite/
	PR tree-optimization/108979
	* gfortran.dg/vect/pr108979.f90: New test.
This commit is contained in:
Richard Sandiford 2023-03-02 16:39:50 +00:00
parent 71afd06284
commit 076d309e36
2 changed files with 40 additions and 6 deletions

View File

@ -0,0 +1,21 @@
! { dg-do compile }
! { dg-additional-options "-fnon-call-exceptions" }
! { dg-additional-options "-march=armv8.2-a+sve" { target aarch64*-*-* } }
MODULE hfx_contract_block
INTEGER, PARAMETER :: dp=8
CONTAINS
SUBROUTINE block_2_1_2_1(kbd,kbc,kad,kac,pbd,pbc,pad,pac,prim,scale)
REAL(KIND=dp) :: kbd(1*1), kbc(1*2), kad(2*1), kac(2*2), pbd(1*1), &
pbc(1*2), pad(2*1), pac(2*2), prim(2*1*2*1), scale
DO md = 1,1
DO mc = 1,2
DO mb = 1,1
DO ma = 1,2
kac((mc-1)*2+ma) = kac((mc-1)*2+ma)-tmp*p_bd
END DO
END DO
END DO
END DO
END SUBROUTINE block_2_1_2_1
END MODULE hfx_contract_block

View File

@ -6254,6 +6254,8 @@ vectorizable_operation (vec_info *vinfo,
"use not simple.\n");
return false;
}
bool is_invariant = (dt[0] == vect_external_def
|| dt[0] == vect_constant_def);
/* If op0 is an external or constant def, infer the vector type
from the scalar type. */
if (!vectype)
@ -6307,6 +6309,8 @@ vectorizable_operation (vec_info *vinfo,
"use not simple.\n");
return false;
}
is_invariant &= (dt[1] == vect_external_def
|| dt[1] == vect_constant_def);
if (vectype2
&& maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
return false;
@ -6321,6 +6325,8 @@ vectorizable_operation (vec_info *vinfo,
"use not simple.\n");
return false;
}
is_invariant &= (dt[2] == vect_external_def
|| dt[2] == vect_constant_def);
if (vectype3
&& maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
return false;
@ -6426,16 +6432,23 @@ vectorizable_operation (vec_info *vinfo,
int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
bool could_trap = gimple_could_trap_p (stmt);
/* If operating on inactive elements could generate spurious traps,
we need to restrict the operation to active lanes. Note that this
specifically doesn't apply to unhoisted invariants, since they
operate on the same value for every lane.
Similarly, if this operation is part of a reduction, a fully-masked
loop should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
|| reduc_idx >= 0);
if (!vec_stmt) /* transformation not required. */
{
/* If this operation is part of a reduction, a fully-masked loop
should only change the active lanes of the reduction chain,
keeping the inactive lanes as-is. */
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
&& (could_trap || reduc_idx >= 0))
&& mask_out_inactive)
{
if (cond_fn == IFN_LAST
|| !direct_internal_fn_supported_p (cond_fn, vectype,
@ -6578,7 +6591,7 @@ vectorizable_operation (vec_info *vinfo,
vop1 = ((op_type == binary_op || op_type == ternary_op)
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
if (masked_loop_p && (reduc_idx >= 0 || could_trap))
if (masked_loop_p && mask_out_inactive)
{
tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
vectype, i);