vect: Add maskload else value support.

This patch adds an else operand to vectorized masked load calls.
The current implementation adds else-value arguments to the respective
target-querying functions that is used to supply the vectorizer with the
proper else value.

We query the target for its supported else operand and uses that for the
maskload call.  If necessary, i.e. if the mode has padding bits and if
the else operand is nonzero, a VEC_COND enforcing a zero else value is
emitted.

gcc/ChangeLog:

	* optabs-query.cc (supports_vec_convert_optab_p): Return icode.
	(get_supported_else_val): Return supported else value for
	optab's operand at index.
	(supports_vec_gather_load_p): Add else argument.
	(supports_vec_scatter_store_p): Ditto.
	* optabs-query.h (supports_vec_gather_load_p): Ditto.
	(get_supported_else_val): Ditto.
	* optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
	(can_vec_mask_load_store_p): Ditto.
	(target_supports_len_load_store_p): Ditto.
	(get_len_load_store_mode): Ditto.
	* optabs-tree.h (target_supports_mask_load_store_p): Ditto.
	(can_vec_mask_load_store_p): Ditto.
	* tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
	(vect_gather_scatter_fn_p): Ditto.
	(vect_check_gather_scatter): Ditto.
	(vect_load_lanes_supported): Ditto.
	* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
	Ditto.
	* tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
	else operand.
	(vect_slp_analyze_node_operations): Skip undefined else operand.
	* tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
	Add else operand handling.
	(vect_get_vec_defs_for_operand): Handle undefined else operand.
	(check_load_store_for_partial_vectors): Add else argument.
	(vect_truncate_gather_scatter_offset): Ditto.
	(vect_use_strided_gather_scatters_p): Ditto.
	(get_group_load_store_type): Ditto.
	(get_load_store_type): Ditto.
	(vect_get_mask_load_else): Ditto.
	(vect_get_else_val_from_tree): Ditto.
	(vect_build_one_gather_load_call): Add zero else operand.
	(vectorizable_load): Use else operand.
	* tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
	argument.
	(vect_load_lanes_supported): Ditto.
	(vect_get_mask_load_else): Ditto.
	(vect_get_else_val_from_tree): Ditto.
This commit is contained in:
Robin Dapp 2024-08-08 14:29:05 +02:00
parent 6b6bd53619
commit 634ae740f5
9 changed files with 466 additions and 125 deletions

View File

@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see
#include "rtl.h"
#include "recog.h"
#include "vec-perm-indices.h"
#include "internal-fn.h"
#include "memmodel.h"
#include "optabs.h"
struct target_optabs default_target_optabs;
struct target_optabs *this_fn_optabs = &default_target_optabs;
@ -672,34 +675,57 @@ lshift_cheap_p (bool speed_p)
that mode, given that the second mode is always an integer vector.
If MODE is VOIDmode, return true if OP supports any vector mode. */
static bool
supports_vec_convert_optab_p (optab op, machine_mode mode)
static enum insn_code
supported_vec_convert_optab (optab op, machine_mode mode)
{
int start = mode == VOIDmode ? 0 : mode;
int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
enum insn_code icode = CODE_FOR_nothing;
for (int i = start; i <= end; ++i)
if (VECTOR_MODE_P ((machine_mode) i))
for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
if (convert_optab_handler (op, (machine_mode) i,
(machine_mode) j) != CODE_FOR_nothing)
return true;
{
if ((icode
= convert_optab_handler (op, (machine_mode) i,
(machine_mode) j)) != CODE_FOR_nothing)
return icode;
}
return false;
return icode;
}
/* If MODE is not VOIDmode, return true if vec_gather_load is available for
that mode. If MODE is VOIDmode, return true if gather_load is available
for at least one vector mode. */
for at least one vector mode.
In that case, and if ELSVALS is nonzero, store the supported else values
into the vector it points to. */
bool
supports_vec_gather_load_p (machine_mode mode)
supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals)
{
if (!this_fn_optabs->supports_vec_gather_load[mode])
enum insn_code icode = CODE_FOR_nothing;
if (!this_fn_optabs->supports_vec_gather_load[mode] || elsvals)
{
/* Try the masked variants first. In case we later decide that we
need a mask after all (thus requiring an else operand) we need
to query it below and we cannot do that when using the
non-masked optab. */
icode = supported_vec_convert_optab (mask_gather_load_optab, mode);
if (icode == CODE_FOR_nothing)
icode = supported_vec_convert_optab (mask_len_gather_load_optab, mode);
if (icode == CODE_FOR_nothing)
icode = supported_vec_convert_optab (gather_load_optab, mode);
this_fn_optabs->supports_vec_gather_load[mode]
= (supports_vec_convert_optab_p (gather_load_optab, mode)
|| supports_vec_convert_optab_p (mask_gather_load_optab, mode)
|| supports_vec_convert_optab_p (mask_len_gather_load_optab, mode)
? 1 : -1);
= (icode != CODE_FOR_nothing) ? 1 : -1;
}
/* For gather the optab's operand indices do not match the IFN's because
the latter does not have the extension operand (operand 3). It is
implicitly added during expansion so we use the IFN's else index + 1.
*/
if (elsvals && icode != CODE_FOR_nothing)
get_supported_else_vals
(icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
return this_fn_optabs->supports_vec_gather_load[mode] > 0;
}
@ -711,12 +737,18 @@ supports_vec_gather_load_p (machine_mode mode)
bool
supports_vec_scatter_store_p (machine_mode mode)
{
enum insn_code icode;
if (!this_fn_optabs->supports_vec_scatter_store[mode])
{
icode = supported_vec_convert_optab (scatter_store_optab, mode);
if (icode == CODE_FOR_nothing)
icode = supported_vec_convert_optab (mask_scatter_store_optab, mode);
if (icode == CODE_FOR_nothing)
icode = supported_vec_convert_optab (mask_len_scatter_store_optab,
mode);
this_fn_optabs->supports_vec_scatter_store[mode]
= (supports_vec_convert_optab_p (scatter_store_optab, mode)
|| supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
|| supports_vec_convert_optab_p (mask_len_scatter_store_optab, mode)
? 1 : -1);
= (icode != CODE_FOR_nothing) ? 1 : -1;
}
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}

View File

@ -168,7 +168,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
bool can_atomic_exchange_p (machine_mode, bool);
bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
bool supports_vec_gather_load_p (machine_mode = E_VOIDmode,
vec<int> * = nullptr);
bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
bool can_vec_extract (machine_mode, machine_mode);

View File

@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h"
#include "optabs-tree.h"
#include "stor-layout.h"
#include "internal-fn.h"
/* Return the optab used for computing the operation given by the tree code,
CODE and the tree EXP. This function is not always usable (for example, it
@ -512,24 +513,38 @@ target_supports_op_p (tree type, enum tree_code code,
or mask_len_{load,store}.
This helper function checks whether target supports masked
load/store and return corresponding IFN in the last argument
(IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
(IFN_MASK_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
If there is support and ELSVALS is nonzero store the possible else values
in the vector it points to. */
static bool
bool
target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
bool is_load, internal_fn *ifn)
bool is_load, internal_fn *ifn,
vec<int> *elsvals)
{
optab op = is_load ? maskload_optab : maskstore_optab;
optab len_op = is_load ? mask_len_load_optab : mask_len_store_optab;
if (convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing)
enum insn_code icode;
if ((icode = convert_optab_handler (op, mode, mask_mode))
!= CODE_FOR_nothing)
{
if (ifn)
*ifn = is_load ? IFN_MASK_LOAD : IFN_MASK_STORE;
if (elsvals && is_load)
get_supported_else_vals (icode,
internal_fn_else_index (IFN_MASK_LOAD),
*elsvals);
return true;
}
else if (convert_optab_handler (len_op, mode, mask_mode) != CODE_FOR_nothing)
else if ((icode = convert_optab_handler (len_op, mode, mask_mode))
!= CODE_FOR_nothing)
{
if (ifn)
*ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
if (elsvals && is_load)
get_supported_else_vals (icode,
internal_fn_else_index (IFN_MASK_LEN_LOAD),
*elsvals);
return true;
}
return false;
@ -538,19 +553,23 @@ target_supports_mask_load_store_p (machine_mode mode, machine_mode mask_mode,
/* Return true if target supports vector masked load/store for mode.
An additional output in the last argument which is the IFN pointer.
We set IFN as MASK_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
which optab is supported in the target. */
which optab is supported in the target.
If there is support and ELSVALS is nonzero store the possible else values
in the vector it points to. */
bool
can_vec_mask_load_store_p (machine_mode mode,
machine_mode mask_mode,
bool is_load,
internal_fn *ifn)
internal_fn *ifn,
vec<int> *elsvals)
{
machine_mode vmode;
/* If mode is vector mode, check it directly. */
if (VECTOR_MODE_P (mode))
return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn);
return target_supports_mask_load_store_p (mode, mask_mode, is_load, ifn,
elsvals);
/* Otherwise, return true if there is some vector mode with
the mask load/store supported. */
@ -564,7 +583,8 @@ can_vec_mask_load_store_p (machine_mode mode,
vmode = targetm.vectorize.preferred_simd_mode (smode);
if (VECTOR_MODE_P (vmode)
&& targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
&& target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
&& target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
elsvals))
return true;
auto_vector_modes vector_modes;
@ -572,7 +592,8 @@ can_vec_mask_load_store_p (machine_mode mode,
for (machine_mode base_mode : vector_modes)
if (related_vector_mode (base_mode, smode).exists (&vmode)
&& targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
&& target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn))
&& target_supports_mask_load_store_p (vmode, mask_mode, is_load, ifn,
elsvals))
return true;
return false;
}
@ -582,11 +603,13 @@ can_vec_mask_load_store_p (machine_mode mode,
or mask_len_{load,store}.
This helper function checks whether target supports len
load/store and return corresponding IFN in the last argument
(IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}). */
(IFN_LEN_{LOAD,STORE} or IFN_MASK_LEN_{LOAD,STORE}).
If there is support and ELSVALS is nonzero store thepossible
else values in the vector it points to. */
static bool
target_supports_len_load_store_p (machine_mode mode, bool is_load,
internal_fn *ifn)
internal_fn *ifn, vec<int> *elsvals)
{
optab op = is_load ? len_load_optab : len_store_optab;
optab masked_op = is_load ? mask_len_load_optab : mask_len_store_optab;
@ -598,11 +621,17 @@ target_supports_len_load_store_p (machine_mode mode, bool is_load,
return true;
}
machine_mode mask_mode;
enum insn_code icode;
if (targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
&& convert_optab_handler (masked_op, mode, mask_mode) != CODE_FOR_nothing)
&& ((icode = convert_optab_handler (masked_op, mode, mask_mode))
!= CODE_FOR_nothing))
{
if (ifn)
*ifn = is_load ? IFN_MASK_LEN_LOAD : IFN_MASK_LEN_STORE;
if (elsvals && is_load)
get_supported_else_vals (icode,
internal_fn_else_index (IFN_MASK_LEN_LOAD),
*elsvals);
return true;
}
return false;
@ -616,22 +645,25 @@ target_supports_len_load_store_p (machine_mode mode, bool is_load,
VnQI to wrap the other supportable same size vector modes.
An additional output in the last argument which is the IFN pointer.
We set IFN as LEN_{LOAD,STORE} or MASK_LEN_{LOAD,STORE} according
which optab is supported in the target. */
which optab is supported in the target.
If there is support and ELSVALS is nonzero store the possible else values
in the vector it points to. */
opt_machine_mode
get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn)
get_len_load_store_mode (machine_mode mode, bool is_load, internal_fn *ifn,
vec<int> *elsvals)
{
gcc_assert (VECTOR_MODE_P (mode));
/* Check if length in lanes supported for this mode directly. */
if (target_supports_len_load_store_p (mode, is_load, ifn))
if (target_supports_len_load_store_p (mode, is_load, ifn, elsvals))
return mode;
/* Check if length in bytes supported for same vector size VnQI. */
machine_mode vmode;
poly_uint64 nunits = GET_MODE_SIZE (mode);
if (related_vector_mode (mode, QImode, nunits).exists (&vmode)
&& target_supports_len_load_store_p (vmode, is_load, ifn))
&& target_supports_len_load_store_p (vmode, is_load, ifn, elsvals))
return vmode;
return opt_machine_mode ();

View File

@ -47,9 +47,13 @@ bool expand_vec_cond_expr_p (tree, tree);
void init_tree_optimization_optabs (tree);
bool target_supports_op_p (tree, enum tree_code,
enum optab_subtype = optab_default);
bool target_supports_mask_load_store_p (machine_mode, machine_mode,
bool, internal_fn *, vec<int> *);
bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool,
internal_fn * = nullptr);
internal_fn * = nullptr,
vec<int> * = nullptr);
opt_machine_mode get_len_load_store_mode (machine_mode, bool,
internal_fn * = nullptr);
internal_fn * = nullptr,
vec<int> * = nullptr);
#endif

View File

@ -55,13 +55,18 @@ along with GCC; see the file COPYING3. If not see
#include "vec-perm-indices.h"
#include "internal-fn.h"
#include "gimple-fold.h"
#include "optabs-query.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
If it is implemented and ELSVALS is nonzero store the possible else
values in the vector it points to. */
static bool
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
tree vectype, unsigned HOST_WIDE_INT count)
tree vectype, unsigned HOST_WIDE_INT count,
vec<int> *elsvals = nullptr)
{
machine_mode mode, array_mode;
bool limit_p;
@ -81,7 +86,9 @@ vect_lanes_optab_supported_p (const char *name, convert_optab optab,
}
}
if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
enum insn_code icode;
if ((icode = convert_optab_handler (optab, array_mode, mode))
== CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -95,6 +102,11 @@ vect_lanes_optab_supported_p (const char *name, convert_optab optab,
"can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
GET_MODE_NAME (mode));
if (elsvals)
get_supported_else_vals (icode,
internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
*elsvals);
return true;
}
@ -4184,13 +4196,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
be multiplied *after* it has been converted to address width.
Return true if the function is supported, storing the function id in
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. */
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
If we can use gather and store the possible else values in ELSVALS. */
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree vectype, tree memory_type, tree offset_type,
int scale, internal_fn *ifn_out,
tree *offset_vectype_out)
tree *offset_vectype_out, vec<int> *elsvals)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
@ -4228,7 +4242,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
/* Test whether the target supports this combination. */
if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
offset_vectype, scale))
offset_vectype, scale,
elsvals))
{
*ifn_out = ifn;
*offset_vectype_out = offset_vectype;
@ -4238,7 +4253,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
&& internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
memory_type,
offset_vectype,
scale))
scale, elsvals))
{
*ifn_out = alt_ifn;
*offset_vectype_out = offset_vectype;
@ -4246,7 +4261,8 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
}
else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
memory_type,
offset_vectype, scale))
offset_vectype, scale,
elsvals))
{
*ifn_out = alt_ifn2;
*offset_vectype_out = offset_vectype;
@ -4285,11 +4301,13 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
}
/* Return true if a non-affine read or write in STMT_INFO is suitable for a
gather load or scatter store. Describe the operation in *INFO if so. */
gather load or scatter store. Describe the operation in *INFO if so.
If it is suitable and ELSVALS is nonzero store the supported else values
in the vector it points to. */
bool
vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
gather_scatter_info *info)
gather_scatter_info *info, vec<int> *elsvals)
{
HOST_WIDE_INT scale = 1;
poly_int64 pbitpos, pbitsize;
@ -4314,6 +4332,13 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (internal_gather_scatter_fn_p (ifn))
{
vect_describe_gather_scatter_call (stmt_info, info);
/* In pattern recog we simply used a ZERO else value that
we need to correct here. To that end just re-use the
(already succesful) check if we support a gather IFN
and have it populate the else values. */
if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
return true;
}
masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
@ -4329,7 +4354,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* True if we should aim to use internal functions rather than
built-in functions. */
bool use_ifn_p = (DR_IS_READ (dr)
? supports_vec_gather_load_p (TYPE_MODE (vectype))
? supports_vec_gather_load_p (TYPE_MODE (vectype),
elsvals)
: supports_vec_scatter_store_p (TYPE_MODE (vectype)));
base = DR_REF (dr);
@ -4486,12 +4512,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
masked_p, vectype, memory_type,
signed_char_type_node,
new_scale, &ifn,
&offset_vectype)
&offset_vectype,
elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
new_scale, &ifn,
&offset_vectype))
&offset_vectype,
elsvals))
break;
scale = new_scale;
off = op0;
@ -4514,7 +4542,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
TREE_TYPE (off), scale, &ifn,
&offset_vectype))
&offset_vectype, elsvals))
break;
if (TYPE_PRECISION (TREE_TYPE (op0))
@ -4568,7 +4596,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offtype, scale,
&ifn, &offset_vectype))
&ifn, &offset_vectype, elsvals))
ifn = IFN_LAST;
decl = NULL_TREE;
}
@ -6405,27 +6433,29 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
}
/* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
of type VECTYPE. MASKED_P says whether the masked form is needed. */
of type VECTYPE. MASKED_P says whether the masked form is needed.
If it is available and ELSVALS is nonzero store the possible else values
in the vector it points to. */
internal_fn
vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
bool masked_p)
bool masked_p, vec<int> *elsvals)
{
if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
vec_mask_len_load_lanes_optab, vectype,
count))
count, elsvals))
return IFN_MASK_LEN_LOAD_LANES;
else if (masked_p)
{
if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
vec_mask_load_lanes_optab, vectype,
count))
count, elsvals))
return IFN_MASK_LOAD_LANES;
}
else
{
if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
vectype, count))
vectype, count, elsvals))
return IFN_LOAD_LANES;
}
return IFN_LAST;

View File

@ -6021,12 +6021,20 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
/* Build the new pattern statement. */
tree scale = size_int (gs_info.scale);
gcall *pattern_stmt;
if (DR_IS_READ (dr))
{
tree zero = build_zero_cst (gs_info.element_type);
if (mask != NULL)
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
offset, scale, zero, mask);
{
int elsval = MASK_LOAD_ELSE_ZERO;
tree vec_els
= vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
offset, scale, zero, mask,
vec_els);
}
else
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
offset, scale, zero);

View File

@ -511,15 +511,15 @@ static const int cond_expr_maps[3][5] = {
static const int no_arg_map[] = { 0 };
static const int arg0_map[] = { 1, 0 };
static const int arg1_map[] = { 1, 1 };
static const int arg2_map[] = { 1, 2 };
static const int arg2_arg3_map[] = { 2, 2, 3 };
static const int arg1_arg3_map[] = { 2, 1, 3 };
static const int arg1_arg4_map[] = { 2, 1, 4 };
static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
static const int arg3_arg2_map[] = { 2, 3, 2 };
static const int op1_op0_map[] = { 2, 1, 0 };
static const int off_map[] = { 1, -3 };
static const int off_op0_map[] = { 2, -3, 0 };
static const int off_arg2_map[] = { 2, -3, 2 };
static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
static const int mask_call_maps[6][7] = {
{ 1, 1, },
@ -566,14 +566,14 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
switch (gimple_call_internal_fn (call))
{
case IFN_MASK_LOAD:
return gather_scatter_p ? off_arg2_map : arg2_map;
return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
case IFN_GATHER_LOAD:
return arg1_map;
case IFN_MASK_GATHER_LOAD:
case IFN_MASK_LEN_GATHER_LOAD:
return arg1_arg4_map;
return arg1_arg4_arg5_map;
case IFN_SCATTER_STORE:
return arg1_arg3_map;
@ -8000,6 +8000,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
tree vector_type = SLP_TREE_VECTYPE (child);
if (!vector_type)
{
/* Masked loads can have an undefined (default SSA definition)
else operand. We do not need to cost it. */
vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
== load_vec_info_type)
&& ((ops.length ()
&& TREE_CODE (ops[0]) == SSA_NAME
&& SSA_NAME_IS_DEFAULT_DEF (ops[0])
&& VAR_P (SSA_NAME_VAR (ops[0])))
|| SLP_TREE_DEF_TYPE (child) == vect_constant_def))
continue;
/* For shifts with a scalar argument we don't need
to cost or code-generate anything.
??? Represent this more explicitely. */

View File

@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "regs.h"
#include "attribs.h"
#include "optabs-libfuncs.h"
#include "tree-dfa.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@ -157,28 +158,45 @@ create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
/* ARRAY is an array of vectors created by create_vector_array.
Return an SSA_NAME for the vector in index N. The reference
is part of the vectorization of STMT_INFO and the vector is associated
with scalar destination SCALAR_DEST. */
with scalar destination SCALAR_DEST.
If we need to ensure that inactive elements are set to zero,
NEED_ZEROING is true, MASK contains the loop mask to be used. */
static tree
read_vector_array (vec_info *vinfo,
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
bool need_zeroing, tree mask)
{
tree vect_type, vect, vect_name, array_ref;
tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
gimple *new_stmt;
gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
vect_type = TREE_TYPE (TREE_TYPE (array));
tmp = vect_create_destination_var (scalar_dest, vect_type);
vect = vect_create_destination_var (scalar_dest, vect_type);
array_ref = build4 (ARRAY_REF, vect_type, array,
build_int_cst (size_type_node, n),
NULL_TREE, NULL_TREE);
new_stmt = gimple_build_assign (vect, array_ref);
vect_name = make_ssa_name (vect, new_stmt);
gimple_assign_set_lhs (new_stmt, vect_name);
new_stmt = gimple_build_assign (tmp, array_ref);
tmp_name = make_ssa_name (vect, new_stmt);
gimple_assign_set_lhs (new_stmt, tmp_name);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (need_zeroing)
{
tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
vect_type);
vect_name = make_ssa_name (vect, new_stmt);
new_stmt
= gimple_build_assign (vect_name, VEC_COND_EXPR,
mask, tmp_name, vec_els);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
else
vect_name = tmp_name;
return vect_name;
}
@ -469,6 +487,10 @@ exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
if (mask_index >= 0
&& use == gimple_call_arg (call, mask_index))
return true;
int els_index = internal_fn_else_index (ifn);
if (els_index >= 0
&& use == gimple_call_arg (call, els_index))
return true;
int stored_value_index = internal_fn_stored_value_index (ifn);
if (stored_value_index >= 0
&& use == gimple_call_arg (call, stored_value_index))
@ -1280,7 +1302,17 @@ vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
gcc_assert (vector_type);
tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
/* A masked load can have a default SSA definition as else operand.
We should "vectorize" this instead of creating a duplicate from the
scalar default. */
tree vop;
if (TREE_CODE (op) == SSA_NAME
&& SSA_NAME_IS_DEFAULT_DEF (op)
&& VAR_P (SSA_NAME_VAR (op)))
vop = get_or_create_ssa_default_def (cfun,
create_tmp_var (vector_type));
else
vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
while (ncopies--)
vec_oprnds->quick_push (vop);
}
@ -1492,7 +1524,10 @@ static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
vectors is not supported, otherwise record the required rgroup control
types. */
types.
If partial vectors can be used and ELSVALS is nonzero the supported
else values will be added to the vector ELSVALS points to. */
static void
check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
@ -1502,7 +1537,8 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
vect_memory_access_type
memory_access_type,
gather_scatter_info *gs_info,
tree scalar_mask)
tree scalar_mask,
vec<int> *elsvals = nullptr)
{
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
@ -1518,7 +1554,8 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
if (slp_node)
nvectors /= group_size;
internal_fn ifn
= (is_load ? vect_load_lanes_supported (vectype, group_size, true)
= (is_load ? vect_load_lanes_supported (vectype, group_size, true,
elsvals)
: vect_store_lanes_supported (vectype, group_size, true));
if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
@ -1548,12 +1585,14 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
gs_info->memory_type,
gs_info->offset_vectype,
gs_info->scale))
gs_info->scale,
elsvals))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
gs_info->memory_type,
gs_info->offset_vectype,
gs_info->scale))
gs_info->scale,
elsvals))
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
scalar_mask);
else
@ -1607,7 +1646,8 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
machine_mode mask_mode;
machine_mode vmode;
bool using_partial_vectors_p = false;
if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
if (get_len_load_store_mode
(vecmode, is_load, nullptr, elsvals).exists (&vmode))
{
nvectors = group_memory_nvectors (group_size * vf, nunits);
unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
@ -1615,7 +1655,8 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
using_partial_vectors_p = true;
}
else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
&& can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
&& can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
elsvals))
{
nvectors = group_memory_nvectors (group_size * vf, nunits);
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
@ -1672,12 +1713,16 @@ prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
without loss of precision, where X is STMT_INFO's DR_STEP.
Return true if this is possible, describing the gather load or scatter
store in GS_INFO. MASKED_P is true if the load or store is conditional. */
store in GS_INFO. MASKED_P is true if the load or store is conditional.
If we can use gather/scatter and ELSVALS is nonzero the supported
else values will be stored in the vector ELSVALS points to. */
static bool
vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info)
gather_scatter_info *gs_info,
vec<int> *elsvals)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
data_reference *dr = dr_info->dr;
@ -1734,7 +1779,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
tree memory_type = TREE_TYPE (DR_REF (dr));
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offset_type, scale,
&gs_info->ifn, &gs_info->offset_vectype)
&gs_info->ifn, &gs_info->offset_vectype,
elsvals)
|| gs_info->ifn == IFN_LAST)
continue;
@ -1762,17 +1808,21 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
vectorize STMT_INFO, which is a grouped or strided load or store.
MASKED_P is true if load or store is conditional. When returning
true, fill in GS_INFO with the information required to perform the
operation. */
operation.
If we can use gather/scatter and ELSVALS is nonzero the supported
else values will be stored in the vector ELSVALS points to. */
static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info)
gather_scatter_info *gs_info,
vec<int> *elsvals)
{
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
masked_p, gs_info, elsvals);
tree old_offset_type = TREE_TYPE (gs_info->offset);
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
@ -1974,7 +2024,11 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
For stores, the statements in the group are all consecutive
and there is no gap at the end. For loads, the statements in the
group might not be consecutive; there can be gaps between statements
as well as at the end. */
as well as at the end.
If we can use gather/scatter and ELSVALS is nonzero the supported
else values will be stored in the vector ELSVALS points to.
*/
static bool
get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
@ -1985,7 +2039,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dr_alignment_support *alignment_support_scheme,
int *misalignment,
gather_scatter_info *gs_info,
internal_fn *lanes_ifn)
internal_fn *lanes_ifn,
vec<int> *elsvals)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@ -2074,7 +2129,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
else if (slp_node->ldst_lanes
&& (*lanes_ifn
= (vls_type == VLS_LOAD
? vect_load_lanes_supported (vectype, group_size, masked_p)
? vect_load_lanes_supported (vectype, group_size,
masked_p, elsvals)
: vect_store_lanes_supported (vectype, group_size,
masked_p))) != IFN_LAST)
*memory_access_type = VMAT_LOAD_STORE_LANES;
@ -2282,7 +2338,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
/* Otherwise try using LOAD/STORE_LANES. */
*lanes_ifn
= vls_type == VLS_LOAD
? vect_load_lanes_supported (vectype, group_size, masked_p)
? vect_load_lanes_supported (vectype, group_size, masked_p,
elsvals)
: vect_store_lanes_supported (vectype, group_size,
masked_p);
if (*lanes_ifn != IFN_LAST)
@ -2318,7 +2375,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
&& (!slp_node || SLP_TREE_LANES (slp_node) == 1)
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
masked_p, gs_info))
masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
if (*memory_access_type == VMAT_GATHER_SCATTER
@ -2380,7 +2437,10 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
SLP says whether we're performing SLP rather than loop vectorization.
MASKED_P is true if the statement is conditional on a vectorized mask.
VECTYPE is the vector type that the vectorized statements will use.
NCOPIES is the number of vector statements that will be needed. */
NCOPIES is the number of vector statements that will be needed.
If ELSVALS is nonzero the supported else values will be stored in the
vector ELSVALS points to. */
static bool
get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
@ -2392,7 +2452,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dr_alignment_support *alignment_support_scheme,
int *misalignment,
gather_scatter_info *gs_info,
internal_fn *lanes_ifn)
internal_fn *lanes_ifn,
vec<int> *elsvals = nullptr)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@ -2401,7 +2462,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
*memory_access_type = VMAT_GATHER_SCATTER;
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
elsvals))
gcc_unreachable ();
/* When using internal functions, we rely on pattern recognition
to convert the type of the offset to the type that the target
@ -2455,7 +2517,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
masked_p,
vls_type, memory_access_type, poffset,
alignment_support_scheme,
misalignment, gs_info, lanes_ifn))
misalignment, gs_info, lanes_ifn,
elsvals))
return false;
}
else if (STMT_VINFO_STRIDED_P (stmt_info))
@ -2463,7 +2526,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
gcc_assert (!slp_node);
if (loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
masked_p, gs_info))
masked_p, gs_info, elsvals))
*memory_access_type = VMAT_GATHER_SCATTER;
else
*memory_access_type = VMAT_ELEMENTWISE;
@ -2732,6 +2795,30 @@ vect_build_zero_merge_argument (vec_info *vinfo,
return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
}
/* Return the corresponding else value for an else value constant
ELSVAL with type TYPE. */
tree
vect_get_mask_load_else (int elsval, tree type)
{
tree els;
if (elsval == MASK_LOAD_ELSE_UNDEFINED)
{
tree tmp = create_tmp_var (type);
/* No need to warn about anything. */
TREE_NO_WARNING (tmp) = 1;
els = get_or_create_ssa_default_def (cfun, tmp);
}
else if (elsval == MASK_LOAD_ELSE_M1)
els = build_minus_one_cst (type);
else if (elsval == MASK_LOAD_ELSE_ZERO)
els = build_zero_cst (type);
else
gcc_unreachable ();
return els;
}
/* Build a gather load call while vectorizing STMT_INFO. Insert new
instructions before GSI and add them to VEC_STMT. GS_INFO describes
the gather load operation. If the load is conditional, MASK is the
@ -10031,6 +10118,7 @@ vectorizable_load (vec_info *vinfo,
gather_scatter_info gs_info;
tree ref_type;
enum vect_def_type mask_dt = vect_unknown_def_type;
enum vect_def_type els_dt = vect_unknown_def_type;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
@ -10043,8 +10131,12 @@ vectorizable_load (vec_info *vinfo,
return false;
tree mask = NULL_TREE, mask_vectype = NULL_TREE;
tree els = NULL_TREE; tree els_vectype = NULL_TREE;
int mask_index = -1;
int els_index = -1;
slp_tree slp_op = NULL;
slp_tree els_op = NULL;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
scalar_dest = gimple_assign_lhs (assign);
@ -10084,6 +10176,15 @@ vectorizable_load (vec_info *vinfo,
&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
&mask, &slp_op, &mask_dt, &mask_vectype))
return false;
els_index = internal_fn_else_index (ifn);
if (els_index >= 0 && slp_node)
els_index = vect_slp_child_index_for_operand
(call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (els_index >= 0
&& !vect_is_simple_use (vinfo, stmt_info, slp_node, els_index,
&els, &els_op, &els_dt, &els_vectype))
return false;
}
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
@ -10186,12 +10287,23 @@ vectorizable_load (vec_info *vinfo,
int misalignment;
poly_int64 poffset;
internal_fn lanes_ifn;
auto_vec<int> elsvals;
int maskload_elsval = 0;
bool need_zeroing = false;
if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
ncopies, &memory_access_type, &poffset,
&alignment_support_scheme, &misalignment, &gs_info,
&lanes_ifn))
&lanes_ifn, &elsvals))
return false;
/* We might need to explicitly zero inactive elements if there are
padding bits in the type that might leak otherwise.
Refer to PR115336. */
tree scalar_type = TREE_TYPE (scalar_dest);
bool type_mode_padding_p
= TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
/* ??? The following checks should really be part of
get_group_load_store_type. */
if (slp
@ -10255,7 +10367,8 @@ vectorizable_load (vec_info *vinfo,
machine_mode vec_mode = TYPE_MODE (vectype);
if (!VECTOR_MODE_P (vec_mode)
|| !can_vec_mask_load_store_p (vec_mode,
TYPE_MODE (mask_vectype), true))
TYPE_MODE (mask_vectype),
true, NULL, &elsvals))
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
@ -10310,7 +10423,7 @@ vectorizable_load (vec_info *vinfo,
check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
VLS_LOAD, group_size,
memory_access_type, &gs_info,
mask);
mask, &elsvals);
if (dump_enabled_p ()
&& memory_access_type != VMAT_ELEMENTWISE
@ -10324,6 +10437,36 @@ vectorizable_load (vec_info *vinfo,
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
}
else
{
/* Here just get the else values. */
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
VLS_LOAD, group_size,
memory_access_type, &gs_info,
mask, &elsvals);
}
/* If the type needs padding we must zero inactive elements.
Check if we can do that with a VEC_COND_EXPR and store the
elsval we choose in MASKLOAD_ELSVAL. */
if (elsvals.length ()
&& type_mode_padding_p
&& !elsvals.contains (MASK_LOAD_ELSE_ZERO)
&& !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"cannot zero inactive elements.\n");
return false;
}
/* For now just use the first available else value.
get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
select it here if it is supported. */
if (elsvals.length ())
maskload_elsval = *elsvals.begin ();
if (!slp)
gcc_assert (memory_access_type
@ -10994,6 +11137,7 @@ vectorizable_load (vec_info *vinfo,
}
tree vec_mask = NULL_TREE;
tree vec_els = NULL_TREE;
if (memory_access_type == VMAT_LOAD_STORE_LANES)
{
gcc_assert (alignment_support_scheme == dr_aligned
@ -11084,6 +11228,14 @@ vectorizable_load (vec_info *vinfo,
}
}
if (final_mask)
{
vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
}
gcall *call;
if (final_len && final_mask)
{
@ -11092,9 +11244,10 @@ vectorizable_load (vec_info *vinfo,
VEC_MASK, LEN, BIAS). */
unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
tree alias_ptr = build_int_cst (ref_type, align);
call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
dataref_ptr, alias_ptr,
final_mask, final_len, bias);
final_mask, vec_els,
final_len, bias);
}
else if (final_mask)
{
@ -11103,9 +11256,9 @@ vectorizable_load (vec_info *vinfo,
VEC_MASK). */
unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
tree alias_ptr = build_int_cst (ref_type, align);
call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
dataref_ptr, alias_ptr,
final_mask);
final_mask, vec_els);
}
else
{
@ -11124,7 +11277,8 @@ vectorizable_load (vec_info *vinfo,
for (unsigned i = 0; i < group_size; i++)
{
new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
vec_array, i);
vec_array, i, need_zeroing,
final_mask);
if (slp)
slp_node->push_vec_def (new_temp);
else
@ -11254,25 +11408,36 @@ vectorizable_load (vec_info *vinfo,
}
}
if (final_mask)
{
vec_els = vect_get_mask_load_else
(maskload_elsval, vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
}
gcall *call;
if (final_len && final_mask)
{
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (
IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
scale, zero, final_mask, final_len, bias);
IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset,
scale, zero, final_mask, vec_els, final_len, bias);
else
/* Non-vector offset indicates that prefer to take
MASK_LEN_STRIDED_LOAD instead of the
MASK_LEN_GATHER_LOAD with direct stride arg. */
call = gimple_build_call_internal (
IFN_MASK_LEN_STRIDED_LOAD, 6, dataref_ptr, vec_offset,
zero, final_mask, final_len, bias);
IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset,
zero, final_mask, vec_els, final_len, bias);
}
else if (final_mask)
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
dataref_ptr, vec_offset,
scale, zero, final_mask);
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
6, dataref_ptr,
vec_offset, scale,
zero, final_mask,
vec_els);
else
call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
dataref_ptr, vec_offset,
@ -11483,10 +11648,28 @@ vectorizable_load (vec_info *vinfo,
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
new_stmt = gimple_build_assign (vec_dest, data_ref);
}
new_temp = make_ssa_name (vec_dest, new_stmt);
new_temp = need_zeroing
? make_ssa_name (vectype)
: make_ssa_name (vec_dest, new_stmt);
gimple_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
/* If we need to explicitly zero inactive elements emit a
VEC_COND_EXPR that does so. */
if (need_zeroing)
{
vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
vectype);
tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
new_stmt
= gimple_build_assign (new_temp2, VEC_COND_EXPR,
final_mask, new_temp, vec_els);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
gsi);
new_temp = new_temp2;
}
/* Store vector loads in the corresponding SLP_NODE. */
if (slp)
slp_node->push_vec_def (new_stmt);
@ -11586,6 +11769,7 @@ vectorizable_load (vec_info *vinfo,
tree final_mask = NULL_TREE;
tree final_len = NULL_TREE;
tree bias = NULL_TREE;
if (!costing_p)
{
if (mask)
@ -11678,15 +11862,24 @@ vectorizable_load (vec_info *vinfo,
bias = build_int_cst (intQI_type_node, biasval);
}
tree vec_els;
if (final_len)
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call;
if (partial_ifn == IFN_MASK_LEN_LOAD)
call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
dataref_ptr, ptr,
final_mask, final_len,
bias);
{
vec_els = vect_get_mask_load_else
(maskload_elsval, vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
6, dataref_ptr, ptr,
final_mask, vec_els,
final_len, bias);
}
else
call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
dataref_ptr, ptr,
@ -11713,9 +11906,15 @@ vectorizable_load (vec_info *vinfo,
else if (final_mask)
{
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
vec_els = vect_get_mask_load_else
(maskload_elsval, vectype);
if (type_mode_padding_p
&& maskload_elsval != MASK_LOAD_ELSE_ZERO)
need_zeroing = true;
gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
dataref_ptr, ptr,
final_mask);
final_mask,
vec_els);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
@ -11996,9 +12195,28 @@ vectorizable_load (vec_info *vinfo,
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
new_stmt = gimple_build_assign (vec_dest, data_ref);
}
new_temp = make_ssa_name (vec_dest, new_stmt);
new_temp = need_zeroing
? make_ssa_name (vectype)
: make_ssa_name (vec_dest, new_stmt);
gimple_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
/* If we need to explicitly zero inactive elements emit a
VEC_COND_EXPR that does so. */
if (need_zeroing)
{
vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
vectype);
tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
new_stmt
= gimple_build_assign (new_temp2, VEC_COND_EXPR,
final_mask, new_temp, vec_els);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
gsi);
new_temp = new_temp2;
}
}
/* 3. Handle explicit realignment if necessary/supported.

View File

@ -2447,9 +2447,11 @@ extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
tree, int, internal_fn *, tree *);
tree, int, internal_fn *, tree *,
vec<int> * = nullptr);
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
gather_scatter_info *);
gather_scatter_info *,
vec<int> * = nullptr);
extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
vec<data_reference_p> *,
vec<int> *, int);
@ -2467,7 +2469,8 @@ extern tree vect_create_destination_var (tree, tree);
extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
bool, vec<int> * = nullptr);
extern void vect_permute_store_chain (vec_info *, vec<tree> &,
unsigned int, stmt_vec_info,
gimple_stmt_iterator *, vec<tree> *);
@ -2613,6 +2616,7 @@ extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
gimple_stmt_iterator *);
extern tree vect_get_mask_load_else (int, tree);
/* In tree-vect-patterns.cc. */
extern void