From 42d99f63cfccabe1d19177993abf4f1219d6f967 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 4 Nov 2024 12:58:41 +0100 Subject: [PATCH] Add LOOP_VINFO_MAIN_LOOP_INFO The following introduces LOOP_VINFO_MAIN_LOOP_INFO alongside LOOP_VINFO_ORIG_LOOP_INFO so one can have both access to the main vectorized loop info and the preceeding vectorized epilogue. This is critical for correctness as we need to disallow never executed epilogues by costing in vect_analyze_loop_costing as we assume those do not exist when deciding to add a skip-vector edge during peeling. The patch also changes how multiple vector epilogues are handled - instead of the epilogue_vinfos array in the main loop info we now record the single epilogue_vinfo there and further epilogues in the epilogue_vinfo member of the epilogue info. This simplifies code. * tree-vectorizer.h (_loop_vec_info::main_loop_info): New. (LOOP_VINFO_MAIN_LOOP_INFO): Likewise. (_loop_vec_info::epilogue_vinfo): Change from epilogue_vinfos from array to single element. * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize main_loop_info and epilogue_vinfo. Remove epilogue_vinfos allocation. (_loop_vec_info::~_loop_vec_info): Do not release epilogue_vinfos. (vect_create_loop_vinfo): Rename parameter, set LOOP_VINFO_MAIN_LOOP_INFO. (vect_analyze_loop_1): Rename parameter. (vect_analyze_loop_costing): Properly distinguish between the main vector loop and the preceeding epilogue. (vect_analyze_loop): Change for epilogue_vinfos no longer being a vector. * tree-vect-loop-manip.cc (vect_do_peeling): Simplify and thereby handle a vector epilogue of a vector epilogue. --- gcc/tree-vect-loop-manip.cc | 22 +++++------- gcc/tree-vect-loop.cc | 67 ++++++++++++++++++++----------------- gcc/tree-vectorizer.h | 12 +++++-- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 5bbeeddd854..c8dc7153298 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3100,12 +3100,12 @@ vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value, The analysis resulting in this epilogue loop's loop_vec_info was performed in the same vect_analyze_loop call as the main loop's. At that time vect_analyze_loop constructs a list of accepted loop_vec_info's for lower - vectorization factors than the main loop. This list is stored in the main - loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to - vectorize the epilogue loop for a lower vectorization factor, the - loop_vec_info sitting at the top of the epilogue_vinfos list is removed, - updated and linked to the epilogue loop. This is later used to vectorize - the epilogue. The reason the loop_vec_info needs updating is that it was + vectorization factors than the main loop. This list is chained in the + loop's loop_vec_info in the 'epilogue_vinfo' member. When we decide to + vectorize the epilogue loop for a lower vectorization factor, the + loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop. + This is later used to vectorize the epilogue. + The reason the loop_vec_info needs updating is that it was constructed based on the original main loop, and the epilogue loop is a copy of this loop, so all links pointing to statements in the original loop need updating. Furthermore, these loop_vec_infos share the @@ -3128,7 +3128,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, profile_probability prob_prolog, prob_vector, prob_epilog; int estimated_vf; int prolog_peeling = 0; - bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0; + bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL; /* We currently do not support prolog peeling if the target alignment is not known at compile time. 'vect_gen_prolog_loop_niters' depends on the target alignment being constant. */ @@ -3255,13 +3255,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, else niters_prolog = build_int_cst (type, 0); - loop_vec_info epilogue_vinfo = NULL; - if (vect_epilogues) - { - epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; - loop_vinfo->epilogue_vinfos.ordered_remove (0); - } - + loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo; tree niters_vector_mult_vf = NULL_TREE; /* Saving NITERs before the loop, as this may be changed by prologue. */ tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 6059ce031d1..f4f16fc07a2 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1071,7 +1071,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), scalar_loop (NULL), + main_loop_info (NULL), orig_loop_info (NULL), + epilogue_vinfo (NULL), drs_advanced_by (NULL_TREE), vec_loop_iv_exit (NULL), vec_epilogue_loop_iv_exit (NULL), @@ -1128,8 +1130,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) } } } - - epilogue_vinfos.create (6); } /* Free all levels of rgroup CONTROLS. */ @@ -1155,7 +1155,6 @@ _loop_vec_info::~_loop_vec_info () release_vec_loop_controls (&lens); delete ivexpr_map; delete scan_map; - epilogue_vinfos.release (); delete scalar_costs; delete vector_costs; @@ -1936,15 +1935,20 @@ vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call, loop_vec_info vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *info, - loop_vec_info main_loop_info) + loop_vec_info orig_loop_info) { loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); LOOP_VINFO_NITERSM1 (loop_vinfo) = info->number_of_iterationsm1; LOOP_VINFO_NITERS (loop_vinfo) = info->number_of_iterations; LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = info->number_of_iterations; - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_info; + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_info; + if (orig_loop_info && LOOP_VINFO_EPILOGUE_P (orig_loop_info)) + LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo) + = LOOP_VINFO_MAIN_LOOP_INFO (orig_loop_info); + else + LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo) = orig_loop_info; /* Also record the assumptions for versioning. */ - if (!integer_onep (info->assumptions) && !main_loop_info) + if (!integer_onep (info->assumptions) && !orig_loop_info) LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions; for (gcond *cond : info->conds) @@ -2314,17 +2318,19 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo, { loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + loop_vec_info main_loop_vinfo + = LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo); unsigned lowest_vf = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)); int prolog_peeling = 0; - if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) - prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo); + if (!vect_use_loop_mask_for_alignment_p (main_loop_vinfo)) + prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo); if (prolog_peeling >= 0 && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo), lowest_vf)) { unsigned gap - = LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0; + = LOOP_VINFO_PEELING_FOR_GAPS (main_loop_vinfo) ? 1 : 0; scalar_niters = ((scalar_niters - gap - prolog_peeling) % lowest_vf + gap); } @@ -3440,7 +3446,7 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, return true; } -/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is +/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next mode useful to analyze. Return the loop_vinfo on success and wrapped null on failure. */ @@ -3448,13 +3454,13 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, static opt_loop_vec_info vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *loop_form_info, - loop_vec_info main_loop_vinfo, + loop_vec_info orig_loop_vinfo, const vector_modes &vector_modes, unsigned &mode_i, machine_mode &autodetected_vector_mode, bool &fatal) { loop_vec_info loop_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); + = vect_create_loop_vinfo (loop, shared, loop_form_info, orig_loop_vinfo); machine_mode vector_mode = vector_modes[mode_i]; loop_vinfo->vector_mode = vector_mode; @@ -3471,7 +3477,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, res ? "succeeded" : "failed", GET_MODE_NAME (loop_vinfo->vector_mode)); - if (res && !main_loop_vinfo && suggested_unroll_factor > 1) + if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3480,7 +3486,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, suggested_unroll_factor, slp_done_for_suggested_uf ? "on" : "off"); loop_vec_info unroll_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); + = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); unroll_vinfo->vector_mode = vector_mode; unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, @@ -3534,7 +3540,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, { delete loop_vinfo; if (fatal) - gcc_checking_assert (main_loop_vinfo == NULL); + gcc_checking_assert (orig_loop_vinfo == NULL); return opt_loop_vec_info::propagate_failure (res); } @@ -3746,21 +3752,17 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, if (loop_vinfo) { - if (pick_lowest_cost_p) + if (pick_lowest_cost_p + && first_loop_vinfo->epilogue_vinfo + && vect_joust_loop_vinfos (loop_vinfo, + first_loop_vinfo->epilogue_vinfo)) { - /* Keep trying to roll back vectorization attempts while the - loop_vec_infos they produced were worse than this one. */ - vec &vinfos = first_loop_vinfo->epilogue_vinfos; - while (!vinfos.is_empty () - && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) - { - gcc_assert (vect_epilogues); - delete vinfos.pop (); - } + gcc_assert (vect_epilogues); + delete first_loop_vinfo->epilogue_vinfo; + first_loop_vinfo->epilogue_vinfo = nullptr; } - /* For now only allow one epilogue loop. */ - if (first_loop_vinfo->epilogue_vinfos.is_empty ()) - first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); + if (!first_loop_vinfo->epilogue_vinfo) + first_loop_vinfo->epilogue_vinfo = loop_vinfo; else { delete loop_vinfo; @@ -3779,11 +3781,12 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, } - if (!first_loop_vinfo->epilogue_vinfos.is_empty ()) + if (first_loop_vinfo->epilogue_vinfo) { poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); - for (auto epilog_vinfo : first_loop_vinfo->epilogue_vinfos) + loop_vec_info epilog_vinfo = first_loop_vinfo->epilogue_vinfo; + do { poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (epilog_vinfo); gcc_assert (!LOOP_REQUIRES_VERSIONING (epilog_vinfo) @@ -3791,13 +3794,15 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, /* Keep track of the known smallest versioning threshold. */ if (ordered_p (lowest_th, th)) lowest_th = ordered_min (lowest_th, th); + epilog_vinfo = epilog_vinfo->epilogue_vinfo; } + while (epilog_vinfo); LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "***** Choosing epilogue vector mode %s\n", GET_MODE_NAME - (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)); + (first_loop_vinfo->epilogue_vinfo->vector_mode)); } return first_loop_vinfo; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5a1bd237beb..88db34b87b6 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -987,12 +987,17 @@ public: class loop *scalar_loop; /* For loops being epilogues of already vectorized loops - this points to the original vectorized loop. Otherwise NULL. */ + this points to the main vectorized loop. Otherwise NULL. */ + _loop_vec_info *main_loop_info; + + /* For loops being epilogues of already vectorized loops + this points to the preceeding vectorized (possibly epilogue) loop. + Otherwise NULL. */ _loop_vec_info *orig_loop_info; - /* Used to store loop_vec_infos of epilogues of this loop during + /* Used to store loop_vec_infos of the epilogue of this loop during analysis. */ - vec<_loop_vec_info *> epilogue_vinfos; + _loop_vec_info *epilogue_vinfo; /* If this is an epilogue loop the DR advancement applied. */ tree drs_advanced_by; @@ -1096,6 +1101,7 @@ public: #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec +#define LOOP_VINFO_MAIN_LOOP_INFO(L) (L)->main_loop_info #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor