diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 5bbeeddd854..c8dc7153298 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3100,12 +3100,12 @@ vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value, The analysis resulting in this epilogue loop's loop_vec_info was performed in the same vect_analyze_loop call as the main loop's. At that time vect_analyze_loop constructs a list of accepted loop_vec_info's for lower - vectorization factors than the main loop. This list is stored in the main - loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to - vectorize the epilogue loop for a lower vectorization factor, the - loop_vec_info sitting at the top of the epilogue_vinfos list is removed, - updated and linked to the epilogue loop. This is later used to vectorize - the epilogue. The reason the loop_vec_info needs updating is that it was + vectorization factors than the main loop. This list is chained in the + loop's loop_vec_info in the 'epilogue_vinfo' member. When we decide to + vectorize the epilogue loop for a lower vectorization factor, the + loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop. + This is later used to vectorize the epilogue. + The reason the loop_vec_info needs updating is that it was constructed based on the original main loop, and the epilogue loop is a copy of this loop, so all links pointing to statements in the original loop need updating. Furthermore, these loop_vec_infos share the @@ -3128,7 +3128,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, profile_probability prob_prolog, prob_vector, prob_epilog; int estimated_vf; int prolog_peeling = 0; - bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0; + bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL; /* We currently do not support prolog peeling if the target alignment is not known at compile time. 'vect_gen_prolog_loop_niters' depends on the target alignment being constant. */ @@ -3255,13 +3255,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, else niters_prolog = build_int_cst (type, 0); - loop_vec_info epilogue_vinfo = NULL; - if (vect_epilogues) - { - epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; - loop_vinfo->epilogue_vinfos.ordered_remove (0); - } - + loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo; tree niters_vector_mult_vf = NULL_TREE; /* Saving NITERs before the loop, as this may be changed by prologue. */ tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 6059ce031d1..f4f16fc07a2 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1071,7 +1071,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), scalar_loop (NULL), + main_loop_info (NULL), orig_loop_info (NULL), + epilogue_vinfo (NULL), drs_advanced_by (NULL_TREE), vec_loop_iv_exit (NULL), vec_epilogue_loop_iv_exit (NULL), @@ -1128,8 +1130,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) } } } - - epilogue_vinfos.create (6); } /* Free all levels of rgroup CONTROLS. */ @@ -1155,7 +1155,6 @@ _loop_vec_info::~_loop_vec_info () release_vec_loop_controls (&lens); delete ivexpr_map; delete scan_map; - epilogue_vinfos.release (); delete scalar_costs; delete vector_costs; @@ -1936,15 +1935,20 @@ vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call, loop_vec_info vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *info, - loop_vec_info main_loop_info) + loop_vec_info orig_loop_info) { loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); LOOP_VINFO_NITERSM1 (loop_vinfo) = info->number_of_iterationsm1; LOOP_VINFO_NITERS (loop_vinfo) = info->number_of_iterations; LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = info->number_of_iterations; - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_info; + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_info; + if (orig_loop_info && LOOP_VINFO_EPILOGUE_P (orig_loop_info)) + LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo) + = LOOP_VINFO_MAIN_LOOP_INFO (orig_loop_info); + else + LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo) = orig_loop_info; /* Also record the assumptions for versioning. */ - if (!integer_onep (info->assumptions) && !main_loop_info) + if (!integer_onep (info->assumptions) && !orig_loop_info) LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions; for (gcond *cond : info->conds) @@ -2314,17 +2318,19 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo, { loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + loop_vec_info main_loop_vinfo + = LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo); unsigned lowest_vf = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)); int prolog_peeling = 0; - if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) - prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo); + if (!vect_use_loop_mask_for_alignment_p (main_loop_vinfo)) + prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo); if (prolog_peeling >= 0 && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo), lowest_vf)) { unsigned gap - = LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0; + = LOOP_VINFO_PEELING_FOR_GAPS (main_loop_vinfo) ? 1 : 0; scalar_niters = ((scalar_niters - gap - prolog_peeling) % lowest_vf + gap); } @@ -3440,7 +3446,7 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, return true; } -/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is +/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next mode useful to analyze. Return the loop_vinfo on success and wrapped null on failure. */ @@ -3448,13 +3454,13 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, static opt_loop_vec_info vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *loop_form_info, - loop_vec_info main_loop_vinfo, + loop_vec_info orig_loop_vinfo, const vector_modes &vector_modes, unsigned &mode_i, machine_mode &autodetected_vector_mode, bool &fatal) { loop_vec_info loop_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); + = vect_create_loop_vinfo (loop, shared, loop_form_info, orig_loop_vinfo); machine_mode vector_mode = vector_modes[mode_i]; loop_vinfo->vector_mode = vector_mode; @@ -3471,7 +3477,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, res ? "succeeded" : "failed", GET_MODE_NAME (loop_vinfo->vector_mode)); - if (res && !main_loop_vinfo && suggested_unroll_factor > 1) + if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3480,7 +3486,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, suggested_unroll_factor, slp_done_for_suggested_uf ? "on" : "off"); loop_vec_info unroll_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo); + = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); unroll_vinfo->vector_mode = vector_mode; unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, @@ -3534,7 +3540,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, { delete loop_vinfo; if (fatal) - gcc_checking_assert (main_loop_vinfo == NULL); + gcc_checking_assert (orig_loop_vinfo == NULL); return opt_loop_vec_info::propagate_failure (res); } @@ -3746,21 +3752,17 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, if (loop_vinfo) { - if (pick_lowest_cost_p) + if (pick_lowest_cost_p + && first_loop_vinfo->epilogue_vinfo + && vect_joust_loop_vinfos (loop_vinfo, + first_loop_vinfo->epilogue_vinfo)) { - /* Keep trying to roll back vectorization attempts while the - loop_vec_infos they produced were worse than this one. */ - vec &vinfos = first_loop_vinfo->epilogue_vinfos; - while (!vinfos.is_empty () - && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ())) - { - gcc_assert (vect_epilogues); - delete vinfos.pop (); - } + gcc_assert (vect_epilogues); + delete first_loop_vinfo->epilogue_vinfo; + first_loop_vinfo->epilogue_vinfo = nullptr; } - /* For now only allow one epilogue loop. */ - if (first_loop_vinfo->epilogue_vinfos.is_empty ()) - first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); + if (!first_loop_vinfo->epilogue_vinfo) + first_loop_vinfo->epilogue_vinfo = loop_vinfo; else { delete loop_vinfo; @@ -3779,11 +3781,12 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, } - if (!first_loop_vinfo->epilogue_vinfos.is_empty ()) + if (first_loop_vinfo->epilogue_vinfo) { poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); - for (auto epilog_vinfo : first_loop_vinfo->epilogue_vinfos) + loop_vec_info epilog_vinfo = first_loop_vinfo->epilogue_vinfo; + do { poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (epilog_vinfo); gcc_assert (!LOOP_REQUIRES_VERSIONING (epilog_vinfo) @@ -3791,13 +3794,15 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, /* Keep track of the known smallest versioning threshold. */ if (ordered_p (lowest_th, th)) lowest_th = ordered_min (lowest_th, th); + epilog_vinfo = epilog_vinfo->epilogue_vinfo; } + while (epilog_vinfo); LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "***** Choosing epilogue vector mode %s\n", GET_MODE_NAME - (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)); + (first_loop_vinfo->epilogue_vinfo->vector_mode)); } return first_loop_vinfo; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5a1bd237beb..88db34b87b6 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -987,12 +987,17 @@ public: class loop *scalar_loop; /* For loops being epilogues of already vectorized loops - this points to the original vectorized loop. Otherwise NULL. */ + this points to the main vectorized loop. Otherwise NULL. */ + _loop_vec_info *main_loop_info; + + /* For loops being epilogues of already vectorized loops + this points to the preceeding vectorized (possibly epilogue) loop. + Otherwise NULL. */ _loop_vec_info *orig_loop_info; - /* Used to store loop_vec_infos of epilogues of this loop during + /* Used to store loop_vec_infos of the epilogue of this loop during analysis. */ - vec<_loop_vec_info *> epilogue_vinfos; + _loop_vec_info *epilogue_vinfo; /* If this is an epilogue loop the DR advancement applied. */ tree drs_advanced_by; @@ -1096,6 +1101,7 @@ public: #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec +#define LOOP_VINFO_MAIN_LOOP_INFO(L) (L)->main_loop_info #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor