mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
Add X86_TUNE_AVX512_TWO_EPILOGUES, enable for Zen4 and Zen5
The following adds X86_TUNE_AVX512_TWO_EPILOGUES tuning and directs the vectorizer to produce both a vector AVX2 and SSE epilogue for AVX512 vectorized loops when set. The tuning is enabled by default for Zen4 and Zen5 where I benchmarked it to be overall positive on SPEC CPU 2017 both in performance and overall code size. In particular it speeds up 525.x264_r which with only an AVX2 epilogue ends up in unvectorized code at the moment. * config/i386/i386.cc (ix86_vector_costs::finish_cost): Set m_suggested_epilogue_mode according to X86_TUNE_AVX512_TWO_EPILOGUES. * config/i386/x86-tune.def (X86_TUNE_AVX512_TWO_EPILOGUES): Add. Enable for znver4 and znver5.
This commit is contained in:
parent
82d955b0a8
commit
9a62c14958
@ -25353,6 +25353,18 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
|
||||
&& TARGET_AVX256_AVOID_VEC_PERM)
|
||||
m_costs[i] = INT_MAX;
|
||||
|
||||
/* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
|
||||
a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
|
||||
if (loop_vinfo
|
||||
&& ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
|
||||
{
|
||||
if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
|
||||
m_suggested_epilogue_mode = V32QImode;
|
||||
else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
|
||||
&& GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
|
||||
m_suggested_epilogue_mode = V16QImode;
|
||||
}
|
||||
|
||||
vector_costs::finish_cost (scalar_costs);
|
||||
}
|
||||
|
||||
|
@ -597,6 +597,11 @@ DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces",
|
||||
DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
|
||||
m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5)
|
||||
|
||||
/* X86_TUNE_AVX512_TWO_EPILOGUES: Use two vector epilogues for 512-bit
|
||||
vectorized loops. */
|
||||
DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
|
||||
m_ZNVER4 | m_ZNVER5)
|
||||
|
||||
/*****************************************************************************/
|
||||
/*****************************************************************************/
|
||||
/* Historical relics: tuning flags that helps a specific old CPU designs */
|
||||
|
Loading…
Reference in New Issue
Block a user