mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
PR target/106060: Improved SSE vector constant materialization on x86.
This patch resolves PR target/106060 by providing efficient methods for materializing/synthesizing special "vector" constants on x86. Currently there are three methods of materializing a vector constant; the most general is to load a vector from the constant pool, secondly "duplicated" constants can be synthesized by moving an integer between units and broadcasting (of shuffling it), and finally the special cases of the all-zeros vector and all-ones vectors can be loaded via a single SSE instruction. This patch handle additional cases that can be synthesized in two instructions, loading an all-ones vector followed by another SSE instruction. Following my recent patch for PR target/112992, there's conveniently a single place in i386-expand.cc where these special cases can be handled. Two examples are given in the original bugzilla PR for 106060. __m256i should_be_cmpeq_abs () { return _mm256_set1_epi8 (1); } is now generated (with -O3 -march=x86-64-v3) as: vpcmpeqd %ymm0, %ymm0, %ymm0 vpabsb %ymm0, %ymm0 ret and __m256i should_be_cmpeq_add () { return _mm256_set1_epi8 (-2); } is now generated as: vpcmpeqd %ymm0, %ymm0, %ymm0 vpaddb %ymm0, %ymm0, %ymm0 ret 2024-05-07 Roger Sayle <roger@nextmovesoftware.com> Hongtao Liu <hongtao.liu@intel.com> gcc/ChangeLog PR target/106060 * config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New. (struct ix86_vec_bcast_map_simode_t): New type for table below. (ix86_vec_bcast_map_simode): Table of SImode constants that may be efficiently synthesized by a ix86_vec_bcast_alg method. (ix86_vec_bcast_map_simode_cmp): New comparator for bsearch. (ix86_vector_duplicate_simode_const): Efficiently synthesize V4SImode and V8SImode constants that duplicate special constants. (ix86_vector_duplicate_value): Attempt to synthesize "special" vector constants using ix86_vector_duplicate_simode_const. * config/i386/i386.cc (ix86_rtx_costs) <case ABS>: ABS of a vector integer mode costs with a single SSE instruction. gcc/testsuite/ChangeLog PR target/106060 * gcc.target/i386/auto-init-8.c: Update test case. * gcc.target/i386/avx512fp16-13.c: Likewise. * gcc.target/i386/pr100865-9a.c: Likewise. * gcc.target/i386/pr101796-1.c: Likewise. * gcc.target/i386/pr106060-1.c: New test case. * gcc.target/i386/pr106060-2.c: Likewise. * gcc.target/i386/pr106060-3.c: Likewise. * gcc.target/i386/pr70314.c: Update test case. * gcc.target/i386/vect-shiftv4qi.c: Likewise. * gcc.target/i386/vect-shiftv8qi.c: Likewise.
This commit is contained in:
parent
0c43c673b0
commit
79649a5dcd
@ -15696,6 +15696,332 @@ s4fma_expand:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* See below where shifts are handled for explanation of this enum. */
|
||||
enum ix86_vec_bcast_alg
|
||||
{
|
||||
VEC_BCAST_PXOR,
|
||||
VEC_BCAST_PCMPEQ,
|
||||
VEC_BCAST_PABSB,
|
||||
VEC_BCAST_PADDB,
|
||||
VEC_BCAST_PSRLW,
|
||||
VEC_BCAST_PSRLD,
|
||||
VEC_BCAST_PSLLW,
|
||||
VEC_BCAST_PSLLD
|
||||
};
|
||||
|
||||
struct ix86_vec_bcast_map_simode_t
|
||||
{
|
||||
unsigned int key;
|
||||
enum ix86_vec_bcast_alg alg;
|
||||
unsigned int arg;
|
||||
};
|
||||
|
||||
/* This table must be kept sorted as values are looked-up using bsearch. */
|
||||
static const ix86_vec_bcast_map_simode_t ix86_vec_bcast_map_simode[] = {
|
||||
{ 0x00000000, VEC_BCAST_PXOR, 0 },
|
||||
{ 0x00000001, VEC_BCAST_PSRLD, 31 },
|
||||
{ 0x00000003, VEC_BCAST_PSRLD, 30 },
|
||||
{ 0x00000007, VEC_BCAST_PSRLD, 29 },
|
||||
{ 0x0000000f, VEC_BCAST_PSRLD, 28 },
|
||||
{ 0x0000001f, VEC_BCAST_PSRLD, 27 },
|
||||
{ 0x0000003f, VEC_BCAST_PSRLD, 26 },
|
||||
{ 0x0000007f, VEC_BCAST_PSRLD, 25 },
|
||||
{ 0x000000ff, VEC_BCAST_PSRLD, 24 },
|
||||
{ 0x000001ff, VEC_BCAST_PSRLD, 23 },
|
||||
{ 0x000003ff, VEC_BCAST_PSRLD, 22 },
|
||||
{ 0x000007ff, VEC_BCAST_PSRLD, 21 },
|
||||
{ 0x00000fff, VEC_BCAST_PSRLD, 20 },
|
||||
{ 0x00001fff, VEC_BCAST_PSRLD, 19 },
|
||||
{ 0x00003fff, VEC_BCAST_PSRLD, 18 },
|
||||
{ 0x00007fff, VEC_BCAST_PSRLD, 17 },
|
||||
{ 0x0000ffff, VEC_BCAST_PSRLD, 16 },
|
||||
{ 0x00010001, VEC_BCAST_PSRLW, 15 },
|
||||
{ 0x0001ffff, VEC_BCAST_PSRLD, 15 },
|
||||
{ 0x00030003, VEC_BCAST_PSRLW, 14 },
|
||||
{ 0x0003ffff, VEC_BCAST_PSRLD, 14 },
|
||||
{ 0x00070007, VEC_BCAST_PSRLW, 13 },
|
||||
{ 0x0007ffff, VEC_BCAST_PSRLD, 13 },
|
||||
{ 0x000f000f, VEC_BCAST_PSRLW, 12 },
|
||||
{ 0x000fffff, VEC_BCAST_PSRLD, 12 },
|
||||
{ 0x001f001f, VEC_BCAST_PSRLW, 11 },
|
||||
{ 0x001fffff, VEC_BCAST_PSRLD, 11 },
|
||||
{ 0x003f003f, VEC_BCAST_PSRLW, 10 },
|
||||
{ 0x003fffff, VEC_BCAST_PSRLD, 10 },
|
||||
{ 0x007f007f, VEC_BCAST_PSRLW, 9 },
|
||||
{ 0x007fffff, VEC_BCAST_PSRLD, 9 },
|
||||
{ 0x00ff00ff, VEC_BCAST_PSRLW, 8 },
|
||||
{ 0x00ffffff, VEC_BCAST_PSRLD, 8 },
|
||||
{ 0x01010101, VEC_BCAST_PABSB, 0 },
|
||||
{ 0x01ff01ff, VEC_BCAST_PSRLW, 7 },
|
||||
{ 0x01ffffff, VEC_BCAST_PSRLD, 7 },
|
||||
{ 0x03ff03ff, VEC_BCAST_PSRLW, 6 },
|
||||
{ 0x03ffffff, VEC_BCAST_PSRLD, 6 },
|
||||
{ 0x07ff07ff, VEC_BCAST_PSRLW, 5 },
|
||||
{ 0x07ffffff, VEC_BCAST_PSRLD, 5 },
|
||||
{ 0x0fff0fff, VEC_BCAST_PSRLW, 4 },
|
||||
{ 0x0fffffff, VEC_BCAST_PSRLD, 4 },
|
||||
{ 0x1fff1fff, VEC_BCAST_PSRLW, 3 },
|
||||
{ 0x1fffffff, VEC_BCAST_PSRLD, 3 },
|
||||
{ 0x3fff3fff, VEC_BCAST_PSRLW, 2 },
|
||||
{ 0x3fffffff, VEC_BCAST_PSRLD, 2 },
|
||||
{ 0x7fff7fff, VEC_BCAST_PSRLW, 1 },
|
||||
{ 0x7fffffff, VEC_BCAST_PSRLD, 1 },
|
||||
{ 0x80000000, VEC_BCAST_PSLLD, 31 },
|
||||
{ 0x80008000, VEC_BCAST_PSLLW, 15 },
|
||||
{ 0xc0000000, VEC_BCAST_PSLLD, 30 },
|
||||
{ 0xc000c000, VEC_BCAST_PSLLW, 14 },
|
||||
{ 0xe0000000, VEC_BCAST_PSLLD, 29 },
|
||||
{ 0xe000e000, VEC_BCAST_PSLLW, 13 },
|
||||
{ 0xf0000000, VEC_BCAST_PSLLD, 28 },
|
||||
{ 0xf000f000, VEC_BCAST_PSLLW, 12 },
|
||||
{ 0xf8000000, VEC_BCAST_PSLLD, 27 },
|
||||
{ 0xf800f800, VEC_BCAST_PSLLW, 11 },
|
||||
{ 0xfc000000, VEC_BCAST_PSLLD, 26 },
|
||||
{ 0xfc00fc00, VEC_BCAST_PSLLW, 10 },
|
||||
{ 0xfe000000, VEC_BCAST_PSLLD, 25 },
|
||||
{ 0xfe00fe00, VEC_BCAST_PSLLW, 9 },
|
||||
{ 0xfefefefe, VEC_BCAST_PADDB, 0 },
|
||||
{ 0xff000000, VEC_BCAST_PSLLD, 24 },
|
||||
{ 0xff00ff00, VEC_BCAST_PSLLW, 8 },
|
||||
{ 0xff800000, VEC_BCAST_PSLLD, 23 },
|
||||
{ 0xff80ff80, VEC_BCAST_PSLLW, 7 },
|
||||
{ 0xffc00000, VEC_BCAST_PSLLD, 22 },
|
||||
{ 0xffc0ffc0, VEC_BCAST_PSLLW, 6 },
|
||||
{ 0xffe00000, VEC_BCAST_PSLLD, 21 },
|
||||
{ 0xffe0ffe0, VEC_BCAST_PSLLW, 5 },
|
||||
{ 0xfff00000, VEC_BCAST_PSLLD, 20 },
|
||||
{ 0xfff0fff0, VEC_BCAST_PSLLW, 4 },
|
||||
{ 0xfff80000, VEC_BCAST_PSLLD, 19 },
|
||||
{ 0xfff8fff8, VEC_BCAST_PSLLW, 3 },
|
||||
{ 0xfffc0000, VEC_BCAST_PSLLD, 18 },
|
||||
{ 0xfffcfffc, VEC_BCAST_PSLLW, 2 },
|
||||
{ 0xfffe0000, VEC_BCAST_PSLLD, 17 },
|
||||
{ 0xfffefffe, VEC_BCAST_PSLLW, 1 },
|
||||
{ 0xffff0000, VEC_BCAST_PSLLD, 16 },
|
||||
{ 0xffff8000, VEC_BCAST_PSLLD, 15 },
|
||||
{ 0xffffc000, VEC_BCAST_PSLLD, 14 },
|
||||
{ 0xffffe000, VEC_BCAST_PSLLD, 13 },
|
||||
{ 0xfffff000, VEC_BCAST_PSLLD, 12 },
|
||||
{ 0xfffff800, VEC_BCAST_PSLLD, 11 },
|
||||
{ 0xfffffc00, VEC_BCAST_PSLLD, 10 },
|
||||
{ 0xfffffe00, VEC_BCAST_PSLLD, 9 },
|
||||
{ 0xffffff00, VEC_BCAST_PSLLD, 8 },
|
||||
{ 0xffffff80, VEC_BCAST_PSLLD, 7 },
|
||||
{ 0xffffffc0, VEC_BCAST_PSLLD, 6 },
|
||||
{ 0xffffffe0, VEC_BCAST_PSLLD, 5 },
|
||||
{ 0xfffffff0, VEC_BCAST_PSLLD, 4 },
|
||||
{ 0xfffffff8, VEC_BCAST_PSLLD, 3 },
|
||||
{ 0xfffffffc, VEC_BCAST_PSLLD, 2 },
|
||||
{ 0xfffffffe, VEC_BCAST_PSLLD, 1 },
|
||||
{ 0xffffffff, VEC_BCAST_PCMPEQ, 0 }
|
||||
};
|
||||
|
||||
/* Comparator for bsearch on ix86_vec_bcast_map. */
|
||||
static int
|
||||
ix86_vec_bcast_map_simode_cmp (const void *key, const void *entry)
|
||||
{
|
||||
return (*(const unsigned int*)key)
|
||||
- ((const ix86_vec_bcast_map_simode_t*)entry)->key;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_vector_duplicate_value. Tries to efficiently
|
||||
materialize V4SImode, V8SImode and V16SImode vectors from SImode
|
||||
integer constants. */
|
||||
static bool
|
||||
ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
|
||||
unsigned int val)
|
||||
{
|
||||
const ix86_vec_bcast_map_simode_t *entry;
|
||||
rtx tmp1, tmp2;
|
||||
|
||||
entry = (const ix86_vec_bcast_map_simode_t*)
|
||||
bsearch(&val, ix86_vec_bcast_map_simode,
|
||||
ARRAY_SIZE (ix86_vec_bcast_map_simode),
|
||||
sizeof (ix86_vec_bcast_map_simode_t),
|
||||
ix86_vec_bcast_map_simode_cmp);
|
||||
if (!entry)
|
||||
return false;
|
||||
|
||||
switch (entry->alg)
|
||||
{
|
||||
case VEC_BCAST_PXOR:
|
||||
if ((mode == V8SImode && !TARGET_AVX2)
|
||||
|| (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
|
||||
return false;
|
||||
emit_move_insn (target, CONST0_RTX (mode));
|
||||
return true;
|
||||
|
||||
case VEC_BCAST_PCMPEQ:
|
||||
if ((mode == V4SImode && !TARGET_SSE2)
|
||||
|| (mode == V8SImode && !TARGET_AVX2)
|
||||
|| (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
|
||||
return false;
|
||||
emit_move_insn (target, CONSTM1_RTX (mode));
|
||||
return true;
|
||||
|
||||
case VEC_BCAST_PABSB:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16QImode));
|
||||
tmp2 = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_absv16qi2 (tmp2, tmp1));
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V32QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V32QImode));
|
||||
tmp2 = gen_reg_rtx (V32QImode);
|
||||
emit_insn (gen_absv32qi2 (tmp2, tmp1));
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V64QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
|
||||
tmp2 = gen_reg_rtx (V64QImode);
|
||||
emit_insn (gen_absv64qi2 (tmp2, tmp1));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case VEC_BCAST_PADDB:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16QImode));
|
||||
tmp2 = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_addv16qi3 (tmp2, tmp1, tmp1));
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V32QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V32QImode));
|
||||
tmp2 = gen_reg_rtx (V32QImode);
|
||||
emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V64QImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
|
||||
tmp2 = gen_reg_rtx (V64QImode);
|
||||
emit_insn (gen_addv64qi3 (tmp2, tmp1, tmp1));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case VEC_BCAST_PSRLW:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V8HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V8HImode));
|
||||
tmp2 = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_lshrv8hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16HImode));
|
||||
tmp2 = gen_reg_rtx (V16HImode);
|
||||
emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V32HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
|
||||
tmp2 = gen_reg_rtx (V32HImode);
|
||||
emit_insn (gen_lshrv32hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case VEC_BCAST_PSRLD:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V4SImode));
|
||||
emit_insn (gen_lshrv4si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V8SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V8SImode));
|
||||
emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
|
||||
emit_insn (gen_lshrv16si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case VEC_BCAST_PSLLW:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V8HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V8HImode));
|
||||
tmp2 = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_ashlv8hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16HImode));
|
||||
tmp2 = gen_reg_rtx (V16HImode);
|
||||
emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V32HImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
|
||||
tmp2 = gen_reg_rtx (V32HImode);
|
||||
emit_insn (gen_ashlv32hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case VEC_BCAST_PSLLD:
|
||||
if (mode == V4SImode && TARGET_SSE2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V4SImode));
|
||||
emit_insn (gen_ashlv4si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else if (mode == V8SImode && TARGET_AVX2)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V8SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V8SImode));
|
||||
emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
|
||||
{
|
||||
tmp1 = gen_reg_rtx (V16SImode);
|
||||
emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
|
||||
emit_insn (gen_ashlv16si3 (target, tmp1, GEN_INT (entry->arg)));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
emit_move_insn (target, gen_lowpart (mode, tmp2));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vector_init_duplicate. Tries to
|
||||
fill target with val via vec_duplicate. */
|
||||
|
||||
@ -15705,6 +16031,12 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
|
||||
bool ok;
|
||||
rtx_insn *insn;
|
||||
rtx dup;
|
||||
|
||||
if ((mode == V4SImode || mode == V8SImode || mode == V16SImode)
|
||||
&& CONST_INT_P (val)
|
||||
&& ix86_vector_duplicate_simode_const (mode, target, INTVAL (val)))
|
||||
return true;
|
||||
|
||||
/* Save/restore recog_data in case this is called from splitters
|
||||
or other routines where recog_data needs to stay valid across
|
||||
force_reg. See PR106577. */
|
||||
@ -15801,6 +16133,24 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
|
||||
}
|
||||
return ix86_vector_duplicate_value (mode, target, val);
|
||||
|
||||
case E_V8DImode:
|
||||
if (CONST_INT_P (val))
|
||||
{
|
||||
int tmp = (int)INTVAL (val);
|
||||
if (tmp == (int)(INTVAL (val) >> 32))
|
||||
{
|
||||
rtx reg = gen_reg_rtx (V16SImode);
|
||||
ok = ix86_vector_duplicate_value (V16SImode, reg,
|
||||
GEN_INT (tmp));
|
||||
if (ok)
|
||||
{
|
||||
emit_move_insn (target, gen_lowpart (V8DImode, reg));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ix86_vector_duplicate_value (mode, target, val);
|
||||
|
||||
case E_V2SImode:
|
||||
case E_V2SFmode:
|
||||
if (!mmx_ok)
|
||||
@ -15814,7 +16164,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
|
||||
case E_V4SFmode:
|
||||
case E_V4SImode:
|
||||
case E_V16SImode:
|
||||
case E_V8DImode:
|
||||
case E_V16SFmode:
|
||||
case E_V8DFmode:
|
||||
return ix86_vector_duplicate_value (mode, target, val);
|
||||
@ -16019,9 +16368,13 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
|
||||
return true;
|
||||
|
||||
case E_V32HImode:
|
||||
case E_V64QImode:
|
||||
if (CONST_INT_P (val))
|
||||
goto widen;
|
||||
/* FALLTHRU */
|
||||
|
||||
case E_V32HFmode:
|
||||
case E_V32BFmode:
|
||||
case E_V64QImode:
|
||||
gcc_assert (TARGET_EVEX512);
|
||||
if (TARGET_AVX512BW)
|
||||
return ix86_vector_duplicate_value (mode, target, val);
|
||||
@ -17021,6 +17374,13 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
|
||||
all_same = false;
|
||||
}
|
||||
|
||||
/* Handle the zero vector as special case. */
|
||||
if (n_var == 0 && all_const_zero)
|
||||
{
|
||||
emit_move_insn (target, CONST0_RTX (mode));
|
||||
return;
|
||||
}
|
||||
|
||||
/* If all values are identical, broadcast the value. */
|
||||
if (all_same
|
||||
&& ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
|
||||
|
@ -22134,6 +22134,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
*total = cost->fabs;
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
*total = ix86_vec_cost (mode, cost->sse_op);
|
||||
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
*total = cost->sse_op;
|
||||
return false;
|
||||
|
||||
case SQRT:
|
||||
|
@ -29,7 +29,7 @@ double foo()
|
||||
return result;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 3 "expand" } } */
|
||||
/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 1 "expand" } } */
|
||||
/* { dg-final { scan-rtl-dump-times "\\\[0xfefefefefefefefe\\\]" 2 "expand" } } */
|
||||
/* { dg-final { scan-rtl-dump-times "0xfffffffffffffffe\\\]\\\) repeated x16" 2 "expand" } } */
|
||||
|
||||
|
@ -116,7 +116,6 @@ abs512_ph (__m512h a)
|
||||
return _mm512_abs_ph (a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpandd\[^\n\]*%zmm\[0-9\]+" 1 } } */
|
||||
|
||||
__m256h
|
||||
@ -126,7 +125,6 @@ abs256_ph (__m256h a)
|
||||
return _mm256_abs_ph (a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */
|
||||
|
||||
__m128h
|
||||
@ -136,5 +134,4 @@ abs_ph (__m128h a)
|
||||
return _mm_abs_ph (a);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */
|
||||
|
@ -18,7 +18,7 @@ foo (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < sizeof (array) / sizeof (array[0]); i++)
|
||||
array[i] = MK_CONST128_BROADCAST (0x1fff);
|
||||
array[i] = MK_CONST128_BROADCAST (0x1234);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
|
||||
|
@ -15,7 +15,7 @@ volatile __m512i a, b;
|
||||
void
|
||||
foo()
|
||||
{
|
||||
b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
|
||||
b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4));
|
||||
b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5));
|
||||
b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (4));
|
||||
b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (5));
|
||||
b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (6));
|
||||
}
|
||||
|
12
gcc/testsuite/gcc.target/i386/pr106060-1.c
Normal file
12
gcc/testsuite/gcc.target/i386/pr106060-1.c
Normal file
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=x86-64-v3" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i
|
||||
foo ()
|
||||
{
|
||||
/* shouldnt_have_movabs */
|
||||
return _mm256_set1_epi8 (123);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "movabs" } } */
|
13
gcc/testsuite/gcc.target/i386/pr106060-2.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr106060-2.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=x86-64-v3" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i
|
||||
foo ()
|
||||
{
|
||||
/* should_be_cmpeq_abs */
|
||||
return _mm256_set1_epi8 (1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pcmpeq" } } */
|
||||
/* { dg-final { scan-assembler "pabsb" } } */
|
14
gcc/testsuite/gcc.target/i386/pr106060-3.c
Normal file
14
gcc/testsuite/gcc.target/i386/pr106060-3.c
Normal file
@ -0,0 +1,14 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=x86-64-v3" } */
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i
|
||||
foo ()
|
||||
{
|
||||
/* should_be_cmpeq_add */
|
||||
return _mm256_set1_epi8 (-2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pcmpeq" } } */
|
||||
/* { dg-final { scan-assembler "paddb" } } */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=skylake-avx512 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "cmp" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "cmp\[dq\]" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "and" } } */
|
||||
|
||||
typedef long vec __attribute__((vector_size(16)));
|
||||
|
@ -28,7 +28,7 @@ __vu srl_c (__vu a)
|
||||
return a >> 5;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "psrlw" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "psrlw" 5 } } */
|
||||
|
||||
__vi sra (__vi a, int n)
|
||||
{
|
||||
|
@ -28,7 +28,7 @@ __vu srl_c (__vu a)
|
||||
return a >> 5;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "psrlw" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "psrlw" 5 } } */
|
||||
|
||||
__vi sra (__vi a, int n)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user