mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
forwprop: Try to blend two isomorphic VEC_PERM sequences
This extends forwprop by yet another VEC_PERM optimization: It attempts to blend two isomorphic vector sequences by using the redundancy in the lane utilization in these sequences. This redundancy in lane utilization comes from the way how specific scalar statements end up vectorized: two VEC_PERMs on top, binary operations on both of them, and a final VEC_PERM to create the result. Here is an example of this sequence: v_in = {e0, e1, e2, e3} v_1 = VEC_PERM <v_in, v_in, {0, 2, 0, 2}> // v_1 = {e0, e2, e0, e2} v_2 = VEC_PERM <v_in, v_in, {1, 3, 1, 3}> // v_2 = {e1, e3, e1, e3} v_x = v_1 + v_2 // v_x = {e0+e1, e2+e3, e0+e1, e2+e3} v_y = v_1 - v_2 // v_y = {e0-e1, e2-e3, e0-e1, e2-e3} v_out = VEC_PERM <v_x, v_y, {0, 1, 6, 7}> // v_out = {e0+e1, e2+e3, e0-e1, e2-e3} To remove the redundancy, lanes 2 and 3 can be freed, which allows to change the last statement into: v_out' = VEC_PERM <v_x, v_y, {0, 1, 4, 5}> // v_out' = {e0+e1, e2+e3, e0-e1, e2-e3} The cost of eliminating the redundancy in the lane utilization is that lowering the VEC PERM expression could get more expensive because of tighter packing of the lanes. Therefore this optimization is not done alone, but in only in case we identify two such sequences that can be blended. Once all candidate sequences have been identified, we try to blend them, so that we can use the freed lanes for the second sequence. On success we convert 2x (2x BINOP + 1x VEC_PERM) to 2x VEC_PERM + 2x BINOP + 2x VEC_PERM traded for 4x VEC_PERM + 2x BINOP. The implemented transformation reuses (rewrites) the statements of the first sequence and the last VEC_PERM of the second sequence. The remaining four statements of the second statment are left untouched and will be eliminated by DCE later. This targets x264_pixel_satd_8x4, which calculates the sum of absolute transformed differences (SATD) using Hadamard transformation. We have seen 8% speedup on SPEC's x264 on a 5950X (x86-64) and 7% speedup on an AArch64 machine. Bootstrapped and reg-tested on x86-64 and AArch64 (all languages). gcc/ChangeLog: * tree-ssa-forwprop.cc (struct _vec_perm_simplify_seq): New data structure to store analysis results of a vec perm simplify sequence. (get_vect_selector_index_map): Helper to get an index map from the provided vector permute selector. (recognise_vec_perm_simplify_seq): Helper to recognise a vec perm simplify sequence. (narrow_vec_perm_simplify_seq): Helper to pack the lanes more tight. (can_blend_vec_perm_simplify_seqs_p): Test if two vec perm sequences can be blended. (calc_perm_vec_perm_simplify_seqs): Helper to calculate the new permutation indices. (blend_vec_perm_simplify_seqs): Helper to blend two vec perm simplify sequences. (process_vec_perm_simplify_seq_list): Helper to process a list of vec perm simplify sequences. (append_vec_perm_simplify_seq_list): Helper to add a vec perm simplify sequence to the list. (pass_forwprop::execute): Integrate new functionality. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/satd-hadamard.c: New test. * gcc.dg/tree-ssa/vector-10.c: New test. * gcc.dg/tree-ssa/vector-8.c: New test. * gcc.dg/tree-ssa/vector-9.c: New test. * gcc.target/aarch64/sve/satd-hadamard.c: New test. Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
This commit is contained in:
parent
42a8005c63
commit
1c4d39ada3
43
gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
Normal file
43
gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
Normal file
@ -0,0 +1,43 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -fdump-tree-forwprop4-details" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
|
||||
int t0 = s0 + s1;\
|
||||
int t1 = s0 - s1;\
|
||||
int t2 = s2 + s3;\
|
||||
int t3 = s2 - s3;\
|
||||
d0 = t0 + t2;\
|
||||
d1 = t1 + t3;\
|
||||
d2 = t0 - t2;\
|
||||
d3 = t1 - t3;\
|
||||
}
|
||||
|
||||
int
|
||||
x264_pixel_satd_8x4_simplified (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2)
|
||||
{
|
||||
uint32_t tmp[4][4];
|
||||
uint32_t a0, a1, a2, a3;
|
||||
int sum = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2)
|
||||
{
|
||||
a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
|
||||
a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
|
||||
a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
|
||||
a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
|
||||
HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
|
||||
sum += a0 + a1 + a2 + a3;
|
||||
}
|
||||
|
||||
return (((uint16_t)sum) + ((uint32_t)sum>>16)) >> 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop4" } } */
|
122
gcc/testsuite/gcc.dg/tree-ssa/vector-10.c
Normal file
122
gcc/testsuite/gcc.dg/tree-ssa/vector-10.c
Normal file
@ -0,0 +1,122 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -fdump-tree-forwprop1-details" } */
|
||||
|
||||
typedef int vec __attribute__((vector_size (4 * sizeof (int))));
|
||||
|
||||
void f1 (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
/* Won't blend because v_in_2 is defined after v_1 above. */
|
||||
v_in_2 = *p_v_in_2;
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_1 = v_out_1;
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
void f2 (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2 = *p_v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
/* Won't blend because of this store between the sequences. */
|
||||
*p_v_out_1 = v_out_1;
|
||||
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
void f3 (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2 = *p_v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
/* Won't blend because v_2 is RHS1 here. */
|
||||
v_y = v_2 - v_1;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_1 = v_out_1;
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
extern vec foo (vec v);
|
||||
void f4 (vec *p_v_in_1, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
/* Won't merge because of dependency. */
|
||||
v_in_2 = foo (v_out_1);
|
||||
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_1 = v_out_1;
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "Vec perm simplify sequences have been merged" "forwprop1" } } */
|
34
gcc/testsuite/gcc.dg/tree-ssa/vector-8.c
Normal file
34
gcc/testsuite/gcc.dg/tree-ssa/vector-8.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -fdump-tree-forwprop1-details" } */
|
||||
|
||||
typedef int vec __attribute__((vector_size (4 * sizeof (int))));
|
||||
|
||||
void f (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2 = *p_v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_1 + v_2;
|
||||
v_y = v_1 - v_2;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_1 = v_out_1;
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" "forwprop1" } } */
|
||||
/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" } } */
|
34
gcc/testsuite/gcc.dg/tree-ssa/vector-9.c
Normal file
34
gcc/testsuite/gcc.dg/tree-ssa/vector-9.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-O3 -fdump-tree-forwprop1-details" } */
|
||||
|
||||
typedef int vec __attribute__((vector_size (4 * sizeof (int))));
|
||||
|
||||
void f (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec *p_v_out_2)
|
||||
{
|
||||
vec sel0 = { 0, 2, 0, 2 };
|
||||
vec sel1 = { 1, 3, 1, 3 };
|
||||
vec sel = { 0, 1, 6, 7 };
|
||||
vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
|
||||
vec v_in_1 = *p_v_in_1;
|
||||
vec v_in_2 = *p_v_in_2;
|
||||
|
||||
/* First vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_1, v_in_1, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_1, v_in_1, sel1);
|
||||
v_x = v_1 * v_2;
|
||||
v_y = v_2 - v_1;
|
||||
v_out_1 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
/* Second vec perm sequence. */
|
||||
v_1 = __builtin_shuffle (v_in_2, v_in_2, sel0);
|
||||
v_2 = __builtin_shuffle (v_in_2, v_in_2, sel1);
|
||||
v_x = v_2 * v_1;
|
||||
v_y = v_2 - v_1;
|
||||
v_out_2 = __builtin_shuffle (v_x, v_y, sel);
|
||||
|
||||
*p_v_out_1 = v_out_1;
|
||||
*p_v_out_2 = v_out_2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" "forwprop1" } } */
|
||||
/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" } } */
|
3
gcc/testsuite/gcc.target/aarch64/sve/satd-hadamard.c
Normal file
3
gcc/testsuite/gcc.target/aarch64/sve/satd-hadamard.c
Normal file
@ -0,0 +1,3 @@
|
||||
#include "../../../gcc.dg/tree-ssa/satd-hadamard.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {\ttbl\t} } } */
|
@ -50,6 +50,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "optabs-tree.h"
|
||||
#include "insn-config.h"
|
||||
#include "recog.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "tree-vector-builder.h"
|
||||
#include "vec-perm-indices.h"
|
||||
#include "internal-fn.h"
|
||||
@ -184,6 +186,25 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
This will (of course) be extended as other needs arise. */
|
||||
|
||||
/* Data structure that contains simplifiable vectorized permute sequences.
|
||||
See recognise_vec_perm_simplify_seq () for a description of the sequence. */
|
||||
|
||||
struct _vec_perm_simplify_seq
|
||||
{
|
||||
/* Defining stmts of vectors in the sequence. */
|
||||
gassign *v_1_stmt;
|
||||
gassign *v_2_stmt;
|
||||
gassign *v_x_stmt;
|
||||
gassign *v_y_stmt;
|
||||
/* Final permute statment. */
|
||||
gassign *stmt;
|
||||
/* New selector indices for stmt. */
|
||||
tree new_sel;
|
||||
/* Elements of each vector and selector. */
|
||||
unsigned int nelts;
|
||||
};
|
||||
typedef struct _vec_perm_simplify_seq *vec_perm_simplify_seq;
|
||||
|
||||
static bool forward_propagate_addr_expr (tree, tree, bool);
|
||||
|
||||
/* Set to true if we delete dead edges during the optimization. */
|
||||
@ -225,7 +246,6 @@ fwprop_invalidate_lattice (tree name)
|
||||
lattice[SSA_NAME_VERSION (name)] = NULL_TREE;
|
||||
}
|
||||
|
||||
|
||||
/* Get the statement we can propagate from into NAME skipping
|
||||
trivial copies. Returns the statement which defines the
|
||||
propagation source or NULL_TREE if there is no such one.
|
||||
@ -3460,6 +3480,546 @@ fwprop_ssa_val (tree name)
|
||||
return name;
|
||||
}
|
||||
|
||||
/* Get an index map from the provided vector permute selector
|
||||
and return the number of unique indices.
|
||||
E.g.: { 1, 3, 1, 3 } -> <0, 1, 0, 1>, 2
|
||||
{ 0, 2, 0, 2 } -> <0, 1, 0, 1>, 2
|
||||
{ 3, 2, 1, 0 } -> <0, 1, 2, 3>, 4. */
|
||||
|
||||
static unsigned int
|
||||
get_vect_selector_index_map (tree sel, vec<unsigned int> *index_map)
|
||||
{
|
||||
gcc_assert (VECTOR_CST_NELTS (sel).is_constant ());
|
||||
unsigned int nelts = VECTOR_CST_NELTS (sel).to_constant ();
|
||||
unsigned int n = 0;
|
||||
|
||||
for (unsigned int i = 0; i < nelts; i++)
|
||||
{
|
||||
/* Extract the i-th value from the selector. */
|
||||
tree sel_cst_tree = VECTOR_CST_ELT (sel, i);
|
||||
unsigned int sel_cst = TREE_INT_CST_LOW (sel_cst_tree);
|
||||
|
||||
unsigned int j = 0;
|
||||
for (; j <= i; j++)
|
||||
{
|
||||
tree prev_sel_cst_tree = VECTOR_CST_ELT (sel, j);
|
||||
unsigned int prev_sel_cst
|
||||
= TREE_INT_CST_LOW (prev_sel_cst_tree);
|
||||
if (prev_sel_cst == sel_cst)
|
||||
break;
|
||||
}
|
||||
index_map->quick_push (j);
|
||||
n += (i == j) ? 1 : 0;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Search for opportunities to free half of the lanes in the following pattern:
|
||||
|
||||
v_in = {e0, e1, e2, e3}
|
||||
v_1 = VEC_PERM <v_in, v_in, {0, 2, 0, 2}>
|
||||
// v_1 = {e0, e2, e0, e2}
|
||||
v_2 = VEC_PERM <v_in, v_in, {1, 3, 1, 3}>
|
||||
// v_2 = {e1, e3, e1, e3}
|
||||
|
||||
v_x = v_1 + v_2
|
||||
// v_x = {e0+e1, e2+e3, e0+e1, e2+e3}
|
||||
v_y = v_1 - v_2
|
||||
// v_y = {e0-e1, e2-e3, e0-e1, e2-e3}
|
||||
|
||||
v_out = VEC_PERM <v_x, v_y, {0, 1, 6, 7}>
|
||||
// v_out = {e0+e1, e2+e3, e0-e1, e2-e3}
|
||||
|
||||
The last statement could be simplified to:
|
||||
v_out' = VEC_PERM <v_x, v_y, {0, 1, 4, 5}>
|
||||
// v_out' = {e0+e1, e2+e3, e0-e1, e2-e3}
|
||||
|
||||
Characteristic properties:
|
||||
- v_1 and v_2 are created from the same input vector v_in and introduce the
|
||||
lane duplication (in the selection operand) that we can eliminate.
|
||||
- v_x and v_y are results from lane-preserving operations that use v_1 and
|
||||
v_2 as inputs.
|
||||
- v_out is created by selecting from duplicated lanes. */
|
||||
|
||||
static bool
|
||||
recognise_vec_perm_simplify_seq (gassign *stmt, vec_perm_simplify_seq *seq)
|
||||
{
|
||||
gcc_checking_assert (stmt);
|
||||
gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR);
|
||||
basic_block bb = gimple_bb (stmt);
|
||||
|
||||
/* Decompose the final vec permute statement. */
|
||||
tree v_x = gimple_assign_rhs1 (stmt);
|
||||
tree v_y = gimple_assign_rhs2 (stmt);
|
||||
tree sel = gimple_assign_rhs3 (stmt);
|
||||
|
||||
if (!VECTOR_CST_NELTS (sel).is_constant ()
|
||||
|| TREE_CODE (v_x) != SSA_NAME
|
||||
|| TREE_CODE (v_y) != SSA_NAME
|
||||
|| !has_single_use (v_x)
|
||||
|| !has_single_use (v_y))
|
||||
return false;
|
||||
|
||||
unsigned int nelts = VECTOR_CST_NELTS (sel).to_constant ();
|
||||
|
||||
/* Lookup the definition of v_x and v_y. */
|
||||
gassign *v_x_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (v_x));
|
||||
gassign *v_y_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (v_y));
|
||||
if (!v_x_stmt || gimple_bb (v_x_stmt) != bb
|
||||
|| !v_y_stmt || gimple_bb (v_y_stmt) != bb)
|
||||
return false;
|
||||
|
||||
/* Check the operations that define v_x and v_y. */
|
||||
if (TREE_CODE_CLASS (gimple_assign_rhs_code (v_x_stmt)) != tcc_binary
|
||||
|| TREE_CODE_CLASS (gimple_assign_rhs_code (v_y_stmt)) != tcc_binary)
|
||||
return false;
|
||||
|
||||
tree v_x_1 = gimple_assign_rhs1 (v_x_stmt);
|
||||
tree v_x_2 = gimple_assign_rhs2 (v_x_stmt);
|
||||
tree v_y_1 = gimple_assign_rhs1 (v_y_stmt);
|
||||
tree v_y_2 = gimple_assign_rhs2 (v_y_stmt);
|
||||
|
||||
if (v_x_stmt == v_y_stmt
|
||||
|| TREE_CODE (v_x_1) != SSA_NAME
|
||||
|| TREE_CODE (v_x_2) != SSA_NAME
|
||||
|| num_imm_uses (v_x_1) != 2
|
||||
|| num_imm_uses (v_x_2) != 2)
|
||||
return false;
|
||||
|
||||
if (v_x_1 != v_y_1 || v_x_2 != v_y_2)
|
||||
{
|
||||
/* Allow operands of commutative operators to swap. */
|
||||
if (commutative_tree_code (gimple_assign_rhs_code (v_x_stmt)))
|
||||
{
|
||||
/* Keep v_x_1 the first operand for non-commutative operators. */
|
||||
v_x_1 = gimple_assign_rhs2 (v_x_stmt);
|
||||
v_x_2 = gimple_assign_rhs1 (v_x_stmt);
|
||||
if (v_x_1 != v_y_1 || v_x_2 != v_y_2)
|
||||
return false;
|
||||
}
|
||||
else if (commutative_tree_code (gimple_assign_rhs_code (v_y_stmt)))
|
||||
{
|
||||
if (v_x_1 != v_y_2 || v_x_2 != v_y_1)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
gassign *v_1_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (v_x_1));
|
||||
gassign *v_2_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (v_x_2));
|
||||
if (!v_1_stmt || gimple_bb (v_1_stmt) != bb
|
||||
|| !v_2_stmt || gimple_bb (v_2_stmt) != bb)
|
||||
return false;
|
||||
|
||||
if (gimple_assign_rhs_code (v_1_stmt) != VEC_PERM_EXPR
|
||||
|| gimple_assign_rhs_code (v_2_stmt) != VEC_PERM_EXPR)
|
||||
return false;
|
||||
|
||||
/* Decompose initial VEC_PERM_EXPRs. */
|
||||
tree v_in = gimple_assign_rhs1 (v_1_stmt);
|
||||
tree v_1_sel = gimple_assign_rhs3 (v_1_stmt);
|
||||
tree v_2_sel = gimple_assign_rhs3 (v_2_stmt);
|
||||
if (v_in != gimple_assign_rhs2 (v_1_stmt)
|
||||
|| v_in != gimple_assign_rhs1 (v_2_stmt)
|
||||
|| v_in != gimple_assign_rhs2 (v_2_stmt))
|
||||
return false;
|
||||
|
||||
if (!VECTOR_CST_NELTS (v_1_sel).is_constant ()
|
||||
|| !VECTOR_CST_NELTS (v_2_sel).is_constant ())
|
||||
return false;
|
||||
|
||||
if (nelts != VECTOR_CST_NELTS (v_1_sel).to_constant ()
|
||||
|| nelts != VECTOR_CST_NELTS (v_2_sel).to_constant ())
|
||||
return false;
|
||||
|
||||
/* Now check permutation selection operands. */
|
||||
auto_vec<unsigned int> v_1_lane_map, v_2_lane_map;
|
||||
v_1_lane_map.reserve (nelts);
|
||||
v_2_lane_map.reserve (nelts);
|
||||
unsigned int v_1_lanes, v_2_lanes;
|
||||
v_1_lanes = get_vect_selector_index_map (v_1_sel, &v_1_lane_map);
|
||||
v_2_lanes = get_vect_selector_index_map (v_2_sel, &v_2_lane_map);
|
||||
|
||||
/* Check if we could free up half of the lanes. */
|
||||
if (v_1_lanes != v_2_lanes || v_1_lanes > (nelts / 2))
|
||||
return false;
|
||||
|
||||
/* Create the new selector. */
|
||||
vec_perm_builder new_sel_perm (nelts, nelts, 1);
|
||||
for (unsigned int i = 0; i < nelts; i++)
|
||||
{
|
||||
/* Extract the i-th value from the selector. */
|
||||
tree sel_cst_tree = VECTOR_CST_ELT (sel, i);
|
||||
unsigned int sel_cst = TREE_INT_CST_LOW (sel_cst_tree);
|
||||
|
||||
unsigned int j;
|
||||
if (sel_cst < nelts)
|
||||
j = v_1_lane_map[sel_cst];
|
||||
else
|
||||
j = v_2_lane_map[sel_cst - nelts] + nelts;
|
||||
|
||||
new_sel_perm.quick_push (j);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "%u", j);
|
||||
if (i != (nelts -1))
|
||||
fprintf (dump_file, ", ");
|
||||
}
|
||||
}
|
||||
|
||||
vec_perm_indices new_indices (new_sel_perm, 2, nelts);
|
||||
tree vectype = TREE_TYPE (gimple_assign_lhs (stmt));
|
||||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
if (!can_vec_perm_const_p (vmode, vmode, new_indices, false))
|
||||
return false;
|
||||
|
||||
*seq = XNEW (struct _vec_perm_simplify_seq);
|
||||
(*seq)->stmt = stmt;
|
||||
(*seq)->v_1_stmt = v_1_stmt;
|
||||
(*seq)->v_2_stmt = v_2_stmt;
|
||||
(*seq)->v_x_stmt = v_x_stmt;
|
||||
(*seq)->v_y_stmt = v_y_stmt;
|
||||
(*seq)->nelts = nelts;
|
||||
(*seq)->new_sel = vect_gen_perm_mask_checked (vectype, new_indices);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Found vec perm simplify sequence ending with: ");
|
||||
print_gimple_stmt (dump_file, stmt, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Reduce the lane consumption of a simplifiable vec perm sequence. */
|
||||
|
||||
static void
|
||||
narrow_vec_perm_simplify_seq (const vec_perm_simplify_seq &seq)
|
||||
{
|
||||
gassign *stmt = seq->stmt;
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Updating VEC_PERM statment:\n");
|
||||
fprintf (dump_file, "Old stmt: ");
|
||||
print_gimple_stmt (dump_file, stmt, 0);
|
||||
}
|
||||
|
||||
/* Update the last VEC_PERM statement. */
|
||||
gimple_assign_set_rhs3 (stmt, seq->new_sel);
|
||||
update_stmt (stmt);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "New stmt: ");
|
||||
print_gimple_stmt (dump_file, stmt, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Test if we can blend two simplifiable vec permute sequences.
|
||||
NEED_SWAP will be set, if sequences must be swapped for blending. */
|
||||
|
||||
static bool
|
||||
can_blend_vec_perm_simplify_seqs_p (vec_perm_simplify_seq seq1,
|
||||
vec_perm_simplify_seq seq2,
|
||||
bool *need_swap)
|
||||
{
|
||||
unsigned int nelts = seq1->nelts;
|
||||
basic_block bb = gimple_bb (seq1->stmt);
|
||||
|
||||
gcc_assert (gimple_bb (seq2->stmt) == bb);
|
||||
|
||||
/* BBs and number of elements must be equal. */
|
||||
if (gimple_bb (seq2->stmt) != bb || seq2->nelts != nelts)
|
||||
return false;
|
||||
|
||||
/* We need vectors of the same type. */
|
||||
if (TREE_TYPE (gimple_assign_lhs (seq1->stmt))
|
||||
!= TREE_TYPE (gimple_assign_lhs (seq2->stmt)))
|
||||
return false;
|
||||
|
||||
/* We require isomorphic operators. */
|
||||
if (((gimple_assign_rhs_code (seq1->v_x_stmt)
|
||||
!= gimple_assign_rhs_code (seq2->v_x_stmt))
|
||||
|| (gimple_assign_rhs_code (seq1->v_y_stmt)
|
||||
!= gimple_assign_rhs_code (seq2->v_y_stmt))))
|
||||
return false;
|
||||
|
||||
/* We cannot have any dependencies between the sequences.
|
||||
|
||||
For merging, we will reuse seq1->v_1_stmt and seq1->v_2_stmt.
|
||||
seq1's v_in is defined before these statements, but we need
|
||||
to check if seq2's v_in is defined before them as well.
|
||||
|
||||
Further, we will reuse seq2->stmt. We need to ensure that
|
||||
seq1->v_x_stmt and seq1->v_y_stmt are before it.
|
||||
|
||||
Note, that we don't need to check the BBs here, because all
|
||||
statements of both sequences have to be in the same BB.
|
||||
*/
|
||||
|
||||
tree seq2_v_in = gimple_assign_rhs1 (seq2->v_1_stmt);
|
||||
if (TREE_CODE (seq2_v_in) != SSA_NAME)
|
||||
return false;
|
||||
|
||||
gassign *seq2_v_in_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (seq2_v_in));
|
||||
if (!seq2_v_in_stmt || gimple_bb (seq2_v_in_stmt) != bb
|
||||
|| (gimple_uid (seq2_v_in_stmt) > gimple_uid (seq1->v_1_stmt))
|
||||
|| (gimple_uid (seq1->v_x_stmt) > gimple_uid (seq2->stmt))
|
||||
|| (gimple_uid (seq1->v_y_stmt) > gimple_uid (seq2->stmt)))
|
||||
{
|
||||
tree seq1_v_in = gimple_assign_rhs1 (seq1->v_1_stmt);
|
||||
if (TREE_CODE (seq1_v_in) != SSA_NAME)
|
||||
return false;
|
||||
|
||||
gassign *seq1_v_in_stmt
|
||||
= dyn_cast<gassign *> (SSA_NAME_DEF_STMT (seq1_v_in));
|
||||
/* Let's try to see if we succeed when swapping the sequences. */
|
||||
if (!seq1_v_in_stmt || gimple_bb (seq1_v_in_stmt)
|
||||
|| (gimple_uid (seq1_v_in_stmt) > gimple_uid (seq2->v_1_stmt))
|
||||
|| (gimple_uid (seq2->v_x_stmt) > gimple_uid (seq1->stmt))
|
||||
|| (gimple_uid (seq2->v_y_stmt) > gimple_uid (seq1->stmt)))
|
||||
return false;
|
||||
*need_swap = true;
|
||||
}
|
||||
else
|
||||
*need_swap = false;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Found vec perm simplify sequence pair.\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Calculate the permutations for blending the two given vec permute
|
||||
sequences. This may fail if the resulting permutation is not
|
||||
supported. */
|
||||
|
||||
static bool
|
||||
calc_perm_vec_perm_simplify_seqs (vec_perm_simplify_seq seq1,
|
||||
vec_perm_simplify_seq seq2,
|
||||
vec_perm_indices *seq2_stmt_indices,
|
||||
vec_perm_indices *seq1_v_1_stmt_indices,
|
||||
vec_perm_indices *seq1_v_2_stmt_indices)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int nelts = seq1->nelts;
|
||||
auto_vec<int> lane_assignment;
|
||||
lane_assignment.create (2 * nelts);
|
||||
|
||||
/* Mark all lanes as free. */
|
||||
lane_assignment.quick_grow_cleared (2 * nelts);
|
||||
|
||||
/* Reserve lanes for seq1. */
|
||||
for (i = 0; i < nelts; i++)
|
||||
{
|
||||
unsigned int l = TREE_INT_CST_LOW (VECTOR_CST_ELT (seq1->new_sel, i));
|
||||
lane_assignment[l] = 1;
|
||||
}
|
||||
|
||||
/* Reserve lanes for seq2 and calculate selector for seq2->stmt. */
|
||||
vec_perm_builder seq2_stmt_sel_perm (nelts, nelts, 1);
|
||||
for (i = 0; i < nelts; i++)
|
||||
{
|
||||
unsigned int l = TREE_INT_CST_LOW (VECTOR_CST_ELT (seq2->new_sel, i));
|
||||
while (lane_assignment[l] != 0)
|
||||
l++;
|
||||
lane_assignment[l] = 2;
|
||||
seq2_stmt_sel_perm.quick_push (l);
|
||||
}
|
||||
|
||||
seq2_stmt_indices->new_vector (seq2_stmt_sel_perm, 2, nelts);
|
||||
tree vectype = TREE_TYPE (gimple_assign_lhs (seq2->stmt));
|
||||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
if (!can_vec_perm_const_p (vmode, vmode, *seq2_stmt_indices, false))
|
||||
return false;
|
||||
|
||||
/* Calculate selectors for seq1->v_1_stmt and seq1->v_2_stmt. */
|
||||
vec_perm_builder seq1_v_1_stmt_sel_perm (nelts, nelts, 1);
|
||||
vec_perm_builder seq1_v_2_stmt_sel_perm (nelts, nelts, 1);
|
||||
for (i = 0; i < nelts; i++)
|
||||
{
|
||||
bool use_seq1 = lane_assignment[i] == 1;
|
||||
tree s1 = gimple_assign_rhs3 (use_seq1 ? seq1->v_1_stmt
|
||||
: seq2->v_1_stmt);
|
||||
tree s2 = gimple_assign_rhs3 (use_seq1 ? seq1->v_2_stmt
|
||||
: seq2->v_2_stmt);
|
||||
unsigned int l1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s1, i)) % nelts;
|
||||
unsigned int l2 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s2, i)) % nelts;
|
||||
|
||||
seq1_v_1_stmt_sel_perm.quick_push (l1 + (use_seq1 ? 0 : nelts));
|
||||
seq1_v_2_stmt_sel_perm.quick_push (l2 + (use_seq1 ? 0 : nelts));
|
||||
}
|
||||
|
||||
seq1_v_1_stmt_indices->new_vector (seq1_v_1_stmt_sel_perm, 2, nelts);
|
||||
vectype = TREE_TYPE (gimple_assign_lhs (seq1->v_1_stmt));
|
||||
vmode = TYPE_MODE (vectype);
|
||||
if (!can_vec_perm_const_p (vmode, vmode, *seq1_v_1_stmt_indices, false))
|
||||
return false;
|
||||
|
||||
seq1_v_2_stmt_indices->new_vector (seq1_v_2_stmt_sel_perm, 2, nelts);
|
||||
vectype = TREE_TYPE (gimple_assign_lhs (seq1->v_2_stmt));
|
||||
vmode = TYPE_MODE (vectype);
|
||||
if (!can_vec_perm_const_p (vmode, vmode, *seq1_v_2_stmt_indices, false))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Blend the two given simplifiable vec permute sequences using the
|
||||
given permutations. */
|
||||
|
||||
static void
|
||||
blend_vec_perm_simplify_seqs (vec_perm_simplify_seq seq1,
|
||||
vec_perm_simplify_seq seq2,
|
||||
const vec_perm_indices &seq2_stmt_indices,
|
||||
const vec_perm_indices &seq1_v_1_stmt_indices,
|
||||
const vec_perm_indices &seq1_v_2_stmt_indices)
|
||||
{
|
||||
/* We don't need to adjust seq1->stmt because its lanes consumption
|
||||
was already narrowed before entering this function. */
|
||||
|
||||
/* Adjust seq2->stmt: copy RHS1/RHS2 from seq1->stmt and set new sel. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Updating VEC_PERM statment:\n");
|
||||
fprintf (dump_file, "Old stmt: ");
|
||||
print_gimple_stmt (dump_file, seq2->stmt, 0);
|
||||
}
|
||||
|
||||
gimple_assign_set_rhs1 (seq2->stmt, gimple_assign_rhs1 (seq1->stmt));
|
||||
gimple_assign_set_rhs2 (seq2->stmt, gimple_assign_rhs2 (seq1->stmt));
|
||||
tree vectype = TREE_TYPE (gimple_assign_lhs (seq2->stmt));
|
||||
tree sel = vect_gen_perm_mask_checked (vectype, seq2_stmt_indices);
|
||||
gimple_assign_set_rhs3 (seq2->stmt, sel);
|
||||
update_stmt (seq2->stmt);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "New stmt: ");
|
||||
print_gimple_stmt (dump_file, seq2->stmt, 0);
|
||||
}
|
||||
|
||||
/* Adjust seq1->v_1_stmt: copy RHS2 from seq2->v_1_stmt and set new sel. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Updating VEC_PERM statment:\n");
|
||||
fprintf (dump_file, "Old stmt: ");
|
||||
print_gimple_stmt (dump_file, seq1->v_1_stmt, 0);
|
||||
}
|
||||
|
||||
gimple_assign_set_rhs2 (seq1->v_1_stmt, gimple_assign_rhs1 (seq2->v_1_stmt));
|
||||
vectype = TREE_TYPE (gimple_assign_lhs (seq1->v_1_stmt));
|
||||
sel = vect_gen_perm_mask_checked (vectype, seq1_v_1_stmt_indices);
|
||||
gimple_assign_set_rhs3 (seq1->v_1_stmt, sel);
|
||||
update_stmt (seq1->v_1_stmt);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "New stmt: ");
|
||||
print_gimple_stmt (dump_file, seq1->v_1_stmt, 0);
|
||||
}
|
||||
|
||||
/* Adjust seq1->v_2_stmt: copy RHS2 from seq2->v_2_stmt and set new sel. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Updating VEC_PERM statment:\n");
|
||||
fprintf (dump_file, "Old stmt: ");
|
||||
print_gimple_stmt (dump_file, seq1->v_2_stmt, 0);
|
||||
}
|
||||
|
||||
gimple_assign_set_rhs2 (seq1->v_2_stmt, gimple_assign_rhs1 (seq2->v_2_stmt));
|
||||
vectype = TREE_TYPE (gimple_assign_lhs (seq1->v_2_stmt));
|
||||
sel = vect_gen_perm_mask_checked (vectype, seq1_v_2_stmt_indices);
|
||||
gimple_assign_set_rhs3 (seq1->v_2_stmt, sel);
|
||||
update_stmt (seq1->v_2_stmt);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "New stmt: ");
|
||||
print_gimple_stmt (dump_file, seq1->v_2_stmt, 0);
|
||||
}
|
||||
|
||||
/* At this point, we have four unmodified seq2 stmts, which will be
|
||||
eliminated by DCE. */
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "Vec perm simplify sequences have been blended.\n\n");
|
||||
}
|
||||
|
||||
/* Try to blend narrowed vec_perm_simplify_seqs pairwise.
|
||||
The provided list will be empty after this call. */
|
||||
|
||||
static void
|
||||
process_vec_perm_simplify_seq_list (vec<vec_perm_simplify_seq> *l)
|
||||
{
|
||||
unsigned int i, j;
|
||||
vec_perm_simplify_seq seq1, seq2;
|
||||
|
||||
if (l->is_empty ())
|
||||
return;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Processing %u vec perm simplify sequences.\n",
|
||||
l->length ());
|
||||
|
||||
FOR_EACH_VEC_ELT (*l, i, seq1)
|
||||
{
|
||||
if (i + 1 < l->length ())
|
||||
{
|
||||
FOR_EACH_VEC_ELT_FROM (*l, j, seq2, i + 1)
|
||||
{
|
||||
bool swap = false;
|
||||
if (can_blend_vec_perm_simplify_seqs_p (seq1, seq2, &swap))
|
||||
{
|
||||
vec_perm_indices seq2_stmt_indices;
|
||||
vec_perm_indices seq1_v_1_stmt_indices;
|
||||
vec_perm_indices seq1_v_2_stmt_indices;
|
||||
if (calc_perm_vec_perm_simplify_seqs (swap ? seq2 : seq1,
|
||||
swap ? seq1 : seq2,
|
||||
&seq2_stmt_indices,
|
||||
&seq1_v_1_stmt_indices,
|
||||
&seq1_v_2_stmt_indices))
|
||||
{
|
||||
/* Narrow lane usage. */
|
||||
narrow_vec_perm_simplify_seq (seq1);
|
||||
narrow_vec_perm_simplify_seq (seq2);
|
||||
|
||||
/* Blend sequences. */
|
||||
blend_vec_perm_simplify_seqs (swap ? seq2 : seq1,
|
||||
swap ? seq1 : seq2,
|
||||
seq2_stmt_indices,
|
||||
seq1_v_1_stmt_indices,
|
||||
seq1_v_2_stmt_indices);
|
||||
|
||||
/* We can use unordered_remove as we break the loop. */
|
||||
l->unordered_remove (j);
|
||||
XDELETE (seq2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We don't need to call l->remove for seq1. */
|
||||
XDELETE (seq1);
|
||||
}
|
||||
|
||||
l->truncate (0);
|
||||
}
|
||||
|
||||
static void
|
||||
append_vec_perm_simplify_seq_list (vec<vec_perm_simplify_seq> *l,
|
||||
const vec_perm_simplify_seq &seq)
|
||||
{
|
||||
/* If no space on list left, then process the list. */
|
||||
if (!l->space (1))
|
||||
process_vec_perm_simplify_seq_list (l);
|
||||
|
||||
l->quick_push (seq);
|
||||
}
|
||||
|
||||
/* Main entry point for the forward propagation and statement combine
|
||||
optimizer. */
|
||||
|
||||
@ -3526,6 +4086,7 @@ pass_forwprop::execute (function *fun)
|
||||
auto_bitmap simple_dce_worklist;
|
||||
auto_bitmap need_ab_cleanup;
|
||||
to_purge = BITMAP_ALLOC (NULL);
|
||||
auto_vec<vec_perm_simplify_seq, 8> vec_perm_simplify_seq_list;
|
||||
for (int i = 0; i < postorder_num; ++i)
|
||||
{
|
||||
gimple_stmt_iterator gsi;
|
||||
@ -3605,14 +4166,18 @@ pass_forwprop::execute (function *fun)
|
||||
|
||||
/* Apply forward propagation to all stmts in the basic-block.
|
||||
Note we update GSI within the loop as necessary. */
|
||||
unsigned int uid = 1;
|
||||
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
|
||||
{
|
||||
gimple *stmt = gsi_stmt (gsi);
|
||||
tree lhs, rhs;
|
||||
enum tree_code code;
|
||||
|
||||
gimple_set_uid (stmt, uid++);
|
||||
|
||||
if (!is_gimple_assign (stmt))
|
||||
{
|
||||
process_vec_perm_simplify_seq_list (&vec_perm_simplify_seq_list);
|
||||
gsi_next (&gsi);
|
||||
continue;
|
||||
}
|
||||
@ -3620,9 +4185,11 @@ pass_forwprop::execute (function *fun)
|
||||
lhs = gimple_assign_lhs (stmt);
|
||||
rhs = gimple_assign_rhs1 (stmt);
|
||||
code = gimple_assign_rhs_code (stmt);
|
||||
|
||||
if (TREE_CODE (lhs) != SSA_NAME
|
||||
|| has_zero_uses (lhs))
|
||||
{
|
||||
process_vec_perm_simplify_seq_list (&vec_perm_simplify_seq_list);
|
||||
gsi_next (&gsi);
|
||||
continue;
|
||||
}
|
||||
@ -3901,10 +4468,25 @@ pass_forwprop::execute (function *fun)
|
||||
else
|
||||
gsi_next (&gsi);
|
||||
}
|
||||
else if (code == VEC_PERM_EXPR)
|
||||
{
|
||||
/* Find vectorized sequences, where we can reduce the lane
|
||||
utilization. The narrowing will be donw later and only
|
||||
if we find a pair of sequences that can be blended. */
|
||||
gassign *assign = dyn_cast <gassign *> (stmt);
|
||||
vec_perm_simplify_seq seq;
|
||||
if (recognise_vec_perm_simplify_seq (assign, &seq))
|
||||
append_vec_perm_simplify_seq_list (&vec_perm_simplify_seq_list,
|
||||
seq);
|
||||
|
||||
gsi_next (&gsi);
|
||||
}
|
||||
else
|
||||
gsi_next (&gsi);
|
||||
}
|
||||
|
||||
process_vec_perm_simplify_seq_list (&vec_perm_simplify_seq_list);
|
||||
|
||||
/* Combine stmts with the stmts defining their operands.
|
||||
Note we update GSI within the loop as necessary. */
|
||||
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
|
Loading…
Reference in New Issue
Block a user