mirror of
https://github.com/LIV2/bebbo-gcc.git
synced 2025-12-06 00:23:32 +00:00
2018-06-21 Richard Biener <rguenther@suse.de>
Backport from mainline 2017-09-06 Richard Biener <rguenther@suse.de> PR tree-optimization/82108 * tree-vect-stmts.c (vectorizable_load): Fix pointer adjustment for gap in the non-permutation SLP case. * gcc.dg/vect/pr82108.c: New testcase. 2017-06-18 Richard Biener <rguenther@suse.de> PR tree-optimization/81410 * tree-vect-stmts.c (vectorizable_load): Properly adjust for the gap in the ! slp_perm SLP case after each group. * gcc.dg/vect/pr81410.c: New testcase. 2017-03-08 Richard Biener <rguenther@suse.de> PR tree-optimization/79920 * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline with ncopies == 1 to ... (vect_transform_slp_perm_load): ... here. Properly compute all element loads by iterating VF times over the group. Do not handle ncopies (computed in a broken way) in vect_create_mask_and_perm. * gcc.dg/vect/pr79920.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-6-branch@261842 138bc75d-0d04-0410-961f-82ee72b054a4
This commit is contained in:
parent
564d3da38c
commit
e6a9f2bf1e
@ -1,3 +1,28 @@
|
||||
2018-06-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
Backport from mainline
|
||||
2017-09-06 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/82108
|
||||
* tree-vect-stmts.c (vectorizable_load): Fix pointer adjustment
|
||||
for gap in the non-permutation SLP case.
|
||||
|
||||
2017-06-18 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/81410
|
||||
* tree-vect-stmts.c (vectorizable_load): Properly adjust for
|
||||
the gap in the ! slp_perm SLP case after each group.
|
||||
|
||||
2017-03-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/79920
|
||||
* tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
|
||||
with ncopies == 1 to ...
|
||||
(vect_transform_slp_perm_load): ... here. Properly compute
|
||||
all element loads by iterating VF times over the group. Do
|
||||
not handle ncopies (computed in a broken way) in
|
||||
vect_create_mask_and_perm.
|
||||
|
||||
2018-06-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
Backport from mainline
|
||||
|
||||
@ -1,3 +1,21 @@
|
||||
2018-06-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
Backport from mainline
|
||||
2017-09-06 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/82108
|
||||
* gcc.dg/vect/pr82108.c: New testcase.
|
||||
|
||||
2017-06-18 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/81410
|
||||
* gcc.dg/vect/pr81410.c: New testcase.
|
||||
|
||||
2017-03-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/79920
|
||||
* gcc.dg/vect/pr79920.c: New testcase.
|
||||
|
||||
2018-06-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
Backport from mainline
|
||||
|
||||
44
gcc/testsuite/gcc.dg/vect/pr79920.c
Normal file
44
gcc/testsuite/gcc.dg/vect/pr79920.c
Normal file
@ -0,0 +1,44 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-additional-options "-O3" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
double __attribute__((noinline,noclone))
|
||||
compute_integral (double w_1[18])
|
||||
{
|
||||
double A = 0;
|
||||
double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
|
||||
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
|
||||
double t43[2] = {0.0, 0.0};
|
||||
double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}};
|
||||
double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}};
|
||||
|
||||
for (int ip_1 = 0; ip_1 < 2; ++ip_1)
|
||||
{
|
||||
for (int i_0 = 0; i_0 < 6; ++i_0)
|
||||
t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
|
||||
+ (w_1[i_0*3+2] * t32[ip_1][2]));
|
||||
t43[ip_1] = 2.0;
|
||||
}
|
||||
for (int i_0 = 0; i_0 < 6; ++i_0)
|
||||
A += t43[1]*t33[1][i_0];
|
||||
return A;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
double w_1[18] = {0., 1.0, 1.0,
|
||||
0., 1.0, 1.0,
|
||||
0., 1.0, 1.0,
|
||||
0., 1.0, 1.0,
|
||||
0., 1.0, 1.0,
|
||||
0., 1.0, 1.0};
|
||||
double A = compute_integral(w_1);
|
||||
if (A != 12.0)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */
|
||||
37
gcc/testsuite/gcc.dg/vect/pr81410.c
Normal file
37
gcc/testsuite/gcc.dg/vect/pr81410.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target vect_long_long } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
long long x[24];
|
||||
long long y[16];
|
||||
long long z[8];
|
||||
|
||||
void __attribute__((noinline)) foo()
|
||||
{
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
y[2*i] = x[3*i];
|
||||
y[2*i + 1] = x[3*i + 1];
|
||||
z[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
for (int i = 0; i < 24; ++i)
|
||||
{
|
||||
x[i] = i;
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
}
|
||||
foo ();
|
||||
for (int i = 0; i < 8; ++i)
|
||||
if (y[2*i] != 3*i || y[2*i+1] != 3*i + 1)
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
|
||||
47
gcc/testsuite/gcc.dg/vect/pr82108.c
Normal file
47
gcc/testsuite/gcc.dg/vect/pr82108.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
void __attribute__((noinline,noclone))
|
||||
downscale_2 (const float* src, int src_n, float* dst)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < src_n; i += 2) {
|
||||
const float* a = src;
|
||||
const float* b = src + 4;
|
||||
|
||||
dst[0] = (a[0] + b[0]) / 2;
|
||||
dst[1] = (a[1] + b[1]) / 2;
|
||||
dst[2] = (a[2] + b[2]) / 2;
|
||||
dst[3] = (a[3] + b[3]) / 2;
|
||||
|
||||
src += 2 * 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
const float in[4 * 4] = {
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8
|
||||
};
|
||||
float out[2 * 4];
|
||||
|
||||
check_vect ();
|
||||
|
||||
downscale_2 (in, 4, out);
|
||||
|
||||
if (out[0] != 3 || out[1] != 4 || out[2] != 5 || out[3] != 6
|
||||
|| out[4] != 3 || out[5] != 4 || out[6] != 5 || out[7] != 6)
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
@ -3288,66 +3288,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
|
||||
building a vector of type MASK_TYPE from it) and two input vectors placed in
|
||||
DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
|
||||
shifting by STRIDE elements of DR_CHAIN for every copy.
|
||||
(STRIDE is the number of vectorized stmts for NODE divided by the number of
|
||||
copies).
|
||||
VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
|
||||
the created stmts must be inserted. */
|
||||
|
||||
static inline void
|
||||
vect_create_mask_and_perm (gimple *stmt,
|
||||
tree mask, int first_vec_indx, int second_vec_indx,
|
||||
gimple_stmt_iterator *gsi, slp_tree node,
|
||||
tree vectype, vec<tree> dr_chain,
|
||||
int ncopies, int vect_stmts_counter)
|
||||
{
|
||||
tree perm_dest;
|
||||
gimple *perm_stmt = NULL;
|
||||
int i, stride_in, stride_out;
|
||||
tree first_vec, second_vec, data_ref;
|
||||
|
||||
stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
|
||||
stride_in = dr_chain.length () / ncopies;
|
||||
|
||||
/* Initialize the vect stmts of NODE to properly insert the generated
|
||||
stmts later. */
|
||||
for (i = SLP_TREE_VEC_STMTS (node).length ();
|
||||
i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
|
||||
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
|
||||
|
||||
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
|
||||
for (i = 0; i < ncopies; i++)
|
||||
{
|
||||
first_vec = dr_chain[first_vec_indx];
|
||||
second_vec = dr_chain[second_vec_indx];
|
||||
|
||||
/* Generate the permute statement if necessary. */
|
||||
if (mask)
|
||||
{
|
||||
perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
|
||||
first_vec, second_vec, mask);
|
||||
data_ref = make_ssa_name (perm_dest, perm_stmt);
|
||||
gimple_set_lhs (perm_stmt, data_ref);
|
||||
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
||||
}
|
||||
else
|
||||
/* If mask was NULL_TREE generate the requested identity transform. */
|
||||
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
|
||||
|
||||
/* Store the vector statement in NODE. */
|
||||
SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
|
||||
= perm_stmt;
|
||||
|
||||
first_vec_indx += stride_in;
|
||||
second_vec_indx += stride_in;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Generate vector permute statements from a list of loads in DR_CHAIN.
|
||||
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
|
||||
permute statements for the SLP node NODE of the SLP instance
|
||||
@ -3364,7 +3304,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
int nunits, vec_index = 0;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
||||
int unroll_factor, mask_element, ncopies;
|
||||
int mask_element;
|
||||
unsigned char *mask;
|
||||
machine_mode mode;
|
||||
|
||||
@ -3382,11 +3322,13 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
mask = XALLOCAVEC (unsigned char, nunits);
|
||||
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
||||
|
||||
/* Number of copies is determined by the final vectorization factor
|
||||
relatively to SLP_NODE_INSTANCE unrolling factor. */
|
||||
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
||||
/* Initialize the vect stmts of NODE to properly insert the generated
|
||||
stmts later. */
|
||||
if (! analyze_only)
|
||||
for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
|
||||
i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
|
||||
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
|
||||
|
||||
/* Generate permutation masks for every NODE. Number of masks for each NODE
|
||||
is equal to GROUP_SIZE.
|
||||
@ -3412,7 +3354,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
int second_vec_index = -1;
|
||||
bool noop_p = true;
|
||||
|
||||
for (int j = 0; j < unroll_factor; j++)
|
||||
for (int j = 0; j < vf; j++)
|
||||
{
|
||||
for (int k = 0; k < group_size; k++)
|
||||
{
|
||||
@ -3483,10 +3425,30 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||
|
||||
if (second_vec_index == -1)
|
||||
second_vec_index = first_vec_index;
|
||||
vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
|
||||
second_vec_index,
|
||||
gsi, node, vectype, dr_chain,
|
||||
ncopies, vect_stmts_counter++);
|
||||
|
||||
/* Generate the permute statement if necessary. */
|
||||
tree first_vec = dr_chain[first_vec_index];
|
||||
tree second_vec = dr_chain[second_vec_index];
|
||||
gimple *perm_stmt;
|
||||
if (! noop_p)
|
||||
{
|
||||
tree perm_dest
|
||||
= vect_create_destination_var (gimple_assign_lhs (stmt),
|
||||
vectype);
|
||||
perm_dest = make_ssa_name (perm_dest);
|
||||
perm_stmt = gimple_build_assign (perm_dest,
|
||||
VEC_PERM_EXPR,
|
||||
first_vec, second_vec,
|
||||
mask_vec);
|
||||
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
||||
}
|
||||
else
|
||||
/* If mask was NULL_TREE generate the requested
|
||||
identity transform. */
|
||||
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
|
||||
|
||||
/* Store the vector statement in NODE. */
|
||||
SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
|
||||
}
|
||||
|
||||
index = 0;
|
||||
|
||||
@ -6937,10 +6937,16 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
not only the number of vector stmts the permutation result
|
||||
fits in. */
|
||||
if (slp_perm)
|
||||
vec_num = (group_size * vf + nunits - 1) / nunits;
|
||||
{
|
||||
vec_num = (group_size * vf + nunits - 1) / nunits;
|
||||
group_gap_adj = vf * group_size - nunits * vec_num;
|
||||
}
|
||||
else
|
||||
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
group_gap_adj = vf * group_size - nunits * vec_num;
|
||||
{
|
||||
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
group_gap_adj
|
||||
= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
||||
}
|
||||
}
|
||||
else
|
||||
vec_num = group_size;
|
||||
@ -7101,6 +7107,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
aggr_type = vectype;
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
int group_elt = 0;
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
/* 1. Create the vector or array pointer update chain. */
|
||||
@ -7392,10 +7399,27 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||
/* Store vector loads in the corresponding SLP_NODE. */
|
||||
if (slp && !slp_perm)
|
||||
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
|
||||
|
||||
/* With SLP permutation we load the gaps as well, without
|
||||
we need to skip the gaps after we manage to fully load
|
||||
all elements. group_gap_adj is GROUP_SIZE here. */
|
||||
group_elt += nunits;
|
||||
if (group_gap_adj != 0 && ! slp_perm
|
||||
&& group_elt == group_size - group_gap_adj)
|
||||
{
|
||||
bool ovf;
|
||||
tree bump
|
||||
= wide_int_to_tree (sizetype,
|
||||
wi::smul (TYPE_SIZE_UNIT (elem_type),
|
||||
group_gap_adj, &ovf));
|
||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
|
||||
stmt, bump);
|
||||
group_elt = 0;
|
||||
}
|
||||
}
|
||||
/* Bump the vector pointer to account for a gap or for excess
|
||||
elements loaded for a permuted SLP load. */
|
||||
if (group_gap_adj != 0)
|
||||
if (group_gap_adj != 0 && slp_perm)
|
||||
{
|
||||
bool ovf;
|
||||
tree bump
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user