47
48 @Run(test = {"test1"})
49 @Warmup(0)
50 public void runTests() throws Exception {
51 int[] data = new int[RANGE];
52
53 init(data);
54 for (int i = 0; i < ITER; i++) {
55 long r1 = test1(data, i);
56 long r2 = ref1(data, i);
57 if (r1 != r2) {
58 throw new RuntimeException("Wrong result test1: " + r1 + " != " + r2);
59 }
60 }
61 }
62
63 @Test
64 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
65 IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
66 IRNode.OR_REDUCTION_V, "> 0",},
67 applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
68 applyIfPlatform = {"64-bit", "true"},
69 applyIfCPUFeatureOr = {"avx2", "true"})
70 static long test1(int[] data, long sum) {
71 for (int i = 0; i < data.length; i+=2) {
72 // Mixing int and long ops means we only end up allowing half of the int
73 // loads in one pack, and we have two int packs. The first pack has one
74 // of the pairs missing because of the store, which creates a dependency.
75 // The first pack is rejected and left as scalar, the second pack succeeds
76 // with vectorization. That means we have a mixed scalar/vector reduction
77 // chain. This way it is possible that a vector-reduction has a scalar
78 // reduction as input, which is neigher a phi nor a vector reduction.
79 // In such a case, we must bail out of the optimization in
80 // PhaseIdealLoop::move_unordered_reduction_out_of_loop
81 int v = data[i]; // int read
82 data[0] = 0; // ruin the first pack
83 sum |= v; // long reduction (and implicit cast from int to long)
84
85 // This example used to rely on that reductions were ignored in SuperWord::unrolling_analysis,
86 // and hence the largest data type in the loop was the ints. This would then unroll the doubles
87 // for twice the vector length, and this resulted in us having twice as many packs. Because of
88 // the store "data[0] = 0", the first packs were destroyed, since they do not have power of 2
89 // size.
90 // Now, we no longer ignore reductions, and now we unroll half as much before SuperWord. This
91 // means we would only get one pack per operation, and that one would get ruined, and we have
92 // no vectorization. We now ensure there are again 2 packs per operation with a 2x hand unroll.
93 int v2 = data[i + 1];
94 sum |= v2;
95
96 // With AlignVector, we need 8-byte alignment of vector loads/stores.
97 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
98 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
99 // -> vectorize -> no vectorization
100 }
101 return sum;
102 }
103
104 static long ref1(int[] data, long sum) {
105 for (int i = 0; i < data.length; i++) {
106 int v = data[i];
107 data[0] = 0;
108 sum |= v;
109 }
110 return sum;
111 }
112
113 static void init(int[] data) {
114 for (int i = 0; i < RANGE; i++) {
115 data[i] = i + 1;
116 }
117 }
118 }
|
47
48 @Run(test = {"test1"})
49 @Warmup(0)
50 public void runTests() throws Exception {
51 int[] data = new int[RANGE];
52
53 init(data);
54 for (int i = 0; i < ITER; i++) {
55 long r1 = test1(data, i);
56 long r2 = ref1(data, i);
57 if (r1 != r2) {
58 throw new RuntimeException("Wrong result test1: " + r1 + " != " + r2);
59 }
60 }
61 }
62
63 @Test
64 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
65 IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
66 IRNode.OR_REDUCTION_V, "> 0",},
67 applyIfPlatform = {"64-bit", "true"},
68 applyIfCPUFeatureOr = {"avx2", "true"})
69 static long test1(int[] data, long sum) {
70 for (int i = 0; i < data.length; i+=2) {
71 // Mixing int and long ops means we only end up allowing half of the int
72 // loads in one pack, and we have two int packs. The first pack has one
73 // of the pairs missing because of the store, which creates a dependency.
74 // The first pack is rejected and left as scalar, the second pack succeeds
75 // with vectorization. That means we have a mixed scalar/vector reduction
76 // chain. This way it is possible that a vector-reduction has a scalar
77 // reduction as input, which is neigher a phi nor a vector reduction.
78 // In such a case, we must bail out of the optimization in
79 // PhaseIdealLoop::move_unordered_reduction_out_of_loop
80 int v = data[i]; // int read
81 data[0] = 0; // ruin the first pack
82 sum |= v; // long reduction (and implicit cast from int to long)
83
84 // This example used to rely on that reductions were ignored in SuperWord::unrolling_analysis,
85 // and hence the largest data type in the loop was the ints. This would then unroll the doubles
86 // for twice the vector length, and this resulted in us having twice as many packs. Because of
87 // the store "data[0] = 0", the first packs were destroyed, since they do not have power of 2
88 // size.
89 // Now, we no longer ignore reductions, and now we unroll half as much before SuperWord. This
90 // means we would only get one pack per operation, and that one would get ruined, and we have
91 // no vectorization. We now ensure there are again 2 packs per operation with a 2x hand unroll.
92 int v2 = data[i + 1];
93 sum |= v2;
94 }
95 return sum;
96 }
97
98 static long ref1(int[] data, long sum) {
99 for (int i = 0; i < data.length; i++) {
100 int v = data[i];
101 data[0] = 0;
102 sum |= v;
103 }
104 return sum;
105 }
106
107 static void init(int[] data) {
108 for (int i = 0; i < RANGE; i++) {
109 data[i] = i + 1;
110 }
111 }
112 }
|