1 /*
2 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 /*
26 * @test
27 * @bug 8304042
28 * @summary Test some examples with independent packs with cyclic dependency
29 * between the packs.
30 * @modules java.base/jdk.internal.misc
31 * @library /test/lib /
32 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_nAV
33 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_yAV
34 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_nAV
35 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_yAV
36 */
37
38 package compiler.loopopts.superword;
39
40 import jdk.internal.misc.Unsafe;
41 import jdk.test.lib.Asserts;
42 import compiler.lib.ir_framework.*;
43
44 public class TestIndependentPacksWithCyclicDependency {
45 static final int RANGE = 1024;
46 static final int ITER = 10_000;
47 static Unsafe unsafe = Unsafe.getUnsafe();
48
49 int[] goldI0 = new int[RANGE];
50 float[] goldF0 = new float[RANGE];
51 int[] goldI1 = new int[RANGE];
52 float[] goldF1 = new float[RANGE];
53 int[] goldI2 = new int[RANGE];
54 float[] goldF2 = new float[RANGE];
55 int[] goldI3 = new int[RANGE];
56 float[] goldF3 = new float[RANGE];
57 int[] goldI4 = new int[RANGE];
58 float[] goldF4 = new float[RANGE];
59 int[] goldI5 = new int[RANGE];
60 float[] goldF5 = new float[RANGE];
61 int[] goldI6 = new int[RANGE];
62 float[] goldF6 = new float[RANGE];
63 long[] goldL6 = new long[RANGE];
64 int[] goldI7 = new int[RANGE];
65 float[] goldF7 = new float[RANGE];
66 long[] goldL7 = new long[RANGE];
67 int[] goldI8 = new int[RANGE];
68 float[] goldF8 = new float[RANGE];
69 long[] goldL8 = new long[RANGE];
70 int[] goldI9 = new int[RANGE];
71 float[] goldF9 = new float[RANGE];
72 long[] goldL9 = new long[RANGE];
73 int[] goldI10 = new int[RANGE];
74 float[] goldF10 = new float[RANGE];
75 long[] goldL10 = new long[RANGE];
76
77 public static void main(String args[]) {
78 TestFramework framework = new TestFramework(TestIndependentPacksWithCyclicDependency.class);
79 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
80 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
81 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
82 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
83 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
84 switch (args[0]) {
85 case "nCOH_nAV" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
86 case "nCOH_yAV" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
87 case "yCOH_nAV" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
88 case "yCOH_yAV" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); }
89 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
90 };
91 framework.start();
92 }
93
94 TestIndependentPacksWithCyclicDependency() {
95 // compute the gold standard in interpreter mode
96 init(goldI0, goldF0);
97 test0(goldI0, goldI0, goldF0, goldF0);
98 init(goldI1, goldF1);
99 test1(goldI1, goldI1, goldF1, goldF1);
100 init(goldI2, goldF2);
101 test2(goldI2, goldI2, goldF2, goldF2);
102 init(goldI3, goldF3);
103 test3(goldI3, goldI3, goldF3, goldF3);
104 init(goldI4, goldF4);
105 test4(goldI4, goldI4, goldF4, goldF4);
106 init(goldI5, goldF5);
107 test5(goldI5, goldI5, goldF5, goldF5);
108 init(goldI6, goldF6, goldL6);
109 test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
110 init(goldI7, goldF7, goldL7);
111 test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
112 init(goldI8, goldF8, goldL8);
113 test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
114 init(goldI9, goldF9, goldL9);
115 test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
116 init(goldI10, goldF10, goldL10);
117 test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
118 }
119
120 @Run(test = "test0")
121 @Warmup(100)
122 public void runTest0() {
123 int[] dataI = new int[RANGE];
124 float[] dataF = new float[RANGE];
125 init(dataI, dataF);
126 test0(dataI, dataI, dataF, dataF);
127 verify("test0", dataI, goldI0);
128 verify("test0", dataF, goldF0);
129 }
130
131 @Test
132 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
133 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
134 applyIfPlatform = {"64-bit", "true"},
135 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
136 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
137 for (int i = 0; i < RANGE; i+=2) {
138 // Hand-unrolled 2x. Int and Float slice are completely separate.
139 dataIb[i+0] = dataIa[i+0] + 3;
140 dataIb[i+1] = dataIa[i+1] + 3;
141 dataFb[i+0] = dataFa[i+0] * 1.3f;
142 dataFb[i+1] = dataFa[i+1] * 1.3f;
143 // With AlignVector, we need 8-byte alignment of vector loads/stores.
144 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
145 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
146 // -> vectorize -> no vectorization
147 }
148 }
149
150 @Run(test = "test1")
151 @Warmup(100)
152 public void runTest1() {
153 int[] dataI = new int[RANGE];
154 float[] dataF = new float[RANGE];
155 init(dataI, dataF);
156 test1(dataI, dataI, dataF, dataF);
157 verify("test1", dataI, goldI1);
158 verify("test1", dataF, goldF1);
159 }
160
161 @Test
162 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
163 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
164 applyIfPlatform = {"64-bit", "true"},
165 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
166 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
167 for (int i = 0; i < RANGE; i+=2) {
168 // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
169 dataFa[i+0] = dataIa[i+0] + 3;
170 dataFa[i+1] = dataIa[i+1] + 3;
171 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
172 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
173 // With AlignVector, we need 8-byte alignment of vector loads/stores.
174 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
175 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
176 // -> vectorize -> no vectorization
177 }
178 }
179
180 @Run(test = "test2")
181 public void runTest2() {
182 int[] dataI = new int[RANGE];
183 float[] dataF = new float[RANGE];
184 init(dataI, dataF);
185 test2(dataI, dataI, dataF, dataF);
186 verify("test2", dataI, goldI2);
187 verify("test2", dataF, goldF2);
188 }
189
190 @Test
191 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
192 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
193 applyIfPlatform = {"64-bit", "true"},
194 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
195 static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
196 for (int i = 0; i < RANGE; i+=2) {
197 // int and float arrays are two slices. But we pretend both are of type int.
198 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
199 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
200 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
201 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
202 // With AlignVector, we need 8-byte alignment of vector loads/stores.
203 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
204 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
205 // -> vectorize -> no vectorization
206 }
207 }
208
209 @Run(test = "test3")
210 @Warmup(100)
211 public void runTest3() {
212 int[] dataI = new int[RANGE];
213 float[] dataF = new float[RANGE];
214 init(dataI, dataF);
215 test3(dataI, dataI, dataF, dataF);
216 verify("test3", dataI, goldI3);
217 verify("test3", dataF, goldF3);
218 }
219
220 @Test
221 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
222 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
223 applyIfPlatform = {"64-bit", "true"},
224 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
225 static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
226 for (int i = 0; i < RANGE; i+=2) {
227 // Inversion of orders. But because we operate on separate slices, this should
228 // safely vectorize. It should detect that each line is independent, so it can
229 // reorder them.
230 dataIb[i+0] = dataIa[i+0] + 3;
231 dataFb[i+1] = dataFa[i+1] * 1.3f;
232 dataFb[i+0] = dataFa[i+0] * 1.3f;
233 dataIb[i+1] = dataIa[i+1] + 3;
234 // With AlignVector, we need 8-byte alignment of vector loads/stores.
235 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
236 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
237 // -> vectorize -> no vectorization
238 }
239 }
240
241 @Run(test = "test4")
242 @Warmup(100)
243 public void runTest4() {
244 int[] dataI = new int[RANGE];
245 float[] dataF = new float[RANGE];
246 init(dataI, dataF);
247 test4(dataI, dataI, dataF, dataF);
248 verify("test4", dataI, goldI4);
249 verify("test4", dataF, goldF4);
250 }
251
252 @Test
253 static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
254 for (int i = 0; i < RANGE; i+=2) {
255 // same as test1, except that reordering leads to different semantics
256 // [A,B] and [X,Y] are both packs that are internally independent
257 // But we have dependencies A -> X (StoreF -> LoadF)
258 // and Y -> B (StoreI -> LoadI)
259 // Hence the two packs have a cyclic dependency, we cannot schedule
260 // one before the other.
261 dataFa[i+0] = dataIa[i+0] + 3; // A
262 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); // X
263 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); // Y
264 dataFa[i+1] = dataIa[i+1] + 3; // B
265 }
266 }
267
268 @Run(test = "test5")
269 public void runTest5() {
270 int[] dataI = new int[RANGE];
271 float[] dataF = new float[RANGE];
272 init(dataI, dataF);
273 test5(dataI, dataI, dataF, dataF);
274 verify("test5", dataI, goldI5);
275 verify("test5", dataF, goldF5);
276 }
277
278 @Test
279 static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
280 for (int i = 0; i < RANGE; i+=2) {
281 // same as test2, except that reordering leads to different semantics
282 // explanation analogue to test4
283 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
284 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
285 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
286 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
287 }
288 }
289
290 @Run(test = "test6")
291 public void runTest6() {
292 int[] dataI = new int[RANGE];
293 float[] dataF = new float[RANGE];
294 long[] dataL = new long[RANGE];
295 init(dataI, dataF, dataL);
296 test6(dataI, dataI, dataF, dataF, dataL, dataL);
297 verify("test6", dataI, goldI6);
298 verify("test6", dataF, goldF6);
299 verify("test6", dataL, goldL6);
300 }
301
302 @Test
303 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
304 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
307 static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
308 long[] dataLa, long[] dataLb) {
309 for (int i = 0; i < RANGE; i+=2) {
310 // Chain of parallelizable op and conversion
311 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
312 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
313 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
314 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
315 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
316 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
317 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
318 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
319 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
320 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
321 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
322 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
323 // With AlignVector, we need 8-byte alignment of vector loads/stores.
324 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
325 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
326 // -> vectorize -> no vectorization
327 }
328 }
329
330 @Run(test = "test7")
331 public void runTest7() {
332 int[] dataI = new int[RANGE];
333 float[] dataF = new float[RANGE];
334 long[] dataL = new long[RANGE];
335 init(dataI, dataF, dataL);
336 test7(dataI, dataI, dataF, dataF, dataL, dataL);
337 verify("test7", dataI, goldI7);
338 verify("test7", dataF, goldF7);
339 verify("test7", dataL, goldL7);
340 }
341
342 @Test
343 static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
344 long[] dataLa, long[] dataLb) {
345 for (int i = 0; i < RANGE; i+=2) {
346 // Cycle involving 3 memory slices
347 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
348 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
349 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
350 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
351 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
352 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
353 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
354 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
355 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
356 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
357 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
358 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
359 }
360 }
361
362
363 @Run(test = "test8")
364 public void runTest8() {
365 int[] dataI = new int[RANGE];
366 float[] dataF = new float[RANGE];
367 long[] dataL = new long[RANGE];
368 init(dataI, dataF, dataL);
369 test8(dataI, dataI, dataF, dataF, dataL, dataL);
370 verify("test8", dataI, goldI8);
371 verify("test8", dataF, goldF8);
372 verify("test8", dataL, goldL8);
373 }
374
375 @Test
376 static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
377 long[] dataLa, long[] dataLb) {
378 for (int i = 0; i < RANGE; i+=2) {
379 // 2-cycle, with more ops after
380 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
381 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
382 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
383 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
384 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
385 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
386 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
387 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
388 // more stuff after
389 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
390 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
391 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
392 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
393 }
394 }
395
396 @Run(test = "test9")
397 public void runTest9() {
398 int[] dataI = new int[RANGE];
399 float[] dataF = new float[RANGE];
400 long[] dataL = new long[RANGE];
401 init(dataI, dataF, dataL);
402 test9(dataI, dataI, dataF, dataF, dataL, dataL);
403 verify("test9", dataI, goldI9);
404 verify("test9", dataF, goldF9);
405 verify("test9", dataL, goldL9);
406 }
407
408 @Test
409 static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
410 long[] dataLa, long[] dataLb) {
411 for (int i = 0; i < RANGE; i+=2) {
412 // 2-cycle, with more stuff before
413 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
414 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
415 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
416 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
417 // 2-cycle
418 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
419 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
420 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
421 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
422 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
423 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
424 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
425 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
426 }
427 }
428
429 @Run(test = "test10")
430 public void runTest10() {
431 int[] dataI = new int[RANGE];
432 float[] dataF = new float[RANGE];
433 long[] dataL = new long[RANGE];
434 init(dataI, dataF, dataL);
435 test10(dataI, dataI, dataF, dataF, dataL, dataL);
436 verify("test10", dataI, goldI10);
437 verify("test10", dataF, goldF10);
438 verify("test10", dataL, goldL10);
439 }
440
441 @Test
442 static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
443 long[] dataLa, long[] dataLb) {
444 for (int i = 0; i < RANGE; i+=2) {
445 // This creates the following graph before SuperWord:
446 //
447 // A -> R -> U
448 // S -> V -> B
449 //
450 // SuperWord analyzes the graph, and sees that [A,B] and [U,V]
451 // are adjacent, isomorphic and independent packs. However,
452 // [R,S] are not isomorphic (R mul, S add).
453 // So it vectorizes [A,B] and [U,V] this gives us this graph:
454 //
455 // -> R
456 // [A,B] -> [U,V] -+
457 // ^ -> S |
458 // | |
459 // +------------------+
460 //
461 // The cycle thus does not only go via packs, but also scalar ops.
462 //
463 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
464 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
465 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
466 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
467 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
468 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
469 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
470 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
471 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
472 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
473 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
474 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
475 }
476 }
477
478 static void init(int[] dataI, float[] dataF) {
479 for (int i = 0; i < RANGE; i++) {
480 dataI[i] = i + 1;
481 dataF[i] = i + 0.1f;
482 }
483 }
484
485 static void init(int[] dataI, float[] dataF, long[] dataL) {
486 for (int i = 0; i < RANGE; i++) {
487 dataI[i] = i + 1;
488 dataF[i] = i + 0.1f;
489 dataL[i] = i + 1;
490 }
491 }
492
493 static void verify(String name, int[] data, int[] gold) {
494 for (int i = 0; i < RANGE; i++) {
495 if (data[i] != gold[i]) {
496 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
497 }
498 }
499 }
500
501 static void verify(String name, float[] data, float[] gold) {
502 for (int i = 0; i < RANGE; i++) {
503 int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
504 int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
505 if (datav != goldv) {
506 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
507 }
508 }
509 }
510
511 static void verify(String name, long[] data, long[] gold) {
512 for (int i = 0; i < RANGE; i++) {
513 if (data[i] != gold[i]) {
514 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
515 }
516 }
517 }
518 }
519