1 /*
  2  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 /*
 26  * @test
 27  * @bug 8304042
 28  * @summary Test some examples with independent packs with cyclic dependency
 29  *          between the packs.
 30  * @modules java.base/jdk.internal.misc
 31  * @library /test/lib /
 32  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_nAV
 33  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_yAV
 34  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_nAV
 35  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_yAV
 36  */
 37 
 38 package compiler.loopopts.superword;
 39 
 40 import jdk.internal.misc.Unsafe;
 41 import jdk.test.lib.Asserts;
 42 import compiler.lib.ir_framework.*;
 43 
 44 public class TestIndependentPacksWithCyclicDependency {
 45     static final int RANGE = 1024;
 46     static final int ITER  = 10_000;
 47     static Unsafe unsafe = Unsafe.getUnsafe();
 48 
 49     int[]   goldI0 = new int[RANGE];
 50     float[] goldF0 = new float[RANGE];
 51     int[]   goldI1 = new int[RANGE];
 52     float[] goldF1 = new float[RANGE];
 53     int[]   goldI2 = new int[RANGE];
 54     float[] goldF2 = new float[RANGE];
 55     int[]   goldI3 = new int[RANGE];
 56     float[] goldF3 = new float[RANGE];
 57     int[]   goldI4 = new int[RANGE];
 58     float[] goldF4 = new float[RANGE];
 59     int[]   goldI5 = new int[RANGE];
 60     float[] goldF5 = new float[RANGE];
 61     int[]   goldI6 = new int[RANGE];
 62     float[] goldF6 = new float[RANGE];
 63     long[]  goldL6 = new long[RANGE];
 64     int[]   goldI7 = new int[RANGE];
 65     float[] goldF7 = new float[RANGE];
 66     long[]  goldL7 = new long[RANGE];
 67     int[]   goldI8 = new int[RANGE];
 68     float[] goldF8 = new float[RANGE];
 69     long[]  goldL8 = new long[RANGE];
 70     int[]   goldI9 = new int[RANGE];
 71     float[] goldF9 = new float[RANGE];
 72     long[]  goldL9 = new long[RANGE];
 73     int[]   goldI10 = new int[RANGE];
 74     float[] goldF10 = new float[RANGE];
 75     long[]  goldL10 = new long[RANGE];
 76 
 77     public static void main(String args[]) {
 78         TestFramework framework = new TestFramework(TestIndependentPacksWithCyclicDependency.class);
 79         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 80                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
 81                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
 82                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
 83                            "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
 84         switch (args[0]) {
 85             case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
 86             case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
 87             case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
 88             case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); }
 89             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 90         };
 91         framework.start();
 92     }
 93 
 94     TestIndependentPacksWithCyclicDependency() {
 95         // compute the gold standard in interpreter mode
 96         init(goldI0, goldF0);
 97         test0(goldI0, goldI0, goldF0, goldF0);
 98         init(goldI1, goldF1);
 99         test1(goldI1, goldI1, goldF1, goldF1);
100         init(goldI2, goldF2);
101         test2(goldI2, goldI2, goldF2, goldF2);
102         init(goldI3, goldF3);
103         test3(goldI3, goldI3, goldF3, goldF3);
104         init(goldI4, goldF4);
105         test4(goldI4, goldI4, goldF4, goldF4);
106         init(goldI5, goldF5);
107         test5(goldI5, goldI5, goldF5, goldF5);
108         init(goldI6, goldF6, goldL6);
109         test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
110         init(goldI7, goldF7, goldL7);
111         test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
112         init(goldI8, goldF8, goldL8);
113         test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
114         init(goldI9, goldF9, goldL9);
115         test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
116         init(goldI10, goldF10, goldL10);
117         test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
118     }
119 
120     @Run(test = "test0")
121     @Warmup(100)
122     public void runTest0() {
123         int[] dataI = new int[RANGE];
124         float[] dataF = new float[RANGE];
125         init(dataI, dataF);
126         test0(dataI, dataI, dataF, dataF);
127         verify("test0", dataI, goldI0);
128         verify("test0", dataF, goldF0);
129     }
130 
131     @Test
132     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
133         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
134         applyIfPlatform = {"64-bit", "true"},
135         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
136     static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
137         for (int i = 0; i < RANGE; i+=2) {
138             // Hand-unrolled 2x. Int and Float slice are completely separate.
139             dataIb[i+0] = dataIa[i+0] + 3;
140             dataIb[i+1] = dataIa[i+1] + 3;
141             dataFb[i+0] = dataFa[i+0] * 1.3f;
142             dataFb[i+1] = dataFa[i+1] * 1.3f;
143             // With AlignVector, we need 8-byte alignment of vector loads/stores.
144             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
145             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
146             // -> vectorize                                  -> no vectorization
147         }
148     }
149 
150     @Run(test = "test1")
151     @Warmup(100)
152     public void runTest1() {
153         int[] dataI = new int[RANGE];
154         float[] dataF = new float[RANGE];
155         init(dataI, dataF);
156         test1(dataI, dataI, dataF, dataF);
157         verify("test1", dataI, goldI1);
158         verify("test1", dataF, goldF1);
159     }
160 
161     @Test
162     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
163         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
164         applyIfPlatform = {"64-bit", "true"},
165         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
166     static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
167         for (int i = 0; i < RANGE; i+=2) {
168             // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
169             dataFa[i+0] = dataIa[i+0] + 3;
170             dataFa[i+1] = dataIa[i+1] + 3;
171             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
172             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
173             // With AlignVector, we need 8-byte alignment of vector loads/stores.
174             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
175             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
176             // -> vectorize                                  -> no vectorization
177         }
178     }
179 
180     @Run(test = "test2")
181     public void runTest2() {
182         int[] dataI = new int[RANGE];
183         float[] dataF = new float[RANGE];
184         init(dataI, dataF);
185         test2(dataI, dataI, dataF, dataF);
186         verify("test2", dataI, goldI2);
187         verify("test2", dataF, goldF2);
188     }
189 
190     @Test
191     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
192         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
193         applyIfPlatform = {"64-bit", "true"},
194         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
195     static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
196         for (int i = 0; i < RANGE; i+=2) {
197             // int and float arrays are two slices. But we pretend both are of type int.
198             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
199             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
200             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
201             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
202             // With AlignVector, we need 8-byte alignment of vector loads/stores.
203             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
204             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
205             // -> vectorize                                  -> no vectorization
206         }
207     }
208 
209     @Run(test = "test3")
210     @Warmup(100)
211     public void runTest3() {
212         int[] dataI = new int[RANGE];
213         float[] dataF = new float[RANGE];
214         init(dataI, dataF);
215         test3(dataI, dataI, dataF, dataF);
216         verify("test3", dataI, goldI3);
217         verify("test3", dataF, goldF3);
218     }
219 
220     @Test
221     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
222         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
223         applyIfPlatform = {"64-bit", "true"},
224         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
225     static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
226         for (int i = 0; i < RANGE; i+=2) {
227             // Inversion of orders. But because we operate on separate slices, this should
228             // safely vectorize. It should detect that each line is independent, so it can
229             // reorder them.
230             dataIb[i+0] = dataIa[i+0] + 3;
231             dataFb[i+1] = dataFa[i+1] * 1.3f;
232             dataFb[i+0] = dataFa[i+0] * 1.3f;
233             dataIb[i+1] = dataIa[i+1] + 3;
234             // With AlignVector, we need 8-byte alignment of vector loads/stores.
235             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
236             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
237             // -> vectorize                                  -> no vectorization
238         }
239     }
240 
241     @Run(test = "test4")
242     @Warmup(100)
243     public void runTest4() {
244         int[] dataI = new int[RANGE];
245         float[] dataF = new float[RANGE];
246         init(dataI, dataF);
247         test4(dataI, dataI, dataF, dataF);
248         verify("test4", dataI, goldI4);
249         verify("test4", dataF, goldF4);
250     }
251 
252     @Test
253     static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
254         for (int i = 0; i < RANGE; i+=2) {
255             // same as test1, except that reordering leads to different semantics
256             // [A,B] and [X,Y] are both packs that are internally independent
257             // But we have dependencies A -> X (StoreF -> LoadF)
258             //                      and Y -> B (StoreI -> LoadI)
259             // Hence the two packs have a cyclic dependency, we cannot schedule
260             // one before the other.
261             dataFa[i+0] = dataIa[i+0] + 3;            // A
262             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);  // X
263             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);  // Y
264             dataFa[i+1] = dataIa[i+1] + 3;            // B
265         }
266     }
267 
268     @Run(test = "test5")
269     public void runTest5() {
270         int[] dataI = new int[RANGE];
271         float[] dataF = new float[RANGE];
272         init(dataI, dataF);
273         test5(dataI, dataI, dataF, dataF);
274         verify("test5", dataI, goldI5);
275         verify("test5", dataF, goldF5);
276     }
277 
278     @Test
279     static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
280         for (int i = 0; i < RANGE; i+=2) {
281             // same as test2, except that reordering leads to different semantics
282             // explanation analogue to test4
283             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
284             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
285             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
286             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
287         }
288     }
289 
290     @Run(test = "test6")
291     public void runTest6() {
292         int[]   dataI = new int[RANGE];
293         float[] dataF = new float[RANGE];
294         long[]  dataL = new long[RANGE];
295         init(dataI, dataF, dataL);
296         test6(dataI, dataI, dataF, dataF, dataL, dataL);
297         verify("test6", dataI, goldI6);
298         verify("test6", dataF, goldF6);
299         verify("test6", dataL, goldL6);
300     }
301 
302     @Test
303     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
304         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
305         applyIfPlatform = {"64-bit", "true"},
306         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
307     static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
308                       long[] dataLa, long[] dataLb) {
309         for (int i = 0; i < RANGE; i+=2) {
310             // Chain of parallelizable op and conversion
311             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
312             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
313             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
314             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
315             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
316             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
317             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
318             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
319             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
320             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
321             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
322             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
323             // With AlignVector, we need 8-byte alignment of vector loads/stores.
324             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
325             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
326             // -> vectorize                                  -> no vectorization
327         }
328     }
329 
330     @Run(test = "test7")
331     public void runTest7() {
332         int[]   dataI = new int[RANGE];
333         float[] dataF = new float[RANGE];
334         long[]  dataL = new long[RANGE];
335         init(dataI, dataF, dataL);
336         test7(dataI, dataI, dataF, dataF, dataL, dataL);
337         verify("test7", dataI, goldI7);
338         verify("test7", dataF, goldF7);
339         verify("test7", dataL, goldL7);
340     }
341 
342     @Test
343     static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
344                       long[] dataLa, long[] dataLb) {
345         for (int i = 0; i < RANGE; i+=2) {
346             // Cycle involving 3 memory slices
347             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
348             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
349             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
350             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
351             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
352             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
353             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
354             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
355             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
356             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
357             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
358             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
359         }
360     }
361 
362 
363     @Run(test = "test8")
364     public void runTest8() {
365         int[]   dataI = new int[RANGE];
366         float[] dataF = new float[RANGE];
367         long[]  dataL = new long[RANGE];
368         init(dataI, dataF, dataL);
369         test8(dataI, dataI, dataF, dataF, dataL, dataL);
370         verify("test8", dataI, goldI8);
371         verify("test8", dataF, goldF8);
372         verify("test8", dataL, goldL8);
373     }
374 
375     @Test
376     static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
377                       long[] dataLa, long[] dataLb) {
378         for (int i = 0; i < RANGE; i+=2) {
379             // 2-cycle, with more ops after
380             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
381             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
382             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
383             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
384             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
385             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
386             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
387             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
388             // more stuff after
389             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
390             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
391             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
392             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
393         }
394     }
395 
396     @Run(test = "test9")
397     public void runTest9() {
398         int[]   dataI = new int[RANGE];
399         float[] dataF = new float[RANGE];
400         long[]  dataL = new long[RANGE];
401         init(dataI, dataF, dataL);
402         test9(dataI, dataI, dataF, dataF, dataL, dataL);
403         verify("test9", dataI, goldI9);
404         verify("test9", dataF, goldF9);
405         verify("test9", dataL, goldL9);
406     }
407 
408     @Test
409     static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
410                       long[] dataLa, long[] dataLb) {
411         for (int i = 0; i < RANGE; i+=2) {
412             // 2-cycle, with more stuff before
413             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
414             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
415             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
416             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
417             // 2-cycle
418             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
419             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
420             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
421             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
422             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
423             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
424             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
425             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
426         }
427     }
428 
429     @Run(test = "test10")
430     public void runTest10() {
431         int[]   dataI = new int[RANGE];
432         float[] dataF = new float[RANGE];
433         long[]  dataL = new long[RANGE];
434         init(dataI, dataF, dataL);
435         test10(dataI, dataI, dataF, dataF, dataL, dataL);
436         verify("test10", dataI, goldI10);
437         verify("test10", dataF, goldF10);
438         verify("test10", dataL, goldL10);
439     }
440 
441     @Test
442     static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
443                       long[] dataLa, long[] dataLb) {
444         for (int i = 0; i < RANGE; i+=2) {
445             // This creates the following graph before SuperWord:
446             //
447             // A -> R -> U
448             //      S -> V -> B
449             //
450             // SuperWord analyzes the graph, and sees that [A,B] and [U,V]
451             // are adjacent, isomorphic and independent packs. However,
452             // [R,S] are not isomorphic (R mul, S add).
453             // So it vectorizes [A,B] and [U,V] this gives us this graph:
454             //
455             //        -> R
456             //  [A,B]      -> [U,V] -+
457             //    ^   -> S           |
458             //    |                  |
459             //    +------------------+
460             //
461             // The cycle thus does not only go via packs, but also scalar ops.
462             //
463             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
464             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
465             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
466             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
467             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
468             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
469             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
470             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
471             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
472             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
473             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
474             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
475         }
476     }
477 
478     static void init(int[] dataI, float[] dataF) {
479         for (int i = 0; i < RANGE; i++) {
480             dataI[i] = i + 1;
481             dataF[i] = i + 0.1f;
482         }
483     }
484 
485     static void init(int[] dataI, float[] dataF, long[] dataL) {
486         for (int i = 0; i < RANGE; i++) {
487             dataI[i] = i + 1;
488             dataF[i] = i + 0.1f;
489             dataL[i] = i + 1;
490         }
491     }
492 
493     static void verify(String name, int[] data, int[] gold) {
494         for (int i = 0; i < RANGE; i++) {
495             if (data[i] != gold[i]) {
496                 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
497             }
498         }
499     }
500 
501     static void verify(String name, float[] data, float[] gold) {
502         for (int i = 0; i < RANGE; i++) {
503             int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
504             int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
505             if (datav != goldv) {
506                 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
507             }
508         }
509     }
510 
511     static void verify(String name, long[] data, long[] gold) {
512         for (int i = 0; i < RANGE; i++) {
513             if (data[i] != gold[i]) {
514                 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
515             }
516         }
517     }
518 }
519