1 /*
  2  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 /*
 26  * @test
 27  * @bug 8304042
 28  * @summary Test some examples with independent packs with cyclic dependency
 29  *          between the packs.
 30  * @requires vm.bits == 64
 31  * @requires vm.compiler2.enabled
 32  * @modules java.base/jdk.internal.misc
 33  * @library /test/lib /
 34  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency
 35  */
 36 
 37 package compiler.loopopts.superword;
 38 
 39 import jdk.internal.misc.Unsafe;
 40 import jdk.test.lib.Asserts;
 41 import compiler.lib.ir_framework.*;
 42 
 43 public class TestIndependentPacksWithCyclicDependency {
 44     static final int RANGE = 1024;
 45     static final int ITER  = 10_000;
 46     static Unsafe unsafe = Unsafe.getUnsafe();
 47 
 48     int[]   goldI0 = new int[RANGE];
 49     float[] goldF0 = new float[RANGE];
 50     int[]   goldI1 = new int[RANGE];
 51     float[] goldF1 = new float[RANGE];
 52     int[]   goldI2 = new int[RANGE];
 53     float[] goldF2 = new float[RANGE];
 54     int[]   goldI3 = new int[RANGE];
 55     float[] goldF3 = new float[RANGE];
 56     int[]   goldI4 = new int[RANGE];
 57     float[] goldF4 = new float[RANGE];
 58     int[]   goldI5 = new int[RANGE];
 59     float[] goldF5 = new float[RANGE];
 60     int[]   goldI6 = new int[RANGE];
 61     float[] goldF6 = new float[RANGE];
 62     long[]  goldL6 = new long[RANGE];
 63     int[]   goldI7 = new int[RANGE];
 64     float[] goldF7 = new float[RANGE];
 65     long[]  goldL7 = new long[RANGE];
 66     int[]   goldI8 = new int[RANGE];
 67     float[] goldF8 = new float[RANGE];
 68     long[]  goldL8 = new long[RANGE];
 69     int[]   goldI9 = new int[RANGE];
 70     float[] goldF9 = new float[RANGE];
 71     long[]  goldL9 = new long[RANGE];
 72     int[]   goldI10 = new int[RANGE];
 73     float[] goldF10 = new float[RANGE];
 74     long[]  goldL10 = new long[RANGE];
 75 
 76     public static void main(String args[]) {
 77         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 78                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
 79                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
 80                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
 81                                    "-XX:LoopUnrollLimit=1000");
 82     }
 83 
 84     TestIndependentPacksWithCyclicDependency() {
 85         // compute the gold standard in interpreter mode
 86         init(goldI0, goldF0);
 87         test0(goldI0, goldI0, goldF0, goldF0);
 88         init(goldI1, goldF1);
 89         test1(goldI1, goldI1, goldF1, goldF1);
 90         init(goldI2, goldF2);
 91         test2(goldI2, goldI2, goldF2, goldF2);
 92         init(goldI3, goldF3);
 93         test3(goldI3, goldI3, goldF3, goldF3);
 94         init(goldI4, goldF4);
 95         test4(goldI4, goldI4, goldF4, goldF4);
 96         init(goldI5, goldF5);
 97         test5(goldI5, goldI5, goldF5, goldF5);
 98         init(goldI6, goldF6, goldL6);
 99         test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
100         init(goldI7, goldF7, goldL7);
101         test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
102         init(goldI8, goldF8, goldL8);
103         test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
104         init(goldI9, goldF9, goldL9);
105         test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
106         init(goldI10, goldF10, goldL10);
107         test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
108     }
109 
110     @Run(test = "test0")
111     @Warmup(100)
112     public void runTest0() {
113         int[] dataI = new int[RANGE];
114         float[] dataF = new float[RANGE];
115         init(dataI, dataF);
116         test0(dataI, dataI, dataF, dataF);
117         verify("test0", dataI, goldI0);
118         verify("test0", dataF, goldF0);
119     }
120 
121     @Test
122     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
123         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
124     static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
125         for (int i = 0; i < RANGE; i+=2) {
126             // Hand-unrolled 2x. Int and Float slice are completely separate.
127             dataIb[i+0] = dataIa[i+0] + 3;
128             dataIb[i+1] = dataIa[i+1] + 3;
129             dataFb[i+0] = dataFa[i+0] * 1.3f;
130             dataFb[i+1] = dataFa[i+1] * 1.3f;
131         }
132     }
133 
134     @Run(test = "test1")
135     @Warmup(100)
136     public void runTest1() {
137         int[] dataI = new int[RANGE];
138         float[] dataF = new float[RANGE];
139         init(dataI, dataF);
140         test1(dataI, dataI, dataF, dataF);
141         verify("test1", dataI, goldI1);
142         verify("test1", dataF, goldF1);
143     }
144 
145     @Test
146     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
147         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
148     static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
149         for (int i = 0; i < RANGE; i+=2) {
150             // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
151             dataFa[i+0] = dataIa[i+0] + 3;
152             dataFa[i+1] = dataIa[i+1] + 3;
153             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
154             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
155         }
156     }
157 
158     @Run(test = "test2")
159     public void runTest2() {
160         int[] dataI = new int[RANGE];
161         float[] dataF = new float[RANGE];
162         init(dataI, dataF);
163         test2(dataI, dataI, dataF, dataF);
164         verify("test2", dataI, goldI2);
165         verify("test2", dataF, goldF2);
166     }
167 
168     @Test
169     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
170         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
171     static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
172         for (int i = 0; i < RANGE; i+=2) {
173             // int and float arrays are two slices. But we pretend both are of type int.
174             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
175             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
176             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
177             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
178         }
179     }
180 
181     @Run(test = "test3")
182     @Warmup(100)
183     public void runTest3() {
184         int[] dataI = new int[RANGE];
185         float[] dataF = new float[RANGE];
186         init(dataI, dataF);
187         test3(dataI, dataI, dataF, dataF);
188         verify("test3", dataI, goldI3);
189         verify("test3", dataF, goldF3);
190     }
191 
192     @Test
193     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
194         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
195     static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
196         for (int i = 0; i < RANGE; i+=2) {
197             // Inversion of orders. But because we operate on separate slices, this should
198             // safely vectorize. It should detect that each line is independent, so it can
199             // reorder them.
200             dataIb[i+0] = dataIa[i+0] + 3;
201             dataFb[i+1] = dataFa[i+1] * 1.3f;
202             dataFb[i+0] = dataFa[i+0] * 1.3f;
203             dataIb[i+1] = dataIa[i+1] + 3;
204         }
205     }
206 
207     @Run(test = "test4")
208     @Warmup(100)
209     public void runTest4() {
210         int[] dataI = new int[RANGE];
211         float[] dataF = new float[RANGE];
212         init(dataI, dataF);
213         test4(dataI, dataI, dataF, dataF);
214         verify("test4", dataI, goldI4);
215         verify("test4", dataF, goldF4);
216     }
217 
218     @Test
219     static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
220         for (int i = 0; i < RANGE; i+=2) {
221             // same as test1, except that reordering leads to different semantics
222             // [A,B] and [X,Y] are both packs that are internally independent
223             // But we have dependencies A -> X (StoreF -> LoadF)
224             //                      and Y -> B (StoreI -> LoadI)
225             // Hence the two packs have a cyclic dependency, we cannot schedule
226             // one before the other.
227             dataFa[i+0] = dataIa[i+0] + 3;            // A
228             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);  // X
229             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);  // Y
230             dataFa[i+1] = dataIa[i+1] + 3;            // B
231         }
232     }
233 
234     @Run(test = "test5")
235     public void runTest5() {
236         int[] dataI = new int[RANGE];
237         float[] dataF = new float[RANGE];
238         init(dataI, dataF);
239         test5(dataI, dataI, dataF, dataF);
240         verify("test5", dataI, goldI5);
241         verify("test5", dataF, goldF5);
242     }
243 
244     @Test
245     static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
246         for (int i = 0; i < RANGE; i+=2) {
247             // same as test2, except that reordering leads to different semantics
248             // explanation analogue to test4
249             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
250             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
251             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
252             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
253         }
254     }
255 
256     @Run(test = "test6")
257     public void runTest6() {
258         int[]   dataI = new int[RANGE];
259         float[] dataF = new float[RANGE];
260         long[]  dataL = new long[RANGE];
261         init(dataI, dataF, dataL);
262         test6(dataI, dataI, dataF, dataF, dataL, dataL);
263         verify("test6", dataI, goldI6);
264         verify("test6", dataF, goldF6);
265         verify("test6", dataL, goldL6);
266     }
267 
268     @Test
269     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
270         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
271         applyIf = {"UseCompactObjectHeaders", "false"})
272     static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
273                       long[] dataLa, long[] dataLb) {
274         for (int i = 0; i < RANGE; i+=2) {
275             // Chain of parallelizable op and conversion
276             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
277             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
278             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
279             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
280             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
281             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
282             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
283             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
284             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
285             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
286             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
287             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
288         }
289     }
290 
291     @Run(test = "test7")
292     public void runTest7() {
293         int[]   dataI = new int[RANGE];
294         float[] dataF = new float[RANGE];
295         long[]  dataL = new long[RANGE];
296         init(dataI, dataF, dataL);
297         test7(dataI, dataI, dataF, dataF, dataL, dataL);
298         verify("test7", dataI, goldI7);
299         verify("test7", dataF, goldF7);
300         verify("test7", dataL, goldL7);
301     }
302 
303     @Test
304     static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
305                       long[] dataLa, long[] dataLb) {
306         for (int i = 0; i < RANGE; i+=2) {
307             // Cycle involving 3 memory slices
308             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
309             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
310             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
311             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
312             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
313             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
314             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
315             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
316             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
317             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
318             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
319             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
320         }
321     }
322 
323 
324     @Run(test = "test8")
325     public void runTest8() {
326         int[]   dataI = new int[RANGE];
327         float[] dataF = new float[RANGE];
328         long[]  dataL = new long[RANGE];
329         init(dataI, dataF, dataL);
330         test8(dataI, dataI, dataF, dataF, dataL, dataL);
331         verify("test8", dataI, goldI8);
332         verify("test8", dataF, goldF8);
333         verify("test8", dataL, goldL8);
334     }
335 
336     @Test
337     static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
338                       long[] dataLa, long[] dataLb) {
339         for (int i = 0; i < RANGE; i+=2) {
340             // 2-cycle, with more ops after
341             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
342             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
343             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
344             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
345             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
346             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
347             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
348             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
349             // more stuff after
350             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
351             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
352             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
353             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
354         }
355     }
356 
357     @Run(test = "test9")
358     public void runTest9() {
359         int[]   dataI = new int[RANGE];
360         float[] dataF = new float[RANGE];
361         long[]  dataL = new long[RANGE];
362         init(dataI, dataF, dataL);
363         test9(dataI, dataI, dataF, dataF, dataL, dataL);
364         verify("test9", dataI, goldI9);
365         verify("test9", dataF, goldF9);
366         verify("test9", dataL, goldL9);
367     }
368 
369     @Test
370     static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
371                       long[] dataLa, long[] dataLb) {
372         for (int i = 0; i < RANGE; i+=2) {
373             // 2-cycle, with more stuff before
374             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
375             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
376             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
377             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
378             // 2-cycle
379             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
380             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
381             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
382             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
383             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
384             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
385             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
386             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
387         }
388     }
389 
390     @Run(test = "test10")
391     public void runTest10() {
392         int[]   dataI = new int[RANGE];
393         float[] dataF = new float[RANGE];
394         long[]  dataL = new long[RANGE];
395         init(dataI, dataF, dataL);
396         test10(dataI, dataI, dataF, dataF, dataL, dataL);
397         verify("test10", dataI, goldI10);
398         verify("test10", dataF, goldF10);
399         verify("test10", dataL, goldL10);
400     }
401 
402     @Test
403     static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
404                       long[] dataLa, long[] dataLb) {
405         for (int i = 0; i < RANGE; i+=2) {
406             // This creates the following graph before SuperWord:
407             //
408             // A -> R -> U
409             //      S -> V -> B
410             //
411             // SuperWord analyzes the graph, and sees that [A,B] and [U,V]
412             // are adjacent, isomorphic and independent packs. However,
413             // [R,S] are not isomorphic (R mul, S add).
414             // So it vectorizes [A,B] and [U,V] this gives us this graph:
415             //
416             //        -> R
417             //  [A,B]      -> [U,V] -+
418             //    ^   -> S           |
419             //    |                  |
420             //    +------------------+
421             //
422             // The cycle thus does not only go via packs, but also scalar ops.
423             //
424             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
425             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
426             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
427             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
428             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
429             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
430             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
431             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
432             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
433             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
434             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
435             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
436         }
437     }
438 
439     static void init(int[] dataI, float[] dataF) {
440         for (int i = 0; i < RANGE; i++) {
441             dataI[i] = i + 1;
442             dataF[i] = i + 0.1f;
443         }
444     }
445 
446     static void init(int[] dataI, float[] dataF, long[] dataL) {
447         for (int i = 0; i < RANGE; i++) {
448             dataI[i] = i + 1;
449             dataF[i] = i + 0.1f;
450             dataL[i] = i + 1;
451         }
452     }
453 
454     static void verify(String name, int[] data, int[] gold) {
455         for (int i = 0; i < RANGE; i++) {
456             if (data[i] != gold[i]) {
457                 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
458             }
459         }
460     }
461 
462     static void verify(String name, float[] data, float[] gold) {
463         for (int i = 0; i < RANGE; i++) {
464             int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
465             int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
466             if (datav != goldv) {
467                 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
468             }
469         }
470     }
471 
472     static void verify(String name, long[] data, long[] gold) {
473         for (int i = 0; i < RANGE; i++) {
474             if (data[i] != gold[i]) {
475                 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
476             }
477         }
478     }
479 }
480