1 /*
  2  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 // Note Lilliput:
 26 // Tests rely on array members starting at the same offset, otherwise vectorization does not kick in. Not true
 27 // for Lilliput.
 28 // For now I just enforce -CompactObjectHeaders.
 29 
 30 /*
 31  * @test
 32  * @bug 8304042
 33  * @summary Test some examples with independent packs with cyclic dependency
 34  *          between the packs.
 35  * @requires vm.bits == 64
 36  * @requires vm.compiler2.enabled
 37  * @modules java.base/jdk.internal.misc
 38  * @library /test/lib /
 39  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency
 40  */
 41 
 42 package compiler.loopopts.superword;
 43 
 44 import jdk.internal.misc.Unsafe;
 45 import jdk.test.lib.Asserts;
 46 import compiler.lib.ir_framework.*;
 47 
 48 public class TestIndependentPacksWithCyclicDependency {
 49     static final int RANGE = 1024;
 50     static final int ITER  = 10_000;
 51     static Unsafe unsafe = Unsafe.getUnsafe();
 52 
 53     int[]   goldI0 = new int[RANGE];
 54     float[] goldF0 = new float[RANGE];
 55     int[]   goldI1 = new int[RANGE];
 56     float[] goldF1 = new float[RANGE];
 57     int[]   goldI2 = new int[RANGE];
 58     float[] goldF2 = new float[RANGE];
 59     int[]   goldI3 = new int[RANGE];
 60     float[] goldF3 = new float[RANGE];
 61     int[]   goldI4 = new int[RANGE];
 62     float[] goldF4 = new float[RANGE];
 63     int[]   goldI5 = new int[RANGE];
 64     float[] goldF5 = new float[RANGE];
 65     int[]   goldI6 = new int[RANGE];
 66     float[] goldF6 = new float[RANGE];
 67     long[]  goldL6 = new long[RANGE];
 68     int[]   goldI7 = new int[RANGE];
 69     float[] goldF7 = new float[RANGE];
 70     long[]  goldL7 = new long[RANGE];
 71     int[]   goldI8 = new int[RANGE];
 72     float[] goldF8 = new float[RANGE];
 73     long[]  goldL8 = new long[RANGE];
 74     int[]   goldI9 = new int[RANGE];
 75     float[] goldF9 = new float[RANGE];
 76     long[]  goldL9 = new long[RANGE];
 77     int[]   goldI10 = new int[RANGE];
 78     float[] goldF10 = new float[RANGE];
 79     long[]  goldL10 = new long[RANGE];
 80 
 81     public static void main(String args[]) {
 82         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 83                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
 84                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
 85                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
 86                                    "-XX:LoopUnrollLimit=1000", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders");
 87     }
 88 
 89     TestIndependentPacksWithCyclicDependency() {
 90         // compute the gold standard in interpreter mode
 91         init(goldI0, goldF0);
 92         test0(goldI0, goldI0, goldF0, goldF0);
 93         init(goldI1, goldF1);
 94         test1(goldI1, goldI1, goldF1, goldF1);
 95         init(goldI2, goldF2);
 96         test2(goldI2, goldI2, goldF2, goldF2);
 97         init(goldI3, goldF3);
 98         test3(goldI3, goldI3, goldF3, goldF3);
 99         init(goldI4, goldF4);
100         test4(goldI4, goldI4, goldF4, goldF4);
101         init(goldI5, goldF5);
102         test5(goldI5, goldI5, goldF5, goldF5);
103         init(goldI6, goldF6, goldL6);
104         test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
105         init(goldI7, goldF7, goldL7);
106         test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
107         init(goldI8, goldF8, goldL8);
108         test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
109         init(goldI9, goldF9, goldL9);
110         test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
111         init(goldI10, goldF10, goldL10);
112         test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
113     }
114 
115     @Run(test = "test0")
116     @Warmup(100)
117     public void runTest0() {
118         int[] dataI = new int[RANGE];
119         float[] dataF = new float[RANGE];
120         init(dataI, dataF);
121         test0(dataI, dataI, dataF, dataF);
122         verify("test0", dataI, goldI0);
123         verify("test0", dataF, goldF0);
124     }
125 
126     @Test
127     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
128         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
129     static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
130         for (int i = 0; i < RANGE; i+=2) {
131             // Hand-unrolled 2x. Int and Float slice are completely separate.
132             dataIb[i+0] = dataIa[i+0] + 3;
133             dataIb[i+1] = dataIa[i+1] + 3;
134             dataFb[i+0] = dataFa[i+0] * 1.3f;
135             dataFb[i+1] = dataFa[i+1] * 1.3f;
136         }
137     }
138 
139     @Run(test = "test1")
140     @Warmup(100)
141     public void runTest1() {
142         int[] dataI = new int[RANGE];
143         float[] dataF = new float[RANGE];
144         init(dataI, dataF);
145         test1(dataI, dataI, dataF, dataF);
146         verify("test1", dataI, goldI1);
147         verify("test1", dataF, goldF1);
148     }
149 
150     @Test
151     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
152         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
153     static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
154         for (int i = 0; i < RANGE; i+=2) {
155             // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
156             dataFa[i+0] = dataIa[i+0] + 3;
157             dataFa[i+1] = dataIa[i+1] + 3;
158             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
159             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
160         }
161     }
162 
163     @Run(test = "test2")
164     public void runTest2() {
165         int[] dataI = new int[RANGE];
166         float[] dataF = new float[RANGE];
167         init(dataI, dataF);
168         test2(dataI, dataI, dataF, dataF);
169         verify("test2", dataI, goldI2);
170         verify("test2", dataF, goldF2);
171     }
172 
173     @Test
174     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
175         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
176     static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
177         for (int i = 0; i < RANGE; i+=2) {
178             // int and float arrays are two slices. But we pretend both are of type int.
179             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
180             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
181             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
182             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
183         }
184     }
185 
186     @Run(test = "test3")
187     @Warmup(100)
188     public void runTest3() {
189         int[] dataI = new int[RANGE];
190         float[] dataF = new float[RANGE];
191         init(dataI, dataF);
192         test3(dataI, dataI, dataF, dataF);
193         verify("test3", dataI, goldI3);
194         verify("test3", dataF, goldF3);
195     }
196 
197     @Test
198     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
199         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
200     static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
201         for (int i = 0; i < RANGE; i+=2) {
202             // Inversion of orders. But because we operate on separate slices, this should
203             // safely vectorize. It should detect that each line is independent, so it can
204             // reorder them.
205             dataIb[i+0] = dataIa[i+0] + 3;
206             dataFb[i+1] = dataFa[i+1] * 1.3f;
207             dataFb[i+0] = dataFa[i+0] * 1.3f;
208             dataIb[i+1] = dataIa[i+1] + 3;
209         }
210     }
211 
212     @Run(test = "test4")
213     @Warmup(100)
214     public void runTest4() {
215         int[] dataI = new int[RANGE];
216         float[] dataF = new float[RANGE];
217         init(dataI, dataF);
218         test4(dataI, dataI, dataF, dataF);
219         verify("test4", dataI, goldI4);
220         verify("test4", dataF, goldF4);
221     }
222 
223     @Test
224     static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
225         for (int i = 0; i < RANGE; i+=2) {
226             // same as test1, except that reordering leads to different semantics
227             // [A,B] and [X,Y] are both packs that are internally independent
228             // But we have dependencies A -> X (StoreF -> LoadF)
229             //                      and Y -> B (StoreI -> LoadI)
230             // Hence the two packs have a cyclic dependency, we cannot schedule
231             // one before the other.
232             dataFa[i+0] = dataIa[i+0] + 3;            // A
233             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);  // X
234             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);  // Y
235             dataFa[i+1] = dataIa[i+1] + 3;            // B
236         }
237     }
238 
239     @Run(test = "test5")
240     public void runTest5() {
241         int[] dataI = new int[RANGE];
242         float[] dataF = new float[RANGE];
243         init(dataI, dataF);
244         test5(dataI, dataI, dataF, dataF);
245         verify("test5", dataI, goldI5);
246         verify("test5", dataF, goldF5);
247     }
248 
249     @Test
250     static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
251         for (int i = 0; i < RANGE; i+=2) {
252             // same as test2, except that reordering leads to different semantics
253             // explanation analogue to test4
254             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
255             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
256             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
257             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
258         }
259     }
260 
261     @Run(test = "test6")
262     public void runTest6() {
263         int[]   dataI = new int[RANGE];
264         float[] dataF = new float[RANGE];
265         long[]  dataL = new long[RANGE];
266         init(dataI, dataF, dataL);
267         test6(dataI, dataI, dataF, dataF, dataL, dataL);
268         verify("test6", dataI, goldI6);
269         verify("test6", dataF, goldF6);
270         verify("test6", dataL, goldL6);
271     }
272 
273     @Test
274     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
275         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
276     static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
277                       long[] dataLa, long[] dataLb) {
278         for (int i = 0; i < RANGE; i+=2) {
279             // Chain of parallelizable op and conversion
280             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
281             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
282             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
283             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
284             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
285             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
286             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
287             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
288             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
289             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
290             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
291             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
292         }
293     }
294 
295     @Run(test = "test7")
296     public void runTest7() {
297         int[]   dataI = new int[RANGE];
298         float[] dataF = new float[RANGE];
299         long[]  dataL = new long[RANGE];
300         init(dataI, dataF, dataL);
301         test7(dataI, dataI, dataF, dataF, dataL, dataL);
302         verify("test7", dataI, goldI7);
303         verify("test7", dataF, goldF7);
304         verify("test7", dataL, goldL7);
305     }
306 
307     @Test
308     static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
309                       long[] dataLa, long[] dataLb) {
310         for (int i = 0; i < RANGE; i+=2) {
311             // Cycle involving 3 memory slices
312             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
313             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
314             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
315             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
316             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
317             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
318             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
319             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
320             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
321             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
322             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
323             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
324         }
325     }
326 
327 
328     @Run(test = "test8")
329     public void runTest8() {
330         int[]   dataI = new int[RANGE];
331         float[] dataF = new float[RANGE];
332         long[]  dataL = new long[RANGE];
333         init(dataI, dataF, dataL);
334         test8(dataI, dataI, dataF, dataF, dataL, dataL);
335         verify("test8", dataI, goldI8);
336         verify("test8", dataF, goldF8);
337         verify("test8", dataL, goldL8);
338     }
339 
340     @Test
341     static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
342                       long[] dataLa, long[] dataLb) {
343         for (int i = 0; i < RANGE; i+=2) {
344             // 2-cycle, with more ops after
345             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
346             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
347             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
348             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
349             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
350             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
351             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
352             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
353             // more stuff after
354             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
355             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
356             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
357             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
358         }
359     }
360 
361     @Run(test = "test9")
362     public void runTest9() {
363         int[]   dataI = new int[RANGE];
364         float[] dataF = new float[RANGE];
365         long[]  dataL = new long[RANGE];
366         init(dataI, dataF, dataL);
367         test9(dataI, dataI, dataF, dataF, dataL, dataL);
368         verify("test9", dataI, goldI9);
369         verify("test9", dataF, goldF9);
370         verify("test9", dataL, goldL9);
371     }
372 
373     @Test
374     static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
375                       long[] dataLa, long[] dataLb) {
376         for (int i = 0; i < RANGE; i+=2) {
377             // 2-cycle, with more stuff before
378             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
379             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
380             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
381             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
382             // 2-cycle
383             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
384             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
385             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
386             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
387             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
388             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
389             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
390             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
391         }
392     }
393 
394     @Run(test = "test10")
395     public void runTest10() {
396         int[]   dataI = new int[RANGE];
397         float[] dataF = new float[RANGE];
398         long[]  dataL = new long[RANGE];
399         init(dataI, dataF, dataL);
400         test10(dataI, dataI, dataF, dataF, dataL, dataL);
401         verify("test10", dataI, goldI10);
402         verify("test10", dataF, goldF10);
403         verify("test10", dataL, goldL10);
404     }
405 
406     @Test
407     static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
408                       long[] dataLa, long[] dataLb) {
409         for (int i = 0; i < RANGE; i+=2) {
410             // This creates the following graph before SuperWord:
411             //
412             // A -> R -> U
413             //      S -> V -> B
414             //
415             // SuperWord analyzes the graph, and sees that [A,B] and [U,V]
416             // are adjacent, isomorphic and independent packs. However,
417             // [R,S] are not isomorphic (R mul, S add).
418             // So it vectorizes [A,B] and [U,V] this gives us this graph:
419             //
420             //        -> R
421             //  [A,B]      -> [U,V] -+
422             //    ^   -> S           |
423             //    |                  |
424             //    +------------------+
425             //
426             // The cycle thus does not only go via packs, but also scalar ops.
427             //
428             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
429             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
430             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
431             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
432             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
433             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
434             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
435             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
436             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
437             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
438             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
439             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
440         }
441     }
442 
443     static void init(int[] dataI, float[] dataF) {
444         for (int i = 0; i < RANGE; i++) {
445             dataI[i] = i + 1;
446             dataF[i] = i + 0.1f;
447         }
448     }
449 
450     static void init(int[] dataI, float[] dataF, long[] dataL) {
451         for (int i = 0; i < RANGE; i++) {
452             dataI[i] = i + 1;
453             dataF[i] = i + 0.1f;
454             dataL[i] = i + 1;
455         }
456     }
457 
458     static void verify(String name, int[] data, int[] gold) {
459         for (int i = 0; i < RANGE; i++) {
460             if (data[i] != gold[i]) {
461                 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
462             }
463         }
464     }
465 
466     static void verify(String name, float[] data, float[] gold) {
467         for (int i = 0; i < RANGE; i++) {
468             int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
469             int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
470             if (datav != goldv) {
471                 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
472             }
473         }
474     }
475 
476     static void verify(String name, long[] data, long[] gold) {
477         for (int i = 0; i < RANGE; i++) {
478             if (data[i] != gold[i]) {
479                 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
480             }
481         }
482     }
483 }
484