1 /*
  2  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 /*
 26  * @test
 27  * @bug 8304042
 28  * @summary Test some examples with independent packs with cyclic dependency
 29  *          between the packs.
 30  * @modules java.base/jdk.internal.misc
 31  * @library /test/lib /
 32  * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency
 33  */
 34 
 35 package compiler.loopopts.superword;
 36 
 37 import jdk.internal.misc.Unsafe;
 38 import jdk.test.lib.Asserts;
 39 import compiler.lib.ir_framework.*;
 40 
 41 public class TestIndependentPacksWithCyclicDependency {
 42     static final int RANGE = 1024;
 43     static final int ITER  = 10_000;
 44     static Unsafe unsafe = Unsafe.getUnsafe();
 45 
 46     int[]   goldI0 = new int[RANGE];
 47     float[] goldF0 = new float[RANGE];
 48     int[]   goldI1 = new int[RANGE];
 49     float[] goldF1 = new float[RANGE];
 50     int[]   goldI2 = new int[RANGE];
 51     float[] goldF2 = new float[RANGE];
 52     int[]   goldI3 = new int[RANGE];
 53     float[] goldF3 = new float[RANGE];
 54     int[]   goldI4 = new int[RANGE];
 55     float[] goldF4 = new float[RANGE];
 56     int[]   goldI5 = new int[RANGE];
 57     float[] goldF5 = new float[RANGE];
 58     int[]   goldI6 = new int[RANGE];
 59     float[] goldF6 = new float[RANGE];
 60     long[]  goldL6 = new long[RANGE];
 61     int[]   goldI7 = new int[RANGE];
 62     float[] goldF7 = new float[RANGE];
 63     long[]  goldL7 = new long[RANGE];
 64     int[]   goldI8 = new int[RANGE];
 65     float[] goldF8 = new float[RANGE];
 66     long[]  goldL8 = new long[RANGE];
 67     int[]   goldI9 = new int[RANGE];
 68     float[] goldF9 = new float[RANGE];
 69     long[]  goldL9 = new long[RANGE];
 70     int[]   goldI10 = new int[RANGE];
 71     float[] goldF10 = new float[RANGE];
 72     long[]  goldL10 = new long[RANGE];
 73 
 74     public static void main(String args[]) {
 75         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 76                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
 77                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
 78                                    "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
 79                                    "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
 80     }
 81 
 82     TestIndependentPacksWithCyclicDependency() {
 83         // compute the gold standard in interpreter mode
 84         init(goldI0, goldF0);
 85         test0(goldI0, goldI0, goldF0, goldF0);
 86         init(goldI1, goldF1);
 87         test1(goldI1, goldI1, goldF1, goldF1);
 88         init(goldI2, goldF2);
 89         test2(goldI2, goldI2, goldF2, goldF2);
 90         init(goldI3, goldF3);
 91         test3(goldI3, goldI3, goldF3, goldF3);
 92         init(goldI4, goldF4);
 93         test4(goldI4, goldI4, goldF4, goldF4);
 94         init(goldI5, goldF5);
 95         test5(goldI5, goldI5, goldF5, goldF5);
 96         init(goldI6, goldF6, goldL6);
 97         test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
 98         init(goldI7, goldF7, goldL7);
 99         test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
100         init(goldI8, goldF8, goldL8);
101         test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
102         init(goldI9, goldF9, goldL9);
103         test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
104         init(goldI10, goldF10, goldL10);
105         test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
106     }
107 
108     @Run(test = "test0")
109     @Warmup(100)
110     public void runTest0() {
111         int[] dataI = new int[RANGE];
112         float[] dataF = new float[RANGE];
113         init(dataI, dataF);
114         test0(dataI, dataI, dataF, dataF);
115         verify("test0", dataI, goldI0);
116         verify("test0", dataF, goldF0);
117     }
118 
119     @Test
120     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
121         applyIfPlatform = {"64-bit", "true"},
122         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
123     static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
124         for (int i = 0; i < RANGE; i+=2) {
125             // Hand-unrolled 2x. Int and Float slice are completely separate.
126             dataIb[i+0] = dataIa[i+0] + 3;
127             dataIb[i+1] = dataIa[i+1] + 3;
128             dataFb[i+0] = dataFa[i+0] * 1.3f;
129             dataFb[i+1] = dataFa[i+1] * 1.3f;
130         }
131     }
132 
133     @Run(test = "test1")
134     @Warmup(100)
135     public void runTest1() {
136         int[] dataI = new int[RANGE];
137         float[] dataF = new float[RANGE];
138         init(dataI, dataF);
139         test1(dataI, dataI, dataF, dataF);
140         verify("test1", dataI, goldI1);
141         verify("test1", dataF, goldF1);
142     }
143 
144     @Test
145     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
146         applyIfPlatform = {"64-bit", "true"},
147         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
148     static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
149         for (int i = 0; i < RANGE; i+=2) {
150             // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
151             dataFa[i+0] = dataIa[i+0] + 3;
152             dataFa[i+1] = dataIa[i+1] + 3;
153             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
154             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
155         }
156     }
157 
158     @Run(test = "test2")
159     public void runTest2() {
160         int[] dataI = new int[RANGE];
161         float[] dataF = new float[RANGE];
162         init(dataI, dataF);
163         test2(dataI, dataI, dataF, dataF);
164         verify("test2", dataI, goldI2);
165         verify("test2", dataF, goldF2);
166     }
167 
168     @Test
169     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
170         applyIfPlatform = {"64-bit", "true"},
171         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
172     static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
173         for (int i = 0; i < RANGE; i+=2) {
174             // int and float arrays are two slices. But we pretend both are of type int.
175             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
176             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
177             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
178             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
179         }
180     }
181 
182     @Run(test = "test3")
183     @Warmup(100)
184     public void runTest3() {
185         int[] dataI = new int[RANGE];
186         float[] dataF = new float[RANGE];
187         init(dataI, dataF);
188         test3(dataI, dataI, dataF, dataF);
189         verify("test3", dataI, goldI3);
190         verify("test3", dataF, goldF3);
191     }
192 
193     @Test
194     @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
195         applyIfPlatform = {"64-bit", "true"},
196         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
197     static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
198         for (int i = 0; i < RANGE; i+=2) {
199             // Inversion of orders. But because we operate on separate slices, this should
200             // safely vectorize. It should detect that each line is independent, so it can
201             // reorder them.
202             dataIb[i+0] = dataIa[i+0] + 3;
203             dataFb[i+1] = dataFa[i+1] * 1.3f;
204             dataFb[i+0] = dataFa[i+0] * 1.3f;
205             dataIb[i+1] = dataIa[i+1] + 3;
206         }
207     }
208 
209     @Run(test = "test4")
210     @Warmup(100)
211     public void runTest4() {
212         int[] dataI = new int[RANGE];
213         float[] dataF = new float[RANGE];
214         init(dataI, dataF);
215         test4(dataI, dataI, dataF, dataF);
216         verify("test4", dataI, goldI4);
217         verify("test4", dataF, goldF4);
218     }
219 
220     @Test
221     static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
222         for (int i = 0; i < RANGE; i+=2) {
223             // same as test1, except that reordering leads to different semantics
224             // [A,B] and [X,Y] are both packs that are internally independent
225             // But we have dependencies A -> X (StoreF -> LoadF)
226             //                      and Y -> B (StoreI -> LoadI)
227             // Hence the two packs have a cyclic dependency, we cannot schedule
228             // one before the other.
229             dataFa[i+0] = dataIa[i+0] + 3;            // A
230             dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);  // X
231             dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);  // Y
232             dataFa[i+1] = dataIa[i+1] + 3;            // B
233         }
234     }
235 
236     @Run(test = "test5")
237     public void runTest5() {
238         int[] dataI = new int[RANGE];
239         float[] dataF = new float[RANGE];
240         init(dataI, dataF);
241         test5(dataI, dataI, dataF, dataF);
242         verify("test5", dataI, goldI5);
243         verify("test5", dataF, goldF5);
244     }
245 
246     @Test
247     static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
248         for (int i = 0; i < RANGE; i+=2) {
249             // same as test2, except that reordering leads to different semantics
250             // explanation analogue to test4
251             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
252             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
253             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
254             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
255         }
256     }
257 
258     @Run(test = "test6")
259     public void runTest6() {
260         int[]   dataI = new int[RANGE];
261         float[] dataF = new float[RANGE];
262         long[]  dataL = new long[RANGE];
263         init(dataI, dataF, dataL);
264         test6(dataI, dataI, dataF, dataF, dataL, dataL);
265         verify("test6", dataI, goldI6);
266         verify("test6", dataF, goldF6);
267         verify("test6", dataL, goldL6);
268     }
269 
270     @Test
271     @IR(applyIf = {"UseCompactObjectHeaders", "false"},
272         counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
273         applyIfPlatform = {"64-bit", "true"},
274         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
275     static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
276                       long[] dataLa, long[] dataLb) {
277         for (int i = 0; i < RANGE; i+=2) {
278             // Chain of parallelizable op and conversion
279             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
280             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
281             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
282             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
283             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
284             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
285             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
286             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
287             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
288             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
289             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
290             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
291         }
292     }
293 
294     @Run(test = "test7")
295     public void runTest7() {
296         int[]   dataI = new int[RANGE];
297         float[] dataF = new float[RANGE];
298         long[]  dataL = new long[RANGE];
299         init(dataI, dataF, dataL);
300         test7(dataI, dataI, dataF, dataF, dataL, dataL);
301         verify("test7", dataI, goldI7);
302         verify("test7", dataF, goldF7);
303         verify("test7", dataL, goldL7);
304     }
305 
306     @Test
307     static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
308                       long[] dataLa, long[] dataLb) {
309         for (int i = 0; i < RANGE; i+=2) {
310             // Cycle involving 3 memory slices
311             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
312             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
313             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
314             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
315             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
316             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
317             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
318             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
319             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
320             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
321             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
322             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
323         }
324     }
325 
326 
327     @Run(test = "test8")
328     public void runTest8() {
329         int[]   dataI = new int[RANGE];
330         float[] dataF = new float[RANGE];
331         long[]  dataL = new long[RANGE];
332         init(dataI, dataF, dataL);
333         test8(dataI, dataI, dataF, dataF, dataL, dataL);
334         verify("test8", dataI, goldI8);
335         verify("test8", dataF, goldF8);
336         verify("test8", dataL, goldL8);
337     }
338 
339     @Test
340     static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
341                       long[] dataLa, long[] dataLb) {
342         for (int i = 0; i < RANGE; i+=2) {
343             // 2-cycle, with more ops after
344             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
345             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
346             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
347             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
348             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
349             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
350             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
351             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
352             // more stuff after
353             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
354             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
355             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
356             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
357         }
358     }
359 
360     @Run(test = "test9")
361     public void runTest9() {
362         int[]   dataI = new int[RANGE];
363         float[] dataF = new float[RANGE];
364         long[]  dataL = new long[RANGE];
365         init(dataI, dataF, dataL);
366         test9(dataI, dataI, dataF, dataF, dataL, dataL);
367         verify("test9", dataI, goldI9);
368         verify("test9", dataF, goldF9);
369         verify("test9", dataL, goldL9);
370     }
371 
372     @Test
373     static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
374                       long[] dataLa, long[] dataLb) {
375         for (int i = 0; i < RANGE; i+=2) {
376             // 2-cycle, with more stuff before
377             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
378             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
379             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
380             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
381             // 2-cycle
382             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
383             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
384             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
385             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
386             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
387             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
388             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
389             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
390         }
391     }
392 
393     @Run(test = "test10")
394     public void runTest10() {
395         int[]   dataI = new int[RANGE];
396         float[] dataF = new float[RANGE];
397         long[]  dataL = new long[RANGE];
398         init(dataI, dataF, dataL);
399         test10(dataI, dataI, dataF, dataF, dataL, dataL);
400         verify("test10", dataI, goldI10);
401         verify("test10", dataF, goldF10);
402         verify("test10", dataL, goldL10);
403     }
404 
405     @Test
406     static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
407                       long[] dataLa, long[] dataLb) {
408         for (int i = 0; i < RANGE; i+=2) {
409             // This creates the following graph before SuperWord:
410             //
411             // A -> R -> U
412             //      S -> V -> B
413             //
414             // SuperWord analyzes the graph, and sees that [A,B] and [U,V]
415             // are adjacent, isomorphic and independent packs. However,
416             // [R,S] are not isomorphic (R mul, S add).
417             // So it vectorizes [A,B] and [U,V] this gives us this graph:
418             //
419             //        -> R
420             //  [A,B]      -> [U,V] -+
421             //    ^   -> S           |
422             //    |                  |
423             //    +------------------+
424             //
425             // The cycle thus does not only go via packs, but also scalar ops.
426             //
427             int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
428             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
429             int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
430             int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
431             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
432             unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
433             float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
434             float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
435             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
436             unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
437             int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
438             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
439         }
440     }
441 
442     static void init(int[] dataI, float[] dataF) {
443         for (int i = 0; i < RANGE; i++) {
444             dataI[i] = i + 1;
445             dataF[i] = i + 0.1f;
446         }
447     }
448 
449     static void init(int[] dataI, float[] dataF, long[] dataL) {
450         for (int i = 0; i < RANGE; i++) {
451             dataI[i] = i + 1;
452             dataF[i] = i + 0.1f;
453             dataL[i] = i + 1;
454         }
455     }
456 
457     static void verify(String name, int[] data, int[] gold) {
458         for (int i = 0; i < RANGE; i++) {
459             if (data[i] != gold[i]) {
460                 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
461             }
462         }
463     }
464 
465     static void verify(String name, float[] data, float[] gold) {
466         for (int i = 0; i < RANGE; i++) {
467             int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
468             int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
469             if (datav != goldv) {
470                 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
471             }
472         }
473     }
474 
475     static void verify(String name, long[] data, long[] gold) {
476         for (int i = 0; i < RANGE; i++) {
477             if (data[i] != gold[i]) {
478                 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
479             }
480         }
481     }
482 }
483