1 /*
  2  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package hat.test;
 26 
 27 import hat.Accelerator;
 28 import hat.ComputeContext;
 29 import hat.KernelContext;
 30 import hat.NDRange;
 31 import hat.backend.Backend;
 32 import hat.buffer.F32ArrayPadded;
 33 import hat.types.Float2;
 34 import hat.device.DeviceSchema;
 35 import hat.device.NonMappableIface;
 36 import optkl.ifacemapper.MappableIface.*;
 37 import hat.test.annotation.HatTest;
 38 import hat.test.exceptions.HATAsserts;
 39 import jdk.incubator.code.Reflect;
 40 
 41 import java.lang.invoke.MethodHandles;
 42 import java.util.Random;
 43 
 44 public class TestFloat2 {
 45 
 46     @Reflect
 47     public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
 48         if (kernelContext.gix < kernelContext.gsx) {
 49             int index = kernelContext.gix;
 50             Float2 vA = a.float2View(index * 2);
 51             Float2 vB = b.float2View(index * 2);
 52             Float2 vC = Float2.add(vA, vB);
 53             c.storeFloat2View(vC, index * 2);
 54         }
 55     }
 56 
 57     @Reflect
 58     public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
 59         if (kernelContext.gix < kernelContext.gsx) {
 60             int index = kernelContext.gix;
 61             Float2.MutableImpl vA = a.float2View(index * 2);
 62             float scaleX = vA.x() * 10.0f;
 63             vA.x(scaleX);
 64             b.storeFloat2View(vA, index * 2);
 65         }
 66     }
 67 
 68     @Reflect
 69     public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
 70         if (kernelContext.gix < kernelContext.gsx) {
 71             int index = kernelContext.gix;
 72 
 73             // Obtain a view of the input data as a float4 and
 74             // store that view in private memory
 75             Float2 vA = a.float2View(index * 2);
 76 
 77             // operate with the float4
 78             float scaleX = vA.x() * 10.0f;
 79             float scaleY = vA.y() * 20.0f;
 80 
 81             // Create a float4 within the device code
 82             Float2 vResult = Float2.of(scaleX, scaleY);
 83 
 84             // store the float4 from private memory to global memory
 85             b.storeFloat2View(vResult, index * 2);
 86         }
 87     }
 88 
 89     @Reflect
 90     public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
 91         if (kernelContext.gix < kernelContext.gsx) {
 92             int index = kernelContext.gix;
 93             Float2.MutableImpl vA = a.float2View(index * 2);
 94             vA.x(vA.x() * 10.0f);
 95             vA.y(vA.y() * 20.0f);
 96             b.storeFloat2View(vA, index * 2);
 97         }
 98     }
 99 
100     @Reflect
101     public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
102         if (kernelContext.gix < kernelContext.gsx) {
103             int index = kernelContext.gix;
104             Float2 vA = a.float2View(index * 2);
105             Float2 vB = b.float2View(index * 2);
106             Float2 vC = vA.add(vB).add(vB);
107             c.storeFloat2View(vC, index * 2);
108         }
109     }
110 
111     @Reflect
112     public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
113         if (kernelContext.gix < kernelContext.gsx) {
114             int index = kernelContext.gix;
115             Float2 vA = a.float2View(index * 2);
116             Float2 vB = b.float2View(index * 2);
117          //   Float2 vD = Float2.sub(vA, vB);
118             Float2 vC = vA.sub(vB);
119             c.storeFloat2View(vC, index * 2);
120         }
121     }
122 
123     @Reflect
124     public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
125         if (kernelContext.gix < kernelContext.gsx) {
126             int index = kernelContext.gix;
127             Float2 vA = a.float2View(index * 2);
128             Float2 vB = b.float2View(index * 2);
129             Float2 vC = vA.add(vB).sub(vB);
130             c.storeFloat2View(vC, index * 2);
131         }
132     }
133 
134     @Reflect
135     public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
136         if (kernelContext.gix < kernelContext.gsx) {
137             int index = kernelContext.gix;
138             Float2 vA = a.float2View(index * 2);
139             Float2 vB = b.float2View(index * 2);
140             Float2 vC = vA.add(vB).mul(vA).div(vB);
141             c.storeFloat2View(vC, index * 2);
142         }
143     }
144 
145     @Reflect
146     public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
147         // Checking composition
148         if (kernelContext.gix < kernelContext.gsx) {
149             int index = kernelContext.gix;
150             Float2 vA = a.float2View(index * 2);
151             Float2 vB = b.float2View(index * 2);
152             Float2 vC = vA.add(vA.mul(vB));
153             c.storeFloat2View(vC, index * 2);
154         }
155     }
156 
157     private interface SharedArray extends NonMappableIface {
158         void array(long index, float value);
159         float array(long index);
160         DeviceSchema<SharedArray> schema = DeviceSchema.of(SharedArray.class,
161                 arr -> arr.withArray("array", 1024));
162         static SharedArray create(Accelerator accelerator) {
163             return null;
164         }
165         static SharedArray createLocal() {
166             return null;
167         }
168         default Float2 float2View(int index) {
169             return null;
170         }
171         default void storeFloat2View(Float2 float4, int index) {
172         }
173     }
174 
175     @Reflect
176     public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
177         SharedArray sm = SharedArray.createLocal();
178         if (kernelContext.gix < kernelContext.gsx) {
179             int index = kernelContext.gix;
180             int lix = kernelContext.lix;
181             Float2 vA = a.float2View(index * 2);
182             sm.storeFloat2View(vA, lix * 2);
183             kernelContext.barrier();
184             Float2 r = sm.float2View(lix * 2);
185             b.storeFloat2View(r, index * 2);
186         }
187     }
188 
189     private interface PrivateMemory extends NonMappableIface {
190         void array(long index, float value);
191         float array(long index);
192         DeviceSchema<PrivateMemory> schema = DeviceSchema.of(PrivateMemory.class,
193                 arr -> arr.withArray("array", 4));
194         static PrivateMemory create(Accelerator accelerator) {
195             return null;
196         }
197         static PrivateMemory createPrivate() {
198             return null;
199         }
200         default Float2 float2View(int index) {
201             return null;
202         }
203         default void storeFloat2View(Float2 float4, int index) {
204         }
205     }
206 
207     @Reflect
208     public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
209         PrivateMemory pm = PrivateMemory.createPrivate();
210         if (kernelContext.gix < kernelContext.gsx) {
211             int index = kernelContext.gix;
212             Float2 vA = a.float2View(index * 2);
213             pm.storeFloat2View(vA, 0);
214             kernelContext.barrier();
215             Float2 r = pm.float2View(0);
216             b.storeFloat2View(r, index * 2);
217         }
218     }
219 
220     @Reflect
221     public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
222         SharedArray sm = SharedArray.createLocal();
223         if (kernelContext.gix < kernelContext.gsx) {
224             int index = kernelContext.gix;
225             int lix = kernelContext.lix;
226             Float2 vA = a.float2View(index * 2);
227             sm.array(lix * 2 + 0, vA.x());
228             sm.array(lix * 2 + 1, vA.y());
229             kernelContext.barrier();
230             Float2 r = sm.float2View(lix * 2);
231             b.storeFloat2View(r, index * 2);
232         }
233     }
234 
235     @Reflect
236     public static void vectorOps14(@RO KernelContext kernelContext, @RW F32ArrayPadded a) {
237         if (kernelContext.gix < kernelContext.gsx) {
238             int index = kernelContext.gix;
239             Float2 vA = a.float2View(index * 2);
240             Float2.MutableImpl vB = Float2.makeMutable(vA);
241             vB.x(10.0f);
242             a.storeFloat2View(vB, index * 2);
243         }
244     }
245 
246 
247     @Reflect
248     public static void vectorOps15(@RO KernelContext kernelContext, @WO F32ArrayPadded a) {
249         // in this sample, we don't perform the vload, but rather the vstore directly
250         // from a new float2.
251         if (kernelContext.gix < kernelContext.gsx) {
252             int index = kernelContext.gix;
253             Float2 result = Float2.of(1.0f, 2.0f);
254             a.storeFloat2View(result, index * 2);
255         }
256     }
257 
258     @Reflect
259     public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
260         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
261         cc.dispatchKernel(NDRange.of1D(size/2,128), kernelContext -> TestFloat2.vectorOps01(kernelContext, a, b, c));
262     }
263 
264     @Reflect
265     public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
266         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
267         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps02(kernelContext, a, b));
268     }
269 
270     @Reflect
271     public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
272         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
273         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps03(kernelContext, a, b));
274     }
275 
276     @Reflect
277     public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
278         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
279         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps04(kernelContext, a, b));
280     }
281 
282     @Reflect
283     public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c,  int size) {
284         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
285         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps05(kernelContext, a, b, c));
286     }
287 
288     @Reflect
289     public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c,  int size) {
290         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
291         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps06(kernelContext, a, b, c));
292     }
293 
294 
295     @Reflect
296     public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c,  int size) {
297         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
298         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps07(kernelContext, a, b, c));
299     }
300 
301     @Reflect
302     public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c,  int size) {
303         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
304         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps08(kernelContext, a, b, c));
305     }
306 
307     @Reflect
308     public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c,  int size) {
309         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
310         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps09(kernelContext, a, b, c));
311     }
312 
313     @Reflect
314     public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a,  @WO F32ArrayPadded b, int size) {
315         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
316         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps10(kernelContext, a, b));
317     }
318 
319     @Reflect
320     public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a,  @WO F32ArrayPadded b, int size) {
321         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
322         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps11(kernelContext, a, b));
323     }
324 
325     @Reflect
326     public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a,  @WO F32ArrayPadded b, int size) {
327         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
328         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps12(kernelContext, a, b));
329     }
330 
331     @Reflect
332     public static void computeGraph14(@RO ComputeContext cc, @RW F32ArrayPadded a, int size) {
333         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
334         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps14(kernelContext, a));
335     }
336 
337     @Reflect
338     public static void computeGraph15(@RO ComputeContext cc, @WO F32ArrayPadded a, int size) {
339         // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
340         cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps15(kernelContext, a));
341     }
342 
343 
344     @HatTest
345     @Reflect
346     public void testFloat2_01() {
347         final int size = 1024;
348         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
349         var arrayA = F32ArrayPadded.create(accelerator, size);
350         var arrayB = F32ArrayPadded.create(accelerator, size);
351         var arrayC = F32ArrayPadded.create(accelerator, size);
352 
353         Random r = new Random(19);
354         for (int i = 0; i < size; i++) {
355             arrayA.array(i, r.nextFloat());
356             arrayB.array(i, r.nextFloat());
357         }
358 
359         accelerator.compute(cc -> TestFloat2.computeGraph01(cc, arrayA, arrayB, arrayC, size));
360 
361         for (int i = 0; i < size; i++) {
362             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
363         }
364 
365     }
366 
367     @HatTest
368     @Reflect
369     public void testFloat2_02() {
370         final int size = 1024;
371         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
372         var arrayA = F32ArrayPadded.create(accelerator, size);
373         var arrayB = F32ArrayPadded.create(accelerator, size);
374 
375         Random r = new Random(19);
376         for (int i = 0; i < size; i++) {
377             arrayA.array(i, r.nextFloat());
378         }
379 
380         accelerator.compute(cc -> TestFloat2.computeGraph02(cc, arrayA, arrayB, size));
381 
382         for (int i = 0; i < size; i += 2) {
383             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
384             HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
385         }
386     }
387 
388     @HatTest
389     @Reflect
390     public void testFloat2_03() {
391         final int size = 1024;
392         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
393         var arrayA = F32ArrayPadded.create(accelerator, size);
394         var arrayB = F32ArrayPadded.create(accelerator, size);
395 
396         Random r = new Random(19);
397         for (int i = 0; i < size; i++) {
398             arrayA.array(i, r.nextFloat());
399         }
400 
401         accelerator.compute(cc -> TestFloat2.computeGraph03(cc, arrayA, arrayB, size));
402 
403         for (int i = 0; i < size; i += 2) {
404             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
405             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
406         }
407     }
408 
409     @HatTest
410     @Reflect
411     public void testFloat2_04() {
412         final int size = 1024;
413         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
414         var arrayA = F32ArrayPadded.create(accelerator, size);
415         var arrayB = F32ArrayPadded.create(accelerator, size);
416 
417         Random r = new Random(19);
418         for (int i = 0; i < size; i++) {
419             arrayA.array(i, r.nextFloat());
420         }
421 
422         accelerator.compute(cc -> TestFloat2.computeGraph04(cc, arrayA, arrayB, size));
423 
424         for (int i = 0; i < size; i += 2) {
425             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
426             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
427         }
428     }
429 
430     @HatTest
431     @Reflect
432     public void testFloat2_05() {
433         final int size = 1024;
434         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
435         var arrayA = F32ArrayPadded.create(accelerator, size);
436         var arrayB = F32ArrayPadded.create(accelerator, size);
437         var arrayC = F32ArrayPadded.create(accelerator, size);
438 
439         Random r = new Random(19);
440         for (int i = 0; i < size; i++) {
441             arrayA.array(i, r.nextFloat());
442             arrayB.array(i, r.nextFloat());
443         }
444 
445         accelerator.compute(cc -> TestFloat2.computeGraph05(cc, arrayA, arrayB, arrayC, size));
446 
447         for (int i = 0; i < size; i++) {
448             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
449         }
450     }
451 
452     @HatTest
453     @Reflect
454     public void testFloat2_06() {
455         final int size = 1024;
456         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
457         var arrayA = F32ArrayPadded.create(accelerator, size);
458         var arrayB = F32ArrayPadded.create(accelerator, size);
459         var arrayC = F32ArrayPadded.create(accelerator, size);
460 
461         Random r = new Random(19);
462         for (int i = 0; i < size; i++) {
463             arrayA.array(i, r.nextFloat());
464             arrayB.array(i, r.nextFloat());
465         }
466 
467         accelerator.compute(cc -> TestFloat2.computeGraph06(cc, arrayA, arrayB, arrayC, size));
468 
469         for (int i = 0; i < size; i++) {
470             HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
471         }
472     }
473 
474     @HatTest
475     @Reflect
476     public void testFloat2_07() {
477         final int size = 1024;
478         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
479         var arrayA = F32ArrayPadded.create(accelerator, size);
480         var arrayB = F32ArrayPadded.create(accelerator, size);
481         var arrayC = F32ArrayPadded.create(accelerator, size);
482 
483         Random r = new Random(19);
484         for (int i = 0; i < size; i++) {
485             arrayA.array(i, r.nextFloat());
486             arrayB.array(i, r.nextFloat());
487         }
488 
489         accelerator.compute(cc -> TestFloat2.computeGraph07(cc, arrayA, arrayB, arrayC, size));
490 
491         for (int i = 0; i < size; i++) {
492             HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
493         }
494     }
495 
496     @HatTest
497     @Reflect
498     public void testFloat2_08() {
499         final int size = 1024;
500         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
501         var arrayA = F32ArrayPadded.create(accelerator, size);
502         var arrayB = F32ArrayPadded.create(accelerator, size);
503         var arrayC = F32ArrayPadded.create(accelerator, size);
504 
505         Random r = new Random(19);
506         for (int i = 0; i < size; i++) {
507             arrayA.array(i, r.nextFloat());
508             arrayB.array(i, r.nextFloat());
509         }
510 
511         accelerator.compute(cc -> TestFloat2.computeGraph08(cc, arrayA, arrayB, arrayC, size));
512 
513         for (int i = 0; i < size; i++) {
514             float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
515             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
516         }
517     }
518 
519     @HatTest
520     @Reflect
521     public void testFloat2_09() {
522         final int size = 1024;
523         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
524         var arrayA = F32ArrayPadded.create(accelerator, size);
525         var arrayB = F32ArrayPadded.create(accelerator, size);
526         var arrayC = F32ArrayPadded.create(accelerator, size);
527 
528         Random r = new Random(19);
529         for (int i = 0; i < size; i++) {
530             arrayA.array(i, r.nextFloat());
531             arrayB.array(i, r.nextFloat());
532         }
533 
534         accelerator.compute(cc -> TestFloat2.computeGraph09(cc, arrayA, arrayB, arrayC, size));
535 
536         for (int i = 0; i < size; i++) {
537             float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
538             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
539         }
540     }
541 
542     @HatTest
543     @Reflect
544     public void testFloat2_10() {
545         final int size = 1024;
546         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
547         var arrayA = F32ArrayPadded.create(accelerator, size);
548         var arrayB = F32ArrayPadded.create(accelerator, size);
549 
550         Random r = new Random(19);
551         for (int i = 0; i < size; i++) {
552             arrayA.array(i, r.nextFloat());
553             arrayB.array(i, r.nextFloat());
554         }
555 
556         accelerator.compute(cc -> TestFloat2.computeGraph10(cc, arrayA, arrayB, size));
557 
558         for (int i = 0; i < size; i++) {
559             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
560         }
561     }
562 
563     @HatTest
564     @Reflect
565     public void testFloat2_11() {
566         final int size = 1024;
567         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
568         var arrayA = F32ArrayPadded.create(accelerator, size);
569         var arrayB = F32ArrayPadded.create(accelerator, size);
570 
571         Random r = new Random(19);
572         for (int i = 0; i < size; i++) {
573             arrayA.array(i, r.nextFloat());
574             arrayB.array(i, r.nextFloat());
575         }
576 
577         accelerator.compute(cc -> TestFloat2.computeGraph11(cc, arrayA, arrayB, size));
578 
579         for (int i = 0; i < size; i++) {
580             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
581         }
582     }
583 
584     @HatTest
585     @Reflect
586     public void testFloat2_12() {
587         final int size = 1024;
588         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
589         var arrayA = F32ArrayPadded.create(accelerator, size);
590         var arrayB = F32ArrayPadded.create(accelerator, size);
591 
592         Random r = new Random(19);
593         for (int i = 0; i < size; i++) {
594             arrayA.array(i, r.nextFloat());
595             arrayB.array(i, r.nextFloat());
596         }
597 
598         accelerator.compute(cc -> TestFloat2.computeGraph12(cc, arrayA, arrayB, size));
599 
600         for (int i = 0; i < size; i++) {
601             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
602         }
603     }
604 
605     @HatTest
606     @Reflect
607     public void testFloat2_13() {
608         // Test the CPU implementation of Float4
609         Float2 vA = Float2.of(1, 2);
610         Float2 vB = Float2.of(3, 4);
611         Float2 vC = Float2.add(vA, vB);
612         Float2 expectedSum = Float2.of(
613                 vA.x() + vB.x(),
614                 vA.y() + vB.y());
615 
616         HATAsserts.assertEquals(expectedSum, vC, 0.001f);
617 
618         Float2 vD = Float2.sub(vA, vB);
619         Float2 expectedSub = Float2.of(
620                 vA.x() - vB.x(),
621                 vA.y() - vB.y());
622         HATAsserts.assertEquals(expectedSub, vD, 0.001f);
623 
624         Float2 vE = Float2.mul(vA, vB);
625         Float2 expectedMul = Float2.of(
626                 vA.x() * vB.x(),
627                 vA.y() * vB.y());
628         HATAsserts.assertEquals(expectedMul, vE, 0.001f);
629 
630         Float2 vF = Float2.div(vA, vB);
631         Float2 expectedDiv = Float2.of(
632                 vA.x() / vB.x(),
633                 vA.y() / vB.y());
634         HATAsserts.assertEquals(expectedDiv, vF, 0.001f);
635     }
636 
637     @HatTest
638     @Reflect
639     public void testFloat2_14() {
640         final int size = 1024;
641         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
642         var arrayA = F32ArrayPadded.create(accelerator, size);
643 
644         Random r = new Random(73);
645         for (int i = 0; i < size; i++) {
646             arrayA.array(i, r.nextFloat());
647         }
648 
649         accelerator.compute(cc -> TestFloat2.computeGraph14(cc, arrayA, size));
650 
651         for (int i = 0; i < size; i += 2) {
652             HATAsserts.assertEquals(10.0f, arrayA.array(i), 0.001f);
653         }
654     }
655 
656     @HatTest
657     @Reflect
658     public void testFloat2_15() {
659         final int size = 2048;
660         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661         var arrayA = F32ArrayPadded.create(accelerator, size);
662 
663         Random r = new Random(73);
664         for (int i = 0; i < size; i++) {
665             arrayA.array(i, r.nextFloat());
666         }
667 
668         accelerator.compute(cc -> TestFloat2.computeGraph15(cc, arrayA, size));
669 
670         Float2 v = Float2.of(1.0f, 2.0f);
671         for (int i = 0; i < size; i += 2) {
672             HATAsserts.assertEquals(v.x(), arrayA.array(i), 0.001f);
673             HATAsserts.assertEquals(v.y(), arrayA.array(i + 1), 0.001f);
674         }
675     }
676 }