1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.Accelerator;
28 import hat.ComputeContext;
29 import hat.KernelContext;
30 import hat.NDRange;
31 import hat.backend.Backend;
32 import hat.buffer.F32ArrayPadded;
33 import hat.types.Float2;
34 import hat.device.DeviceSchema;
35 import hat.device.NonMappableIface;
36 import optkl.ifacemapper.MappableIface.*;
37 import hat.test.annotation.HatTest;
38 import hat.test.exceptions.HATAsserts;
39 import jdk.incubator.code.Reflect;
40
41 import java.lang.invoke.MethodHandles;
42 import java.util.Random;
43
44 public class TestFloat2 {
45
46 @Reflect
47 public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
48 if (kernelContext.gix < kernelContext.gsx) {
49 int index = kernelContext.gix;
50 Float2 vA = a.float2View(index * 2);
51 Float2 vB = b.float2View(index * 2);
52 Float2 vC = Float2.add(vA, vB);
53 c.storeFloat2View(vC, index * 2);
54 }
55 }
56
57 @Reflect
58 public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
59 if (kernelContext.gix < kernelContext.gsx) {
60 int index = kernelContext.gix;
61 Float2.MutableImpl vA = a.float2View(index * 2);
62 float scaleX = vA.x() * 10.0f;
63 vA.x(scaleX);
64 b.storeFloat2View(vA, index * 2);
65 }
66 }
67
68 @Reflect
69 public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
70 if (kernelContext.gix < kernelContext.gsx) {
71 int index = kernelContext.gix;
72
73 // Obtain a view of the input data as a float4 and
74 // store that view in private memory
75 Float2 vA = a.float2View(index * 2);
76
77 // operate with the float4
78 float scaleX = vA.x() * 10.0f;
79 float scaleY = vA.y() * 20.0f;
80
81 // Create a float4 within the device code
82 Float2 vResult = Float2.of(scaleX, scaleY);
83
84 // store the float4 from private memory to global memory
85 b.storeFloat2View(vResult, index * 2);
86 }
87 }
88
89 @Reflect
90 public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
91 if (kernelContext.gix < kernelContext.gsx) {
92 int index = kernelContext.gix;
93 Float2.MutableImpl vA = a.float2View(index * 2);
94 vA.x(vA.x() * 10.0f);
95 vA.y(vA.y() * 20.0f);
96 b.storeFloat2View(vA, index * 2);
97 }
98 }
99
100 @Reflect
101 public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
102 if (kernelContext.gix < kernelContext.gsx) {
103 int index = kernelContext.gix;
104 Float2 vA = a.float2View(index * 2);
105 Float2 vB = b.float2View(index * 2);
106 Float2 vC = vA.add(vB).add(vB);
107 c.storeFloat2View(vC, index * 2);
108 }
109 }
110
111 @Reflect
112 public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
113 if (kernelContext.gix < kernelContext.gsx) {
114 int index = kernelContext.gix;
115 Float2 vA = a.float2View(index * 2);
116 Float2 vB = b.float2View(index * 2);
117 // Float2 vD = Float2.sub(vA, vB);
118 Float2 vC = vA.sub(vB);
119 c.storeFloat2View(vC, index * 2);
120 }
121 }
122
123 @Reflect
124 public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
125 if (kernelContext.gix < kernelContext.gsx) {
126 int index = kernelContext.gix;
127 Float2 vA = a.float2View(index * 2);
128 Float2 vB = b.float2View(index * 2);
129 Float2 vC = vA.add(vB).sub(vB);
130 c.storeFloat2View(vC, index * 2);
131 }
132 }
133
134 @Reflect
135 public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
136 if (kernelContext.gix < kernelContext.gsx) {
137 int index = kernelContext.gix;
138 Float2 vA = a.float2View(index * 2);
139 Float2 vB = b.float2View(index * 2);
140 Float2 vC = vA.add(vB).mul(vA).div(vB);
141 c.storeFloat2View(vC, index * 2);
142 }
143 }
144
145 @Reflect
146 public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c) {
147 // Checking composition
148 if (kernelContext.gix < kernelContext.gsx) {
149 int index = kernelContext.gix;
150 Float2 vA = a.float2View(index * 2);
151 Float2 vB = b.float2View(index * 2);
152 Float2 vC = vA.add(vA.mul(vB));
153 c.storeFloat2View(vC, index * 2);
154 }
155 }
156
157 private interface SharedArray extends NonMappableIface {
158 void array(long index, float value);
159 float array(long index);
160 DeviceSchema<SharedArray> schema = DeviceSchema.of(SharedArray.class,
161 arr -> arr.withArray("array", 1024));
162 static SharedArray create(Accelerator accelerator) {
163 return null;
164 }
165 static SharedArray createLocal() {
166 return null;
167 }
168 default Float2 float2View(int index) {
169 return null;
170 }
171 default void storeFloat2View(Float2 float4, int index) {
172 }
173 }
174
175 @Reflect
176 public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
177 SharedArray sm = SharedArray.createLocal();
178 if (kernelContext.gix < kernelContext.gsx) {
179 int index = kernelContext.gix;
180 int lix = kernelContext.lix;
181 Float2 vA = a.float2View(index * 2);
182 sm.storeFloat2View(vA, lix * 2);
183 kernelContext.barrier();
184 Float2 r = sm.float2View(lix * 2);
185 b.storeFloat2View(r, index * 2);
186 }
187 }
188
189 private interface PrivateMemory extends NonMappableIface {
190 void array(long index, float value);
191 float array(long index);
192 DeviceSchema<PrivateMemory> schema = DeviceSchema.of(PrivateMemory.class,
193 arr -> arr.withArray("array", 4));
194 static PrivateMemory create(Accelerator accelerator) {
195 return null;
196 }
197 static PrivateMemory createPrivate() {
198 return null;
199 }
200 default Float2 float2View(int index) {
201 return null;
202 }
203 default void storeFloat2View(Float2 float4, int index) {
204 }
205 }
206
207 @Reflect
208 public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
209 PrivateMemory pm = PrivateMemory.createPrivate();
210 if (kernelContext.gix < kernelContext.gsx) {
211 int index = kernelContext.gix;
212 Float2 vA = a.float2View(index * 2);
213 pm.storeFloat2View(vA, 0);
214 kernelContext.barrier();
215 Float2 r = pm.float2View(0);
216 b.storeFloat2View(r, index * 2);
217 }
218 }
219
220 @Reflect
221 public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @WO F32ArrayPadded b) {
222 SharedArray sm = SharedArray.createLocal();
223 if (kernelContext.gix < kernelContext.gsx) {
224 int index = kernelContext.gix;
225 int lix = kernelContext.lix;
226 Float2 vA = a.float2View(index * 2);
227 sm.array(lix * 2 + 0, vA.x());
228 sm.array(lix * 2 + 1, vA.y());
229 kernelContext.barrier();
230 Float2 r = sm.float2View(lix * 2);
231 b.storeFloat2View(r, index * 2);
232 }
233 }
234
235 @Reflect
236 public static void vectorOps14(@RO KernelContext kernelContext, @RW F32ArrayPadded a) {
237 if (kernelContext.gix < kernelContext.gsx) {
238 int index = kernelContext.gix;
239 Float2 vA = a.float2View(index * 2);
240 Float2.MutableImpl vB = Float2.makeMutable(vA);
241 vB.x(10.0f);
242 a.storeFloat2View(vB, index * 2);
243 }
244 }
245
246
247 @Reflect
248 public static void vectorOps15(@RO KernelContext kernelContext, @WO F32ArrayPadded a) {
249 // in this sample, we don't perform the vload, but rather the vstore directly
250 // from a new float2.
251 if (kernelContext.gix < kernelContext.gsx) {
252 int index = kernelContext.gix;
253 Float2 result = Float2.of(1.0f, 2.0f);
254 a.storeFloat2View(result, index * 2);
255 }
256 }
257
258 @Reflect
259 public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
260 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
261 cc.dispatchKernel(NDRange.of1D(size/2,128), kernelContext -> TestFloat2.vectorOps01(kernelContext, a, b, c));
262 }
263
264 @Reflect
265 public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
266 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
267 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps02(kernelContext, a, b));
268 }
269
270 @Reflect
271 public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
272 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
273 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps03(kernelContext, a, b));
274 }
275
276 @Reflect
277 public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
278 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
279 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps04(kernelContext, a, b));
280 }
281
282 @Reflect
283 public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
284 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
285 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps05(kernelContext, a, b, c));
286 }
287
288 @Reflect
289 public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
290 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
291 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps06(kernelContext, a, b, c));
292 }
293
294
295 @Reflect
296 public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
297 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
298 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps07(kernelContext, a, b, c));
299 }
300
301 @Reflect
302 public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
303 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
304 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps08(kernelContext, a, b, c));
305 }
306
307 @Reflect
308 public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @WO F32ArrayPadded c, int size) {
309 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
310 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps09(kernelContext, a, b, c));
311 }
312
313 @Reflect
314 public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
315 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
316 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps10(kernelContext, a, b));
317 }
318
319 @Reflect
320 public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
321 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
322 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps11(kernelContext, a, b));
323 }
324
325 @Reflect
326 public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a, @WO F32ArrayPadded b, int size) {
327 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
328 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps12(kernelContext, a, b));
329 }
330
331 @Reflect
332 public static void computeGraph14(@RO ComputeContext cc, @RW F32ArrayPadded a, int size) {
333 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
334 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps14(kernelContext, a));
335 }
336
337 @Reflect
338 public static void computeGraph15(@RO ComputeContext cc, @WO F32ArrayPadded a, int size) {
339 // Note: we need to launch N threads / vectorWidth -> size / 2 for this example
340 cc.dispatchKernel(NDRange.of1D(size/2), kernelContext -> TestFloat2.vectorOps15(kernelContext, a));
341 }
342
343
344 @HatTest
345 @Reflect
346 public void testFloat2_01() {
347 final int size = 1024;
348 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
349 var arrayA = F32ArrayPadded.create(accelerator, size);
350 var arrayB = F32ArrayPadded.create(accelerator, size);
351 var arrayC = F32ArrayPadded.create(accelerator, size);
352
353 Random r = new Random(19);
354 for (int i = 0; i < size; i++) {
355 arrayA.array(i, r.nextFloat());
356 arrayB.array(i, r.nextFloat());
357 }
358
359 accelerator.compute(cc -> TestFloat2.computeGraph01(cc, arrayA, arrayB, arrayC, size));
360
361 for (int i = 0; i < size; i++) {
362 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
363 }
364
365 }
366
367 @HatTest
368 @Reflect
369 public void testFloat2_02() {
370 final int size = 1024;
371 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
372 var arrayA = F32ArrayPadded.create(accelerator, size);
373 var arrayB = F32ArrayPadded.create(accelerator, size);
374
375 Random r = new Random(19);
376 for (int i = 0; i < size; i++) {
377 arrayA.array(i, r.nextFloat());
378 }
379
380 accelerator.compute(cc -> TestFloat2.computeGraph02(cc, arrayA, arrayB, size));
381
382 for (int i = 0; i < size; i += 2) {
383 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
384 HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
385 }
386 }
387
388 @HatTest
389 @Reflect
390 public void testFloat2_03() {
391 final int size = 1024;
392 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
393 var arrayA = F32ArrayPadded.create(accelerator, size);
394 var arrayB = F32ArrayPadded.create(accelerator, size);
395
396 Random r = new Random(19);
397 for (int i = 0; i < size; i++) {
398 arrayA.array(i, r.nextFloat());
399 }
400
401 accelerator.compute(cc -> TestFloat2.computeGraph03(cc, arrayA, arrayB, size));
402
403 for (int i = 0; i < size; i += 2) {
404 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
405 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
406 }
407 }
408
409 @HatTest
410 @Reflect
411 public void testFloat2_04() {
412 final int size = 1024;
413 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
414 var arrayA = F32ArrayPadded.create(accelerator, size);
415 var arrayB = F32ArrayPadded.create(accelerator, size);
416
417 Random r = new Random(19);
418 for (int i = 0; i < size; i++) {
419 arrayA.array(i, r.nextFloat());
420 }
421
422 accelerator.compute(cc -> TestFloat2.computeGraph04(cc, arrayA, arrayB, size));
423
424 for (int i = 0; i < size; i += 2) {
425 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
426 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
427 }
428 }
429
430 @HatTest
431 @Reflect
432 public void testFloat2_05() {
433 final int size = 1024;
434 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
435 var arrayA = F32ArrayPadded.create(accelerator, size);
436 var arrayB = F32ArrayPadded.create(accelerator, size);
437 var arrayC = F32ArrayPadded.create(accelerator, size);
438
439 Random r = new Random(19);
440 for (int i = 0; i < size; i++) {
441 arrayA.array(i, r.nextFloat());
442 arrayB.array(i, r.nextFloat());
443 }
444
445 accelerator.compute(cc -> TestFloat2.computeGraph05(cc, arrayA, arrayB, arrayC, size));
446
447 for (int i = 0; i < size; i++) {
448 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
449 }
450 }
451
452 @HatTest
453 @Reflect
454 public void testFloat2_06() {
455 final int size = 1024;
456 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
457 var arrayA = F32ArrayPadded.create(accelerator, size);
458 var arrayB = F32ArrayPadded.create(accelerator, size);
459 var arrayC = F32ArrayPadded.create(accelerator, size);
460
461 Random r = new Random(19);
462 for (int i = 0; i < size; i++) {
463 arrayA.array(i, r.nextFloat());
464 arrayB.array(i, r.nextFloat());
465 }
466
467 accelerator.compute(cc -> TestFloat2.computeGraph06(cc, arrayA, arrayB, arrayC, size));
468
469 for (int i = 0; i < size; i++) {
470 HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
471 }
472 }
473
474 @HatTest
475 @Reflect
476 public void testFloat2_07() {
477 final int size = 1024;
478 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
479 var arrayA = F32ArrayPadded.create(accelerator, size);
480 var arrayB = F32ArrayPadded.create(accelerator, size);
481 var arrayC = F32ArrayPadded.create(accelerator, size);
482
483 Random r = new Random(19);
484 for (int i = 0; i < size; i++) {
485 arrayA.array(i, r.nextFloat());
486 arrayB.array(i, r.nextFloat());
487 }
488
489 accelerator.compute(cc -> TestFloat2.computeGraph07(cc, arrayA, arrayB, arrayC, size));
490
491 for (int i = 0; i < size; i++) {
492 HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
493 }
494 }
495
496 @HatTest
497 @Reflect
498 public void testFloat2_08() {
499 final int size = 1024;
500 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
501 var arrayA = F32ArrayPadded.create(accelerator, size);
502 var arrayB = F32ArrayPadded.create(accelerator, size);
503 var arrayC = F32ArrayPadded.create(accelerator, size);
504
505 Random r = new Random(19);
506 for (int i = 0; i < size; i++) {
507 arrayA.array(i, r.nextFloat());
508 arrayB.array(i, r.nextFloat());
509 }
510
511 accelerator.compute(cc -> TestFloat2.computeGraph08(cc, arrayA, arrayB, arrayC, size));
512
513 for (int i = 0; i < size; i++) {
514 float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
515 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
516 }
517 }
518
519 @HatTest
520 @Reflect
521 public void testFloat2_09() {
522 final int size = 1024;
523 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
524 var arrayA = F32ArrayPadded.create(accelerator, size);
525 var arrayB = F32ArrayPadded.create(accelerator, size);
526 var arrayC = F32ArrayPadded.create(accelerator, size);
527
528 Random r = new Random(19);
529 for (int i = 0; i < size; i++) {
530 arrayA.array(i, r.nextFloat());
531 arrayB.array(i, r.nextFloat());
532 }
533
534 accelerator.compute(cc -> TestFloat2.computeGraph09(cc, arrayA, arrayB, arrayC, size));
535
536 for (int i = 0; i < size; i++) {
537 float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
538 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
539 }
540 }
541
542 @HatTest
543 @Reflect
544 public void testFloat2_10() {
545 final int size = 1024;
546 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
547 var arrayA = F32ArrayPadded.create(accelerator, size);
548 var arrayB = F32ArrayPadded.create(accelerator, size);
549
550 Random r = new Random(19);
551 for (int i = 0; i < size; i++) {
552 arrayA.array(i, r.nextFloat());
553 arrayB.array(i, r.nextFloat());
554 }
555
556 accelerator.compute(cc -> TestFloat2.computeGraph10(cc, arrayA, arrayB, size));
557
558 for (int i = 0; i < size; i++) {
559 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
560 }
561 }
562
563 @HatTest
564 @Reflect
565 public void testFloat2_11() {
566 final int size = 1024;
567 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
568 var arrayA = F32ArrayPadded.create(accelerator, size);
569 var arrayB = F32ArrayPadded.create(accelerator, size);
570
571 Random r = new Random(19);
572 for (int i = 0; i < size; i++) {
573 arrayA.array(i, r.nextFloat());
574 arrayB.array(i, r.nextFloat());
575 }
576
577 accelerator.compute(cc -> TestFloat2.computeGraph11(cc, arrayA, arrayB, size));
578
579 for (int i = 0; i < size; i++) {
580 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
581 }
582 }
583
584 @HatTest
585 @Reflect
586 public void testFloat2_12() {
587 final int size = 1024;
588 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
589 var arrayA = F32ArrayPadded.create(accelerator, size);
590 var arrayB = F32ArrayPadded.create(accelerator, size);
591
592 Random r = new Random(19);
593 for (int i = 0; i < size; i++) {
594 arrayA.array(i, r.nextFloat());
595 arrayB.array(i, r.nextFloat());
596 }
597
598 accelerator.compute(cc -> TestFloat2.computeGraph12(cc, arrayA, arrayB, size));
599
600 for (int i = 0; i < size; i++) {
601 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
602 }
603 }
604
605 @HatTest
606 @Reflect
607 public void testFloat2_13() {
608 // Test the CPU implementation of Float4
609 Float2 vA = Float2.of(1, 2);
610 Float2 vB = Float2.of(3, 4);
611 Float2 vC = Float2.add(vA, vB);
612 Float2 expectedSum = Float2.of(
613 vA.x() + vB.x(),
614 vA.y() + vB.y());
615
616 HATAsserts.assertEquals(expectedSum, vC, 0.001f);
617
618 Float2 vD = Float2.sub(vA, vB);
619 Float2 expectedSub = Float2.of(
620 vA.x() - vB.x(),
621 vA.y() - vB.y());
622 HATAsserts.assertEquals(expectedSub, vD, 0.001f);
623
624 Float2 vE = Float2.mul(vA, vB);
625 Float2 expectedMul = Float2.of(
626 vA.x() * vB.x(),
627 vA.y() * vB.y());
628 HATAsserts.assertEquals(expectedMul, vE, 0.001f);
629
630 Float2 vF = Float2.div(vA, vB);
631 Float2 expectedDiv = Float2.of(
632 vA.x() / vB.x(),
633 vA.y() / vB.y());
634 HATAsserts.assertEquals(expectedDiv, vF, 0.001f);
635 }
636
637 @HatTest
638 @Reflect
639 public void testFloat2_14() {
640 final int size = 1024;
641 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
642 var arrayA = F32ArrayPadded.create(accelerator, size);
643
644 Random r = new Random(73);
645 for (int i = 0; i < size; i++) {
646 arrayA.array(i, r.nextFloat());
647 }
648
649 accelerator.compute(cc -> TestFloat2.computeGraph14(cc, arrayA, size));
650
651 for (int i = 0; i < size; i += 2) {
652 HATAsserts.assertEquals(10.0f, arrayA.array(i), 0.001f);
653 }
654 }
655
656 @HatTest
657 @Reflect
658 public void testFloat2_15() {
659 final int size = 2048;
660 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661 var arrayA = F32ArrayPadded.create(accelerator, size);
662
663 Random r = new Random(73);
664 for (int i = 0; i < size; i++) {
665 arrayA.array(i, r.nextFloat());
666 }
667
668 accelerator.compute(cc -> TestFloat2.computeGraph15(cc, arrayA, size));
669
670 Float2 v = Float2.of(1.0f, 2.0f);
671 for (int i = 0; i < size; i += 2) {
672 HATAsserts.assertEquals(v.x(), arrayA.array(i), 0.001f);
673 HATAsserts.assertEquals(v.y(), arrayA.array(i + 1), 0.001f);
674 }
675 }
676 }