1 /*
  2  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package hat.test;
 26 
 27 import hat.device.DeviceSchema;
 28 import hat.device.DeviceType;
 29 import hat.types.Float4;
 30 import jdk.incubator.code.Reflect;
 31 
 32 import hat.*;
 33 import hat.backend.Backend;
 34 import hat.buffer.*;
 35 import optkl.ifacemapper.MappableIface.RO;
 36 import optkl.ifacemapper.MappableIface.RW;
 37 import hat.test.annotation.HatTest;
 38 import hat.test.exceptions.HATAsserts;
 39 
 40 import java.lang.invoke.MethodHandles;
 41 import java.util.Random;
 42 
 43 public class TestVectorArrayView {
 44 
 45     @Reflect
 46     public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 47         if (kernelContext.gix < kernelContext.gsx) {
 48             int index = kernelContext.gix;
 49 
 50             Float4[] vA = a.float4ArrayView();
 51             Float4[] vB = b.float4ArrayView();
 52             Float4[] vC = c.float4ArrayView();
 53             Float4 floatA = vA[index * 4];
 54             Float4 floatB = vB[index * 4];
 55             Float4 res = Float4.add(floatA, floatB);
 56             vC[index * 4] = res;
 57         }
 58     }
 59 
 60     @Reflect
 61     public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 62         if (kernelContext.gix < kernelContext.gsx) {
 63             int index = kernelContext.gix;
 64 
 65             Float4[] vA = a.float4ArrayView();
 66             Float4[] vB = b.float4ArrayView();
 67             Float4[] vC = c.float4ArrayView();
 68             Float4 vAFloat = vA[index * 4];
 69             Float4 vBFloat = vB[index * 4];
 70             vC[index * 4] = Float4.add(vAFloat, vBFloat);
 71         }
 72     }
 73 
 74     @Reflect
 75     public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 76         if (kernelContext.gix < kernelContext.gsx) {
 77             int index = kernelContext.gix;
 78 
 79             Float4[] vA = a.float4ArrayView();
 80             Float4[] vB = b.float4ArrayView();
 81             Float4[] vC = c.float4ArrayView();
 82             Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
 83             vC[index * 4] = res;
 84         }
 85     }
 86 
 87     @Reflect
 88     public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
 89         if (kernelContext.gix < kernelContext.gsx) {
 90             int index = kernelContext.gix;
 91 
 92             Float4.MutableImpl[] vArr = a.float4ArrayView();
 93             Float4.MutableImpl[] bArr = b.float4ArrayView();
 94             Float4.MutableImpl vA = vArr[index * 4];
 95             float scaleX = vA.x() * 10.0f;
 96             vA.x(scaleX);
 97             bArr[index * 4] = vA;
 98         }
 99     }
100 
101     @Reflect
102     public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
103         if (kernelContext.gix < kernelContext.gsx) {
104             int index = kernelContext.gix;
105 
106             Float4.MutableImpl[] vA = a.float4ArrayView();
107             Float4.MutableImpl[] vB = b.float4ArrayView();
108             Float4.MutableImpl vAFloat = vA[index * 4];
109             float scaleX = vAFloat.x() * 10.0f;
110             float scaleY = vAFloat.y() * 20.0f;
111             float scaleZ = vAFloat.z() * 30.0f;
112             float scaleW = vAFloat.w() * 40.0f;
113             vAFloat.x(scaleX);
114             vAFloat.y(scaleY);
115             vAFloat.z(scaleZ);
116             vAFloat.w(scaleW);
117             vB[index * 4] = vAFloat;
118         }
119     }
120 
121     @Reflect
122     public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
123         if (kernelContext.gix < kernelContext.gsx) {
124             int index = kernelContext.gix;
125 
126             Float4.MutableImpl[] vA = a.float4ArrayView();
127             Float4.MutableImpl[] vB = b.float4ArrayView();
128             Float4.MutableImpl vAFloat = vA[index * 4];
129             vAFloat.x(vAFloat.x() * 10.0f);
130             vAFloat.y(vAFloat.y() * 20.0f);
131             vAFloat.z(vAFloat.z() * 30.0f);
132             vAFloat.w(vAFloat.w() * 40.0f);
133             vB[index * 4] = vAFloat;
134         }
135     }
136 
137     @Reflect
138     public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
139         if (kernelContext.gix < kernelContext.gsx) {
140             int index = kernelContext.gix;
141 
142             Float4[] vA = a.float4ArrayView();
143             Float4[] vB = b.float4ArrayView();
144             Float4[] vC = c.float4ArrayView();
145             Float4 floatA = vA[index * 4];
146             Float4 floatB = vB[index * 4];
147             Float4 temp = floatA.add(floatB);
148             Float4 res = temp.add(floatB);
149             vC[index * 4] = res;
150         }
151     }
152 
153     @Reflect
154     public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
155         if (kernelContext.gix < kernelContext.gsx) {
156             int index = kernelContext.gix;
157 
158             Float4[] vA = a.float4ArrayView();
159             Float4[] vB = b.float4ArrayView();
160             Float4[] vC = c.float4ArrayView();
161             Float4 floatA = vA[index * 4];
162             Float4 floatB = vB[index * 4];
163           //  Float4 vD = Float4.sub(floatA, floatB);
164             Float4 vE = Float4.sub(floatA, floatB);
165             vC[index * 4] = vE;
166         }
167     }
168 
169     @Reflect
170     public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
171         if (kernelContext.gix < kernelContext.gsx) {
172             int index = kernelContext.gix;
173 
174             Float4[] vAArray = a.float4ArrayView();
175             Float4[] vBArray = b.float4ArrayView();
176             Float4[] vCArray = c.float4ArrayView();
177 
178             Float4 vA = vAArray[index * 4];
179             Float4 vB = vBArray[index * 4];
180             Float4 vC = vA.add(vB);
181             Float4 vD = vC.sub(vB);
182             vCArray[index * 4] = vD;
183         }
184     }
185 
186     @Reflect
187     public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
188         if (kernelContext.gix < kernelContext.gsx) {
189             int index = kernelContext.gix;
190 
191             Float4[] vAArray = a.float4ArrayView();
192             Float4[] vBArray = b.float4ArrayView();
193             Float4[] vCArray = c.float4ArrayView();
194 
195             Float4 vA = vAArray[index * 4];
196             Float4 vB = vBArray[index * 4];
197             Float4 vC = vA.add(vB);
198             Float4 vD = vC.mul(vA);
199             Float4 vE = vD.div(vB);
200             vCArray[index * 4] = vE;
201         }
202     }
203 
204     @Reflect
205     public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
206         // Checking composition
207         if (kernelContext.gix < kernelContext.gsx) {
208             int index = kernelContext.gix;
209             Float4[] vAArray = a.float4ArrayView();
210             Float4[] vBArray = b.float4ArrayView();
211             Float4[] vCArray = c.float4ArrayView();
212 
213             Float4 vA = vAArray[index * 4];
214             Float4 vB = vBArray[index * 4];
215             Float4 temp = vA.mul(vB);
216             Float4 vC = vA.add(temp);
217             vCArray[index * 4] = vC;
218         }
219     }
220 
221     private interface SharedMemory extends DeviceType {
222         void array(long index, float value);
223         float array(long index);
224         DeviceSchema<SharedMemory> schema = DeviceSchema.of(SharedMemory.class,
225                 arr -> arr.withArray("array", 1024));
226         static SharedMemory createLocal() {
227             return null;
228         }
229         default Float4.MutableImpl[] float4LocalArrayView() {
230             return null;
231         }
232     }
233 
234     @Reflect
235     public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
236         SharedMemory sm = SharedMemory.createLocal();
237         if (kernelContext.gix < kernelContext.gsx) {
238             int index = kernelContext.gix;
239             int lix = kernelContext.lix;
240 
241             Float4[] aArr = a.float4ArrayView();
242             Float4[] bArr = b.float4ArrayView();
243             Float4[] smArr = sm.float4LocalArrayView();
244 
245             Float4 vA = aArr[index * 4];
246             smArr[lix * 4] = vA;
247             kernelContext.barrier();
248             Float4 r = smArr[lix * 4];
249             bArr[index * 4] = r;
250         }
251     }
252 
253     private interface PrivateMemory extends DeviceType {
254         void array(long index, float value);
255         float array(long index);
256         DeviceSchema<PrivateMemory> schema = DeviceSchema.of(PrivateMemory.class,
257                 arr -> arr.withArray("array", 4));
258         static PrivateMemory createPrivate() {
259             return null;
260         }
261         default Float4[] float4PrivateArrayView() {
262             return null;
263         }
264     }
265 
266     @Reflect
267     public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
268         PrivateMemory pm = PrivateMemory.createPrivate();
269         if (kernelContext.gix < kernelContext.gsx) {
270             int index = kernelContext.gix;
271 
272             Float4[] aArr = a.float4ArrayView();
273             Float4[] bArr = b.float4ArrayView();
274             Float4[] pmArr = pm.float4PrivateArrayView();
275 
276             Float4 vA = aArr[index * 4];
277             pmArr[0] = vA;
278             kernelContext.barrier();
279             Float4 r = pmArr[0];
280             bArr[index * 4] = r;
281         }
282     }
283 
284     @Reflect
285     public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
286         SharedMemory sm = SharedMemory.createLocal();
287         if (kernelContext.gix < kernelContext.gsx) {
288             int index = kernelContext.gix;
289             int lix = kernelContext.lix;
290             Float4.MutableImpl[] aArr = a.float4ArrayView();
291             Float4.MutableImpl[] bArr = b.float4ArrayView();
292             Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
293 
294             Float4.MutableImpl vA = aArr[index * 4];
295             Float4.MutableImpl smVector = smArr[lix * 4];
296             smVector.x(vA.x());
297             smVector.y(vA.y());
298             smVector.z(vA.z());
299             smVector.w(vA.w());
300             smArr[lix * 4] = smVector;
301             kernelContext.barrier();
302             Float4.MutableImpl r = smArr[lix * 4];
303             bArr[index * 4] = r;
304         }
305     }
306 
307     @Reflect
308     public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
309         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
310         cc.dispatchKernel( NDRange.of1D(size/4,128), kernelContext -> vectorOps01(kernelContext, a, b, c));
311     }
312 
313     @Reflect
314     public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
315         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
316         cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
317     }
318 
319     @Reflect
320     public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
321         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
322         cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
323     }
324 
325     @Reflect
326     public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
327         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
328         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps02(kernelContext, a, b));
329     }
330 
331     @Reflect
332     public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
333         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
334         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps03(kernelContext, a, b));
335     }
336 
337     @Reflect
338     public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
339         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
340         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps04(kernelContext, a, b));
341     }
342 
343     @Reflect
344     public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
345         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
346         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps05(kernelContext, a, b, c));
347     }
348 
349     @Reflect
350     public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
351         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
352         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps06(kernelContext, a, b, c));
353     }
354 
355     @Reflect
356     public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
357         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
358         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps07(kernelContext, a, b, c));
359     }
360 
361     @Reflect
362     public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
363         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
364         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps08(kernelContext, a, b, c));
365     }
366 
367     @Reflect
368     public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
369         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
370         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps09(kernelContext, a, b, c));
371     }
372 
373     @Reflect
374     public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
375         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
376         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps10(kernelContext, a, b));
377     }
378 
379     @Reflect
380     public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
381         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
382         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps11(kernelContext, a, b));
383     }
384 
385     @Reflect
386     public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
387         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
388         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps12(kernelContext, a, b));
389     }
390 
391     @HatTest
392     @Reflect
393     public void TestVectorArrayView01() {
394         final int size = 1024;
395         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
396         var arrayA = F32ArrayPadded.create(accelerator, size);
397         var arrayB = F32ArrayPadded.create(accelerator, size);
398         var arrayC = F32ArrayPadded.create(accelerator, size);
399 
400         Random r = new Random(19);
401         for (int i = 0; i < size; i++) {
402             arrayA.array(i, r.nextFloat());
403             arrayB.array(i, r.nextFloat());
404         }
405 
406         accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
407 
408         for (int i = 0; i < size; i++) {
409             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
410         }
411 
412     }
413 
414     // @HatTest
415     // @Reflect
416     // public void TestVectorArrayView01WithFloat4s() {
417     //     final int size = 1024;
418     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
419     //     var arrayA = F32ArrayPadded.create(accelerator, size);
420     //     var arrayB = F32ArrayPadded.create(accelerator, size);
421     //     var arrayC = F32ArrayPadded.create(accelerator, size);
422     //
423     //     Random r = new Random(19);
424     //     for (int i = 0; i < size; i++) {
425     //         arrayA.array(i, r.nextFloat());
426     //         arrayB.array(i, r.nextFloat());
427     //     }
428     //
429     //     accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
430     //
431     //     for (int i = 0; i < size; i++) {
432     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
433     //     }
434     //
435     // }
436     //
437     // @HatTest
438     // @Reflect
439     // public void TestVectorArrayView01WithSeparateAdd() {
440     //     final int size = 1024;
441     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
442     //     var arrayA = F32ArrayPadded.create(accelerator, size);
443     //     var arrayB = F32ArrayPadded.create(accelerator, size);
444     //     var arrayC = F32ArrayPadded.create(accelerator, size);
445     //
446     //     Random r = new Random(19);
447     //     for (int i = 0; i < size; i++) {
448     //         arrayA.array(i, r.nextFloat());
449     //         arrayB.array(i, r.nextFloat());
450     //     }
451     //
452     //     accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
453     //
454     //     for (int i = 0; i < size; i++) {
455     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
456     //     }
457     //
458     // }
459 
460     @HatTest
461     @Reflect
462     public void TestVectorArrayView02() {
463         final int size = 1024;
464         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
465         var arrayA = F32ArrayPadded.create(accelerator, size);
466         var arrayB = F32ArrayPadded.create(accelerator, size);
467 
468         Random r = new Random(19);
469         for (int i = 0; i < size; i++) {
470             arrayA.array(i, r.nextFloat());
471         }
472 
473         accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
474 
475         for (int i = 0; i < size; i += 4) {
476             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
477             HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
478             HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
479             HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
480         }
481     }
482 
483     @HatTest
484     @Reflect
485     public void TestVectorArrayView03() {
486         final int size = 1024;
487         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
488         var arrayA = F32ArrayPadded.create(accelerator, size);
489         var arrayB = F32ArrayPadded.create(accelerator, size);
490 
491         Random r = new Random(19);
492         for (int i = 0; i < size; i++) {
493             arrayA.array(i, r.nextFloat());
494         }
495 
496         accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
497 
498         for (int i = 0; i < size; i += 4) {
499             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
500             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
501             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
502             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
503         }
504     }
505 
506     @HatTest
507     @Reflect
508     public void TestVectorArrayView04() {
509         final int size = 1024;
510         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
511         var arrayA = F32ArrayPadded.create(accelerator, size);
512         var arrayB = F32ArrayPadded.create(accelerator, size);
513 
514         Random r = new Random(19);
515         for (int i = 0; i < size; i++) {
516             arrayA.array(i, r.nextFloat());
517         }
518 
519         accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
520 
521         for (int i = 0; i < size; i += 4) {
522             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
523             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
524             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
525             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
526         }
527     }
528 
529     @HatTest
530     @Reflect
531     public void TestVectorArrayView05() {
532         final int size = 1024;
533         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
534         var arrayA = F32ArrayPadded.create(accelerator, size);
535         var arrayB = F32ArrayPadded.create(accelerator, size);
536         var arrayC = F32ArrayPadded.create(accelerator, size);
537 
538         Random r = new Random(19);
539         for (int i = 0; i < size; i++) {
540             arrayA.array(i, r.nextFloat());
541             arrayB.array(i, r.nextFloat());
542         }
543 
544         accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
545 
546         for (int i = 0; i < size; i ++) {
547             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
548         }
549     }
550 
551     @HatTest
552     @Reflect
553     public void TestVectorArrayView06() {
554         final int size = 1024;
555         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
556         var arrayA = F32ArrayPadded.create(accelerator, size);
557         var arrayB = F32ArrayPadded.create(accelerator, size);
558         var arrayC = F32ArrayPadded.create(accelerator, size);
559 
560         Random r = new Random(19);
561         for (int i = 0; i < size; i++) {
562             arrayA.array(i, r.nextFloat());
563             arrayB.array(i, r.nextFloat());
564         }
565 
566         accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
567 
568         for (int i = 0; i < size; i ++) {
569             HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
570         }
571     }
572 
573     @HatTest
574     @Reflect
575     public void TestVectorArrayView07() {
576         final int size = 1024;
577         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
578         var arrayA = F32ArrayPadded.create(accelerator, size);
579         var arrayB = F32ArrayPadded.create(accelerator, size);
580         var arrayC = F32ArrayPadded.create(accelerator, size);
581 
582         Random r = new Random(19);
583         for (int i = 0; i < size; i++) {
584             arrayA.array(i, r.nextFloat());
585             arrayB.array(i, r.nextFloat());
586         }
587 
588         accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
589 
590         for (int i = 0; i < size; i ++) {
591             HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
592         }
593     }
594 
595     @HatTest
596     @Reflect
597     public void TestVectorArrayView08() {
598         final int size = 1024;
599         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
600         var arrayA = F32ArrayPadded.create(accelerator, size);
601         var arrayB = F32ArrayPadded.create(accelerator, size);
602         var arrayC = F32ArrayPadded.create(accelerator, size);
603 
604         Random r = new Random(19);
605         for (int i = 0; i < size; i++) {
606             arrayA.array(i, r.nextFloat());
607             arrayB.array(i, r.nextFloat());
608         }
609 
610         accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
611 
612         for (int i = 0; i < size; i ++) {
613             float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
614             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
615         }
616     }
617 
618     @HatTest
619     @Reflect
620     public void TestVectorArrayView09() {
621         final int size = 1024;
622         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
623         var arrayA = F32ArrayPadded.create(accelerator, size);
624         var arrayB = F32ArrayPadded.create(accelerator, size);
625         var arrayC = F32ArrayPadded.create(accelerator, size);
626 
627         Random r = new Random(19);
628         for (int i = 0; i < size; i++) {
629             arrayA.array(i, r.nextFloat());
630             arrayB.array(i, r.nextFloat());
631         }
632 
633         accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
634 
635         for (int i = 0; i < size; i ++) {
636             float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
637             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
638         }
639     }
640 
641     @HatTest
642     @Reflect
643     public void TestVectorArrayView10() {
644         final int size = 1024;
645         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
646         var arrayA = F32ArrayPadded.create(accelerator, size);
647         var arrayB = F32ArrayPadded.create(accelerator, size);
648 
649         Random r = new Random(19);
650         for (int i = 0; i < size; i++) {
651             arrayA.array(i, r.nextFloat());
652             arrayB.array(i, r.nextFloat());
653         }
654 
655         accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
656 
657         for (int i = 0; i < size; i ++) {
658             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
659         }
660     }
661 
662     @HatTest
663     @Reflect
664     public void TestVectorArrayView11() {
665         final int size = 1024;
666         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
667         var arrayA = F32ArrayPadded.create(accelerator, size);
668         var arrayB = F32ArrayPadded.create(accelerator, size);
669 
670         Random r = new Random(19);
671         for (int i = 0; i < size; i++) {
672             arrayA.array(i, r.nextFloat());
673             arrayB.array(i, r.nextFloat());
674         }
675 
676         accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
677 
678         for (int i = 0; i < size; i ++) {
679             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
680         }
681     }
682 
683     @HatTest
684     @Reflect
685     public void TestVectorArrayView12() {
686         final int size = 1024;
687         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
688         var arrayA = F32ArrayPadded.create(accelerator, size);
689         var arrayB = F32ArrayPadded.create(accelerator, size);
690 
691         Random r = new Random(19);
692         for (int i = 0; i < size; i++) {
693             arrayA.array(i, r.nextFloat());
694             arrayB.array(i, r.nextFloat());
695         }
696 
697         accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
698 
699         for (int i = 0; i < size; i ++) {
700             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
701         }
702     }
703 }