1 /*
  2  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package hat.test;
 26 
 27 import jdk.incubator.code.Reflect;
 28 
 29 import hat.*;
 30 import hat.backend.Backend;
 31 import hat.buffer.*;
 32 import hat.ifacemapper.MappableIface.RO;
 33 import hat.ifacemapper.MappableIface.RW;
 34 import hat.ifacemapper.Schema;
 35 import hat.test.annotation.HatTest;
 36 import hat.test.engine.HATAsserts;
 37 import jdk.incubator.code.Reflect;
 38 
 39 import java.lang.invoke.MethodHandles;
 40 import java.util.Random;
 41 
 42 public class TestVectorArrayView {
 43 
 44     @Reflect
 45     public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 46         if (kernelContext.gix < kernelContext.gsx) {
 47             int index = kernelContext.gix;
 48 
 49             Float4[] vA = a.float4ArrayView();
 50             Float4[] vB = b.float4ArrayView();
 51             Float4[] vC = c.float4ArrayView();
 52             Float4 floatA = vA[index * 4];
 53             Float4 floatB = vB[index * 4];
 54             Float4 res = Float4.add(floatA, floatB);
 55             vC[index * 4] = res;
 56         }
 57     }
 58 
 59     @Reflect
 60     public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 61         if (kernelContext.gix < kernelContext.gsx) {
 62             int index = kernelContext.gix;
 63 
 64             Float4[] vA = a.float4ArrayView();
 65             Float4[] vB = b.float4ArrayView();
 66             Float4[] vC = c.float4ArrayView();
 67             Float4 vAFloat = vA[index * 4];
 68             Float4 vBFloat = vB[index * 4];
 69             vC[index * 4] = Float4.add(vAFloat, vBFloat);
 70         }
 71     }
 72 
 73     @Reflect
 74     public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 75         if (kernelContext.gix < kernelContext.gsx) {
 76             int index = kernelContext.gix;
 77 
 78             Float4[] vA = a.float4ArrayView();
 79             Float4[] vB = b.float4ArrayView();
 80             Float4[] vC = c.float4ArrayView();
 81             Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
 82             vC[index * 4] = res;
 83         }
 84     }
 85 
 86     @Reflect
 87     public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
 88         if (kernelContext.gix < kernelContext.gsx) {
 89             int index = kernelContext.gix;
 90 
 91             Float4.MutableImpl[] vArr = a.float4ArrayView();
 92             Float4.MutableImpl[] bArr = b.float4ArrayView();
 93             Float4.MutableImpl vA = vArr[index * 4];
 94             float scaleX = vA.x() * 10.0f;
 95             vA.x(scaleX);
 96             bArr[index * 4] = vA;
 97         }
 98     }
 99 
100     @Reflect
101     public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
102         if (kernelContext.gix < kernelContext.gsx) {
103             int index = kernelContext.gix;
104 
105             Float4.MutableImpl[] vA = a.float4ArrayView();
106             Float4.MutableImpl[] vB = b.float4ArrayView();
107             Float4.MutableImpl vAFloat = vA[index * 4];
108             float scaleX = vAFloat.x() * 10.0f;
109             float scaleY = vAFloat.y() * 20.0f;
110             float scaleZ = vAFloat.z() * 30.0f;
111             float scaleW = vAFloat.w() * 40.0f;
112             vAFloat.x(scaleX);
113             vAFloat.y(scaleY);
114             vAFloat.z(scaleZ);
115             vAFloat.w(scaleW);
116             vB[index * 4] = vAFloat;
117         }
118     }
119 
120     @Reflect
121     public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
122         if (kernelContext.gix < kernelContext.gsx) {
123             int index = kernelContext.gix;
124 
125             Float4.MutableImpl[] vA = a.float4ArrayView();
126             Float4.MutableImpl[] vB = b.float4ArrayView();
127             Float4.MutableImpl vAFloat = vA[index * 4];
128             vAFloat.x(vAFloat.x() * 10.0f);
129             vAFloat.y(vAFloat.y() * 20.0f);
130             vAFloat.z(vAFloat.z() * 30.0f);
131             vAFloat.w(vAFloat.w() * 40.0f);
132             vB[index * 4] = vAFloat;
133         }
134     }
135 
136     @Reflect
137     public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
138         if (kernelContext.gix < kernelContext.gsx) {
139             int index = kernelContext.gix;
140 
141             Float4[] vA = a.float4ArrayView();
142             Float4[] vB = b.float4ArrayView();
143             Float4[] vC = c.float4ArrayView();
144             Float4 floatA = vA[index * 4];
145             Float4 floatB = vB[index * 4];
146             Float4 temp = floatA.add(floatB);
147             Float4 res = temp.add(floatB);
148             vC[index * 4] = res;
149         }
150     }
151 
152     @Reflect
153     public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
154         if (kernelContext.gix < kernelContext.gsx) {
155             int index = kernelContext.gix;
156 
157             Float4[] vA = a.float4ArrayView();
158             Float4[] vB = b.float4ArrayView();
159             Float4[] vC = c.float4ArrayView();
160             Float4 floatA = vA[index * 4];
161             Float4 floatB = vB[index * 4];
162             Float4 vD = Float4.sub(floatA, floatB);
163             Float4 vE = Float4.sub(floatA, floatB);
164             vC[index * 4] = vE;
165         }
166     }
167 
168     @Reflect
169     public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
170         if (kernelContext.gix < kernelContext.gsx) {
171             int index = kernelContext.gix;
172 
173             Float4[] vAArray = a.float4ArrayView();
174             Float4[] vBArray = b.float4ArrayView();
175             Float4[] vCArray = c.float4ArrayView();
176 
177             Float4 vA = vAArray[index * 4];
178             Float4 vB = vBArray[index * 4];
179             Float4 vC = vA.add(vB);
180             Float4 vD = vC.sub(vB);
181             vCArray[index * 4] = vD;
182         }
183     }
184 
185     @Reflect
186     public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
187         if (kernelContext.gix < kernelContext.gsx) {
188             int index = kernelContext.gix;
189 
190             Float4[] vAArray = a.float4ArrayView();
191             Float4[] vBArray = b.float4ArrayView();
192             Float4[] vCArray = c.float4ArrayView();
193 
194             Float4 vA = vAArray[index * 4];
195             Float4 vB = vBArray[index * 4];
196             Float4 vC = vA.add(vB);
197             Float4 vD = vC.mul(vA);
198             Float4 vE = vD.div(vB);
199             vCArray[index * 4] = vE;
200         }
201     }
202 
203     @Reflect
204     public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
205         // Checking composition
206         if (kernelContext.gix < kernelContext.gsx) {
207             int index = kernelContext.gix;
208             Float4[] vAArray = a.float4ArrayView();
209             Float4[] vBArray = b.float4ArrayView();
210             Float4[] vCArray = c.float4ArrayView();
211 
212             Float4 vA = vAArray[index * 4];
213             Float4 vB = vBArray[index * 4];
214             Float4 temp = vA.mul(vB);
215             Float4 vC = vA.add(temp);
216             vCArray[index * 4] = vC;
217         }
218     }
219 
220     private interface SharedMemory extends Buffer {
221         void array(long index, float value);
222         float array(long index);
223         Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
224                 arr -> arr.array("array", 1024));
225         static SharedMemory create(Accelerator accelerator) {
226             return schema.allocate(accelerator);
227         }
228         static SharedMemory createLocal() {
229             return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
230         }
231         default Float4 float4View(int index) {
232             return null;
233         }
234         default void storeFloat4View(Float4 float4, int index) {
235         }
236         default Float4.MutableImpl[] float4LocalArrayView() {
237             return null;
238         }
239     }
240 
241     @Reflect
242     public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
243         SharedMemory sm = SharedMemory.createLocal();
244         if (kernelContext.gix < kernelContext.gsx) {
245             int index = kernelContext.gix;
246             int lix = kernelContext.lix;
247 
248             Float4[] aArr = a.float4ArrayView();
249             Float4[] bArr = b.float4ArrayView();
250             Float4[] smArr = sm.float4LocalArrayView();
251 
252             Float4 vA = aArr[index * 4];
253             smArr[lix * 4] = vA;
254             kernelContext.barrier();
255             Float4 r = smArr[lix * 4];
256             bArr[index * 4] = r;
257         }
258     }
259 
260     private interface PrivateMemory extends Buffer {
261         void array(long index, float value);
262         float array(long index);
263         Schema<PrivateMemory> schema = Schema.of(PrivateMemory.class,
264                 arr -> arr.array("array", 4));
265         static PrivateMemory create(Accelerator accelerator) {
266             return schema.allocate(accelerator);
267         }
268         static PrivateMemory createPrivate() {
269             return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
270         }
271         default Float4 float4View(int index) {
272             return null;
273         }
274         default void storeFloat4View(Float4 float4, int index) {
275         }
276         default Float4[] float4PrivateArrayView() {
277             return null;
278         }
279     }
280 
281     @Reflect
282     public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
283         PrivateMemory pm = PrivateMemory.createPrivate();
284         if (kernelContext.gix < kernelContext.gsx) {
285             int index = kernelContext.gix;
286 
287             Float4[] aArr = a.float4ArrayView();
288             Float4[] bArr = b.float4ArrayView();
289             Float4[] pmArr = pm.float4PrivateArrayView();
290 
291             Float4 vA = aArr[index * 4];
292             pmArr[0] = vA;
293             kernelContext.barrier();
294             Float4 r = pmArr[0];
295             bArr[index * 4] = r;
296         }
297     }
298 
299     @Reflect
300     public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
301         SharedMemory sm = SharedMemory.createLocal();
302         if (kernelContext.gix < kernelContext.gsx) {
303             int index = kernelContext.gix;
304             int lix = kernelContext.lix;
305             Float4.MutableImpl[] aArr = a.float4ArrayView();
306             Float4.MutableImpl[] bArr = b.float4ArrayView();
307             Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
308 
309             Float4.MutableImpl vA = aArr[index * 4];
310             Float4.MutableImpl smVector = smArr[lix * 4];
311             smVector.x(vA.x());
312             smVector.y(vA.y());
313             smVector.z(vA.z());
314             smVector.w(vA.w());
315             smArr[lix * 4] = smVector;
316             kernelContext.barrier();
317             Float4.MutableImpl r = smArr[lix * 4];
318             bArr[index * 4] = r;
319         }
320     }
321 
322     @Reflect
323     public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
324         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
325         cc.dispatchKernel( NDRange.of1D(size/4,128), kernelContext -> vectorOps01(kernelContext, a, b, c));
326     }
327 
328     @Reflect
329     public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
330         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
331         cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
332     }
333 
334     @Reflect
335     public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
336         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
337         cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
338     }
339 
340     @Reflect
341     public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
342         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
343         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps02(kernelContext, a, b));
344     }
345 
346     @Reflect
347     public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
348         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
349         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps03(kernelContext, a, b));
350     }
351 
352     @Reflect
353     public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
354         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
355         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps04(kernelContext, a, b));
356     }
357 
358     @Reflect
359     public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
360         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
361         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps05(kernelContext, a, b, c));
362     }
363 
364     @Reflect
365     public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
366         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
367         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps06(kernelContext, a, b, c));
368     }
369 
370     @Reflect
371     public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
372         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
373         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps07(kernelContext, a, b, c));
374     }
375 
376     @Reflect
377     public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
378         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
379         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps08(kernelContext, a, b, c));
380     }
381 
382     @Reflect
383     public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
384         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
385         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps09(kernelContext, a, b, c));
386     }
387 
388     @Reflect
389     public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
390         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
391         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps10(kernelContext, a, b));
392     }
393 
394     @Reflect
395     public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
396         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
397         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps11(kernelContext, a, b));
398     }
399 
400     @Reflect
401     public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
402         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
403         cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps12(kernelContext, a, b));
404     }
405 
406     @HatTest
407     @Reflect
408     public void TestVectorArrayView01() {
409         final int size = 1024;
410         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
411         var arrayA = F32ArrayPadded.create(accelerator, size);
412         var arrayB = F32ArrayPadded.create(accelerator, size);
413         var arrayC = F32ArrayPadded.create(accelerator, size);
414 
415         Random r = new Random(19);
416         for (int i = 0; i < size; i++) {
417             arrayA.array(i, r.nextFloat());
418             arrayB.array(i, r.nextFloat());
419         }
420 
421         accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
422 
423         for (int i = 0; i < size; i++) {
424             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
425         }
426 
427     }
428 
429     // @HatTest
430     // @Reflect
431     // public void TestVectorArrayView01WithFloat4s() {
432     //     final int size = 1024;
433     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
434     //     var arrayA = F32ArrayPadded.create(accelerator, size);
435     //     var arrayB = F32ArrayPadded.create(accelerator, size);
436     //     var arrayC = F32ArrayPadded.create(accelerator, size);
437     //
438     //     Random r = new Random(19);
439     //     for (int i = 0; i < size; i++) {
440     //         arrayA.array(i, r.nextFloat());
441     //         arrayB.array(i, r.nextFloat());
442     //     }
443     //
444     //     accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
445     //
446     //     for (int i = 0; i < size; i++) {
447     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
448     //     }
449     //
450     // }
451     //
452     // @HatTest
453     // @Reflect
454     // public void TestVectorArrayView01WithSeparateAdd() {
455     //     final int size = 1024;
456     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
457     //     var arrayA = F32ArrayPadded.create(accelerator, size);
458     //     var arrayB = F32ArrayPadded.create(accelerator, size);
459     //     var arrayC = F32ArrayPadded.create(accelerator, size);
460     //
461     //     Random r = new Random(19);
462     //     for (int i = 0; i < size; i++) {
463     //         arrayA.array(i, r.nextFloat());
464     //         arrayB.array(i, r.nextFloat());
465     //     }
466     //
467     //     accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
468     //
469     //     for (int i = 0; i < size; i++) {
470     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
471     //     }
472     //
473     // }
474 
475     @HatTest
476     @Reflect
477     public void TestVectorArrayView02() {
478         final int size = 1024;
479         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
480         var arrayA = F32ArrayPadded.create(accelerator, size);
481         var arrayB = F32ArrayPadded.create(accelerator, size);
482 
483         Random r = new Random(19);
484         for (int i = 0; i < size; i++) {
485             arrayA.array(i, r.nextFloat());
486         }
487 
488         accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
489 
490         for (int i = 0; i < size; i += 4) {
491             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
492             HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
493             HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
494             HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
495         }
496     }
497 
498     @HatTest
499     @Reflect
500     public void TestVectorArrayView03() {
501         final int size = 1024;
502         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
503         var arrayA = F32ArrayPadded.create(accelerator, size);
504         var arrayB = F32ArrayPadded.create(accelerator, size);
505 
506         Random r = new Random(19);
507         for (int i = 0; i < size; i++) {
508             arrayA.array(i, r.nextFloat());
509         }
510 
511         accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
512 
513         for (int i = 0; i < size; i += 4) {
514             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
515             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
516             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
517             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
518         }
519     }
520 
521     @HatTest
522     @Reflect
523     public void TestVectorArrayView04() {
524         final int size = 1024;
525         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
526         var arrayA = F32ArrayPadded.create(accelerator, size);
527         var arrayB = F32ArrayPadded.create(accelerator, size);
528 
529         Random r = new Random(19);
530         for (int i = 0; i < size; i++) {
531             arrayA.array(i, r.nextFloat());
532         }
533 
534         accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
535 
536         for (int i = 0; i < size; i += 4) {
537             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
538             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
539             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
540             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
541         }
542     }
543 
544     @HatTest
545     @Reflect
546     public void TestVectorArrayView05() {
547         final int size = 1024;
548         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
549         var arrayA = F32ArrayPadded.create(accelerator, size);
550         var arrayB = F32ArrayPadded.create(accelerator, size);
551         var arrayC = F32ArrayPadded.create(accelerator, size);
552 
553         Random r = new Random(19);
554         for (int i = 0; i < size; i++) {
555             arrayA.array(i, r.nextFloat());
556             arrayB.array(i, r.nextFloat());
557         }
558 
559         accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
560 
561         for (int i = 0; i < size; i ++) {
562             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
563         }
564     }
565 
566     @HatTest
567     @Reflect
568     public void TestVectorArrayView06() {
569         final int size = 1024;
570         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
571         var arrayA = F32ArrayPadded.create(accelerator, size);
572         var arrayB = F32ArrayPadded.create(accelerator, size);
573         var arrayC = F32ArrayPadded.create(accelerator, size);
574 
575         Random r = new Random(19);
576         for (int i = 0; i < size; i++) {
577             arrayA.array(i, r.nextFloat());
578             arrayB.array(i, r.nextFloat());
579         }
580 
581         accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
582 
583         for (int i = 0; i < size; i ++) {
584             HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
585         }
586     }
587 
588     @HatTest
589     @Reflect
590     public void TestVectorArrayView07() {
591         final int size = 1024;
592         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
593         var arrayA = F32ArrayPadded.create(accelerator, size);
594         var arrayB = F32ArrayPadded.create(accelerator, size);
595         var arrayC = F32ArrayPadded.create(accelerator, size);
596 
597         Random r = new Random(19);
598         for (int i = 0; i < size; i++) {
599             arrayA.array(i, r.nextFloat());
600             arrayB.array(i, r.nextFloat());
601         }
602 
603         accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
604 
605         for (int i = 0; i < size; i ++) {
606             HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
607         }
608     }
609 
610     @HatTest
611     @Reflect
612     public void TestVectorArrayView08() {
613         final int size = 1024;
614         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
615         var arrayA = F32ArrayPadded.create(accelerator, size);
616         var arrayB = F32ArrayPadded.create(accelerator, size);
617         var arrayC = F32ArrayPadded.create(accelerator, size);
618 
619         Random r = new Random(19);
620         for (int i = 0; i < size; i++) {
621             arrayA.array(i, r.nextFloat());
622             arrayB.array(i, r.nextFloat());
623         }
624 
625         accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
626 
627         for (int i = 0; i < size; i ++) {
628             float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
629             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
630         }
631     }
632 
633     @HatTest
634     @Reflect
635     public void TestVectorArrayView09() {
636         final int size = 1024;
637         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
638         var arrayA = F32ArrayPadded.create(accelerator, size);
639         var arrayB = F32ArrayPadded.create(accelerator, size);
640         var arrayC = F32ArrayPadded.create(accelerator, size);
641 
642         Random r = new Random(19);
643         for (int i = 0; i < size; i++) {
644             arrayA.array(i, r.nextFloat());
645             arrayB.array(i, r.nextFloat());
646         }
647 
648         accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
649 
650         for (int i = 0; i < size; i ++) {
651             float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
652             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
653         }
654     }
655 
656     @HatTest
657     @Reflect
658     public void TestVectorArrayView10() {
659         final int size = 1024;
660         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661         var arrayA = F32ArrayPadded.create(accelerator, size);
662         var arrayB = F32ArrayPadded.create(accelerator, size);
663 
664         Random r = new Random(19);
665         for (int i = 0; i < size; i++) {
666             arrayA.array(i, r.nextFloat());
667             arrayB.array(i, r.nextFloat());
668         }
669 
670         accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
671 
672         for (int i = 0; i < size; i ++) {
673             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
674         }
675     }
676 
677     @HatTest
678     @Reflect
679     public void TestVectorArrayView11() {
680         final int size = 1024;
681         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
682         var arrayA = F32ArrayPadded.create(accelerator, size);
683         var arrayB = F32ArrayPadded.create(accelerator, size);
684 
685         Random r = new Random(19);
686         for (int i = 0; i < size; i++) {
687             arrayA.array(i, r.nextFloat());
688             arrayB.array(i, r.nextFloat());
689         }
690 
691         accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
692 
693         for (int i = 0; i < size; i ++) {
694             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
695         }
696     }
697 
698     @HatTest
699     @Reflect
700     public void TestVectorArrayView12() {
701         final int size = 1024;
702         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
703         var arrayA = F32ArrayPadded.create(accelerator, size);
704         var arrayB = F32ArrayPadded.create(accelerator, size);
705 
706         Random r = new Random(19);
707         for (int i = 0; i < size; i++) {
708             arrayA.array(i, r.nextFloat());
709             arrayB.array(i, r.nextFloat());
710         }
711 
712         accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
713 
714         for (int i = 0; i < size; i ++) {
715             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
716         }
717     }
718 }