1 /*
  2  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package hat.test;
 26 
 27 import hat.*;
 28 import hat.backend.Backend;
 29 import hat.buffer.*;
 30 import hat.ifacemapper.MappableIface.RO;
 31 import hat.ifacemapper.MappableIface.RW;
 32 import hat.ifacemapper.Schema;
 33 import hat.test.annotation.HatTest;
 34 import hat.test.engine.HATAsserts;
 35 import jdk.incubator.code.CodeReflection;
 36 
 37 import java.lang.invoke.MethodHandles;
 38 import java.util.Random;
 39 
 40 public class TestVectorArrayView {
 41 
 42     @CodeReflection
 43     public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 44         if (kernelContext.gix < kernelContext.gsx) {
 45             int index = kernelContext.gix;
 46 
 47             Float4[] vA = a.float4ArrayView();
 48             Float4[] vB = b.float4ArrayView();
 49             Float4[] vC = c.float4ArrayView();
 50             Float4 floatA = vA[index * 4];
 51             Float4 floatB = vB[index * 4];
 52             Float4 res = Float4.add(floatA, floatB);
 53             vC[index * 4] = res;
 54         }
 55     }
 56 
 57     @CodeReflection
 58     public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 59         if (kernelContext.gix < kernelContext.gsx) {
 60             int index = kernelContext.gix;
 61 
 62             Float4[] vA = a.float4ArrayView();
 63             Float4[] vB = b.float4ArrayView();
 64             Float4[] vC = c.float4ArrayView();
 65             Float4 vAFloat = vA[index * 4];
 66             Float4 vBFloat = vB[index * 4];
 67             vC[index * 4] = Float4.add(vAFloat, vBFloat);
 68         }
 69     }
 70 
 71     @CodeReflection
 72     public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
 73         if (kernelContext.gix < kernelContext.gsx) {
 74             int index = kernelContext.gix;
 75 
 76             Float4[] vA = a.float4ArrayView();
 77             Float4[] vB = b.float4ArrayView();
 78             Float4[] vC = c.float4ArrayView();
 79             Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
 80             vC[index * 4] = res;
 81         }
 82     }
 83 
 84     @CodeReflection
 85     public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
 86         if (kernelContext.gix < kernelContext.gsx) {
 87             int index = kernelContext.gix;
 88 
 89             Float4.MutableImpl[] vArr = a.float4ArrayView();
 90             Float4.MutableImpl[] bArr = b.float4ArrayView();
 91             Float4.MutableImpl vA = vArr[index * 4];
 92             float scaleX = vA.x() * 10.0f;
 93             vA.x(scaleX);
 94             bArr[index * 4] = vA;
 95         }
 96     }
 97 
 98     @CodeReflection
 99     public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
100         if (kernelContext.gix < kernelContext.gsx) {
101             int index = kernelContext.gix;
102 
103             Float4.MutableImpl[] vA = a.float4ArrayView();
104             Float4.MutableImpl[] vB = b.float4ArrayView();
105             Float4.MutableImpl vAFloat = vA[index * 4];
106             float scaleX = vAFloat.x() * 10.0f;
107             float scaleY = vAFloat.y() * 20.0f;
108             float scaleZ = vAFloat.z() * 30.0f;
109             float scaleW = vAFloat.w() * 40.0f;
110             vAFloat.x(scaleX);
111             vAFloat.y(scaleY);
112             vAFloat.z(scaleZ);
113             vAFloat.w(scaleW);
114             vB[index * 4] = vAFloat;
115         }
116     }
117 
118     @CodeReflection
119     public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
120         if (kernelContext.gix < kernelContext.gsx) {
121             int index = kernelContext.gix;
122 
123             Float4.MutableImpl[] vA = a.float4ArrayView();
124             Float4.MutableImpl[] vB = b.float4ArrayView();
125             Float4.MutableImpl vAFloat = vA[index * 4];
126             vAFloat.x(vAFloat.x() * 10.0f);
127             vAFloat.y(vAFloat.y() * 20.0f);
128             vAFloat.z(vAFloat.z() * 30.0f);
129             vAFloat.w(vAFloat.w() * 40.0f);
130             vB[index * 4] = vAFloat;
131         }
132     }
133 
134     @CodeReflection
135     public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
136         if (kernelContext.gix < kernelContext.gsx) {
137             int index = kernelContext.gix;
138 
139             Float4[] vA = a.float4ArrayView();
140             Float4[] vB = b.float4ArrayView();
141             Float4[] vC = c.float4ArrayView();
142             Float4 floatA = vA[index * 4];
143             Float4 floatB = vB[index * 4];
144             Float4 temp = floatA.add(floatB);
145             Float4 res = temp.add(floatB);
146             vC[index * 4] = res;
147         }
148     }
149 
150     @CodeReflection
151     public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
152         if (kernelContext.gix < kernelContext.gsx) {
153             int index = kernelContext.gix;
154 
155             Float4[] vA = a.float4ArrayView();
156             Float4[] vB = b.float4ArrayView();
157             Float4[] vC = c.float4ArrayView();
158             Float4 floatA = vA[index * 4];
159             Float4 floatB = vB[index * 4];
160             Float4 vD = Float4.sub(floatA, floatB);
161             Float4 vE = Float4.sub(floatA, floatB);
162             vC[index * 4] = vE;
163         }
164     }
165 
166     @CodeReflection
167     public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
168         if (kernelContext.gix < kernelContext.gsx) {
169             int index = kernelContext.gix;
170 
171             Float4[] vAArray = a.float4ArrayView();
172             Float4[] vBArray = b.float4ArrayView();
173             Float4[] vCArray = c.float4ArrayView();
174 
175             Float4 vA = vAArray[index * 4];
176             Float4 vB = vBArray[index * 4];
177             Float4 vC = vA.add(vB);
178             Float4 vD = vC.sub(vB);
179             vCArray[index * 4] = vD;
180         }
181     }
182 
183     @CodeReflection
184     public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
185         if (kernelContext.gix < kernelContext.gsx) {
186             int index = kernelContext.gix;
187 
188             Float4[] vAArray = a.float4ArrayView();
189             Float4[] vBArray = b.float4ArrayView();
190             Float4[] vCArray = c.float4ArrayView();
191 
192             Float4 vA = vAArray[index * 4];
193             Float4 vB = vBArray[index * 4];
194             Float4 vC = vA.add(vB);
195             Float4 vD = vC.mul(vA);
196             Float4 vE = vD.div(vB);
197             vCArray[index * 4] = vE;
198         }
199     }
200 
201     @CodeReflection
202     public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
203         // Checking composition
204         if (kernelContext.gix < kernelContext.gsx) {
205             int index = kernelContext.gix;
206             Float4[] vAArray = a.float4ArrayView();
207             Float4[] vBArray = b.float4ArrayView();
208             Float4[] vCArray = c.float4ArrayView();
209 
210             Float4 vA = vAArray[index * 4];
211             Float4 vB = vBArray[index * 4];
212             Float4 temp = vA.mul(vB);
213             Float4 vC = vA.add(temp);
214             vCArray[index * 4] = vC;
215         }
216     }
217 
218     private interface SharedMemory extends Buffer {
219         void array(long index, float value);
220         float array(long index);
221         Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
222                 arr -> arr.array("array", 1024));
223         static SharedMemory create(Accelerator accelerator) {
224             return schema.allocate(accelerator);
225         }
226         static SharedMemory createLocal() {
227             return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
228         }
229         default Float4 float4View(int index) {
230             return null;
231         }
232         default void storeFloat4View(Float4 float4, int index) {
233         }
234         default Float4.MutableImpl[] float4LocalArrayView() {
235             return null;
236         }
237     }
238 
239     @CodeReflection
240     public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
241         SharedMemory sm = SharedMemory.createLocal();
242         if (kernelContext.gix < kernelContext.gsx) {
243             int index = kernelContext.gix;
244             int lix = kernelContext.lix;
245 
246             Float4[] aArr = a.float4ArrayView();
247             Float4[] bArr = b.float4ArrayView();
248             Float4[] smArr = sm.float4LocalArrayView();
249 
250             Float4 vA = aArr[index * 4];
251             smArr[lix * 4] = vA;
252             kernelContext.barrier();
253             Float4 r = smArr[lix * 4];
254             bArr[index * 4] = r;
255         }
256     }
257 
258     private interface PrivateMemory extends Buffer {
259         void array(long index, float value);
260         float array(long index);
261         Schema<PrivateMemory> schema = Schema.of(PrivateMemory.class,
262                 arr -> arr.array("array", 4));
263         static PrivateMemory create(Accelerator accelerator) {
264             return schema.allocate(accelerator);
265         }
266         static PrivateMemory createPrivate() {
267             return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
268         }
269         default Float4 float4View(int index) {
270             return null;
271         }
272         default void storeFloat4View(Float4 float4, int index) {
273         }
274         default Float4[] float4PrivateArrayView() {
275             return null;
276         }
277     }
278 
279     @CodeReflection
280     public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
281         PrivateMemory pm = PrivateMemory.createPrivate();
282         if (kernelContext.gix < kernelContext.gsx) {
283             int index = kernelContext.gix;
284 
285             Float4[] aArr = a.float4ArrayView();
286             Float4[] bArr = b.float4ArrayView();
287             Float4[] pmArr = pm.float4PrivateArrayView();
288 
289             Float4 vA = aArr[index * 4];
290             pmArr[0] = vA;
291             kernelContext.barrier();
292             Float4 r = pmArr[0];
293             bArr[index * 4] = r;
294         }
295     }
296 
297     @CodeReflection
298     public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
299         SharedMemory sm = SharedMemory.createLocal();
300         if (kernelContext.gix < kernelContext.gsx) {
301             int index = kernelContext.gix;
302             int lix = kernelContext.lix;
303             Float4.MutableImpl[] aArr = a.float4ArrayView();
304             Float4.MutableImpl[] bArr = b.float4ArrayView();
305             Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
306 
307             Float4.MutableImpl vA = aArr[index * 4];
308             Float4.MutableImpl smVector = smArr[lix * 4];
309             smVector.x(vA.x());
310             smVector.y(vA.y());
311             smVector.z(vA.z());
312             smVector.w(vA.w());
313             smArr[lix * 4] = smVector;
314             kernelContext.barrier();
315             Float4.MutableImpl r = smArr[lix * 4];
316             bArr[index * 4] = r;
317         }
318     }
319 
320     @CodeReflection
321     public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
322         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
323         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
324         cc.dispatchKernel(ndRange, kernelContext -> vectorOps01(kernelContext, a, b, c));
325     }
326 
327     @CodeReflection
328     public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
329         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
330         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
331         cc.dispatchKernel(ndRange, kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
332     }
333 
334     @CodeReflection
335     public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
336         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
337         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
338         cc.dispatchKernel(ndRange, kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
339     }
340 
341     @CodeReflection
342     public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
343         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
344         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
345         cc.dispatchKernel(ndRange, kernelContext -> vectorOps02(kernelContext, a, b));
346     }
347 
348     @CodeReflection
349     public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
350         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
351         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
352         cc.dispatchKernel(ndRange, kernelContext -> vectorOps03(kernelContext, a, b));
353     }
354 
355     @CodeReflection
356     public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
357         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
358         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
359         cc.dispatchKernel(ndRange, kernelContext -> vectorOps04(kernelContext, a, b));
360     }
361 
362     @CodeReflection
363     public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
364         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
365         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
366         cc.dispatchKernel(ndRange, kernelContext -> vectorOps05(kernelContext, a, b, c));
367     }
368 
369     @CodeReflection
370     public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
371         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
372         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
373         cc.dispatchKernel(ndRange, kernelContext -> vectorOps06(kernelContext, a, b, c));
374     }
375 
376     @CodeReflection
377     public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
378         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
379         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
380         cc.dispatchKernel(ndRange, kernelContext -> vectorOps07(kernelContext, a, b, c));
381     }
382 
383     @CodeReflection
384     public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
385         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
386         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
387         cc.dispatchKernel(ndRange, kernelContext -> vectorOps08(kernelContext, a, b, c));
388     }
389 
390     @CodeReflection
391     public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c,  int size) {
392         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
393         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
394         cc.dispatchKernel(ndRange, kernelContext -> vectorOps09(kernelContext, a, b, c));
395     }
396 
397     @CodeReflection
398     public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
399         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
400         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
401         cc.dispatchKernel(ndRange, kernelContext -> vectorOps10(kernelContext, a, b));
402     }
403 
404     @CodeReflection
405     public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
406         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
407         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
408         cc.dispatchKernel(ndRange, kernelContext -> vectorOps11(kernelContext, a, b));
409     }
410 
411     @CodeReflection
412     public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a,  @RW F32ArrayPadded b, int size) {
413         // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
414         NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
415         cc.dispatchKernel(ndRange, kernelContext -> vectorOps12(kernelContext, a, b));
416     }
417 
418     @HatTest
419     public void TestVectorArrayView01() {
420         final int size = 1024;
421         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
422         var arrayA = F32ArrayPadded.create(accelerator, size);
423         var arrayB = F32ArrayPadded.create(accelerator, size);
424         var arrayC = F32ArrayPadded.create(accelerator, size);
425 
426         Random r = new Random(19);
427         for (int i = 0; i < size; i++) {
428             arrayA.array(i, r.nextFloat());
429             arrayB.array(i, r.nextFloat());
430         }
431 
432         accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
433 
434         for (int i = 0; i < size; i++) {
435             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
436         }
437 
438     }
439 
440     // @HatTest
441     // public void TestVectorArrayView01WithFloat4s() {
442     //     final int size = 1024;
443     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
444     //     var arrayA = F32ArrayPadded.create(accelerator, size);
445     //     var arrayB = F32ArrayPadded.create(accelerator, size);
446     //     var arrayC = F32ArrayPadded.create(accelerator, size);
447     //
448     //     Random r = new Random(19);
449     //     for (int i = 0; i < size; i++) {
450     //         arrayA.array(i, r.nextFloat());
451     //         arrayB.array(i, r.nextFloat());
452     //     }
453     //
454     //     accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
455     //
456     //     for (int i = 0; i < size; i++) {
457     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
458     //     }
459     //
460     // }
461     //
462     // @HatTest
463     // public void TestVectorArrayView01WithSeparateAdd() {
464     //     final int size = 1024;
465     //     var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
466     //     var arrayA = F32ArrayPadded.create(accelerator, size);
467     //     var arrayB = F32ArrayPadded.create(accelerator, size);
468     //     var arrayC = F32ArrayPadded.create(accelerator, size);
469     //
470     //     Random r = new Random(19);
471     //     for (int i = 0; i < size; i++) {
472     //         arrayA.array(i, r.nextFloat());
473     //         arrayB.array(i, r.nextFloat());
474     //     }
475     //
476     //     accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
477     //
478     //     for (int i = 0; i < size; i++) {
479     //         HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
480     //     }
481     //
482     // }
483 
484     @HatTest
485     public void TestVectorArrayView02() {
486         final int size = 1024;
487         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
488         var arrayA = F32ArrayPadded.create(accelerator, size);
489         var arrayB = F32ArrayPadded.create(accelerator, size);
490 
491         Random r = new Random(19);
492         for (int i = 0; i < size; i++) {
493             arrayA.array(i, r.nextFloat());
494         }
495 
496         accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
497 
498         for (int i = 0; i < size; i += 4) {
499             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
500             HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
501             HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
502             HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
503         }
504     }
505 
506     @HatTest
507     public void TestVectorArrayView03() {
508         final int size = 1024;
509         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
510         var arrayA = F32ArrayPadded.create(accelerator, size);
511         var arrayB = F32ArrayPadded.create(accelerator, size);
512 
513         Random r = new Random(19);
514         for (int i = 0; i < size; i++) {
515             arrayA.array(i, r.nextFloat());
516         }
517 
518         accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
519 
520         for (int i = 0; i < size; i += 4) {
521             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
522             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
523             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
524             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
525         }
526     }
527 
528     @HatTest
529     public void TestVectorArrayView04() {
530         final int size = 1024;
531         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
532         var arrayA = F32ArrayPadded.create(accelerator, size);
533         var arrayB = F32ArrayPadded.create(accelerator, size);
534 
535         Random r = new Random(19);
536         for (int i = 0; i < size; i++) {
537             arrayA.array(i, r.nextFloat());
538         }
539 
540         accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
541 
542         for (int i = 0; i < size; i += 4) {
543             HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
544             HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
545             HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
546             HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
547         }
548     }
549 
550     @HatTest
551     public void TestVectorArrayView05() {
552         final int size = 1024;
553         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
554         var arrayA = F32ArrayPadded.create(accelerator, size);
555         var arrayB = F32ArrayPadded.create(accelerator, size);
556         var arrayC = F32ArrayPadded.create(accelerator, size);
557 
558         Random r = new Random(19);
559         for (int i = 0; i < size; i++) {
560             arrayA.array(i, r.nextFloat());
561             arrayB.array(i, r.nextFloat());
562         }
563 
564         accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
565 
566         for (int i = 0; i < size; i ++) {
567             HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
568         }
569     }
570 
571     @HatTest
572     public void TestVectorArrayView06() {
573         final int size = 1024;
574         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
575         var arrayA = F32ArrayPadded.create(accelerator, size);
576         var arrayB = F32ArrayPadded.create(accelerator, size);
577         var arrayC = F32ArrayPadded.create(accelerator, size);
578 
579         Random r = new Random(19);
580         for (int i = 0; i < size; i++) {
581             arrayA.array(i, r.nextFloat());
582             arrayB.array(i, r.nextFloat());
583         }
584 
585         accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
586 
587         for (int i = 0; i < size; i ++) {
588             HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
589         }
590     }
591 
592     @HatTest
593     public void TestVectorArrayView07() {
594         final int size = 1024;
595         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
596         var arrayA = F32ArrayPadded.create(accelerator, size);
597         var arrayB = F32ArrayPadded.create(accelerator, size);
598         var arrayC = F32ArrayPadded.create(accelerator, size);
599 
600         Random r = new Random(19);
601         for (int i = 0; i < size; i++) {
602             arrayA.array(i, r.nextFloat());
603             arrayB.array(i, r.nextFloat());
604         }
605 
606         accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
607 
608         for (int i = 0; i < size; i ++) {
609             HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
610         }
611     }
612 
613     @HatTest
614     public void TestVectorArrayView08() {
615         final int size = 1024;
616         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
617         var arrayA = F32ArrayPadded.create(accelerator, size);
618         var arrayB = F32ArrayPadded.create(accelerator, size);
619         var arrayC = F32ArrayPadded.create(accelerator, size);
620 
621         Random r = new Random(19);
622         for (int i = 0; i < size; i++) {
623             arrayA.array(i, r.nextFloat());
624             arrayB.array(i, r.nextFloat());
625         }
626 
627         accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
628 
629         for (int i = 0; i < size; i ++) {
630             float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
631             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
632         }
633     }
634 
635     @HatTest
636     public void TestVectorArrayView09() {
637         final int size = 1024;
638         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
639         var arrayA = F32ArrayPadded.create(accelerator, size);
640         var arrayB = F32ArrayPadded.create(accelerator, size);
641         var arrayC = F32ArrayPadded.create(accelerator, size);
642 
643         Random r = new Random(19);
644         for (int i = 0; i < size; i++) {
645             arrayA.array(i, r.nextFloat());
646             arrayB.array(i, r.nextFloat());
647         }
648 
649         accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
650 
651         for (int i = 0; i < size; i ++) {
652             float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
653             HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
654         }
655     }
656 
657     @HatTest
658     public void TestVectorArrayView10() {
659         final int size = 1024;
660         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661         var arrayA = F32ArrayPadded.create(accelerator, size);
662         var arrayB = F32ArrayPadded.create(accelerator, size);
663 
664         Random r = new Random(19);
665         for (int i = 0; i < size; i++) {
666             arrayA.array(i, r.nextFloat());
667             arrayB.array(i, r.nextFloat());
668         }
669 
670         accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
671 
672         for (int i = 0; i < size; i ++) {
673             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
674         }
675     }
676 
677     @HatTest
678     public void TestVectorArrayView11() {
679         final int size = 1024;
680         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
681         var arrayA = F32ArrayPadded.create(accelerator, size);
682         var arrayB = F32ArrayPadded.create(accelerator, size);
683 
684         Random r = new Random(19);
685         for (int i = 0; i < size; i++) {
686             arrayA.array(i, r.nextFloat());
687             arrayB.array(i, r.nextFloat());
688         }
689 
690         accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
691 
692         for (int i = 0; i < size; i ++) {
693             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
694         }
695     }
696 
697     @HatTest
698     public void TestVectorArrayView12() {
699         final int size = 1024;
700         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
701         var arrayA = F32ArrayPadded.create(accelerator, size);
702         var arrayB = F32ArrayPadded.create(accelerator, size);
703 
704         Random r = new Random(19);
705         for (int i = 0; i < size; i++) {
706             arrayA.array(i, r.nextFloat());
707             arrayB.array(i, r.nextFloat());
708         }
709 
710         accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
711 
712         for (int i = 0; i < size; i ++) {
713             HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
714         }
715     }
716 }