1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import jdk.incubator.code.Reflect;
28
29 import hat.*;
30 import hat.backend.Backend;
31 import hat.buffer.*;
32 import hat.ifacemapper.MappableIface.RO;
33 import hat.ifacemapper.MappableIface.RW;
34 import hat.ifacemapper.Schema;
35 import hat.test.annotation.HatTest;
36 import hat.test.engine.HATAsserts;
37 import jdk.incubator.code.Reflect;
38
39 import java.lang.invoke.MethodHandles;
40 import java.util.Random;
41
42 public class TestVectorArrayView {
43
44 @Reflect
45 public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
46 if (kernelContext.gix < kernelContext.gsx) {
47 int index = kernelContext.gix;
48
49 Float4[] vA = a.float4ArrayView();
50 Float4[] vB = b.float4ArrayView();
51 Float4[] vC = c.float4ArrayView();
52 Float4 floatA = vA[index * 4];
53 Float4 floatB = vB[index * 4];
54 Float4 res = Float4.add(floatA, floatB);
55 vC[index * 4] = res;
56 }
57 }
58
59 @Reflect
60 public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
61 if (kernelContext.gix < kernelContext.gsx) {
62 int index = kernelContext.gix;
63
64 Float4[] vA = a.float4ArrayView();
65 Float4[] vB = b.float4ArrayView();
66 Float4[] vC = c.float4ArrayView();
67 Float4 vAFloat = vA[index * 4];
68 Float4 vBFloat = vB[index * 4];
69 vC[index * 4] = Float4.add(vAFloat, vBFloat);
70 }
71 }
72
73 @Reflect
74 public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
75 if (kernelContext.gix < kernelContext.gsx) {
76 int index = kernelContext.gix;
77
78 Float4[] vA = a.float4ArrayView();
79 Float4[] vB = b.float4ArrayView();
80 Float4[] vC = c.float4ArrayView();
81 Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
82 vC[index * 4] = res;
83 }
84 }
85
86 @Reflect
87 public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
88 if (kernelContext.gix < kernelContext.gsx) {
89 int index = kernelContext.gix;
90
91 Float4.MutableImpl[] vArr = a.float4ArrayView();
92 Float4.MutableImpl[] bArr = b.float4ArrayView();
93 Float4.MutableImpl vA = vArr[index * 4];
94 float scaleX = vA.x() * 10.0f;
95 vA.x(scaleX);
96 bArr[index * 4] = vA;
97 }
98 }
99
100 @Reflect
101 public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
102 if (kernelContext.gix < kernelContext.gsx) {
103 int index = kernelContext.gix;
104
105 Float4.MutableImpl[] vA = a.float4ArrayView();
106 Float4.MutableImpl[] vB = b.float4ArrayView();
107 Float4.MutableImpl vAFloat = vA[index * 4];
108 float scaleX = vAFloat.x() * 10.0f;
109 float scaleY = vAFloat.y() * 20.0f;
110 float scaleZ = vAFloat.z() * 30.0f;
111 float scaleW = vAFloat.w() * 40.0f;
112 vAFloat.x(scaleX);
113 vAFloat.y(scaleY);
114 vAFloat.z(scaleZ);
115 vAFloat.w(scaleW);
116 vB[index * 4] = vAFloat;
117 }
118 }
119
120 @Reflect
121 public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
122 if (kernelContext.gix < kernelContext.gsx) {
123 int index = kernelContext.gix;
124
125 Float4.MutableImpl[] vA = a.float4ArrayView();
126 Float4.MutableImpl[] vB = b.float4ArrayView();
127 Float4.MutableImpl vAFloat = vA[index * 4];
128 vAFloat.x(vAFloat.x() * 10.0f);
129 vAFloat.y(vAFloat.y() * 20.0f);
130 vAFloat.z(vAFloat.z() * 30.0f);
131 vAFloat.w(vAFloat.w() * 40.0f);
132 vB[index * 4] = vAFloat;
133 }
134 }
135
136 @Reflect
137 public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
138 if (kernelContext.gix < kernelContext.gsx) {
139 int index = kernelContext.gix;
140
141 Float4[] vA = a.float4ArrayView();
142 Float4[] vB = b.float4ArrayView();
143 Float4[] vC = c.float4ArrayView();
144 Float4 floatA = vA[index * 4];
145 Float4 floatB = vB[index * 4];
146 Float4 temp = floatA.add(floatB);
147 Float4 res = temp.add(floatB);
148 vC[index * 4] = res;
149 }
150 }
151
152 @Reflect
153 public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
154 if (kernelContext.gix < kernelContext.gsx) {
155 int index = kernelContext.gix;
156
157 Float4[] vA = a.float4ArrayView();
158 Float4[] vB = b.float4ArrayView();
159 Float4[] vC = c.float4ArrayView();
160 Float4 floatA = vA[index * 4];
161 Float4 floatB = vB[index * 4];
162 Float4 vD = Float4.sub(floatA, floatB);
163 Float4 vE = Float4.sub(floatA, floatB);
164 vC[index * 4] = vE;
165 }
166 }
167
168 @Reflect
169 public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
170 if (kernelContext.gix < kernelContext.gsx) {
171 int index = kernelContext.gix;
172
173 Float4[] vAArray = a.float4ArrayView();
174 Float4[] vBArray = b.float4ArrayView();
175 Float4[] vCArray = c.float4ArrayView();
176
177 Float4 vA = vAArray[index * 4];
178 Float4 vB = vBArray[index * 4];
179 Float4 vC = vA.add(vB);
180 Float4 vD = vC.sub(vB);
181 vCArray[index * 4] = vD;
182 }
183 }
184
185 @Reflect
186 public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
187 if (kernelContext.gix < kernelContext.gsx) {
188 int index = kernelContext.gix;
189
190 Float4[] vAArray = a.float4ArrayView();
191 Float4[] vBArray = b.float4ArrayView();
192 Float4[] vCArray = c.float4ArrayView();
193
194 Float4 vA = vAArray[index * 4];
195 Float4 vB = vBArray[index * 4];
196 Float4 vC = vA.add(vB);
197 Float4 vD = vC.mul(vA);
198 Float4 vE = vD.div(vB);
199 vCArray[index * 4] = vE;
200 }
201 }
202
203 @Reflect
204 public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
205 // Checking composition
206 if (kernelContext.gix < kernelContext.gsx) {
207 int index = kernelContext.gix;
208 Float4[] vAArray = a.float4ArrayView();
209 Float4[] vBArray = b.float4ArrayView();
210 Float4[] vCArray = c.float4ArrayView();
211
212 Float4 vA = vAArray[index * 4];
213 Float4 vB = vBArray[index * 4];
214 Float4 temp = vA.mul(vB);
215 Float4 vC = vA.add(temp);
216 vCArray[index * 4] = vC;
217 }
218 }
219
220 private interface SharedMemory extends Buffer {
221 void array(long index, float value);
222 float array(long index);
223 Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
224 arr -> arr.array("array", 1024));
225 static SharedMemory create(Accelerator accelerator) {
226 return schema.allocate(accelerator);
227 }
228 static SharedMemory createLocal() {
229 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
230 }
231 default Float4 float4View(int index) {
232 return null;
233 }
234 default void storeFloat4View(Float4 float4, int index) {
235 }
236 default Float4.MutableImpl[] float4LocalArrayView() {
237 return null;
238 }
239 }
240
241 @Reflect
242 public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
243 SharedMemory sm = SharedMemory.createLocal();
244 if (kernelContext.gix < kernelContext.gsx) {
245 int index = kernelContext.gix;
246 int lix = kernelContext.lix;
247
248 Float4[] aArr = a.float4ArrayView();
249 Float4[] bArr = b.float4ArrayView();
250 Float4[] smArr = sm.float4LocalArrayView();
251
252 Float4 vA = aArr[index * 4];
253 smArr[lix * 4] = vA;
254 kernelContext.barrier();
255 Float4 r = smArr[lix * 4];
256 bArr[index * 4] = r;
257 }
258 }
259
260 private interface PrivateMemory extends Buffer {
261 void array(long index, float value);
262 float array(long index);
263 Schema<PrivateMemory> schema = Schema.of(PrivateMemory.class,
264 arr -> arr.array("array", 4));
265 static PrivateMemory create(Accelerator accelerator) {
266 return schema.allocate(accelerator);
267 }
268 static PrivateMemory createPrivate() {
269 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
270 }
271 default Float4 float4View(int index) {
272 return null;
273 }
274 default void storeFloat4View(Float4 float4, int index) {
275 }
276 default Float4[] float4PrivateArrayView() {
277 return null;
278 }
279 }
280
281 @Reflect
282 public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
283 PrivateMemory pm = PrivateMemory.createPrivate();
284 if (kernelContext.gix < kernelContext.gsx) {
285 int index = kernelContext.gix;
286
287 Float4[] aArr = a.float4ArrayView();
288 Float4[] bArr = b.float4ArrayView();
289 Float4[] pmArr = pm.float4PrivateArrayView();
290
291 Float4 vA = aArr[index * 4];
292 pmArr[0] = vA;
293 kernelContext.barrier();
294 Float4 r = pmArr[0];
295 bArr[index * 4] = r;
296 }
297 }
298
299 @Reflect
300 public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
301 SharedMemory sm = SharedMemory.createLocal();
302 if (kernelContext.gix < kernelContext.gsx) {
303 int index = kernelContext.gix;
304 int lix = kernelContext.lix;
305 Float4.MutableImpl[] aArr = a.float4ArrayView();
306 Float4.MutableImpl[] bArr = b.float4ArrayView();
307 Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
308
309 Float4.MutableImpl vA = aArr[index * 4];
310 Float4.MutableImpl smVector = smArr[lix * 4];
311 smVector.x(vA.x());
312 smVector.y(vA.y());
313 smVector.z(vA.z());
314 smVector.w(vA.w());
315 smArr[lix * 4] = smVector;
316 kernelContext.barrier();
317 Float4.MutableImpl r = smArr[lix * 4];
318 bArr[index * 4] = r;
319 }
320 }
321
322 @Reflect
323 public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
324 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
325 cc.dispatchKernel( NDRange.of1D(size/4,128), kernelContext -> vectorOps01(kernelContext, a, b, c));
326 }
327
328 @Reflect
329 public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
330 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
331 cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
332 }
333
334 @Reflect
335 public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
336 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
337 cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
338 }
339
340 @Reflect
341 public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
342 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
343 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps02(kernelContext, a, b));
344 }
345
346 @Reflect
347 public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
348 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
349 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps03(kernelContext, a, b));
350 }
351
352 @Reflect
353 public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
354 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
355 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps04(kernelContext, a, b));
356 }
357
358 @Reflect
359 public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
360 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
361 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps05(kernelContext, a, b, c));
362 }
363
364 @Reflect
365 public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
366 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
367 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps06(kernelContext, a, b, c));
368 }
369
370 @Reflect
371 public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
372 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
373 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps07(kernelContext, a, b, c));
374 }
375
376 @Reflect
377 public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
378 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
379 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps08(kernelContext, a, b, c));
380 }
381
382 @Reflect
383 public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
384 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
385 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps09(kernelContext, a, b, c));
386 }
387
388 @Reflect
389 public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
390 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
391 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps10(kernelContext, a, b));
392 }
393
394 @Reflect
395 public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
396 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
397 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps11(kernelContext, a, b));
398 }
399
400 @Reflect
401 public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
402 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
403 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps12(kernelContext, a, b));
404 }
405
406 @HatTest
407 @Reflect
408 public void TestVectorArrayView01() {
409 final int size = 1024;
410 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
411 var arrayA = F32ArrayPadded.create(accelerator, size);
412 var arrayB = F32ArrayPadded.create(accelerator, size);
413 var arrayC = F32ArrayPadded.create(accelerator, size);
414
415 Random r = new Random(19);
416 for (int i = 0; i < size; i++) {
417 arrayA.array(i, r.nextFloat());
418 arrayB.array(i, r.nextFloat());
419 }
420
421 accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
422
423 for (int i = 0; i < size; i++) {
424 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
425 }
426
427 }
428
429 // @HatTest
430 // @Reflect
431 // public void TestVectorArrayView01WithFloat4s() {
432 // final int size = 1024;
433 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
434 // var arrayA = F32ArrayPadded.create(accelerator, size);
435 // var arrayB = F32ArrayPadded.create(accelerator, size);
436 // var arrayC = F32ArrayPadded.create(accelerator, size);
437 //
438 // Random r = new Random(19);
439 // for (int i = 0; i < size; i++) {
440 // arrayA.array(i, r.nextFloat());
441 // arrayB.array(i, r.nextFloat());
442 // }
443 //
444 // accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
445 //
446 // for (int i = 0; i < size; i++) {
447 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
448 // }
449 //
450 // }
451 //
452 // @HatTest
453 // @Reflect
454 // public void TestVectorArrayView01WithSeparateAdd() {
455 // final int size = 1024;
456 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
457 // var arrayA = F32ArrayPadded.create(accelerator, size);
458 // var arrayB = F32ArrayPadded.create(accelerator, size);
459 // var arrayC = F32ArrayPadded.create(accelerator, size);
460 //
461 // Random r = new Random(19);
462 // for (int i = 0; i < size; i++) {
463 // arrayA.array(i, r.nextFloat());
464 // arrayB.array(i, r.nextFloat());
465 // }
466 //
467 // accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
468 //
469 // for (int i = 0; i < size; i++) {
470 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
471 // }
472 //
473 // }
474
475 @HatTest
476 @Reflect
477 public void TestVectorArrayView02() {
478 final int size = 1024;
479 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
480 var arrayA = F32ArrayPadded.create(accelerator, size);
481 var arrayB = F32ArrayPadded.create(accelerator, size);
482
483 Random r = new Random(19);
484 for (int i = 0; i < size; i++) {
485 arrayA.array(i, r.nextFloat());
486 }
487
488 accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
489
490 for (int i = 0; i < size; i += 4) {
491 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
492 HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
493 HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
494 HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
495 }
496 }
497
498 @HatTest
499 @Reflect
500 public void TestVectorArrayView03() {
501 final int size = 1024;
502 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
503 var arrayA = F32ArrayPadded.create(accelerator, size);
504 var arrayB = F32ArrayPadded.create(accelerator, size);
505
506 Random r = new Random(19);
507 for (int i = 0; i < size; i++) {
508 arrayA.array(i, r.nextFloat());
509 }
510
511 accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
512
513 for (int i = 0; i < size; i += 4) {
514 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
515 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
516 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
517 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
518 }
519 }
520
521 @HatTest
522 @Reflect
523 public void TestVectorArrayView04() {
524 final int size = 1024;
525 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
526 var arrayA = F32ArrayPadded.create(accelerator, size);
527 var arrayB = F32ArrayPadded.create(accelerator, size);
528
529 Random r = new Random(19);
530 for (int i = 0; i < size; i++) {
531 arrayA.array(i, r.nextFloat());
532 }
533
534 accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
535
536 for (int i = 0; i < size; i += 4) {
537 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
538 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
539 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
540 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
541 }
542 }
543
544 @HatTest
545 @Reflect
546 public void TestVectorArrayView05() {
547 final int size = 1024;
548 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
549 var arrayA = F32ArrayPadded.create(accelerator, size);
550 var arrayB = F32ArrayPadded.create(accelerator, size);
551 var arrayC = F32ArrayPadded.create(accelerator, size);
552
553 Random r = new Random(19);
554 for (int i = 0; i < size; i++) {
555 arrayA.array(i, r.nextFloat());
556 arrayB.array(i, r.nextFloat());
557 }
558
559 accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
560
561 for (int i = 0; i < size; i ++) {
562 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
563 }
564 }
565
566 @HatTest
567 @Reflect
568 public void TestVectorArrayView06() {
569 final int size = 1024;
570 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
571 var arrayA = F32ArrayPadded.create(accelerator, size);
572 var arrayB = F32ArrayPadded.create(accelerator, size);
573 var arrayC = F32ArrayPadded.create(accelerator, size);
574
575 Random r = new Random(19);
576 for (int i = 0; i < size; i++) {
577 arrayA.array(i, r.nextFloat());
578 arrayB.array(i, r.nextFloat());
579 }
580
581 accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
582
583 for (int i = 0; i < size; i ++) {
584 HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
585 }
586 }
587
588 @HatTest
589 @Reflect
590 public void TestVectorArrayView07() {
591 final int size = 1024;
592 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
593 var arrayA = F32ArrayPadded.create(accelerator, size);
594 var arrayB = F32ArrayPadded.create(accelerator, size);
595 var arrayC = F32ArrayPadded.create(accelerator, size);
596
597 Random r = new Random(19);
598 for (int i = 0; i < size; i++) {
599 arrayA.array(i, r.nextFloat());
600 arrayB.array(i, r.nextFloat());
601 }
602
603 accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
604
605 for (int i = 0; i < size; i ++) {
606 HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
607 }
608 }
609
610 @HatTest
611 @Reflect
612 public void TestVectorArrayView08() {
613 final int size = 1024;
614 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
615 var arrayA = F32ArrayPadded.create(accelerator, size);
616 var arrayB = F32ArrayPadded.create(accelerator, size);
617 var arrayC = F32ArrayPadded.create(accelerator, size);
618
619 Random r = new Random(19);
620 for (int i = 0; i < size; i++) {
621 arrayA.array(i, r.nextFloat());
622 arrayB.array(i, r.nextFloat());
623 }
624
625 accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
626
627 for (int i = 0; i < size; i ++) {
628 float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
629 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
630 }
631 }
632
633 @HatTest
634 @Reflect
635 public void TestVectorArrayView09() {
636 final int size = 1024;
637 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
638 var arrayA = F32ArrayPadded.create(accelerator, size);
639 var arrayB = F32ArrayPadded.create(accelerator, size);
640 var arrayC = F32ArrayPadded.create(accelerator, size);
641
642 Random r = new Random(19);
643 for (int i = 0; i < size; i++) {
644 arrayA.array(i, r.nextFloat());
645 arrayB.array(i, r.nextFloat());
646 }
647
648 accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
649
650 for (int i = 0; i < size; i ++) {
651 float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
652 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
653 }
654 }
655
656 @HatTest
657 @Reflect
658 public void TestVectorArrayView10() {
659 final int size = 1024;
660 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661 var arrayA = F32ArrayPadded.create(accelerator, size);
662 var arrayB = F32ArrayPadded.create(accelerator, size);
663
664 Random r = new Random(19);
665 for (int i = 0; i < size; i++) {
666 arrayA.array(i, r.nextFloat());
667 arrayB.array(i, r.nextFloat());
668 }
669
670 accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
671
672 for (int i = 0; i < size; i ++) {
673 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
674 }
675 }
676
677 @HatTest
678 @Reflect
679 public void TestVectorArrayView11() {
680 final int size = 1024;
681 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
682 var arrayA = F32ArrayPadded.create(accelerator, size);
683 var arrayB = F32ArrayPadded.create(accelerator, size);
684
685 Random r = new Random(19);
686 for (int i = 0; i < size; i++) {
687 arrayA.array(i, r.nextFloat());
688 arrayB.array(i, r.nextFloat());
689 }
690
691 accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
692
693 for (int i = 0; i < size; i ++) {
694 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
695 }
696 }
697
698 @HatTest
699 @Reflect
700 public void TestVectorArrayView12() {
701 final int size = 1024;
702 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
703 var arrayA = F32ArrayPadded.create(accelerator, size);
704 var arrayB = F32ArrayPadded.create(accelerator, size);
705
706 Random r = new Random(19);
707 for (int i = 0; i < size; i++) {
708 arrayA.array(i, r.nextFloat());
709 arrayB.array(i, r.nextFloat());
710 }
711
712 accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
713
714 for (int i = 0; i < size; i ++) {
715 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
716 }
717 }
718 }