1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.device.DeviceSchema;
28 import hat.device.DeviceType;
29 import hat.types.Float4;
30 import jdk.incubator.code.Reflect;
31
32 import hat.*;
33 import hat.backend.Backend;
34 import hat.buffer.*;
35 import optkl.ifacemapper.MappableIface.RO;
36 import optkl.ifacemapper.MappableIface.RW;
37 import hat.test.annotation.HatTest;
38 import hat.test.exceptions.HATAsserts;
39
40 import java.lang.invoke.MethodHandles;
41 import java.util.Random;
42
43 public class TestVectorArrayView {
44
45 @Reflect
46 public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
47 if (kernelContext.gix < kernelContext.gsx) {
48 int index = kernelContext.gix;
49
50 Float4[] vA = a.float4ArrayView();
51 Float4[] vB = b.float4ArrayView();
52 Float4[] vC = c.float4ArrayView();
53 Float4 floatA = vA[index * 4];
54 Float4 floatB = vB[index * 4];
55 Float4 res = Float4.add(floatA, floatB);
56 vC[index * 4] = res;
57 }
58 }
59
60 @Reflect
61 public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
62 if (kernelContext.gix < kernelContext.gsx) {
63 int index = kernelContext.gix;
64
65 Float4[] vA = a.float4ArrayView();
66 Float4[] vB = b.float4ArrayView();
67 Float4[] vC = c.float4ArrayView();
68 Float4 vAFloat = vA[index * 4];
69 Float4 vBFloat = vB[index * 4];
70 vC[index * 4] = Float4.add(vAFloat, vBFloat);
71 }
72 }
73
74 @Reflect
75 public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
76 if (kernelContext.gix < kernelContext.gsx) {
77 int index = kernelContext.gix;
78
79 Float4[] vA = a.float4ArrayView();
80 Float4[] vB = b.float4ArrayView();
81 Float4[] vC = c.float4ArrayView();
82 Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
83 vC[index * 4] = res;
84 }
85 }
86
87 @Reflect
88 public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
89 if (kernelContext.gix < kernelContext.gsx) {
90 int index = kernelContext.gix;
91
92 Float4.MutableImpl[] vArr = a.float4ArrayView();
93 Float4.MutableImpl[] bArr = b.float4ArrayView();
94 Float4.MutableImpl vA = vArr[index * 4];
95 float scaleX = vA.x() * 10.0f;
96 vA.x(scaleX);
97 bArr[index * 4] = vA;
98 }
99 }
100
101 @Reflect
102 public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
103 if (kernelContext.gix < kernelContext.gsx) {
104 int index = kernelContext.gix;
105
106 Float4.MutableImpl[] vA = a.float4ArrayView();
107 Float4.MutableImpl[] vB = b.float4ArrayView();
108 Float4.MutableImpl vAFloat = vA[index * 4];
109 float scaleX = vAFloat.x() * 10.0f;
110 float scaleY = vAFloat.y() * 20.0f;
111 float scaleZ = vAFloat.z() * 30.0f;
112 float scaleW = vAFloat.w() * 40.0f;
113 vAFloat.x(scaleX);
114 vAFloat.y(scaleY);
115 vAFloat.z(scaleZ);
116 vAFloat.w(scaleW);
117 vB[index * 4] = vAFloat;
118 }
119 }
120
121 @Reflect
122 public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
123 if (kernelContext.gix < kernelContext.gsx) {
124 int index = kernelContext.gix;
125
126 Float4.MutableImpl[] vA = a.float4ArrayView();
127 Float4.MutableImpl[] vB = b.float4ArrayView();
128 Float4.MutableImpl vAFloat = vA[index * 4];
129 vAFloat.x(vAFloat.x() * 10.0f);
130 vAFloat.y(vAFloat.y() * 20.0f);
131 vAFloat.z(vAFloat.z() * 30.0f);
132 vAFloat.w(vAFloat.w() * 40.0f);
133 vB[index * 4] = vAFloat;
134 }
135 }
136
137 @Reflect
138 public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
139 if (kernelContext.gix < kernelContext.gsx) {
140 int index = kernelContext.gix;
141
142 Float4[] vA = a.float4ArrayView();
143 Float4[] vB = b.float4ArrayView();
144 Float4[] vC = c.float4ArrayView();
145 Float4 floatA = vA[index * 4];
146 Float4 floatB = vB[index * 4];
147 Float4 temp = floatA.add(floatB);
148 Float4 res = temp.add(floatB);
149 vC[index * 4] = res;
150 }
151 }
152
153 @Reflect
154 public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
155 if (kernelContext.gix < kernelContext.gsx) {
156 int index = kernelContext.gix;
157
158 Float4[] vA = a.float4ArrayView();
159 Float4[] vB = b.float4ArrayView();
160 Float4[] vC = c.float4ArrayView();
161 Float4 floatA = vA[index * 4];
162 Float4 floatB = vB[index * 4];
163 // Float4 vD = Float4.sub(floatA, floatB);
164 Float4 vE = Float4.sub(floatA, floatB);
165 vC[index * 4] = vE;
166 }
167 }
168
169 @Reflect
170 public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
171 if (kernelContext.gix < kernelContext.gsx) {
172 int index = kernelContext.gix;
173
174 Float4[] vAArray = a.float4ArrayView();
175 Float4[] vBArray = b.float4ArrayView();
176 Float4[] vCArray = c.float4ArrayView();
177
178 Float4 vA = vAArray[index * 4];
179 Float4 vB = vBArray[index * 4];
180 Float4 vC = vA.add(vB);
181 Float4 vD = vC.sub(vB);
182 vCArray[index * 4] = vD;
183 }
184 }
185
186 @Reflect
187 public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
188 if (kernelContext.gix < kernelContext.gsx) {
189 int index = kernelContext.gix;
190
191 Float4[] vAArray = a.float4ArrayView();
192 Float4[] vBArray = b.float4ArrayView();
193 Float4[] vCArray = c.float4ArrayView();
194
195 Float4 vA = vAArray[index * 4];
196 Float4 vB = vBArray[index * 4];
197 Float4 vC = vA.add(vB);
198 Float4 vD = vC.mul(vA);
199 Float4 vE = vD.div(vB);
200 vCArray[index * 4] = vE;
201 }
202 }
203
204 @Reflect
205 public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
206 // Checking composition
207 if (kernelContext.gix < kernelContext.gsx) {
208 int index = kernelContext.gix;
209 Float4[] vAArray = a.float4ArrayView();
210 Float4[] vBArray = b.float4ArrayView();
211 Float4[] vCArray = c.float4ArrayView();
212
213 Float4 vA = vAArray[index * 4];
214 Float4 vB = vBArray[index * 4];
215 Float4 temp = vA.mul(vB);
216 Float4 vC = vA.add(temp);
217 vCArray[index * 4] = vC;
218 }
219 }
220
221 private interface SharedMemory extends DeviceType {
222 void array(long index, float value);
223 float array(long index);
224 DeviceSchema<SharedMemory> schema = DeviceSchema.of(SharedMemory.class,
225 arr -> arr.withArray("array", 1024));
226 static SharedMemory createLocal() {
227 return null;
228 }
229 default Float4.MutableImpl[] float4LocalArrayView() {
230 return null;
231 }
232 }
233
234 @Reflect
235 public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
236 SharedMemory sm = SharedMemory.createLocal();
237 if (kernelContext.gix < kernelContext.gsx) {
238 int index = kernelContext.gix;
239 int lix = kernelContext.lix;
240
241 Float4[] aArr = a.float4ArrayView();
242 Float4[] bArr = b.float4ArrayView();
243 Float4[] smArr = sm.float4LocalArrayView();
244
245 Float4 vA = aArr[index * 4];
246 smArr[lix * 4] = vA;
247 kernelContext.barrier();
248 Float4 r = smArr[lix * 4];
249 bArr[index * 4] = r;
250 }
251 }
252
253 private interface PrivateMemory extends DeviceType {
254 void array(long index, float value);
255 float array(long index);
256 DeviceSchema<PrivateMemory> schema = DeviceSchema.of(PrivateMemory.class,
257 arr -> arr.withArray("array", 4));
258 static PrivateMemory createPrivate() {
259 return null;
260 }
261 default Float4[] float4PrivateArrayView() {
262 return null;
263 }
264 }
265
266 @Reflect
267 public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
268 PrivateMemory pm = PrivateMemory.createPrivate();
269 if (kernelContext.gix < kernelContext.gsx) {
270 int index = kernelContext.gix;
271
272 Float4[] aArr = a.float4ArrayView();
273 Float4[] bArr = b.float4ArrayView();
274 Float4[] pmArr = pm.float4PrivateArrayView();
275
276 Float4 vA = aArr[index * 4];
277 pmArr[0] = vA;
278 kernelContext.barrier();
279 Float4 r = pmArr[0];
280 bArr[index * 4] = r;
281 }
282 }
283
284 @Reflect
285 public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
286 SharedMemory sm = SharedMemory.createLocal();
287 if (kernelContext.gix < kernelContext.gsx) {
288 int index = kernelContext.gix;
289 int lix = kernelContext.lix;
290 Float4.MutableImpl[] aArr = a.float4ArrayView();
291 Float4.MutableImpl[] bArr = b.float4ArrayView();
292 Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
293
294 Float4.MutableImpl vA = aArr[index * 4];
295 Float4.MutableImpl smVector = smArr[lix * 4];
296 smVector.x(vA.x());
297 smVector.y(vA.y());
298 smVector.z(vA.z());
299 smVector.w(vA.w());
300 smArr[lix * 4] = smVector;
301 kernelContext.barrier();
302 Float4.MutableImpl r = smArr[lix * 4];
303 bArr[index * 4] = r;
304 }
305 }
306
307 @Reflect
308 public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
309 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
310 cc.dispatchKernel( NDRange.of1D(size/4,128), kernelContext -> vectorOps01(kernelContext, a, b, c));
311 }
312
313 @Reflect
314 public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
315 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
316 cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
317 }
318
319 @Reflect
320 public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
321 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
322 cc.dispatchKernel(NDRange.of1D(size/4,128), kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
323 }
324
325 @Reflect
326 public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
327 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
328 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps02(kernelContext, a, b));
329 }
330
331 @Reflect
332 public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
333 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
334 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps03(kernelContext, a, b));
335 }
336
337 @Reflect
338 public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
339 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
340 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps04(kernelContext, a, b));
341 }
342
343 @Reflect
344 public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
345 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
346 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps05(kernelContext, a, b, c));
347 }
348
349 @Reflect
350 public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
351 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
352 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps06(kernelContext, a, b, c));
353 }
354
355 @Reflect
356 public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
357 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
358 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps07(kernelContext, a, b, c));
359 }
360
361 @Reflect
362 public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
363 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
364 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps08(kernelContext, a, b, c));
365 }
366
367 @Reflect
368 public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
369 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
370 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps09(kernelContext, a, b, c));
371 }
372
373 @Reflect
374 public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
375 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
376 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps10(kernelContext, a, b));
377 }
378
379 @Reflect
380 public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
381 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
382 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps11(kernelContext, a, b));
383 }
384
385 @Reflect
386 public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
387 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
388 cc.dispatchKernel(NDRange.of1D(size/4), kernelContext -> vectorOps12(kernelContext, a, b));
389 }
390
391 @HatTest
392 @Reflect
393 public void TestVectorArrayView01() {
394 final int size = 1024;
395 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
396 var arrayA = F32ArrayPadded.create(accelerator, size);
397 var arrayB = F32ArrayPadded.create(accelerator, size);
398 var arrayC = F32ArrayPadded.create(accelerator, size);
399
400 Random r = new Random(19);
401 for (int i = 0; i < size; i++) {
402 arrayA.array(i, r.nextFloat());
403 arrayB.array(i, r.nextFloat());
404 }
405
406 accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
407
408 for (int i = 0; i < size; i++) {
409 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
410 }
411
412 }
413
414 // @HatTest
415 // @Reflect
416 // public void TestVectorArrayView01WithFloat4s() {
417 // final int size = 1024;
418 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
419 // var arrayA = F32ArrayPadded.create(accelerator, size);
420 // var arrayB = F32ArrayPadded.create(accelerator, size);
421 // var arrayC = F32ArrayPadded.create(accelerator, size);
422 //
423 // Random r = new Random(19);
424 // for (int i = 0; i < size; i++) {
425 // arrayA.array(i, r.nextFloat());
426 // arrayB.array(i, r.nextFloat());
427 // }
428 //
429 // accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
430 //
431 // for (int i = 0; i < size; i++) {
432 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
433 // }
434 //
435 // }
436 //
437 // @HatTest
438 // @Reflect
439 // public void TestVectorArrayView01WithSeparateAdd() {
440 // final int size = 1024;
441 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
442 // var arrayA = F32ArrayPadded.create(accelerator, size);
443 // var arrayB = F32ArrayPadded.create(accelerator, size);
444 // var arrayC = F32ArrayPadded.create(accelerator, size);
445 //
446 // Random r = new Random(19);
447 // for (int i = 0; i < size; i++) {
448 // arrayA.array(i, r.nextFloat());
449 // arrayB.array(i, r.nextFloat());
450 // }
451 //
452 // accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
453 //
454 // for (int i = 0; i < size; i++) {
455 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
456 // }
457 //
458 // }
459
460 @HatTest
461 @Reflect
462 public void TestVectorArrayView02() {
463 final int size = 1024;
464 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
465 var arrayA = F32ArrayPadded.create(accelerator, size);
466 var arrayB = F32ArrayPadded.create(accelerator, size);
467
468 Random r = new Random(19);
469 for (int i = 0; i < size; i++) {
470 arrayA.array(i, r.nextFloat());
471 }
472
473 accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
474
475 for (int i = 0; i < size; i += 4) {
476 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
477 HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
478 HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
479 HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
480 }
481 }
482
483 @HatTest
484 @Reflect
485 public void TestVectorArrayView03() {
486 final int size = 1024;
487 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
488 var arrayA = F32ArrayPadded.create(accelerator, size);
489 var arrayB = F32ArrayPadded.create(accelerator, size);
490
491 Random r = new Random(19);
492 for (int i = 0; i < size; i++) {
493 arrayA.array(i, r.nextFloat());
494 }
495
496 accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
497
498 for (int i = 0; i < size; i += 4) {
499 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
500 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
501 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
502 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
503 }
504 }
505
506 @HatTest
507 @Reflect
508 public void TestVectorArrayView04() {
509 final int size = 1024;
510 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
511 var arrayA = F32ArrayPadded.create(accelerator, size);
512 var arrayB = F32ArrayPadded.create(accelerator, size);
513
514 Random r = new Random(19);
515 for (int i = 0; i < size; i++) {
516 arrayA.array(i, r.nextFloat());
517 }
518
519 accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
520
521 for (int i = 0; i < size; i += 4) {
522 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
523 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
524 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
525 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
526 }
527 }
528
529 @HatTest
530 @Reflect
531 public void TestVectorArrayView05() {
532 final int size = 1024;
533 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
534 var arrayA = F32ArrayPadded.create(accelerator, size);
535 var arrayB = F32ArrayPadded.create(accelerator, size);
536 var arrayC = F32ArrayPadded.create(accelerator, size);
537
538 Random r = new Random(19);
539 for (int i = 0; i < size; i++) {
540 arrayA.array(i, r.nextFloat());
541 arrayB.array(i, r.nextFloat());
542 }
543
544 accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
545
546 for (int i = 0; i < size; i ++) {
547 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
548 }
549 }
550
551 @HatTest
552 @Reflect
553 public void TestVectorArrayView06() {
554 final int size = 1024;
555 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
556 var arrayA = F32ArrayPadded.create(accelerator, size);
557 var arrayB = F32ArrayPadded.create(accelerator, size);
558 var arrayC = F32ArrayPadded.create(accelerator, size);
559
560 Random r = new Random(19);
561 for (int i = 0; i < size; i++) {
562 arrayA.array(i, r.nextFloat());
563 arrayB.array(i, r.nextFloat());
564 }
565
566 accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
567
568 for (int i = 0; i < size; i ++) {
569 HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
570 }
571 }
572
573 @HatTest
574 @Reflect
575 public void TestVectorArrayView07() {
576 final int size = 1024;
577 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
578 var arrayA = F32ArrayPadded.create(accelerator, size);
579 var arrayB = F32ArrayPadded.create(accelerator, size);
580 var arrayC = F32ArrayPadded.create(accelerator, size);
581
582 Random r = new Random(19);
583 for (int i = 0; i < size; i++) {
584 arrayA.array(i, r.nextFloat());
585 arrayB.array(i, r.nextFloat());
586 }
587
588 accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
589
590 for (int i = 0; i < size; i ++) {
591 HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
592 }
593 }
594
595 @HatTest
596 @Reflect
597 public void TestVectorArrayView08() {
598 final int size = 1024;
599 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
600 var arrayA = F32ArrayPadded.create(accelerator, size);
601 var arrayB = F32ArrayPadded.create(accelerator, size);
602 var arrayC = F32ArrayPadded.create(accelerator, size);
603
604 Random r = new Random(19);
605 for (int i = 0; i < size; i++) {
606 arrayA.array(i, r.nextFloat());
607 arrayB.array(i, r.nextFloat());
608 }
609
610 accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
611
612 for (int i = 0; i < size; i ++) {
613 float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
614 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
615 }
616 }
617
618 @HatTest
619 @Reflect
620 public void TestVectorArrayView09() {
621 final int size = 1024;
622 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
623 var arrayA = F32ArrayPadded.create(accelerator, size);
624 var arrayB = F32ArrayPadded.create(accelerator, size);
625 var arrayC = F32ArrayPadded.create(accelerator, size);
626
627 Random r = new Random(19);
628 for (int i = 0; i < size; i++) {
629 arrayA.array(i, r.nextFloat());
630 arrayB.array(i, r.nextFloat());
631 }
632
633 accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
634
635 for (int i = 0; i < size; i ++) {
636 float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
637 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
638 }
639 }
640
641 @HatTest
642 @Reflect
643 public void TestVectorArrayView10() {
644 final int size = 1024;
645 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
646 var arrayA = F32ArrayPadded.create(accelerator, size);
647 var arrayB = F32ArrayPadded.create(accelerator, size);
648
649 Random r = new Random(19);
650 for (int i = 0; i < size; i++) {
651 arrayA.array(i, r.nextFloat());
652 arrayB.array(i, r.nextFloat());
653 }
654
655 accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
656
657 for (int i = 0; i < size; i ++) {
658 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
659 }
660 }
661
662 @HatTest
663 @Reflect
664 public void TestVectorArrayView11() {
665 final int size = 1024;
666 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
667 var arrayA = F32ArrayPadded.create(accelerator, size);
668 var arrayB = F32ArrayPadded.create(accelerator, size);
669
670 Random r = new Random(19);
671 for (int i = 0; i < size; i++) {
672 arrayA.array(i, r.nextFloat());
673 arrayB.array(i, r.nextFloat());
674 }
675
676 accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
677
678 for (int i = 0; i < size; i ++) {
679 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
680 }
681 }
682
683 @HatTest
684 @Reflect
685 public void TestVectorArrayView12() {
686 final int size = 1024;
687 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
688 var arrayA = F32ArrayPadded.create(accelerator, size);
689 var arrayB = F32ArrayPadded.create(accelerator, size);
690
691 Random r = new Random(19);
692 for (int i = 0; i < size; i++) {
693 arrayA.array(i, r.nextFloat());
694 arrayB.array(i, r.nextFloat());
695 }
696
697 accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
698
699 for (int i = 0; i < size; i ++) {
700 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
701 }
702 }
703 }