1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.*;
28 import hat.backend.Backend;
29 import hat.buffer.*;
30 import hat.ifacemapper.MappableIface.RO;
31 import hat.ifacemapper.MappableIface.RW;
32 import hat.ifacemapper.Schema;
33 import hat.test.annotation.HatTest;
34 import hat.test.engine.HATAsserts;
35 import jdk.incubator.code.CodeReflection;
36
37 import java.lang.invoke.MethodHandles;
38 import java.util.Random;
39
40 public class TestVectorArrayView {
41
42 @CodeReflection
43 public static void vectorOps01(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
44 if (kernelContext.gix < kernelContext.gsx) {
45 int index = kernelContext.gix;
46
47 Float4[] vA = a.float4ArrayView();
48 Float4[] vB = b.float4ArrayView();
49 Float4[] vC = c.float4ArrayView();
50 Float4 floatA = vA[index * 4];
51 Float4 floatB = vB[index * 4];
52 Float4 res = Float4.add(floatA, floatB);
53 vC[index * 4] = res;
54 }
55 }
56
57 @CodeReflection
58 public static void vectorOps01WithFloat4s(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
59 if (kernelContext.gix < kernelContext.gsx) {
60 int index = kernelContext.gix;
61
62 Float4[] vA = a.float4ArrayView();
63 Float4[] vB = b.float4ArrayView();
64 Float4[] vC = c.float4ArrayView();
65 Float4 vAFloat = vA[index * 4];
66 Float4 vBFloat = vB[index * 4];
67 vC[index * 4] = Float4.add(vAFloat, vBFloat);
68 }
69 }
70
71 @CodeReflection
72 public static void vectorOps01WithSeparateAdd(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
73 if (kernelContext.gix < kernelContext.gsx) {
74 int index = kernelContext.gix;
75
76 Float4[] vA = a.float4ArrayView();
77 Float4[] vB = b.float4ArrayView();
78 Float4[] vC = c.float4ArrayView();
79 Float4 res = Float4.add(vA[index * 4], vB[index * 4]);
80 vC[index * 4] = res;
81 }
82 }
83
84 @CodeReflection
85 public static void vectorOps02(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
86 if (kernelContext.gix < kernelContext.gsx) {
87 int index = kernelContext.gix;
88
89 Float4.MutableImpl[] vArr = a.float4ArrayView();
90 Float4.MutableImpl[] bArr = b.float4ArrayView();
91 Float4.MutableImpl vA = vArr[index * 4];
92 float scaleX = vA.x() * 10.0f;
93 vA.x(scaleX);
94 bArr[index * 4] = vA;
95 }
96 }
97
98 @CodeReflection
99 public static void vectorOps03(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
100 if (kernelContext.gix < kernelContext.gsx) {
101 int index = kernelContext.gix;
102
103 Float4.MutableImpl[] vA = a.float4ArrayView();
104 Float4.MutableImpl[] vB = b.float4ArrayView();
105 Float4.MutableImpl vAFloat = vA[index * 4];
106 float scaleX = vAFloat.x() * 10.0f;
107 float scaleY = vAFloat.y() * 20.0f;
108 float scaleZ = vAFloat.z() * 30.0f;
109 float scaleW = vAFloat.w() * 40.0f;
110 vAFloat.x(scaleX);
111 vAFloat.y(scaleY);
112 vAFloat.z(scaleZ);
113 vAFloat.w(scaleW);
114 vB[index * 4] = vAFloat;
115 }
116 }
117
118 @CodeReflection
119 public static void vectorOps04(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
120 if (kernelContext.gix < kernelContext.gsx) {
121 int index = kernelContext.gix;
122
123 Float4.MutableImpl[] vA = a.float4ArrayView();
124 Float4.MutableImpl[] vB = b.float4ArrayView();
125 Float4.MutableImpl vAFloat = vA[index * 4];
126 vAFloat.x(vAFloat.x() * 10.0f);
127 vAFloat.y(vAFloat.y() * 20.0f);
128 vAFloat.z(vAFloat.z() * 30.0f);
129 vAFloat.w(vAFloat.w() * 40.0f);
130 vB[index * 4] = vAFloat;
131 }
132 }
133
134 @CodeReflection
135 public static void vectorOps05(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
136 if (kernelContext.gix < kernelContext.gsx) {
137 int index = kernelContext.gix;
138
139 Float4[] vA = a.float4ArrayView();
140 Float4[] vB = b.float4ArrayView();
141 Float4[] vC = c.float4ArrayView();
142 Float4 floatA = vA[index * 4];
143 Float4 floatB = vB[index * 4];
144 Float4 temp = floatA.add(floatB);
145 Float4 res = temp.add(floatB);
146 vC[index * 4] = res;
147 }
148 }
149
150 @CodeReflection
151 public static void vectorOps06(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
152 if (kernelContext.gix < kernelContext.gsx) {
153 int index = kernelContext.gix;
154
155 Float4[] vA = a.float4ArrayView();
156 Float4[] vB = b.float4ArrayView();
157 Float4[] vC = c.float4ArrayView();
158 Float4 floatA = vA[index * 4];
159 Float4 floatB = vB[index * 4];
160 Float4 vD = Float4.sub(floatA, floatB);
161 Float4 vE = Float4.sub(floatA, floatB);
162 vC[index * 4] = vE;
163 }
164 }
165
166 @CodeReflection
167 public static void vectorOps07(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
168 if (kernelContext.gix < kernelContext.gsx) {
169 int index = kernelContext.gix;
170
171 Float4[] vAArray = a.float4ArrayView();
172 Float4[] vBArray = b.float4ArrayView();
173 Float4[] vCArray = c.float4ArrayView();
174
175 Float4 vA = vAArray[index * 4];
176 Float4 vB = vBArray[index * 4];
177 Float4 vC = vA.add(vB);
178 Float4 vD = vC.sub(vB);
179 vCArray[index * 4] = vD;
180 }
181 }
182
183 @CodeReflection
184 public static void vectorOps08(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
185 if (kernelContext.gix < kernelContext.gsx) {
186 int index = kernelContext.gix;
187
188 Float4[] vAArray = a.float4ArrayView();
189 Float4[] vBArray = b.float4ArrayView();
190 Float4[] vCArray = c.float4ArrayView();
191
192 Float4 vA = vAArray[index * 4];
193 Float4 vB = vBArray[index * 4];
194 Float4 vC = vA.add(vB);
195 Float4 vD = vC.mul(vA);
196 Float4 vE = vD.div(vB);
197 vCArray[index * 4] = vE;
198 }
199 }
200
201 @CodeReflection
202 public static void vectorOps09(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c) {
203 // Checking composition
204 if (kernelContext.gix < kernelContext.gsx) {
205 int index = kernelContext.gix;
206 Float4[] vAArray = a.float4ArrayView();
207 Float4[] vBArray = b.float4ArrayView();
208 Float4[] vCArray = c.float4ArrayView();
209
210 Float4 vA = vAArray[index * 4];
211 Float4 vB = vBArray[index * 4];
212 Float4 temp = vA.mul(vB);
213 Float4 vC = vA.add(temp);
214 vCArray[index * 4] = vC;
215 }
216 }
217
218 private interface SharedMemory extends Buffer {
219 void array(long index, float value);
220 float array(long index);
221 Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
222 arr -> arr.array("array", 1024));
223 static SharedMemory create(Accelerator accelerator) {
224 return schema.allocate(accelerator);
225 }
226 static SharedMemory createLocal() {
227 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
228 }
229 default Float4 float4View(int index) {
230 return null;
231 }
232 default void storeFloat4View(Float4 float4, int index) {
233 }
234 default Float4.MutableImpl[] float4LocalArrayView() {
235 return null;
236 }
237 }
238
239 @CodeReflection
240 public static void vectorOps10(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
241 SharedMemory sm = SharedMemory.createLocal();
242 if (kernelContext.gix < kernelContext.gsx) {
243 int index = kernelContext.gix;
244 int lix = kernelContext.lix;
245
246 Float4[] aArr = a.float4ArrayView();
247 Float4[] bArr = b.float4ArrayView();
248 Float4[] smArr = sm.float4LocalArrayView();
249
250 Float4 vA = aArr[index * 4];
251 smArr[lix * 4] = vA;
252 kernelContext.barrier();
253 Float4 r = smArr[lix * 4];
254 bArr[index * 4] = r;
255 }
256 }
257
258 private interface PrivateMemory extends Buffer {
259 void array(long index, float value);
260 float array(long index);
261 Schema<PrivateMemory> schema = Schema.of(PrivateMemory.class,
262 arr -> arr.array("array", 4));
263 static PrivateMemory create(Accelerator accelerator) {
264 return schema.allocate(accelerator);
265 }
266 static PrivateMemory createPrivate() {
267 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
268 }
269 default Float4 float4View(int index) {
270 return null;
271 }
272 default void storeFloat4View(Float4 float4, int index) {
273 }
274 default Float4[] float4PrivateArrayView() {
275 return null;
276 }
277 }
278
279 @CodeReflection
280 public static void vectorOps11(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
281 PrivateMemory pm = PrivateMemory.createPrivate();
282 if (kernelContext.gix < kernelContext.gsx) {
283 int index = kernelContext.gix;
284
285 Float4[] aArr = a.float4ArrayView();
286 Float4[] bArr = b.float4ArrayView();
287 Float4[] pmArr = pm.float4PrivateArrayView();
288
289 Float4 vA = aArr[index * 4];
290 pmArr[0] = vA;
291 kernelContext.barrier();
292 Float4 r = pmArr[0];
293 bArr[index * 4] = r;
294 }
295 }
296
297 @CodeReflection
298 public static void vectorOps12(@RO KernelContext kernelContext, @RO F32ArrayPadded a, @RW F32ArrayPadded b) {
299 SharedMemory sm = SharedMemory.createLocal();
300 if (kernelContext.gix < kernelContext.gsx) {
301 int index = kernelContext.gix;
302 int lix = kernelContext.lix;
303 Float4.MutableImpl[] aArr = a.float4ArrayView();
304 Float4.MutableImpl[] bArr = b.float4ArrayView();
305 Float4.MutableImpl[] smArr = sm.float4LocalArrayView();
306
307 Float4.MutableImpl vA = aArr[index * 4];
308 Float4.MutableImpl smVector = smArr[lix * 4];
309 smVector.x(vA.x());
310 smVector.y(vA.y());
311 smVector.z(vA.z());
312 smVector.w(vA.w());
313 smArr[lix * 4] = smVector;
314 kernelContext.barrier();
315 Float4.MutableImpl r = smArr[lix * 4];
316 bArr[index * 4] = r;
317 }
318 }
319
320 @CodeReflection
321 public static void computeGraph01(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
322 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
323 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
324 cc.dispatchKernel(ndRange, kernelContext -> vectorOps01(kernelContext, a, b, c));
325 }
326
327 @CodeReflection
328 public static void computeGraph01WithFloat4s(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
329 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
330 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
331 cc.dispatchKernel(ndRange, kernelContext -> vectorOps01WithFloat4s(kernelContext, a, b, c));
332 }
333
334 @CodeReflection
335 public static void computeGraph01WithSeparateAdd(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
336 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
337 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4), NDRange.Local1D.of(128));
338 cc.dispatchKernel(ndRange, kernelContext -> vectorOps01WithSeparateAdd(kernelContext, a, b, c));
339 }
340
341 @CodeReflection
342 public static void computeGraph02(@RO ComputeContext cc, @RW F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
343 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
344 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
345 cc.dispatchKernel(ndRange, kernelContext -> vectorOps02(kernelContext, a, b));
346 }
347
348 @CodeReflection
349 public static void computeGraph03(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
350 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
351 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
352 cc.dispatchKernel(ndRange, kernelContext -> vectorOps03(kernelContext, a, b));
353 }
354
355 @CodeReflection
356 public static void computeGraph04(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
357 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
358 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
359 cc.dispatchKernel(ndRange, kernelContext -> vectorOps04(kernelContext, a, b));
360 }
361
362 @CodeReflection
363 public static void computeGraph05(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
364 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
365 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
366 cc.dispatchKernel(ndRange, kernelContext -> vectorOps05(kernelContext, a, b, c));
367 }
368
369 @CodeReflection
370 public static void computeGraph06(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
371 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
372 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
373 cc.dispatchKernel(ndRange, kernelContext -> vectorOps06(kernelContext, a, b, c));
374 }
375
376 @CodeReflection
377 public static void computeGraph07(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
378 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
379 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
380 cc.dispatchKernel(ndRange, kernelContext -> vectorOps07(kernelContext, a, b, c));
381 }
382
383 @CodeReflection
384 public static void computeGraph08(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
385 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
386 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
387 cc.dispatchKernel(ndRange, kernelContext -> vectorOps08(kernelContext, a, b, c));
388 }
389
390 @CodeReflection
391 public static void computeGraph09(@RO ComputeContext cc, @RO F32ArrayPadded a, @RO F32ArrayPadded b, @RW F32ArrayPadded c, int size) {
392 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
393 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
394 cc.dispatchKernel(ndRange, kernelContext -> vectorOps09(kernelContext, a, b, c));
395 }
396
397 @CodeReflection
398 public static void computeGraph10(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
399 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
400 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
401 cc.dispatchKernel(ndRange, kernelContext -> vectorOps10(kernelContext, a, b));
402 }
403
404 @CodeReflection
405 public static void computeGraph11(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
406 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
407 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
408 cc.dispatchKernel(ndRange, kernelContext -> vectorOps11(kernelContext, a, b));
409 }
410
411 @CodeReflection
412 public static void computeGraph12(@RO ComputeContext cc, @RO F32ArrayPadded a, @RW F32ArrayPadded b, int size) {
413 // Note: we need to launch N threads / vectorWidth -> size / 4 for this example
414 NDRange ndRange = NDRange.of(NDRange.Global1D.of(size/4));
415 cc.dispatchKernel(ndRange, kernelContext -> vectorOps12(kernelContext, a, b));
416 }
417
418 @HatTest
419 public void TestVectorArrayView01() {
420 final int size = 1024;
421 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
422 var arrayA = F32ArrayPadded.create(accelerator, size);
423 var arrayB = F32ArrayPadded.create(accelerator, size);
424 var arrayC = F32ArrayPadded.create(accelerator, size);
425
426 Random r = new Random(19);
427 for (int i = 0; i < size; i++) {
428 arrayA.array(i, r.nextFloat());
429 arrayB.array(i, r.nextFloat());
430 }
431
432 accelerator.compute(cc -> computeGraph01(cc, arrayA, arrayB, arrayC, size));
433
434 for (int i = 0; i < size; i++) {
435 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
436 }
437
438 }
439
440 // @HatTest
441 // public void TestVectorArrayView01WithFloat4s() {
442 // final int size = 1024;
443 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
444 // var arrayA = F32ArrayPadded.create(accelerator, size);
445 // var arrayB = F32ArrayPadded.create(accelerator, size);
446 // var arrayC = F32ArrayPadded.create(accelerator, size);
447 //
448 // Random r = new Random(19);
449 // for (int i = 0; i < size; i++) {
450 // arrayA.array(i, r.nextFloat());
451 // arrayB.array(i, r.nextFloat());
452 // }
453 //
454 // accelerator.compute(cc -> computeGraph01WithFloat4s(cc, arrayA, arrayB, arrayC, size));
455 //
456 // for (int i = 0; i < size; i++) {
457 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
458 // }
459 //
460 // }
461 //
462 // @HatTest
463 // public void TestVectorArrayView01WithSeparateAdd() {
464 // final int size = 1024;
465 // var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
466 // var arrayA = F32ArrayPadded.create(accelerator, size);
467 // var arrayB = F32ArrayPadded.create(accelerator, size);
468 // var arrayC = F32ArrayPadded.create(accelerator, size);
469 //
470 // Random r = new Random(19);
471 // for (int i = 0; i < size; i++) {
472 // arrayA.array(i, r.nextFloat());
473 // arrayB.array(i, r.nextFloat());
474 // }
475 //
476 // accelerator.compute(cc -> computeGraph01WithSeparateAdd(cc, arrayA, arrayB, arrayC, size));
477 //
478 // for (int i = 0; i < size; i++) {
479 // HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
480 // }
481 //
482 // }
483
484 @HatTest
485 public void TestVectorArrayView02() {
486 final int size = 1024;
487 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
488 var arrayA = F32ArrayPadded.create(accelerator, size);
489 var arrayB = F32ArrayPadded.create(accelerator, size);
490
491 Random r = new Random(19);
492 for (int i = 0; i < size; i++) {
493 arrayA.array(i, r.nextFloat());
494 }
495
496 accelerator.compute(cc -> computeGraph02(cc, arrayA, arrayB, size));
497
498 for (int i = 0; i < size; i += 4) {
499 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
500 HATAsserts.assertEquals((arrayA.array(i + 1)), arrayB.array(i + 1), 0.001f);
501 HATAsserts.assertEquals((arrayA.array(i + 2)), arrayB.array(i + 2), 0.001f);
502 HATAsserts.assertEquals((arrayA.array(i + 3)), arrayB.array(i + 3), 0.001f);
503 }
504 }
505
506 @HatTest
507 public void TestVectorArrayView03() {
508 final int size = 1024;
509 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
510 var arrayA = F32ArrayPadded.create(accelerator, size);
511 var arrayB = F32ArrayPadded.create(accelerator, size);
512
513 Random r = new Random(19);
514 for (int i = 0; i < size; i++) {
515 arrayA.array(i, r.nextFloat());
516 }
517
518 accelerator.compute(cc -> computeGraph03(cc, arrayA, arrayB, size));
519
520 for (int i = 0; i < size; i += 4) {
521 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
522 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
523 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
524 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
525 }
526 }
527
528 @HatTest
529 public void TestVectorArrayView04() {
530 final int size = 1024;
531 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
532 var arrayA = F32ArrayPadded.create(accelerator, size);
533 var arrayB = F32ArrayPadded.create(accelerator, size);
534
535 Random r = new Random(19);
536 for (int i = 0; i < size; i++) {
537 arrayA.array(i, r.nextFloat());
538 }
539
540 accelerator.compute(cc -> computeGraph04(cc, arrayA, arrayB, size));
541
542 for (int i = 0; i < size; i += 4) {
543 HATAsserts.assertEquals((arrayA.array(i + 0) * 10.0f), arrayB.array(i + 0), 0.001f);
544 HATAsserts.assertEquals((arrayA.array(i + 1) * 20.0f), arrayB.array(i + 1), 0.001f);
545 HATAsserts.assertEquals((arrayA.array(i + 2) * 30.0f), arrayB.array(i + 2), 0.001f);
546 HATAsserts.assertEquals((arrayA.array(i + 3) * 40.0f), arrayB.array(i + 3), 0.001f);
547 }
548 }
549
550 @HatTest
551 public void TestVectorArrayView05() {
552 final int size = 1024;
553 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
554 var arrayA = F32ArrayPadded.create(accelerator, size);
555 var arrayB = F32ArrayPadded.create(accelerator, size);
556 var arrayC = F32ArrayPadded.create(accelerator, size);
557
558 Random r = new Random(19);
559 for (int i = 0; i < size; i++) {
560 arrayA.array(i, r.nextFloat());
561 arrayB.array(i, r.nextFloat());
562 }
563
564 accelerator.compute(cc -> computeGraph05(cc, arrayA, arrayB, arrayC, size));
565
566 for (int i = 0; i < size; i ++) {
567 HATAsserts.assertEquals((arrayA.array(i) + arrayB.array(i) + arrayB.array(i)), arrayC.array(i), 0.001f);
568 }
569 }
570
571 @HatTest
572 public void TestVectorArrayView06() {
573 final int size = 1024;
574 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
575 var arrayA = F32ArrayPadded.create(accelerator, size);
576 var arrayB = F32ArrayPadded.create(accelerator, size);
577 var arrayC = F32ArrayPadded.create(accelerator, size);
578
579 Random r = new Random(19);
580 for (int i = 0; i < size; i++) {
581 arrayA.array(i, r.nextFloat());
582 arrayB.array(i, r.nextFloat());
583 }
584
585 accelerator.compute(cc -> computeGraph06(cc, arrayA, arrayB, arrayC, size));
586
587 for (int i = 0; i < size; i ++) {
588 HATAsserts.assertEquals((arrayA.array(i) - arrayB.array(i)), arrayC.array(i), 0.001f);
589 }
590 }
591
592 @HatTest
593 public void TestVectorArrayView07() {
594 final int size = 1024;
595 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
596 var arrayA = F32ArrayPadded.create(accelerator, size);
597 var arrayB = F32ArrayPadded.create(accelerator, size);
598 var arrayC = F32ArrayPadded.create(accelerator, size);
599
600 Random r = new Random(19);
601 for (int i = 0; i < size; i++) {
602 arrayA.array(i, r.nextFloat());
603 arrayB.array(i, r.nextFloat());
604 }
605
606 accelerator.compute(cc -> computeGraph07(cc, arrayA, arrayB, arrayC, size));
607
608 for (int i = 0; i < size; i ++) {
609 HATAsserts.assertEquals(arrayA.array(i), arrayC.array(i), 0.001f);
610 }
611 }
612
613 @HatTest
614 public void TestVectorArrayView08() {
615 final int size = 1024;
616 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
617 var arrayA = F32ArrayPadded.create(accelerator, size);
618 var arrayB = F32ArrayPadded.create(accelerator, size);
619 var arrayC = F32ArrayPadded.create(accelerator, size);
620
621 Random r = new Random(19);
622 for (int i = 0; i < size; i++) {
623 arrayA.array(i, r.nextFloat());
624 arrayB.array(i, r.nextFloat());
625 }
626
627 accelerator.compute(cc -> computeGraph08(cc, arrayA, arrayB, arrayC, size));
628
629 for (int i = 0; i < size; i ++) {
630 float val = (((arrayA.array(i) + arrayB.array(i)) * arrayA.array(i)) / arrayB.array(i));
631 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
632 }
633 }
634
635 @HatTest
636 public void TestVectorArrayView09() {
637 final int size = 1024;
638 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
639 var arrayA = F32ArrayPadded.create(accelerator, size);
640 var arrayB = F32ArrayPadded.create(accelerator, size);
641 var arrayC = F32ArrayPadded.create(accelerator, size);
642
643 Random r = new Random(19);
644 for (int i = 0; i < size; i++) {
645 arrayA.array(i, r.nextFloat());
646 arrayB.array(i, r.nextFloat());
647 }
648
649 accelerator.compute(cc -> computeGraph09(cc, arrayA, arrayB, arrayC, size));
650
651 for (int i = 0; i < size; i ++) {
652 float val = (arrayA.array(i) + (arrayB.array(i)) * arrayA.array(i));
653 HATAsserts.assertEquals(val, arrayC.array(i), 0.001f);
654 }
655 }
656
657 @HatTest
658 public void TestVectorArrayView10() {
659 final int size = 1024;
660 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
661 var arrayA = F32ArrayPadded.create(accelerator, size);
662 var arrayB = F32ArrayPadded.create(accelerator, size);
663
664 Random r = new Random(19);
665 for (int i = 0; i < size; i++) {
666 arrayA.array(i, r.nextFloat());
667 arrayB.array(i, r.nextFloat());
668 }
669
670 accelerator.compute(cc -> computeGraph10(cc, arrayA, arrayB, size));
671
672 for (int i = 0; i < size; i ++) {
673 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
674 }
675 }
676
677 @HatTest
678 public void TestVectorArrayView11() {
679 final int size = 1024;
680 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
681 var arrayA = F32ArrayPadded.create(accelerator, size);
682 var arrayB = F32ArrayPadded.create(accelerator, size);
683
684 Random r = new Random(19);
685 for (int i = 0; i < size; i++) {
686 arrayA.array(i, r.nextFloat());
687 arrayB.array(i, r.nextFloat());
688 }
689
690 accelerator.compute(cc -> computeGraph11(cc, arrayA, arrayB, size));
691
692 for (int i = 0; i < size; i ++) {
693 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
694 }
695 }
696
697 @HatTest
698 public void TestVectorArrayView12() {
699 final int size = 1024;
700 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
701 var arrayA = F32ArrayPadded.create(accelerator, size);
702 var arrayB = F32ArrayPadded.create(accelerator, size);
703
704 Random r = new Random(19);
705 for (int i = 0; i < size; i++) {
706 arrayA.array(i, r.nextFloat());
707 arrayB.array(i, r.nextFloat());
708 }
709
710 accelerator.compute(cc -> computeGraph12(cc, arrayA, arrayB, size));
711
712 for (int i = 0; i < size; i ++) {
713 HATAsserts.assertEquals(arrayA.array(i), arrayB.array(i), 0.001f);
714 }
715 }
716 }