1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.Accelerator;
28 import hat.ComputeContext;
29 import hat.NDRange;
30 import hat.KernelContext;
31 import hat.backend.Backend;
32 import hat.buffer.*;
33 import hat.device.DeviceSchema;
34 import hat.device.NonMappableIface;
35 import optkl.ifacemapper.BoundSchema;
36 import optkl.ifacemapper.Buffer;
37 import optkl.ifacemapper.Schema;
38 import jdk.incubator.code.Reflect;
39 import hat.test.annotation.HatTest;
40 import hat.test.exceptions.HATAsserts;
41 import optkl.ifacemapper.MappableIface.*;
42
43 import java.lang.foreign.ValueLayout;
44 import java.lang.invoke.MethodHandles;
45 import java.util.Random;
46
47 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
48
49 public class TestArrayView {
50
51 /*
52 * simple square kernel example using S32Array's ArrayView
53 */
54 @Reflect
55 public static void squareKernel(@RO KernelContext kc, @RW S32Array s32Array) {
56 if (kc.gix < kc.gsx){
57 int[] arr = s32Array.arrayView();
58 arr[kc.gix] *= arr[kc.gix];
59 }
60 }
61
62 @Reflect
63 public static void square(@RO ComputeContext cc, @RW S32Array s32Array) {
64 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
65 kc -> squareKernel(kc, s32Array)
66 );
67 }
68
69 @HatTest
70 @Reflect
71 public static void testSquare() {
72
73 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
74 var arr = S32Array.create(accelerator, 32);
75 for (int i = 0; i < arr.length(); i++) {
76 arr.array(i, i);
77 }
78 accelerator.compute(
79 cc -> square(cc, arr)
80 );
81 for (int i = 0; i < arr.length(); i++) {
82 HATAsserts.assertEquals(i * i, arr.array(i));
83 }
84 }
85
86 /*
87 * making sure arrayviews aren't reliant on varOps
88 */
89 @Reflect
90 public static void squareKernelNoVarOp(@RO KernelContext kc, @RW S32Array s32Array) {
91 if (kc.gix<kc.gsx){
92 s32Array.arrayView()[kc.gix] *= s32Array.arrayView()[kc.gix];
93 }
94 }
95
96 @Reflect
97 public static void squareNoVarOp(@RO ComputeContext cc, @RW S32Array s32Array) {
98 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
99 kc -> squareKernelNoVarOp(kc, s32Array)
100 );
101 }
102
103 @HatTest
104 @Reflect
105 public static void testSquareNoVarOp() {
106 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
107 var arr = S32Array.create(accelerator, 32);
108 for (int i = 0; i < arr.length(); i++) {
109 arr.array(i, i);
110 }
111 accelerator.compute(
112 cc -> squareNoVarOp(cc, arr)
113 );
114 for (int i = 0; i < arr.length(); i++) {
115 HATAsserts.assertEquals(i * i, arr.array(i));
116 }
117 }
118
119 @Reflect
120 public static void square2DKernel(@RO KernelContext kc, @RW S32Array2D s32Array2D) {
121 if (kc.gix < kc.gsx){
122 int[][] arr = s32Array2D.arrayView();
123 arr[kc.gix][kc.giy] *= arr[kc.gix][kc.giy];
124 }
125 }
126
127 @Reflect
128 public static void square2D(@RO ComputeContext cc, @RW S32Array2D s32Array2D) {
129 cc.dispatchKernel(NDRange.of1D(s32Array2D.width() * s32Array2D.height()),
130 kc -> square2DKernel(kc, s32Array2D)
131 );
132 }
133
134 @HatTest
135 @Reflect
136 public static void testSquare2D() {
137
138 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
139 var arr = S32Array2D.create(accelerator, 5, 5);
140 for (int i = 0; i < arr.height(); i++) {
141 for (int j = 0; j < arr.width(); j++) {
142 arr.set(i, j, i * 5 + j);
143 }
144 }
145 accelerator.compute(
146 cc -> square2D(cc, arr)
147 );
148 for (int i = 0; i < arr.height(); i++) {
149 for (int j = 0; j < arr.width(); j++) {
150 HATAsserts.assertEquals((i * 5 + j) * (i * 5 + j), arr.get(i, j));
151 }
152 }
153 }
154
155 /*
156 * simplified version of Game of Life using ArrayView
157 */
158 public final static byte ALIVE = (byte) 0xff;
159 public final static byte DEAD = 0x00;
160
161 public interface CellGrid extends Buffer {
162 /*
163 * struct CellGrid{
164 * int width;
165 * int height;
166 * byte[width*height*2] cellArray;
167 * }
168 */
169 int width();
170
171 int height();
172
173 byte array(long idx);
174
175 void array(long idx, byte b);
176
177 Schema<CellGrid> schema = Schema.of(CellGrid.class, lifeData -> lifeData
178 .arrayLen("width", "height").stride(2).array("array")
179 );
180
181 static CellGrid create(Accelerator accelerator, int width, int height) {
182 return BoundSchema.of(accelerator ,schema, width, height).allocate();
183 }
184
185 ValueLayout valueLayout = JAVA_BYTE;
186
187 default byte[][] arrayView() {
188 return null;
189 }
190 }
191
192 public interface Control extends Buffer {
193 /*
194 * struct Control{
195 * int from;
196 * int to;
197 * }
198 */
199 int from();
200
201 void from(int from);
202
203 int to();
204
205 void to(int to);
206
207 Schema<Control> schema = Schema.of(
208 Control.class, control ->
209 control.fields("from", "to"));//, "generation", "requiredFrameRate", "maxGenerations"));
210
211 static Control create(Accelerator accelerator, CellGrid cellGrid) {
212 var instance = BoundSchema.of(accelerator ,schema).allocate();
213 instance.from(cellGrid.width() * cellGrid.height());
214 instance.to(0);
215 return instance;
216 }
217 }
218
219 public static byte[][] lifeCheck(CellGrid cellGrid) {
220 int w = cellGrid.width();
221 int h = cellGrid.height();
222
223 byte[][] res = new byte[h][w];
224
225 for (int y = 0; y < h; y++) {
226 for (int x = 0; x < w; x++) {
227 int idx = y * w + x;
228 byte cell = cellGrid.array(idx);
229 if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
230 int count =
231 (cellGrid.array((y - 1) * w + (x - 1)) & 1)
232 + (cellGrid.array((y + 0) * w + (x - 1)) & 1)
233 + (cellGrid.array((y + 1) * w + (x - 1)) & 1)
234 + (cellGrid.array((y - 1) * w + (x + 0)) & 1)
235 + (cellGrid.array((y + 1) * w + (x + 0)) & 1)
236 + (cellGrid.array((y - 1) * w + (x + 1)) & 1)
237 + (cellGrid.array((y + 0) * w + (x + 1)) & 1)
238 + (cellGrid.array((y + 1) * w + (x + 1))& 1);
239 cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
240 }
241 res[x][y] = cell;
242 }
243 }
244 return res;
245 }
246
247 public static class Compute {
248 @Reflect
249 // TODO: switch cellGridRes to WO
250 public static void lifePerIdx(int idx, @RO CellGrid cellGrid, @RW CellGrid cellGridRes) {
251 int w = cellGrid.width();
252 int h = cellGrid.height();
253 int x = idx % w;
254 int y = idx / w;
255
256 // byte[] bytes = cellGrid.arrayView();
257 // byte cell = bytes[idx + from];
258 // byte[] lookup = new byte[]{};
259 // if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
260 // int lookupIdx =
261 // (bytes[(y - 1) * w + x - 1 + from]&1 <<0)
262 // |(bytes[(y + 0) * w + x - 1 + from]&1 <<1)
263 // |(bytes[(y + 1) * w + x - 1 + from]&1 <<2)
264 // |(bytes[(y - 1) * w + x + 0 + from]&1 <<3)
265 // |(bytes[(y - 0) * w + x + 0 + from]&1 <<4) // current cell added
266 // |(bytes[(y + 1) * w + x + 0 + from]&1 <<5)
267 // |(bytes[(y + 0) * w + x + 1 + from]&1 <<6)
268 // |(bytes[(y - 1) * w + x + 1 + from]&1 <<7)
269 // |(bytes[(y + 1) * w + x + 1 + from]&1 <<8) ;
270 // // conditional removed!
271 // bytes[idx + to] = lookup[lookupIdx];
272 // }
273
274 byte[][] bytes = cellGrid.arrayView();
275 byte cell = bytes[x][y];
276 if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
277 int count =
278 (bytes[x - 1][y - 1] & 1)
279 + (bytes[x - 1][y + 0] & 1)
280 + (bytes[x - 1][y + 1] & 1)
281 + (bytes[x + 0][y - 1] & 1)
282 + (bytes[x + 0][y + 1] & 1)
283 + (bytes[x + 1][y - 1] & 1)
284 + (bytes[x + 1][y + 0] & 1)
285 + (bytes[x + 1][y + 1] & 1);
286 cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
287 }
288 byte[][] res = cellGridRes.arrayView();
289 res[x][y] = cell;
290 }
291
292 @Reflect
293 public static void life(@RO KernelContext kc, @RO CellGrid cellGrid, @RW CellGrid cellGridRes) {
294 if (kc.gix < kc.gsx) {
295 Compute.lifePerIdx(kc.gix, cellGrid, cellGridRes);
296 }
297 }
298
299 @Reflect
300 static public void compute(final @RO ComputeContext cc, @RO CellGrid grid, @RW CellGrid gridRes) {
301 int range = grid.width() * grid.height();
302 cc.dispatchKernel(NDRange.of1D(range), kc -> Compute.life(kc, grid, gridRes));
303 }
304 }
305
306 @HatTest
307 @Reflect
308 public static void testLife() {
309 Accelerator accelerator = new Accelerator(MethodHandles.lookup());
310
311 // int w = 20;
312 // int h = 20;
313 // // We oversize the grid by adding 1 to n,e,w and s
314 // CellGrid cellGrid = CellGrid.create(accelerator, w, h);
315 // CellGrid cellGridRes = CellGrid.create(accelerator, w, h);
316 //
317 // Random rand = new Random();
318 // byte[][] actualGrid = new byte[w][h];
319 // for (int y = 0; y < h; y++) {
320 // for (int x = 0; x < w; x++) {
321 // actualGrid[x][y] = rand.nextBoolean() ? ALIVE : DEAD;
322 // }
323 // }
324
325 // We oversize the grid by adding 1 to n,e,w and s
326 CellGrid cellGrid = CellGrid.create(accelerator, 17, 17);
327 CellGrid cellGridRes = CellGrid.create(accelerator, 17, 17);
328
329 byte[][] actualGrid = new byte[][]{
330 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
331 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
332 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
333 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
334 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
335 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
336 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
337 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
338 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
339 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
340 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
341 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
342 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
343 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
344 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
345 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
346 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
347 };
348
349 // By shifting all cells +1,+1 so we only need to scan 1..width-1, 1..height-1
350 // we don't worry about possibly finding cells in 0,n width,n or n,0 height,n
351 for (int i = 0; i < cellGrid.height(); i++) {
352 for (int j = 0; j < cellGrid.width(); j++) {
353 cellGrid.array(((long) i * cellGrid.width()) + j, actualGrid[i][j]);
354 }
355 }
356
357 // Control control = Control.create(accelerator, cellGrid);
358
359 accelerator.compute(cc -> Compute.compute(cc, cellGrid, cellGridRes));
360
361 byte[][] resultGrid = lifeCheck(cellGrid);
362
363 for (int i = 0; i < cellGrid.height(); i++) {
364 for (int j = 0; j < cellGrid.width(); j++) {
365 HATAsserts.assertEquals(resultGrid[i][j], cellGridRes.array(((long) i * cellGrid.width()) + j));
366 }
367 }
368 }
369
370 /*
371 * simplified version of mandel using ArrayView
372 */
373 @Reflect
374 public static int mandelCheck(int i, int j, float width, float height, int[] pallette, float offsetx, float offsety, float scale) {
375 float x = (i * scale - (scale / 2f * width)) / width + offsetx;
376 float y = (j * scale - (scale / 2f * height)) / height + offsety;
377 float zx = x;
378 float zy = y;
379 float new_zx;
380 int colorIdx = 0;
381 while ((colorIdx < pallette.length) && (((zx * zx) + (zy * zy)) < 4f)) {
382 new_zx = ((zx * zx) - (zy * zy)) + x;
383 zy = (2f * zx * zy) + y;
384 zx = new_zx;
385 colorIdx++;
386 }
387 return colorIdx < pallette.length ? pallette[colorIdx] : 0;
388 }
389
390 @Reflect
391 public static void mandel(@RO KernelContext kc, @RW S32Array2D s32Array2D, @RO S32Array pallette, float offsetx, float offsety, float scale) {
392 if (kc.gix < kc.gsx) {
393 int[] pal = pallette.arrayView();
394 int[][] s32 = s32Array2D.arrayView();
395 float width = s32Array2D.width();
396 float height = s32Array2D.height();
397 float x = ((kc.gix % s32Array2D.width()) * scale - (scale / 2f * width)) / width + offsetx;
398 float y = ((kc.gix / s32Array2D.width()) * scale - (scale / 2f * height)) / height + offsety;
399 float zx = x;
400 float zy = y;
401 float new_zx;
402 int colorIdx = 0;
403 while ((colorIdx < pal.length) && (((zx * zx) + (zy * zy)) < 4f)) {
404 new_zx = ((zx * zx) - (zy * zy)) + x;
405 zy = (2f * zx * zy) + y;
406 zx = new_zx;
407 colorIdx++;
408 }
409 int color = colorIdx < pal.length ? pal[colorIdx] : 0;
410 s32[kc.gix % s32Array2D.width()][kc.gix / s32Array2D.width()] = color;
411 }
412 }
413
414
415 @Reflect
416 static public void compute(final ComputeContext computeContext, S32Array pallete, S32Array2D s32Array2D, float x, float y, float scale) {
417
418 computeContext.dispatchKernel(
419 NDRange.of1D(s32Array2D.width()*s32Array2D.height()), //0..S32Array2D.size()
420 kc -> mandel(kc, s32Array2D, pallete, x, y, scale));
421 }
422
423 @HatTest
424 @Reflect
425 public static void testMandel() {
426 final int width = 1024;
427 final int height = 1024;
428 final float defaultScale = 3f;
429 final float originX = -1f;
430 final float originY = 0;
431 final int maxIterations = 64;
432
433 Accelerator accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
434
435 S32Array2D s32Array2D = S32Array2D.create(accelerator, width, height);
436
437 int[] palletteArray = new int[maxIterations];
438
439 for (int i = 1; i < maxIterations; i++) {
440 palletteArray[i]=(i/8+1);
441 }
442 palletteArray[0]=0;
443 S32Array pallette = S32Array.createFrom(accelerator, palletteArray);
444
445 accelerator.compute(cc -> compute(cc, pallette, s32Array2D, originX, originY, defaultScale));
446
447 int subsample = 16;
448 char[] charPallette9 = new char []{' ', '.', ',',':', '-', '+','*', '#', '@', '%'};
449 for (int y = 0; y<height/subsample; y++) {
450 for (int x = 0; x<width/subsample; x++) {
451 int palletteValue = s32Array2D.get(x*subsample,y*subsample); // so 0->8
452 int paletteCheck = mandelCheck(x*subsample, y*subsample, width, height, palletteArray, originX, originY, defaultScale);
453 HATAsserts.assertEquals(paletteCheck, palletteValue);
454 }
455 }
456 }
457
458 /*
459 * simplified version of BlackScholes using ArrayView
460 */
461 @Reflect
462 public static void blackScholesKernel(@RO KernelContext kc,
463 @RW F32Array call,
464 @RW F32Array put,
465 @RO F32Array sArray,
466 @RO F32Array xArray,
467 @RO F32Array tArray,
468 float r,
469 float v) {
470 if (kc.gix<kc.gsx){
471 float[] callArr = call.arrayView();
472 float[] putArr = put.arrayView();
473 float[] sArr = sArray.arrayView();
474 float[] xArr = xArray.arrayView();
475 float[] tArr = tArray.arrayView();
476
477 float expNegRt = (float) Math.exp(-r * tArr[kc.gix]);
478 float d1 = (float) ((Math.log(sArr[kc.gix] / xArr[kc.gix]) + (r + v * v * .5f) * tArr[kc.gix]) / (v * Math.sqrt(tArr[kc.gix])));
479 float d2 = (float) (d1 - v * Math.sqrt(tArr[kc.gix]));
480 float cnd1 = CND(d1);
481 float cnd2 = CND(d2);
482 float value = sArr[kc.gix] * cnd1 - expNegRt * xArr[kc.gix] * cnd2;
483 callArr[kc.gix] = value;
484 putArr[kc.gix] = expNegRt * xArr[kc.gix] * (1 - cnd2) - sArr[kc.gix] * (1 - cnd1);
485 }
486 }
487
488 @Reflect
489 public static float CND(float input) {
490 float x = input;
491 if (input < 0f) { // input = Math.abs(input)?
492 x = -input;
493 }
494
495 float term = 1f / (1f + (0.2316419f * x));
496 float term_pow2 = term * term;
497 float term_pow3 = term_pow2 * term;
498 float term_pow4 = term_pow2 * term_pow2;
499 float term_pow5 = term_pow2 * term_pow3;
500
501 float part1 = (1f / (float)Math.sqrt(2f * 3.1415926535f)) * (float)Math.exp((-x * x) * 0.5f);
502
503 float part2 = (0.31938153f * term) +
504 (-0.356563782f * term_pow2) +
505 (1.781477937f * term_pow3) +
506 (-1.821255978f * term_pow4) +
507 (1.330274429f * term_pow5);
508
509 if (input >= 0f) {
510 return 1f - part1 * part2;
511 }
512 return part1 * part2;
513
514 }
515
516 @Reflect
517 public static void blackScholes(@RO ComputeContext cc, @RW F32Array call, @RW F32Array put, @RO F32Array S, @RO F32Array X, @RO F32Array T, float r, float v) {
518 cc.dispatchKernel(NDRange.of1D(call.length()),
519 kc -> blackScholesKernel(kc, call, put, S, X, T, r, v)
520 );
521 }
522
523 static F32Array floatArray(Accelerator accelerator, int size, float low, float high, Random rand) {
524 F32Array array = F32Array.create(accelerator, size);
525 for (int i = 0; i <size; i++) {
526 array.array(i, rand.nextFloat() * (high - low) + low);
527 }
528 return array;
529 }
530
531 public static void blackScholesKernelSeq(F32Array call, F32Array put, F32Array sArray, F32Array xArray, F32Array tArray, float r, float v) {
532 for (int i = 0; i <call.length() ; i++) {
533 float S = sArray.array(i);
534 float X = xArray.array(i);
535 float T = tArray.array(i);
536 float expNegRt = (float) Math.exp(-r * T);
537 float d1 = (float) ((Math.log(S / X) + (r + v * v * .5f) * T) / (v * Math.sqrt(T)));
538 float d2 = (float) (d1 - v * Math.sqrt(T));
539 float cnd1 = CND(d1);
540 float cnd2 = CND(d2);
541 float value = S * cnd1 - expNegRt * X * cnd2;
542 call.array(i, value);
543 put.array(i, expNegRt * X * (1 - cnd2) - S * (1 - cnd1));
544 }
545 }
546
547 @HatTest
548 @Reflect
549 public static void testBlackScholes() {
550 int size = 1024;
551 Random rand = new Random();
552 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
553 var call = F32Array.create(accelerator, size);
554 var put = F32Array.create(accelerator, size);
555 for (int i = 0; i < size; i++) {
556 call.array(i, i);
557 put.array(i, i);
558 }
559
560 var S = floatArray(accelerator, size,1f, 100f, rand);
561 var X = floatArray(accelerator, size,1f, 100f, rand);
562 var T = floatArray(accelerator,size, 0.25f, 10f, rand);
563 float r = 0.02f;
564 float v = 0.30f;
565
566 accelerator.compute(cc -> blackScholes(cc, call, put, S, X, T, r, v));
567
568 var seqCall = F32Array.create(accelerator, size);
569 var seqPut = F32Array.create(accelerator, size);
570 for (int i = 0; i < seqCall.length(); i++) {
571 seqCall.array(i, i);
572 seqPut.array(i, i);
573 }
574
575 blackScholesKernelSeq(seqCall, seqPut, S, X, T, r, v);
576
577 for (int i = 0; i < call.length(); i++) {
578 HATAsserts.assertEquals(seqCall.array(i), call.array(i), 0.01f);
579 HATAsserts.assertEquals(seqPut.array(i), put.array(i), 0.01f);
580 }
581 }
582
583 /*
584 * basic test of local and private buffer ArrayViews
585 */
586 private interface SharedMemory extends NonMappableIface {
587 void array(long index, int value);
588 int array(long index);
589 DeviceSchema<SharedMemory> schema = DeviceSchema.of(SharedMemory.class,
590 arr -> arr.withArray("array", 1024));
591
592 static SharedMemory createLocal() { return null; }
593
594 default int[] localArrayView() {
595 int[] view = new int[1024];
596 for (int i = 0; i < 1024; i++) {
597 view[i] = this.array(i);
598 }
599 return view;
600 }
601 }
602
603 public interface PrivateArray extends NonMappableIface {
604 void array(long index, int value);
605 int array(long index);
606 DeviceSchema<PrivateArray> schema = DeviceSchema.of(PrivateArray.class,
607 arr -> arr.withArray("array", 16));
608
609 static PrivateArray createPrivate() { return null; }
610
611 default int[] privateArrayView() {
612 int[] view = new int[16];
613 for (int i = 0; i < 16; i++) {
614 view[i] = this.array(i);
615 }
616 return view;
617 }
618 }
619
620 @Reflect
621 public static void squareKernelWithPrivateAndLocal(@RO KernelContext kc, @RW S32Array s32Array) {
622 SharedMemory shared = SharedMemory.createLocal();
623 if (kc.gix < kc.gsx){
624 int[] arr = s32Array.arrayView();
625 arr[kc.gix] += arr[kc.gix];
626
627 PrivateArray priv = PrivateArray.createPrivate();
628 int[] privView = priv.privateArrayView();
629 privView[0] = 1;
630 arr[kc.gix] += privView[0];
631
632 int[] sharedView = shared.localArrayView();
633 sharedView[0] = 16;
634 kc.barrier();
635 arr[kc.gix] += sharedView[0];
636 }
637 }
638
639 @Reflect
640 public static void privateAndLocal(@RO ComputeContext cc, @RW S32Array s32Array) {
641 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
642 kc -> squareKernelWithPrivateAndLocal(kc, s32Array)
643 );
644 }
645
646 @HatTest
647 @Reflect
648 public static void testPrivateAndLocal() {
649
650 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
651 var arr = S32Array.create(accelerator, 32);
652 for (int i = 0; i < arr.length(); i++) {
653 arr.array(i, i);
654 }
655 accelerator.compute(
656 cc -> privateAndLocal(cc, arr)
657 );
658 for (int i = 0; i < arr.length(); i++) {
659 HATAsserts.assertEquals(2 * i + 17, arr.array(i));
660 }
661 }
662
663 /*
664 * testing basic DeviceTypes
665 */
666
667 public interface SharedNonMappableIface extends NonMappableIface {
668 void array(long index, int value);
669 int array(long index);
670 DeviceSchema<SharedNonMappableIface> schema = DeviceSchema.of(SharedNonMappableIface.class,
671 arr -> arr.withArray("array", 1024));
672 static SharedNonMappableIface create(Accelerator accelerator) {
673 return null;
674 }
675 static SharedNonMappableIface createLocal() {
676 return null;
677 }
678
679 default int[] localArrayView() {
680 return null;
681 }
682 }
683
684 public interface PrivateNonMappableIface extends NonMappableIface {
685 void array(long index, int value);
686 int array(long index);
687 DeviceSchema<PrivateNonMappableIface> schema = DeviceSchema.of(PrivateNonMappableIface.class,
688 arr -> arr.withArray("array", 32));
689 static PrivateNonMappableIface create(Accelerator accelerator) {
690 return null;
691 }
692 static PrivateNonMappableIface createPrivate() {
693 return null;
694 }
695
696 default int[] privateArrayView() {
697 return null;
698 }
699 }
700
701 @Reflect
702 public static void kernelBasicDeviceType(@RO KernelContext kc, @RW S32Array s32Array) {
703 SharedNonMappableIface shared = SharedNonMappableIface.createLocal();
704 if (kc.gix < kc.gsx){
705 PrivateNonMappableIface priv = PrivateNonMappableIface.createPrivate();
706
707 int[] arr = s32Array.arrayView();
708 int[] privView = priv.privateArrayView();
709 int[] sharedView = shared.localArrayView();
710
711 privView[kc.gix] = arr[kc.gix];
712 sharedView[kc.gix] = arr[kc.gix];
713 kc.barrier();
714 arr[kc.gix] = privView[kc.gix] + sharedView[kc.gix];
715 }
716 }
717
718 @Reflect
719 public static void basicDeviceType(@RO ComputeContext cc, @RW S32Array s32Array) {
720 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
721 kc -> kernelBasicDeviceType(kc, s32Array)
722 );
723 }
724
725 @HatTest
726 @Reflect
727 public static void testBasicDeviceType() {
728 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
729 var arr = S32Array.create(accelerator, 32);
730 for (int i = 0; i < arr.length(); i++) {
731 arr.array(i, i);
732 }
733 accelerator.compute(cc -> basicDeviceType(cc, arr));
734 for (int i = 0; i < arr.length(); i++) {
735 HATAsserts.assertEquals(2 * i, arr.array(i));
736 }
737 }
738
739 @Reflect
740 public static void squareKernelDeviceType(@RO KernelContext kc, @RW S32Array s32Array) {
741 SharedNonMappableIface shared = SharedNonMappableIface.createLocal();
742 if (kc.gix < kc.gsx){
743 PrivateNonMappableIface priv = PrivateNonMappableIface.createPrivate();
744
745 int[] arr = s32Array.arrayView();
746 int[] privView = priv.privateArrayView();
747 int[] sharedView = shared.localArrayView();
748
749 privView[kc.gix] = arr[kc.gix];
750 sharedView[privView[kc.gix]] = 16 * privView[kc.gix];
751 kc.barrier();
752 arr[kc.gix] += privView[kc.gix] + sharedView[kc.gix];
753 }
754 }
755
756 @Reflect
757 public static void deviceType(@RO ComputeContext cc, @RW S32Array s32Array) {
758 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
759 kc -> squareKernelDeviceType(kc, s32Array)
760 );
761 }
762
763 @HatTest
764 @Reflect
765 public static void testDeviceType() {
766 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
767 var arr = S32Array.create(accelerator, 32);
768 for (int i = 0; i < arr.length(); i++) {
769 arr.array(i, i);
770 }
771 accelerator.compute(cc -> deviceType(cc, arr));
772 for (int i = 0; i < arr.length(); i++) {
773 HATAsserts.assertEquals(18 * i, arr.array(i));
774 }
775 }
776 }