1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.Accelerator;
28 import hat.ComputeContext;
29 import hat.NDRange;
30 import hat.KernelContext;
31 import hat.backend.Backend;
32 import hat.buffer.*;
33 import hat.ifacemapper.MappableIface.*;
34 import hat.ifacemapper.Schema;
35 import jdk.incubator.code.Reflect;
36 import hat.test.annotation.HatTest;
37 import hat.test.engine.HATAsserts;
38
39 import java.lang.foreign.ValueLayout;
40 import java.lang.invoke.MethodHandles;
41 import java.util.Random;
42
43 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
44
45 public class TestArrayView {
46
47 /*
48 * simple square kernel example using S32Array's ArrayView
49 */
50 @Reflect
51 public static void squareKernel(@RO KernelContext kc, @RW S32Array s32Array) {
52 if (kc.gix < kc.gsx){
53 int[] arr = s32Array.arrayView();
54 arr[kc.gix] *= arr[kc.gix];
55 }
56 }
57
58 @Reflect
59 public static void square(@RO ComputeContext cc, @RW S32Array s32Array) {
60 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
61 kc -> squareKernel(kc, s32Array)
62 );
63 }
64
65 @HatTest
66 @Reflect
67 public static void testSquare() {
68
69 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
70 var arr = S32Array.create(accelerator, 32);
71 for (int i = 0; i < arr.length(); i++) {
72 arr.array(i, i);
73 }
74 accelerator.compute(
75 cc -> square(cc, arr) //QuotableComputeContextConsumer
76 ); // extends Quotable, Consumer<ComputeContext>
77 for (int i = 0; i < arr.length(); i++) {
78 HATAsserts.assertEquals(i * i, arr.array(i));
79 }
80 }
81
82 /*
83 * making sure arrayviews aren't reliant on varOps
84 */
85 @Reflect
86 public static void squareKernelNoVarOp(@RO KernelContext kc, @RW S32Array s32Array) {
87 if (kc.gix<kc.gsx){
88 s32Array.arrayView()[kc.gix] *= s32Array.arrayView()[kc.gix];
89 }
90 }
91
92 @Reflect
93 public static void squareNoVarOp(@RO ComputeContext cc, @RW S32Array s32Array) {
94 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
95 kc -> squareKernelNoVarOp(kc, s32Array)
96 );
97 }
98
99 @HatTest
100 @Reflect
101 public static void testSquareNoVarOp() {
102 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
103 var arr = S32Array.create(accelerator, 32);
104 for (int i = 0; i < arr.length(); i++) {
105 arr.array(i, i);
106 }
107 accelerator.compute(
108 cc -> squareNoVarOp(cc, arr) //QuotableComputeContextConsumer
109 ); // extends Quotable, Consumer<ComputeContext>
110 for (int i = 0; i < arr.length(); i++) {
111 HATAsserts.assertEquals(i * i, arr.array(i));
112 }
113 }
114
115 @Reflect
116 public static void square2DKernel(@RO KernelContext kc, @RW S32Array2D s32Array2D) {
117 if (kc.gix < kc.gsx){
118 int[][] arr = s32Array2D.arrayView();
119 arr[kc.gix][kc.giy] *= arr[kc.gix][kc.giy];
120 }
121 }
122
123 @Reflect
124 public static void square2D(@RO ComputeContext cc, @RW S32Array2D s32Array2D) {
125 cc.dispatchKernel(NDRange.of1D(s32Array2D.width() * s32Array2D.height()),
126 kc -> square2DKernel(kc, s32Array2D)
127 );
128 }
129
130 @HatTest
131 @Reflect
132 public static void testSquare2D() {
133
134 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
135 var arr = S32Array2D.create(accelerator, 5, 5);
136 for (int i = 0; i < arr.height(); i++) {
137 for (int j = 0; j < arr.width(); j++) {
138 arr.set(i, j, i * 5 + j);
139 }
140 }
141 accelerator.compute(
142 cc -> square2D(cc, arr) //QuotableComputeContextConsumer
143 ); // extends Quotable, Consumer<ComputeContext>
144 for (int i = 0; i < arr.height(); i++) {
145 for (int j = 0; j < arr.width(); j++) {
146 HATAsserts.assertEquals((i * 5 + j) * (i * 5 + j), arr.get(i, j));
147 }
148 }
149 }
150
151 /*
152 * simplified version of Game of Life using ArrayView
153 */
154 public final static byte ALIVE = (byte) 0xff;
155 public final static byte DEAD = 0x00;
156
157 public interface CellGrid extends Buffer {
158 /*
159 * struct CellGrid{
160 * int width;
161 * int height;
162 * byte[width*height*2] cellArray;
163 * }
164 */
165 int width();
166
167 int height();
168
169 byte array(long idx);
170
171 void array(long idx, byte b);
172
173 Schema<CellGrid> schema = Schema.of(CellGrid.class, lifeData -> lifeData
174 .arrayLen("width", "height").stride(2).array("array")
175 );
176
177 static CellGrid create(Accelerator accelerator, int width, int height) {
178 return schema.allocate(accelerator, width, height);
179 }
180
181 ValueLayout valueLayout = JAVA_BYTE;
182
183 default byte[] arrayView() {
184 int size = this.width() * this.height();
185 byte[] arr = new byte[size];
186 for (int i = 0; i < size; i++) {
187 arr[i] = this.array(i);
188 }
189 return arr;
190 }
191 }
192
193 public interface Control extends Buffer {
194 /*
195 * struct Control{
196 * int from;
197 * int to;
198 * }
199 */
200 int from();
201
202 void from(int from);
203
204 int to();
205
206 void to(int to);
207
208 Schema<Control> schema = Schema.of(
209 Control.class, control ->
210 control.fields("from", "to"));//, "generation", "requiredFrameRate", "maxGenerations"));
211
212 static Control create(Accelerator accelerator, CellGrid cellGrid) {
213 var instance = schema.allocate(accelerator);
214 instance.from(cellGrid.width() * cellGrid.height());
215 instance.to(0);
216 return instance;
217 }
218 }
219
220 public static class Compute {
221 @Reflect
222 public static void lifePerIdx(int idx, @RO Control control, @RO CellGrid cellGrid, @WO CellGrid cellGridRes) {
223 int w = cellGrid.width();
224 int h = cellGrid.height();
225 int from = control.from();
226 int to = control.to();
227 int x = idx % w;
228 int y = idx / w;
229
230 // byte[] bytes = cellGrid.arrayView();
231 // byte cell = bytes[idx + from];
232 // byte[] lookup = new byte[]{};
233 // if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
234 // int lookupIdx =
235 // (bytes[(y - 1) * w + x - 1 + from]&1 <<0)
236 // |(bytes[(y + 0) * w + x - 1 + from]&1 <<1)
237 // |(bytes[(y + 1) * w + x - 1 + from]&1 <<2)
238 // |(bytes[(y - 1) * w + x + 0 + from]&1 <<3)
239 // |(bytes[(y - 0) * w + x + 0 + from]&1 <<4) // current cell added
240 // |(bytes[(y + 1) * w + x + 0 + from]&1 <<5)
241 // |(bytes[(y + 0) * w + x + 1 + from]&1 <<6)
242 // |(bytes[(y - 1) * w + x + 1 + from]&1 <<7)
243 // |(bytes[(y + 1) * w + x + 1 + from]&1 <<8) ;
244 // // conditional removed!
245 // bytes[idx + to] = lookup[lookupIdx];
246 // }
247
248 byte[] bytes = cellGrid.arrayView();
249 byte cell = bytes[idx];
250 if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
251 int count =
252 (bytes[(y - 1) * w + (x - 1)] & 1)
253 + (bytes[(y + 0) * w + (x - 1)] & 1)
254 + (bytes[(y + 1) * w + (x - 1)] & 1)
255 + (bytes[(y - 1) * w + (x + 0)] & 1)
256 + (bytes[(y + 1) * w + (x + 0)] & 1)
257 + (bytes[(y - 1) * w + (x + 1)] & 1)
258 + (bytes[(y + 0) * w + (x + 1)] & 1)
259 + (bytes[(y + 1) * w + (x + 1)] & 1);
260 cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
261 }
262 byte[] res = cellGridRes.arrayView();
263 res[idx] = cell;
264 }
265
266 @Reflect
267 public static void life(@RO KernelContext kc, @RO Control control, @RO CellGrid cellGrid, @WO CellGrid cellGridRes) {
268 if (kc.gix < kc.gsx) {
269 Compute.lifePerIdx(kc.gix, control, cellGrid, cellGridRes);
270 }
271 }
272
273 @Reflect
274 static public void compute(final @RO ComputeContext cc, @RO Control ctrl, @RO CellGrid grid, @WO CellGrid gridRes) {
275 int range = grid.width() * grid.height();
276 cc.dispatchKernel(NDRange.of1D(range), kc -> Compute.life(kc, ctrl, grid, gridRes));
277 }
278 }
279
280 @HatTest
281 @Reflect
282 public static void testLife() {
283 Accelerator accelerator = new Accelerator(MethodHandles.lookup());//,new OpenCLBackend("INFO,MINIMIZE_COPIES,SHOW_COMPUTE_MODEL"));
284
285 // We oversize the grid by adding 1 to n,e,w and s
286 CellGrid cellGrid = CellGrid.create(accelerator, 17, 17);
287 CellGrid cellGridRes = CellGrid.create(accelerator, 17, 17);
288
289 byte[][] actualGrid = new byte[][]{
290 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
291 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
292 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
293 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
294 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
295 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
296 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
297 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
298 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
299 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
300 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
301 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
302 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
303 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
304 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
305 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
306 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
307 };
308
309 // By shifting all cells +1,+1 so we only need to scan 1..width-1, 1..height-1
310 // we don't worry about possibly finding cells in 0,n width,n or n,0 height,n
311 for (int i = 0; i < cellGrid.height(); i++) {
312 for (int j = 0; j < cellGrid.width(); j++) {
313 cellGrid.array(((long) i * cellGrid.width()) + j, actualGrid[i][j]);
314 }
315 }
316
317 Control control = Control.create(accelerator, cellGrid);
318
319 accelerator.compute(cc -> Compute.compute(cc, control, cellGrid, cellGridRes));
320
321 byte[][] resultGrid = new byte[][]{
322 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
323 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
324 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
325 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
326 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
327 {DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, DEAD, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD},
328 {DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD},
329 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
330 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
331 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
332 {DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD},
333 {DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, DEAD, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD},
334 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
335 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
336 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
337 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
338 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
339 };
340
341 for (int i = 0; i < cellGrid.height(); i++) {
342 for (int j = 0; j < cellGrid.width(); j++) {
343 HATAsserts.assertEquals(resultGrid[i][j], cellGridRes.array(((long) i * cellGrid.width()) + j));
344 }
345 }
346 }
347
348 /*
349 * simplified version of mandel using ArrayView
350 */
351 @Reflect
352 public static int mandelCheck(int i, int j, float width, float height, int[] pallette, float offsetx, float offsety, float scale) {
353 float x = (i * scale - (scale / 2f * width)) / width + offsetx;
354 float y = (j * scale - (scale / 2f * height)) / height + offsety;
355 float zx = x;
356 float zy = y;
357 float new_zx;
358 int colorIdx = 0;
359 while ((colorIdx < pallette.length) && (((zx * zx) + (zy * zy)) < 4f)) {
360 new_zx = ((zx * zx) - (zy * zy)) + x;
361 zy = (2f * zx * zy) + y;
362 zx = new_zx;
363 colorIdx++;
364 }
365 return colorIdx < pallette.length ? pallette[colorIdx] : 0;
366 }
367
368 @Reflect
369 public static void mandel(@RO KernelContext kc, @RW S32Array2D s32Array2D, @RO S32Array pallette, float offsetx, float offsety, float scale) {
370 if (kc.gix < kc.gsx) {
371 int[] pal = pallette.arrayView();
372 int[][] s32 = s32Array2D.arrayView();
373 float width = s32Array2D.width();
374 float height = s32Array2D.height();
375 float x = ((kc.gix % s32Array2D.width()) * scale - (scale / 2f * width)) / width + offsetx;
376 float y = ((kc.gix / s32Array2D.width()) * scale - (scale / 2f * height)) / height + offsety;
377 float zx = x;
378 float zy = y;
379 float new_zx;
380 int colorIdx = 0;
381 while ((colorIdx < pal.length) && (((zx * zx) + (zy * zy)) < 4f)) {
382 new_zx = ((zx * zx) - (zy * zy)) + x;
383 zy = (2f * zx * zy) + y;
384 zx = new_zx;
385 colorIdx++;
386 }
387 int color = colorIdx < pal.length ? pal[colorIdx] : 0;
388 s32[kc.gix % s32Array2D.width()][kc.gix / s32Array2D.width()] = color;
389 }
390 }
391
392
393 @Reflect
394 static public void compute(final ComputeContext computeContext, S32Array pallete, S32Array2D s32Array2D, float x, float y, float scale) {
395
396 computeContext.dispatchKernel(
397 NDRange.of1D(s32Array2D.width()*s32Array2D.height()), //0..S32Array2D.size()
398 kc -> mandel(kc, s32Array2D, pallete, x, y, scale));
399 }
400
401 @HatTest
402 @Reflect
403 public static void testMandel() {
404 final int width = 1024;
405 final int height = 1024;
406 final float defaultScale = 3f;
407 final float originX = -1f;
408 final float originY = 0;
409 final int maxIterations = 64;
410
411 Accelerator accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
412
413 S32Array2D s32Array2D = S32Array2D.create(accelerator, width, height);
414
415 int[] palletteArray = new int[maxIterations];
416
417 for (int i = 1; i < maxIterations; i++) {
418 palletteArray[i]=(i/8+1);// 0-7?
419 }
420 palletteArray[0]=0;
421 S32Array pallette = S32Array.createFrom(accelerator, palletteArray);
422
423 accelerator.compute(cc -> compute(cc, pallette, s32Array2D, originX, originY, defaultScale));
424
425 // Well take 1 in 4 samples (so 1024 -> 128 grid) of the pallette.
426 int subsample = 16;
427 char[] charPallette9 = new char []{' ', '.', ',',':', '-', '+','*', '#', '@', '%'};
428 for (int y = 0; y<height/subsample; y++) {
429 for (int x = 0; x<width/subsample; x++) {
430 int palletteValue = s32Array2D.get(x*subsample,y*subsample); // so 0->8
431 int paletteCheck = mandelCheck(x*subsample, y*subsample, width, height, palletteArray, originX, originY, defaultScale);
432 // System.out.print(charPallette9[palletteValue]);
433 HATAsserts.assertEquals(paletteCheck, palletteValue);
434 }
435 // System.out.println();
436 }
437 }
438
439 /*
440 * simplified version of BlackScholes using ArrayView
441 */
442 @Reflect
443 public static void blackScholesKernel(@RO KernelContext kc,
444 @WO F32Array call,
445 @WO F32Array put,
446 @RO F32Array sArray,
447 @RO F32Array xArray,
448 @RO F32Array tArray,
449 float r,
450 float v) {
451 if (kc.gix<kc.gsx){
452 float[] callArr = call.arrayView();
453 float[] putArr = put.arrayView();
454 float[] sArr = sArray.arrayView();
455 float[] xArr = xArray.arrayView();
456 float[] tArr = tArray.arrayView();
457
458 float expNegRt = (float) Math.exp(-r * tArr[kc.gix]);
459 float d1 = (float) ((Math.log(sArr[kc.gix] / xArr[kc.gix]) + (r + v * v * .5f) * tArr[kc.gix]) / (v * Math.sqrt(tArr[kc.gix])));
460 float d2 = (float) (d1 - v * Math.sqrt(tArr[kc.gix]));
461 float cnd1 = CND(d1);
462 float cnd2 = CND(d2);
463 float value = sArr[kc.gix] * cnd1 - expNegRt * xArr[kc.gix] * cnd2;
464 callArr[kc.gix] = value;
465 putArr[kc.gix] = expNegRt * xArr[kc.gix] * (1 - cnd2) - sArr[kc.gix] * (1 - cnd1);
466 }
467 }
468
469 @Reflect
470 public static float CND(float input) {
471 float x = input;
472 if (input < 0f) { // input = Math.abs(input)?
473 x = -input;
474 }
475
476 float term = 1f / (1f + (0.2316419f * x));
477 float term_pow2 = term * term;
478 float term_pow3 = term_pow2 * term;
479 float term_pow4 = term_pow2 * term_pow2;
480 float term_pow5 = term_pow2 * term_pow3;
481
482 float part1 = (1f / (float)Math.sqrt(2f * 3.1415926535f)) * (float)Math.exp((-x * x) * 0.5f);
483
484 float part2 = (0.31938153f * term) +
485 (-0.356563782f * term_pow2) +
486 (1.781477937f * term_pow3) +
487 (-1.821255978f * term_pow4) +
488 (1.330274429f * term_pow5);
489
490 if (input >= 0f) {
491 return 1f - part1 * part2;
492 }
493 return part1 * part2;
494
495 }
496
497 @Reflect
498 public static void blackScholes(@RO ComputeContext cc, @WO F32Array call, @WO F32Array put, @RO F32Array S, @RO F32Array X, @RO F32Array T, float r, float v) {
499 cc.dispatchKernel(NDRange.of1D(call.length()),
500 kc -> blackScholesKernel(kc, call, put, S, X, T, r, v)
501 );
502 }
503
504 static F32Array floatArray(Accelerator accelerator, int size, float low, float high, Random rand) {
505 F32Array array = F32Array.create(accelerator, size);
506 for (int i = 0; i <size; i++) {
507 array.array(i, rand.nextFloat() * (high - low) + low);
508 }
509 return array;
510 }
511
512 public static void blackScholesKernelSeq(F32Array call, F32Array put, F32Array sArray, F32Array xArray, F32Array tArray, float r, float v) {
513 for (int i = 0; i <call.length() ; i++) {
514 float S = sArray.array(i);
515 float X = xArray.array(i);
516 float T = tArray.array(i);
517 float expNegRt = (float) Math.exp(-r * T);
518 float d1 = (float) ((Math.log(S / X) + (r + v * v * .5f) * T) / (v * Math.sqrt(T)));
519 float d2 = (float) (d1 - v * Math.sqrt(T));
520 float cnd1 = CND(d1);
521 float cnd2 = CND(d2);
522 float value = S * cnd1 - expNegRt * X * cnd2;
523 call.array(i, value);
524 put.array(i, expNegRt * X * (1 - cnd2) - S * (1 - cnd1));
525 }
526 }
527
528 @HatTest
529 @Reflect
530 public static void testBlackScholes() {
531 int size = 1024;
532 Random rand = new Random();
533 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
534 var call = F32Array.create(accelerator, size);
535 var put = F32Array.create(accelerator, size);
536 for (int i = 0; i < size; i++) {
537 call.array(i, i);
538 put.array(i, i);
539 }
540
541 var S = floatArray(accelerator, size,1f, 100f, rand);
542 var X = floatArray(accelerator, size,1f, 100f, rand);
543 var T = floatArray(accelerator,size, 0.25f, 10f, rand);
544 float r = 0.02f;
545 float v = 0.30f;
546
547 accelerator.compute(cc -> blackScholes(cc, call, put, S, X, T, r, v));
548
549 var seqCall = F32Array.create(accelerator, size);
550 var seqPut = F32Array.create(accelerator, size);
551 for (int i = 0; i < seqCall.length(); i++) {
552 seqCall.array(i, i);
553 seqPut.array(i, i);
554 }
555
556 blackScholesKernelSeq(seqCall, seqPut, S, X, T, r, v);
557
558 for (int i = 0; i < call.length(); i++) {
559 HATAsserts.assertEquals(seqCall.array(i), call.array(i), 0.01f);
560 HATAsserts.assertEquals(seqPut.array(i), put.array(i), 0.01f);
561 }
562 }
563
564 /*
565 * basic test of local and private buffer ArrayViews
566 */
567 private interface SharedMemory extends Buffer {
568 void array(long index, int value);
569 int array(long index);
570 Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
571 arr -> arr.array("array", 1024));
572 static SharedMemory create(Accelerator accelerator) {
573 return schema.allocate(accelerator);
574 }
575 static SharedMemory createLocal() {
576 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
577 }
578
579 default int[] localArrayView() {
580 int[] view = new int[1024];
581 for (int i = 0; i < 1024; i++) {
582 view[i] = this.array(i);
583 }
584 return view;
585 }
586 }
587
588 public interface PrivateArray extends Buffer {
589 void array(long index, int value);
590 int array(long index);
591 Schema<PrivateArray> schema = Schema.of(PrivateArray.class,
592 arr -> arr.array("array", 16));
593 static PrivateArray create(Accelerator accelerator) {
594 return schema.allocate(accelerator);
595 }
596 static PrivateArray createPrivate() {
597 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
598 }
599
600 default int[] privateArrayView() {
601 int[] view = new int[16];
602 for (int i = 0; i < 16; i++) {
603 view[i] = this.array(i);
604 }
605 return view;
606 }
607 }
608
609 @Reflect
610 public static void squareKernelWithPrivateAndLocal(@RO KernelContext kc, @RW S32Array s32Array) {
611 SharedMemory shared = SharedMemory.createLocal();
612 if (kc.gix < kc.gsx){
613 int[] arr = s32Array.arrayView();
614 arr[kc.gix] += arr[kc.gix];
615 // int[] a = new int[4];
616 // a[1] = 4;
617
618 PrivateArray priv = PrivateArray.createPrivate();
619 int[] privView = priv.privateArrayView();
620 privView[0] = 1;
621 arr[kc.gix] += privView[0];
622
623 int[] sharedView = shared.localArrayView();
624 sharedView[0] = 16;
625 arr[kc.gix] += sharedView[0];
626 }
627 }
628
629 @Reflect
630 public static void privateAndLocal(@RO ComputeContext cc, @RW S32Array s32Array) {
631 cc.dispatchKernel(NDRange.of1D(s32Array.length()),
632 kc -> squareKernelWithPrivateAndLocal(kc, s32Array)
633 );
634 }
635
636 @HatTest
637 @Reflect
638 public static void testPrivateAndLocal() {
639
640 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
641 var arr = S32Array.create(accelerator, 32);
642 for (int i = 0; i < arr.length(); i++) {
643 arr.array(i, i);
644 }
645 accelerator.compute(
646 cc -> privateAndLocal(cc, arr) //QuotableComputeContextConsumer
647 ); // extends Quotable, Consumer<ComputeContext>
648 for (int i = 0; i < arr.length(); i++) {
649 HATAsserts.assertEquals(2 * i + 17, arr.array(i));
650 }
651 }
652 }