1 /*
2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package hat.test;
26
27 import hat.Accelerator;
28 import hat.ComputeContext;
29 import hat.NDRange;
30 import hat.KernelContext;
31 import hat.backend.Backend;
32 import hat.buffer.*;
33 import hat.ifacemapper.MappableIface.*;
34 import hat.ifacemapper.Schema;
35 import jdk.incubator.code.CodeReflection;
36 import hat.test.annotation.HatTest;
37 import hat.test.engine.HATAsserts;
38
39 import java.lang.foreign.ValueLayout;
40 import java.lang.invoke.MethodHandles;
41 import java.util.Random;
42
43 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
44
45 public class TestArrayView {
46
47 /*
48 * simple square kernel example using S32Array's ArrayView
49 */
50 @CodeReflection
51 public static void squareKernel(@RO KernelContext kc, @RW S32Array s32Array) {
52 if (kc.gix < kc.gsx){
53 int[] arr = s32Array.arrayView();
54 arr[kc.gix] *= arr[kc.gix];
55 }
56 }
57
58 @CodeReflection
59 public static void square(@RO ComputeContext cc, @RW S32Array s32Array) {
60 cc.dispatchKernel(NDRange.of(s32Array.length()),
61 kc -> squareKernel(kc, s32Array)
62 );
63 }
64
65 @HatTest
66 public static void testSquare() {
67
68 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
69 var arr = S32Array.create(accelerator, 32);
70 for (int i = 0; i < arr.length(); i++) {
71 arr.array(i, i);
72 }
73 accelerator.compute(
74 cc -> square(cc, arr) //QuotableComputeContextConsumer
75 ); // extends Quotable, Consumer<ComputeContext>
76 for (int i = 0; i < arr.length(); i++) {
77 HATAsserts.assertEquals(i * i, arr.array(i));
78 }
79 }
80
81 /*
82 * making sure arrayviews aren't reliant on varOps
83 */
84 @CodeReflection
85 public static void squareKernelNoVarOp(@RO KernelContext kc, @RW S32Array s32Array) {
86 if (kc.gix<kc.gsx){
87 s32Array.arrayView()[kc.gix] *= s32Array.arrayView()[kc.gix];
88 }
89 }
90
91 @CodeReflection
92 public static void squareNoVarOp(@RO ComputeContext cc, @RW S32Array s32Array) {
93 NDRange ndRange = NDRange.of(NDRange.Global1D.of(s32Array.length()));
94 cc.dispatchKernel(ndRange,
95 kc -> squareKernelNoVarOp(kc, s32Array)
96 );
97 }
98
99 @HatTest
100 public static void testSquareNoVarOp() {
101 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
102 var arr = S32Array.create(accelerator, 32);
103 for (int i = 0; i < arr.length(); i++) {
104 arr.array(i, i);
105 }
106 accelerator.compute(
107 cc -> squareNoVarOp(cc, arr) //QuotableComputeContextConsumer
108 ); // extends Quotable, Consumer<ComputeContext>
109 for (int i = 0; i < arr.length(); i++) {
110 HATAsserts.assertEquals(i * i, arr.array(i));
111 }
112 }
113
114 @CodeReflection
115 public static void square2DKernel(@RO KernelContext kc, @RW S32Array2D s32Array2D) {
116 if (kc.gix < kc.gsx){
117 int[][] arr = s32Array2D.arrayView();
118 arr[kc.gix][kc.giy] *= arr[kc.gix][kc.giy];
119 }
120 }
121
122 @CodeReflection
123 public static void square2D(@RO ComputeContext cc, @RW S32Array2D s32Array2D) {
124 cc.dispatchKernel(NDRange.of(s32Array2D.width() * s32Array2D.height()),
125 kc -> square2DKernel(kc, s32Array2D)
126 );
127 }
128
129 @HatTest
130 public static void testSquare2D() {
131
132 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
133 var arr = S32Array2D.create(accelerator, 5, 5);
134 for (int i = 0; i < arr.height(); i++) {
135 for (int j = 0; j < arr.width(); j++) {
136 arr.set(i, j, i * 5 + j);
137 }
138 }
139 accelerator.compute(
140 cc -> square2D(cc, arr) //QuotableComputeContextConsumer
141 ); // extends Quotable, Consumer<ComputeContext>
142 for (int i = 0; i < arr.height(); i++) {
143 for (int j = 0; j < arr.width(); j++) {
144 HATAsserts.assertEquals((i * 5 + j) * (i * 5 + j), arr.get(i, j));
145 }
146 }
147 }
148
149 /*
150 * simplified version of Game of Life using ArrayView
151 */
152 public final static byte ALIVE = (byte) 0xff;
153 public final static byte DEAD = 0x00;
154
155 public interface CellGrid extends Buffer {
156 /*
157 * struct CellGrid{
158 * int width;
159 * int height;
160 * byte[width*height*2] cellArray;
161 * }
162 */
163 int width();
164
165 int height();
166
167 byte array(long idx);
168
169 void array(long idx, byte b);
170
171 Schema<CellGrid> schema = Schema.of(CellGrid.class, lifeData -> lifeData
172 .arrayLen("width", "height").stride(2).array("array")
173 );
174
175 static CellGrid create(Accelerator accelerator, int width, int height) {
176 return schema.allocate(accelerator, width, height);
177 }
178
179 ValueLayout valueLayout = JAVA_BYTE;
180
181 default byte[] arrayView() {
182 int size = this.width() * this.height();
183 byte[] arr = new byte[size];
184 for (int i = 0; i < size; i++) {
185 arr[i] = this.array(i);
186 }
187 return arr;
188 }
189 }
190
191 public interface Control extends Buffer {
192 /*
193 * struct Control{
194 * int from;
195 * int to;
196 * }
197 */
198 int from();
199
200 void from(int from);
201
202 int to();
203
204 void to(int to);
205
206 Schema<Control> schema = Schema.of(
207 Control.class, control ->
208 control.fields("from", "to"));//, "generation", "requiredFrameRate", "maxGenerations"));
209
210 static Control create(Accelerator accelerator, CellGrid cellGrid) {
211 var instance = schema.allocate(accelerator);
212 instance.from(cellGrid.width() * cellGrid.height());
213 instance.to(0);
214 return instance;
215 }
216 }
217
218 public static class Compute {
219 @CodeReflection
220 public static void lifePerIdx(int idx, @RO Control control, @RO CellGrid cellGrid, @WO CellGrid cellGridRes) {
221 int w = cellGrid.width();
222 int h = cellGrid.height();
223 int from = control.from();
224 int to = control.to();
225 int x = idx % w;
226 int y = idx / w;
227
228 // byte[] bytes = cellGrid.arrayView();
229 // byte cell = bytes[idx + from];
230 // byte[] lookup = new byte[]{};
231 // if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
232 // int lookupIdx =
233 // (bytes[(y - 1) * w + x - 1 + from]&1 <<0)
234 // |(bytes[(y + 0) * w + x - 1 + from]&1 <<1)
235 // |(bytes[(y + 1) * w + x - 1 + from]&1 <<2)
236 // |(bytes[(y - 1) * w + x + 0 + from]&1 <<3)
237 // |(bytes[(y - 0) * w + x + 0 + from]&1 <<4) // current cell added
238 // |(bytes[(y + 1) * w + x + 0 + from]&1 <<5)
239 // |(bytes[(y + 0) * w + x + 1 + from]&1 <<6)
240 // |(bytes[(y - 1) * w + x + 1 + from]&1 <<7)
241 // |(bytes[(y + 1) * w + x + 1 + from]&1 <<8) ;
242 // // conditional removed!
243 // bytes[idx + to] = lookup[lookupIdx];
244 // }
245
246 byte[] bytes = cellGrid.arrayView();
247 byte cell = bytes[idx];
248 if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
249 int count =
250 (bytes[(y - 1) * w + (x - 1)] & 1)
251 + (bytes[(y + 0) * w + (x - 1)] & 1)
252 + (bytes[(y + 1) * w + (x - 1)] & 1)
253 + (bytes[(y - 1) * w + (x + 0)] & 1)
254 + (bytes[(y + 1) * w + (x + 0)] & 1)
255 + (bytes[(y - 1) * w + (x + 1)] & 1)
256 + (bytes[(y + 0) * w + (x + 1)] & 1)
257 + (bytes[(y + 1) * w + (x + 1)] & 1);
258 cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
259 }
260 byte[] res = cellGridRes.arrayView();
261 res[idx] = cell;
262 }
263
264 @CodeReflection
265 public static void life(@RO KernelContext kc, @RO Control control, @RO CellGrid cellGrid, @WO CellGrid cellGridRes) {
266 if (kc.gix < kc.gsx) {
267 Compute.lifePerIdx(kc.gix, control, cellGrid, cellGridRes);
268 }
269 }
270
271 @CodeReflection
272 static public void compute(final @RO ComputeContext cc, @RO Control ctrl, @RO CellGrid grid, @WO CellGrid gridRes) {
273 int range = grid.width() * grid.height();
274 cc.dispatchKernel(NDRange.of(range), kc -> Compute.life(kc, ctrl, grid, gridRes));
275 }
276 }
277
278 @HatTest
279 public static void testLife() {
280 Accelerator accelerator = new Accelerator(MethodHandles.lookup());//,new OpenCLBackend("INFO,MINIMIZE_COPIES,SHOW_COMPUTE_MODEL"));
281
282 // We oversize the grid by adding 1 to n,e,w and s
283 CellGrid cellGrid = CellGrid.create(accelerator, 17, 17);
284 CellGrid cellGridRes = CellGrid.create(accelerator, 17, 17);
285
286 byte[][] actualGrid = new byte[][]{
287 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
288 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
289 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
290 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
291 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
292 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
293 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
294 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
295 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
296 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
297 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
298 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
299 {DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD},
300 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
301 {DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD},
302 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
303 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
304 };
305
306 // By shifting all cells +1,+1 so we only need to scan 1..width-1, 1..height-1
307 // we don't worry about possibly finding cells in 0,n width,n or n,0 height,n
308 for (int i = 0; i < cellGrid.height(); i++) {
309 for (int j = 0; j < cellGrid.width(); j++) {
310 cellGrid.array(((long) i * cellGrid.width()) + j, actualGrid[i][j]);
311 }
312 }
313
314 Control control = Control.create(accelerator, cellGrid);
315
316 accelerator.compute(cc -> Compute.compute(cc, control, cellGrid, cellGridRes));
317
318 byte[][] resultGrid = new byte[][]{
319 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
320 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
321 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
322 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
323 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
324 {DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, DEAD, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD},
325 {DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD},
326 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
327 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
328 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
329 {DEAD, DEAD, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, ALIVE, DEAD, DEAD, DEAD},
330 {DEAD, ALIVE, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, DEAD, ALIVE, ALIVE, DEAD, DEAD, ALIVE, ALIVE, ALIVE, DEAD},
331 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
332 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, ALIVE, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
333 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
334 {DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD, ALIVE, DEAD, DEAD, DEAD, DEAD, DEAD},
335 {DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD, DEAD},
336 };
337
338 for (int i = 0; i < cellGrid.height(); i++) {
339 for (int j = 0; j < cellGrid.width(); j++) {
340 HATAsserts.assertEquals(resultGrid[i][j], cellGridRes.array(((long) i * cellGrid.width()) + j));
341 }
342 }
343 }
344
345 /*
346 * simplified version of mandel using ArrayView
347 */
348 @CodeReflection
349 public static int mandelCheck(int i, int j, float width, float height, int[] pallette, float offsetx, float offsety, float scale) {
350 float x = (i * scale - (scale / 2f * width)) / width + offsetx;
351 float y = (j * scale - (scale / 2f * height)) / height + offsety;
352 float zx = x;
353 float zy = y;
354 float new_zx;
355 int colorIdx = 0;
356 while ((colorIdx < pallette.length) && (((zx * zx) + (zy * zy)) < 4f)) {
357 new_zx = ((zx * zx) - (zy * zy)) + x;
358 zy = (2f * zx * zy) + y;
359 zx = new_zx;
360 colorIdx++;
361 }
362 return colorIdx < pallette.length ? pallette[colorIdx] : 0;
363 }
364
365 @CodeReflection
366 public static void mandel(@RO KernelContext kc, @RW S32Array2D s32Array2D, @RO S32Array pallette, float offsetx, float offsety, float scale) {
367 if (kc.gix < kc.gsx) {
368 int[] pal = pallette.arrayView();
369 int[][] s32 = s32Array2D.arrayView();
370 float width = s32Array2D.width();
371 float height = s32Array2D.height();
372 float x = ((kc.gix % s32Array2D.width()) * scale - (scale / 2f * width)) / width + offsetx;
373 float y = ((kc.gix / s32Array2D.width()) * scale - (scale / 2f * height)) / height + offsety;
374 float zx = x;
375 float zy = y;
376 float new_zx;
377 int colorIdx = 0;
378 while ((colorIdx < pal.length) && (((zx * zx) + (zy * zy)) < 4f)) {
379 new_zx = ((zx * zx) - (zy * zy)) + x;
380 zy = (2f * zx * zy) + y;
381 zx = new_zx;
382 colorIdx++;
383 }
384 int color = colorIdx < pal.length ? pal[colorIdx] : 0;
385 s32[kc.gix % s32Array2D.width()][kc.gix / s32Array2D.width()] = color;
386 }
387 }
388
389
390 @CodeReflection
391 static public void compute(final ComputeContext computeContext, S32Array pallete, S32Array2D s32Array2D, float x, float y, float scale) {
392
393 computeContext.dispatchKernel(
394 NDRange.of(s32Array2D.width()*s32Array2D.height()), //0..S32Array2D.size()
395 kc -> mandel(kc, s32Array2D, pallete, x, y, scale));
396 }
397
398 @HatTest
399 public static void testMandel() {
400 final int width = 1024;
401 final int height = 1024;
402 final float defaultScale = 3f;
403 final float originX = -1f;
404 final float originY = 0;
405 final int maxIterations = 64;
406
407 Accelerator accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
408
409 S32Array2D s32Array2D = S32Array2D.create(accelerator, width, height);
410
411 int[] palletteArray = new int[maxIterations];
412
413 for (int i = 1; i < maxIterations; i++) {
414 palletteArray[i]=(i/8+1);// 0-7?
415 }
416 palletteArray[0]=0;
417 S32Array pallette = S32Array.createFrom(accelerator, palletteArray);
418
419 accelerator.compute(cc -> compute(cc, pallette, s32Array2D, originX, originY, defaultScale));
420
421 // Well take 1 in 4 samples (so 1024 -> 128 grid) of the pallette.
422 int subsample = 16;
423 char[] charPallette9 = new char []{' ', '.', ',',':', '-', '+','*', '#', '@', '%'};
424 for (int y = 0; y<height/subsample; y++) {
425 for (int x = 0; x<width/subsample; x++) {
426 int palletteValue = s32Array2D.get(x*subsample,y*subsample); // so 0->8
427 int paletteCheck = mandelCheck(x*subsample, y*subsample, width, height, palletteArray, originX, originY, defaultScale);
428 // System.out.print(charPallette9[palletteValue]);
429 HATAsserts.assertEquals(paletteCheck, palletteValue);
430 }
431 // System.out.println();
432 }
433 }
434
435 /*
436 * simplified version of BlackScholes using ArrayView
437 */
438 @CodeReflection
439 public static void blackScholesKernel(@RO KernelContext kc,
440 @WO F32Array call,
441 @WO F32Array put,
442 @RO F32Array sArray,
443 @RO F32Array xArray,
444 @RO F32Array tArray,
445 float r,
446 float v) {
447 if (kc.gix<kc.gsx){
448 float[] callArr = call.arrayView();
449 float[] putArr = put.arrayView();
450 float[] sArr = sArray.arrayView();
451 float[] xArr = xArray.arrayView();
452 float[] tArr = tArray.arrayView();
453
454 float expNegRt = (float) Math.exp(-r * tArr[kc.gix]);
455 float d1 = (float) ((Math.log(sArr[kc.gix] / xArr[kc.gix]) + (r + v * v * .5f) * tArr[kc.gix]) / (v * Math.sqrt(tArr[kc.gix])));
456 float d2 = (float) (d1 - v * Math.sqrt(tArr[kc.gix]));
457 float cnd1 = CND(d1);
458 float cnd2 = CND(d2);
459 float value = sArr[kc.gix] * cnd1 - expNegRt * xArr[kc.gix] * cnd2;
460 callArr[kc.gix] = value;
461 putArr[kc.gix] = expNegRt * xArr[kc.gix] * (1 - cnd2) - sArr[kc.gix] * (1 - cnd1);
462 }
463 }
464
465 @CodeReflection
466 public static float CND(float input) {
467 float x = input;
468 if (input < 0f) { // input = Math.abs(input)?
469 x = -input;
470 }
471
472 float term = 1f / (1f + (0.2316419f * x));
473 float term_pow2 = term * term;
474 float term_pow3 = term_pow2 * term;
475 float term_pow4 = term_pow2 * term_pow2;
476 float term_pow5 = term_pow2 * term_pow3;
477
478 float part1 = (1f / (float)Math.sqrt(2f * 3.1415926535f)) * (float)Math.exp((-x * x) * 0.5f);
479
480 float part2 = (0.31938153f * term) +
481 (-0.356563782f * term_pow2) +
482 (1.781477937f * term_pow3) +
483 (-1.821255978f * term_pow4) +
484 (1.330274429f * term_pow5);
485
486 if (input >= 0f) {
487 return 1f - part1 * part2;
488 }
489 return part1 * part2;
490
491 }
492
493 @CodeReflection
494 public static void blackScholes(@RO ComputeContext cc, @WO F32Array call, @WO F32Array put, @RO F32Array S, @RO F32Array X, @RO F32Array T, float r, float v) {
495 cc.dispatchKernel(NDRange.of(call.length()),
496 kc -> blackScholesKernel(kc, call, put, S, X, T, r, v)
497 );
498 }
499
500 static F32Array floatArray(Accelerator accelerator, int size, float low, float high, Random rand) {
501 F32Array array = F32Array.create(accelerator, size);
502 for (int i = 0; i <size; i++) {
503 array.array(i, rand.nextFloat() * (high - low) + low);
504 }
505 return array;
506 }
507
508 public static void blackScholesKernelSeq(F32Array call, F32Array put, F32Array sArray, F32Array xArray, F32Array tArray, float r, float v) {
509 for (int i = 0; i <call.length() ; i++) {
510 float S = sArray.array(i);
511 float X = xArray.array(i);
512 float T = tArray.array(i);
513 float expNegRt = (float) Math.exp(-r * T);
514 float d1 = (float) ((Math.log(S / X) + (r + v * v * .5f) * T) / (v * Math.sqrt(T)));
515 float d2 = (float) (d1 - v * Math.sqrt(T));
516 float cnd1 = CND(d1);
517 float cnd2 = CND(d2);
518 float value = S * cnd1 - expNegRt * X * cnd2;
519 call.array(i, value);
520 put.array(i, expNegRt * X * (1 - cnd2) - S * (1 - cnd1));
521 }
522 }
523
524 @HatTest
525 public static void testBlackScholes() {
526 int size = 1024;
527 Random rand = new Random();
528 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
529 var call = F32Array.create(accelerator, size);
530 var put = F32Array.create(accelerator, size);
531 for (int i = 0; i < size; i++) {
532 call.array(i, i);
533 put.array(i, i);
534 }
535
536 var S = floatArray(accelerator, size,1f, 100f, rand);
537 var X = floatArray(accelerator, size,1f, 100f, rand);
538 var T = floatArray(accelerator,size, 0.25f, 10f, rand);
539 float r = 0.02f;
540 float v = 0.30f;
541
542 accelerator.compute(cc -> blackScholes(cc, call, put, S, X, T, r, v));
543
544 var seqCall = F32Array.create(accelerator, size);
545 var seqPut = F32Array.create(accelerator, size);
546 for (int i = 0; i < seqCall.length(); i++) {
547 seqCall.array(i, i);
548 seqPut.array(i, i);
549 }
550
551 blackScholesKernelSeq(seqCall, seqPut, S, X, T, r, v);
552
553 for (int i = 0; i < call.length(); i++) {
554 HATAsserts.assertEquals(seqCall.array(i), call.array(i), 0.01f);
555 HATAsserts.assertEquals(seqPut.array(i), put.array(i), 0.01f);
556 }
557 }
558
559 /*
560 * basic test of local and private buffer ArrayViews
561 */
562 private interface SharedMemory extends Buffer {
563 void array(long index, int value);
564 int array(long index);
565 Schema<SharedMemory> schema = Schema.of(SharedMemory.class,
566 arr -> arr.array("array", 1024));
567 static SharedMemory create(Accelerator accelerator) {
568 return schema.allocate(accelerator);
569 }
570 static SharedMemory createLocal() {
571 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
572 }
573
574 default int[] localArrayView() {
575 int[] view = new int[1024];
576 for (int i = 0; i < 1024; i++) {
577 view[i] = this.array(i);
578 }
579 return view;
580 }
581 }
582
583 public interface PrivateArray extends Buffer {
584 void array(long index, int value);
585 int array(long index);
586 Schema<PrivateArray> schema = Schema.of(PrivateArray.class,
587 arr -> arr.array("array", 16));
588 static PrivateArray create(Accelerator accelerator) {
589 return schema.allocate(accelerator);
590 }
591 static PrivateArray createPrivate() {
592 return schema.allocate(new Accelerator(MethodHandles.lookup(), Backend.FIRST));
593 }
594
595 default int[] privateArrayView() {
596 int[] view = new int[16];
597 for (int i = 0; i < 16; i++) {
598 view[i] = this.array(i);
599 }
600 return view;
601 }
602 }
603
604 @CodeReflection
605 public static void squareKernelWithPrivateAndLocal(@RO KernelContext kc, @RW S32Array s32Array) {
606 SharedMemory shared = SharedMemory.createLocal();
607 if (kc.gix < kc.gsx){
608 int[] arr = s32Array.arrayView();
609 arr[kc.gix] += arr[kc.gix];
610 // int[] a = new int[4];
611 // a[1] = 4;
612
613 PrivateArray priv = PrivateArray.createPrivate();
614 int[] privView = priv.privateArrayView();
615 privView[0] = 1;
616 arr[kc.gix] += privView[0];
617
618 int[] sharedView = shared.localArrayView();
619 sharedView[0] = 16;
620 arr[kc.gix] += sharedView[0];
621 }
622 }
623
624 @CodeReflection
625 public static void privateAndLocal(@RO ComputeContext cc, @RW S32Array s32Array) {
626 cc.dispatchKernel(NDRange.of(s32Array.length()),
627 kc -> squareKernelWithPrivateAndLocal(kc, s32Array)
628 );
629 }
630
631 @HatTest
632 public static void testPrivateAndLocal() {
633
634 var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
635 var arr = S32Array.create(accelerator, 32);
636 for (int i = 0; i < arr.length(); i++) {
637 arr.array(i, i);
638 }
639 accelerator.compute(
640 cc -> privateAndLocal(cc, arr) //QuotableComputeContextConsumer
641 ); // extends Quotable, Consumer<ComputeContext>
642 for (int i = 0; i < arr.length(); i++) {
643 HATAsserts.assertEquals(2 * i + 17, arr.array(i));
644 }
645 }
646 }