1 /*
  2  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package hat.test;
 26 
 27 import hat.Accelerator;
 28 import hat.ComputeContext;
 29 import hat.NDRange;
 30 import hat.KernelContext;
 31 import hat.backend.Backend;
 32 import hat.buffer.*;
 33 import hat.device.DeviceSchema;
 34 import hat.device.NonMappableIface;
 35 import optkl.ifacemapper.BoundSchema;
 36 import optkl.ifacemapper.Buffer;
 37 import optkl.ifacemapper.Schema;
 38 import jdk.incubator.code.Reflect;
 39 import hat.test.annotation.HatTest;
 40 import hat.test.exceptions.HATAsserts;
 41 import optkl.ifacemapper.MappableIface.*;
 42 
 43 import java.lang.foreign.ValueLayout;
 44 import java.lang.invoke.MethodHandles;
 45 import java.util.Random;
 46 
 47 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
 48 
 49 public class TestArrayView {
 50 
 51     /*
 52      * simple square kernel example using S32Array's ArrayView
 53      */
 54     @Reflect
 55     public static void squareKernel(@RO  KernelContext kc, @RW S32Array s32Array) {
 56         if (kc.gix < kc.gsx){
 57             int[] arr = s32Array.arrayView();
 58             arr[kc.gix] *= arr[kc.gix];
 59         }
 60     }
 61 
 62     @Reflect
 63     public static void square(@RO ComputeContext cc, @RW S32Array s32Array) {
 64         cc.dispatchKernel(NDRange.of1D(s32Array.length()),
 65                 kc -> squareKernel(kc, s32Array)
 66         );
 67     }
 68 
 69     @HatTest
 70     @Reflect
 71     public static void testSquare() {
 72 
 73         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
 74         var arr = S32Array.create(accelerator, 32);
 75         for (int i = 0; i < arr.length(); i++) {
 76             arr.array(i, i);
 77         }
 78         accelerator.compute(
 79                 cc -> square(cc, arr)
 80         );
 81         for (int i = 0; i < arr.length(); i++) {
 82             HATAsserts.assertEquals(i * i, arr.array(i));
 83         }
 84     }
 85 
 86     /*
 87      * making sure arrayviews aren't reliant on varOps
 88      */
 89     @Reflect
 90     public static void squareKernelNoVarOp(@RO  KernelContext kc, @RW S32Array s32Array) {
 91         if (kc.gix<kc.gsx){
 92             s32Array.arrayView()[kc.gix] *= s32Array.arrayView()[kc.gix];
 93         }
 94     }
 95 
 96     @Reflect
 97     public static void squareNoVarOp(@RO ComputeContext cc, @RW S32Array s32Array) {
 98         cc.dispatchKernel(NDRange.of1D(s32Array.length()),
 99                 kc -> squareKernelNoVarOp(kc, s32Array)
100         );
101     }
102 
103     @HatTest
104     @Reflect
105     public static void testSquareNoVarOp() {
106         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
107         var arr = S32Array.create(accelerator, 32);
108         for (int i = 0; i < arr.length(); i++) {
109             arr.array(i, i);
110         }
111         accelerator.compute(
112                 cc -> squareNoVarOp(cc, arr)
113         );
114         for (int i = 0; i < arr.length(); i++) {
115             HATAsserts.assertEquals(i * i, arr.array(i));
116         }
117     }
118 
119     @Reflect
120     public static void square2DKernel(@RO  KernelContext kc, @RW S32Array2D s32Array2D) {
121         if (kc.gix < kc.gsx){
122             int[][] arr = s32Array2D.arrayView();
123             arr[kc.gix][kc.giy] *= arr[kc.gix][kc.giy];
124         }
125     }
126 
127     @Reflect
128     public static void square2D(@RO ComputeContext cc, @RW S32Array2D s32Array2D) {
129         cc.dispatchKernel(NDRange.of1D(s32Array2D.width() * s32Array2D.height()),
130                 kc -> square2DKernel(kc, s32Array2D)
131         );
132     }
133 
134     @HatTest
135     @Reflect
136     public static void testSquare2D() {
137 
138         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
139         var arr = S32Array2D.create(accelerator, 5, 5);
140         for (int i = 0; i < arr.height(); i++) {
141             for (int j = 0; j < arr.width(); j++) {
142                 arr.set(i, j, i * 5 + j);
143             }
144         }
145         accelerator.compute(
146                 cc -> square2D(cc, arr)
147         );
148         for (int i = 0; i < arr.height(); i++) {
149             for (int j = 0; j < arr.width(); j++) {
150                 HATAsserts.assertEquals((i * 5 + j) * (i * 5 + j), arr.get(i, j));
151             }
152         }
153     }
154 
155     /*
156      * simplified version of Game of Life using ArrayView
157      */
158     public final static byte ALIVE = (byte) 0xff;
159     public final static byte DEAD = 0x00;
160 
161     public interface CellGrid extends Buffer {
162         /*
163          * struct CellGrid{
164          *     int width;
165          *     int height;
166          *     byte[width*height*2] cellArray;
167          *  }
168          */
169         int width();
170 
171         int height();
172 
173         byte array(long idx);
174 
175         void array(long idx, byte b);
176 
177         Schema<CellGrid> schema = Schema.of(CellGrid.class, lifeData -> lifeData
178                 .arrayLen("width", "height").stride(2).array("array")
179         );
180 
181         static CellGrid create(Accelerator accelerator, int width, int height) {
182             return BoundSchema.of(accelerator ,schema, width, height).allocate();
183         }
184 
185         ValueLayout valueLayout = JAVA_BYTE;
186 
187         default byte[][] arrayView() {
188             return null;
189         }
190     }
191 
192     public interface Control extends Buffer {
193         /*
194          * struct Control{
195          *     int from;
196          *     int to;
197          *  }
198          */
199         int from();
200 
201         void from(int from);
202 
203         int to();
204 
205         void to(int to);
206 
207         Schema<Control> schema = Schema.of(
208                 Control.class, control ->
209                         control.fields("from", "to"));//, "generation", "requiredFrameRate", "maxGenerations"));
210 
211         static Control create(Accelerator accelerator, CellGrid cellGrid) {
212             var instance = BoundSchema.of(accelerator ,schema).allocate();
213             instance.from(cellGrid.width() * cellGrid.height());
214             instance.to(0);
215             return instance;
216         }
217     }
218 
219     public static byte[][] lifeCheck(CellGrid cellGrid) {
220         int w = cellGrid.width();
221         int h = cellGrid.height();
222 
223         byte[][] res = new byte[h][w];
224 
225         for (int y = 0; y < h; y++) {
226             for (int x = 0; x < w; x++) {
227                 int idx = y * w + x;
228                 byte cell = cellGrid.array(idx);
229                 if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
230                     int count =
231                             (cellGrid.array((y - 1) * w + (x - 1)) & 1)
232                                     + (cellGrid.array((y + 0) * w + (x - 1)) & 1)
233                                     + (cellGrid.array((y + 1) * w + (x - 1)) & 1)
234                                     + (cellGrid.array((y - 1) * w + (x + 0)) & 1)
235                                     + (cellGrid.array((y + 1) * w + (x + 0)) & 1)
236                                     + (cellGrid.array((y - 1) * w + (x + 1)) & 1)
237                                     + (cellGrid.array((y + 0) * w + (x + 1)) & 1)
238                                     + (cellGrid.array((y + 1) * w + (x + 1))& 1);
239                     cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
240                 }
241                 res[x][y] = cell;
242             }
243         }
244         return res;
245     }
246 
247     public static class Compute {
248         @Reflect
249         // TODO: switch cellGridRes to WO
250         public static void lifePerIdx(int idx, @RO CellGrid cellGrid, @RW CellGrid cellGridRes) {
251             int w = cellGrid.width();
252             int h = cellGrid.height();
253             int x = idx % w;
254             int y = idx / w;
255 
256             // byte[] bytes = cellGrid.arrayView();
257             // byte cell = bytes[idx + from];
258             // byte[] lookup = new byte[]{};
259             // if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
260             //     int lookupIdx =
261             //             (bytes[(y - 1) * w + x - 1 + from]&1 <<0)
262             //                     |(bytes[(y + 0) * w + x - 1 + from]&1 <<1)
263             //                     |(bytes[(y + 1) * w + x - 1 + from]&1 <<2)
264             //                     |(bytes[(y - 1) * w + x + 0 + from]&1 <<3)
265             //                     |(bytes[(y - 0) * w + x + 0 + from]&1 <<4) // current cell added
266             //                     |(bytes[(y + 1) * w + x + 0 + from]&1 <<5)
267             //                     |(bytes[(y + 0) * w + x + 1 + from]&1 <<6)
268             //                     |(bytes[(y - 1) * w + x + 1 + from]&1 <<7)
269             //                     |(bytes[(y + 1) * w + x + 1 + from]&1 <<8) ;
270             //     // conditional removed!
271             //     bytes[idx + to] = lookup[lookupIdx];
272             // }
273 
274             byte[][] bytes = cellGrid.arrayView();
275             byte cell = bytes[x][y];
276             if (x > 0 && x < (w - 1) && y > 0 && y < (h - 1)) { // passports please
277                 int count =
278                         (bytes[x - 1][y - 1] & 1)
279                                 + (bytes[x - 1][y + 0] & 1)
280                                 + (bytes[x - 1][y + 1] & 1)
281                                 + (bytes[x + 0][y - 1] & 1)
282                                 + (bytes[x + 0][y + 1] & 1)
283                                 + (bytes[x + 1][y - 1] & 1)
284                                 + (bytes[x + 1][y + 0] & 1)
285                                 + (bytes[x + 1][y + 1] & 1);
286                 cell = ((count == 3) || ((count == 2) && (cell == ALIVE))) ? ALIVE : DEAD;// B3/S23.
287             }
288             byte[][] res = cellGridRes.arrayView();
289             res[x][y] = cell;
290         }
291 
292         @Reflect
293         public static void life(@RO KernelContext kc, @RO CellGrid cellGrid, @RW CellGrid cellGridRes) {
294             if (kc.gix < kc.gsx) {
295                 Compute.lifePerIdx(kc.gix, cellGrid, cellGridRes);
296             }
297         }
298 
299         @Reflect
300         static public void compute(final @RO ComputeContext cc, @RO CellGrid grid, @RW CellGrid gridRes) {
301             int range = grid.width() * grid.height();
302             cc.dispatchKernel(NDRange.of1D(range), kc -> Compute.life(kc, grid, gridRes));
303         }
304     }
305 
306     @HatTest
307     @Reflect
308     public static void testLife() {
309         Accelerator accelerator = new Accelerator(MethodHandles.lookup());
310 
311         // int w = 20;
312         // int h = 20;
313         // // We oversize the grid by adding 1 to n,e,w and s
314         // CellGrid cellGrid = CellGrid.create(accelerator, w, h);
315         // CellGrid cellGridRes = CellGrid.create(accelerator, w, h);
316         //
317         // Random rand = new Random();
318         // byte[][] actualGrid = new byte[w][h];
319         // for (int y = 0; y < h; y++) {
320         //     for (int x = 0; x < w; x++) {
321         //         actualGrid[x][y] = rand.nextBoolean() ? ALIVE : DEAD;
322         //     }
323         // }
324 
325         // We oversize the grid by adding 1 to n,e,w and s
326         CellGrid cellGrid = CellGrid.create(accelerator, 17, 17);
327         CellGrid cellGridRes = CellGrid.create(accelerator, 17, 17);
328 
329         byte[][] actualGrid = new byte[][]{
330                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
331                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
332                 {DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  DEAD},
333                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
334                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
335                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
336                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
337                 {DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  DEAD},
338                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
339                 {DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  DEAD},
340                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
341                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
342                 {DEAD,  DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  ALIVE, DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, DEAD,  DEAD},
343                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
344                 {DEAD,  DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  ALIVE, ALIVE, ALIVE, DEAD,  DEAD,  DEAD,  DEAD},
345                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
346                 {DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD,  DEAD},
347         };
348 
349         // By shifting all cells +1,+1 so we only need to scan 1..width-1, 1..height-1
350         // we don't worry about possibly finding cells in 0,n width,n or n,0 height,n
351         for (int i = 0; i < cellGrid.height(); i++) {
352             for (int j = 0; j < cellGrid.width(); j++) {
353                 cellGrid.array(((long) i * cellGrid.width()) + j, actualGrid[i][j]);
354             }
355         }
356 
357         // Control control = Control.create(accelerator, cellGrid);
358 
359         accelerator.compute(cc -> Compute.compute(cc, cellGrid, cellGridRes));
360 
361         byte[][] resultGrid = lifeCheck(cellGrid);
362 
363         for (int i = 0; i < cellGrid.height(); i++) {
364             for (int j = 0; j < cellGrid.width(); j++) {
365                 HATAsserts.assertEquals(resultGrid[i][j], cellGridRes.array(((long) i * cellGrid.width()) + j));
366             }
367         }
368     }
369 
370     /*
371      * simplified version of mandel using ArrayView
372      */
373     @Reflect
374     public static int mandelCheck(int i, int j, float width, float height, int[] pallette, float offsetx, float offsety, float scale) {
375         float x = (i * scale - (scale / 2f * width)) / width + offsetx;
376         float y = (j * scale - (scale / 2f * height)) / height + offsety;
377         float zx = x;
378         float zy = y;
379         float new_zx;
380         int colorIdx = 0;
381         while ((colorIdx < pallette.length) && (((zx * zx) + (zy * zy)) < 4f)) {
382             new_zx = ((zx * zx) - (zy * zy)) + x;
383             zy = (2f * zx * zy) + y;
384             zx = new_zx;
385             colorIdx++;
386         }
387         return colorIdx < pallette.length ? pallette[colorIdx] : 0;
388     }
389 
390     @Reflect
391     public static void mandel(@RO KernelContext kc, @RW S32Array2D s32Array2D, @RO S32Array pallette, float offsetx, float offsety, float scale) {
392         if (kc.gix < kc.gsx) {
393             int[] pal = pallette.arrayView();
394             int[][] s32 = s32Array2D.arrayView();
395             float width = s32Array2D.width();
396             float height = s32Array2D.height();
397             float x = ((kc.gix % s32Array2D.width()) * scale - (scale / 2f * width)) / width + offsetx;
398             float y = ((kc.gix / s32Array2D.width()) * scale - (scale / 2f * height)) / height + offsety;
399             float zx = x;
400             float zy = y;
401             float new_zx;
402             int colorIdx = 0;
403             while ((colorIdx < pal.length) && (((zx * zx) + (zy * zy)) < 4f)) {
404                 new_zx = ((zx * zx) - (zy * zy)) + x;
405                 zy = (2f * zx * zy) + y;
406                 zx = new_zx;
407                 colorIdx++;
408             }
409             int color = colorIdx < pal.length ? pal[colorIdx] : 0;
410             s32[kc.gix % s32Array2D.width()][kc.gix / s32Array2D.width()] = color;
411         }
412     }
413 
414 
415     @Reflect
416     static public void compute(final ComputeContext computeContext, S32Array pallete, S32Array2D s32Array2D, float x, float y, float scale) {
417 
418         computeContext.dispatchKernel(
419                 NDRange.of1D(s32Array2D.width()*s32Array2D.height()), //0..S32Array2D.size()
420                 kc -> mandel(kc, s32Array2D, pallete, x, y, scale));
421     }
422 
423     @HatTest
424     @Reflect
425     public static void testMandel() {
426         final int width = 1024;
427         final int height = 1024;
428         final float defaultScale = 3f;
429         final float originX = -1f;
430         final float originY = 0;
431         final int maxIterations = 64;
432 
433         Accelerator accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
434 
435         S32Array2D s32Array2D = S32Array2D.create(accelerator, width, height);
436 
437         int[] palletteArray = new int[maxIterations];
438 
439         for (int i = 1; i < maxIterations; i++) {
440             palletteArray[i]=(i/8+1);
441         }
442         palletteArray[0]=0;
443         S32Array pallette = S32Array.createFrom(accelerator, palletteArray);
444 
445         accelerator.compute(cc -> compute(cc, pallette, s32Array2D, originX, originY, defaultScale));
446 
447         int subsample = 16;
448         char[] charPallette9 = new char []{' ', '.', ',',':', '-', '+','*', '#', '@', '%'};
449         for (int y = 0; y<height/subsample; y++) {
450             for (int x = 0; x<width/subsample; x++) {
451                 int palletteValue = s32Array2D.get(x*subsample,y*subsample); // so 0->8
452                 int paletteCheck = mandelCheck(x*subsample, y*subsample, width, height, palletteArray, originX, originY, defaultScale);
453                 HATAsserts.assertEquals(paletteCheck, palletteValue);
454             }
455         }
456     }
457 
458     /*
459      * simplified version of BlackScholes using ArrayView
460      */
461     @Reflect
462     public static void blackScholesKernel(@RO KernelContext kc,
463                                           @RW F32Array call,
464                                           @RW F32Array put,
465                                           @RO F32Array sArray,
466                                           @RO F32Array xArray,
467                                           @RO F32Array tArray,
468                                           float r,
469                                           float v) {
470         if (kc.gix<kc.gsx){
471             float[] callArr = call.arrayView();
472             float[] putArr = put.arrayView();
473             float[] sArr = sArray.arrayView();
474             float[] xArr = xArray.arrayView();
475             float[] tArr = tArray.arrayView();
476 
477             float expNegRt = (float) Math.exp(-r * tArr[kc.gix]);
478             float d1 = (float) ((Math.log(sArr[kc.gix] / xArr[kc.gix]) + (r + v * v * .5f) * tArr[kc.gix]) / (v * Math.sqrt(tArr[kc.gix])));
479             float d2 = (float) (d1 - v * Math.sqrt(tArr[kc.gix]));
480             float cnd1 = CND(d1);
481             float cnd2 = CND(d2);
482             float value = sArr[kc.gix] * cnd1 - expNegRt * xArr[kc.gix] * cnd2;
483             callArr[kc.gix] = value;
484             putArr[kc.gix] = expNegRt * xArr[kc.gix] * (1 - cnd2) - sArr[kc.gix] * (1 - cnd1);
485         }
486     }
487 
488     @Reflect
489     public static float CND(float input) {
490         float x = input;
491         if (input < 0f) { // input = Math.abs(input)?
492             x = -input;
493         }
494 
495         float term = 1f / (1f + (0.2316419f * x));
496         float term_pow2 = term * term;
497         float term_pow3 = term_pow2 * term;
498         float term_pow4 = term_pow2 * term_pow2;
499         float term_pow5 = term_pow2 * term_pow3;
500 
501         float part1 = (1f / (float)Math.sqrt(2f * 3.1415926535f)) * (float)Math.exp((-x * x) * 0.5f);
502 
503         float part2 = (0.31938153f * term) +
504                 (-0.356563782f * term_pow2) +
505                 (1.781477937f * term_pow3) +
506                 (-1.821255978f * term_pow4) +
507                 (1.330274429f * term_pow5);
508 
509         if (input >= 0f) {
510             return 1f - part1 * part2;
511         }
512         return part1 * part2;
513 
514     }
515 
516     @Reflect
517     public static void blackScholes(@RO ComputeContext cc, @RW F32Array call, @RW F32Array put, @RO F32Array S, @RO F32Array X, @RO F32Array T, float r, float v) {
518         cc.dispatchKernel(NDRange.of1D(call.length()),
519                 kc -> blackScholesKernel(kc, call, put, S, X, T, r, v)
520         );
521     }
522 
523     static F32Array floatArray(Accelerator accelerator, int size, float low, float high, Random rand) {
524         F32Array array = F32Array.create(accelerator, size);
525         for (int i = 0; i <size; i++) {
526             array.array(i, rand.nextFloat() * (high - low) + low);
527         }
528         return array;
529     }
530 
531     public static void blackScholesKernelSeq(F32Array call, F32Array put, F32Array sArray, F32Array xArray, F32Array tArray, float r, float v) {
532         for (int i = 0; i <call.length() ; i++) {
533             float S = sArray.array(i);
534             float X = xArray.array(i);
535             float T = tArray.array(i);
536             float expNegRt = (float) Math.exp(-r * T);
537             float d1 = (float) ((Math.log(S / X) + (r + v * v * .5f) * T) / (v * Math.sqrt(T)));
538             float d2 = (float) (d1 - v * Math.sqrt(T));
539             float cnd1 = CND(d1);
540             float cnd2 = CND(d2);
541             float value = S * cnd1 - expNegRt * X * cnd2;
542             call.array(i, value);
543             put.array(i, expNegRt * X * (1 - cnd2) - S * (1 - cnd1));
544         }
545     }
546 
547     @HatTest
548     @Reflect
549     public static void testBlackScholes() {
550         int size = 1024;
551         Random rand = new Random();
552         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);
553         var call = F32Array.create(accelerator, size);
554         var put = F32Array.create(accelerator, size);
555         for (int i = 0; i < size; i++) {
556             call.array(i, i);
557             put.array(i, i);
558         }
559 
560         var S = floatArray(accelerator, size,1f, 100f, rand);
561         var X = floatArray(accelerator, size,1f, 100f, rand);
562         var T = floatArray(accelerator,size, 0.25f, 10f, rand);
563         float r = 0.02f;
564         float v = 0.30f;
565 
566         accelerator.compute(cc -> blackScholes(cc, call, put, S, X, T, r, v));
567 
568         var seqCall = F32Array.create(accelerator, size);
569         var seqPut = F32Array.create(accelerator, size);
570         for (int i = 0; i < seqCall.length(); i++) {
571             seqCall.array(i, i);
572             seqPut.array(i, i);
573         }
574 
575         blackScholesKernelSeq(seqCall, seqPut, S, X, T, r, v);
576 
577         for (int i = 0; i < call.length(); i++) {
578             HATAsserts.assertEquals(seqCall.array(i), call.array(i), 0.01f);
579             HATAsserts.assertEquals(seqPut.array(i), put.array(i), 0.01f);
580         }
581     }
582 
583     /*
584      * basic test of local and private buffer ArrayViews
585      */
586     private interface SharedMemory extends NonMappableIface {
587         void array(long index, int value);
588         int array(long index);
589         DeviceSchema<SharedMemory> schema = DeviceSchema.of(SharedMemory.class,
590                 arr -> arr.withArray("array", 1024));
591 
592         static SharedMemory createLocal() { return null; }
593 
594         default int[] localArrayView() {
595             int[] view = new int[1024];
596             for (int i = 0; i < 1024; i++) {
597                 view[i] = this.array(i);
598             }
599             return view;
600         }
601     }
602 
603     public interface PrivateArray extends NonMappableIface {
604         void array(long index, int value);
605         int array(long index);
606         DeviceSchema<PrivateArray> schema = DeviceSchema.of(PrivateArray.class,
607                 arr -> arr.withArray("array", 16));
608 
609         static PrivateArray createPrivate() { return null; }
610 
611         default int[] privateArrayView() {
612             int[] view = new int[16];
613             for (int i = 0; i < 16; i++) {
614                 view[i] = this.array(i);
615             }
616             return view;
617         }
618     }
619 
620     @Reflect
621     public static void squareKernelWithPrivateAndLocal(@RO  KernelContext kc, @RW S32Array s32Array) {
622         SharedMemory shared = SharedMemory.createLocal();
623         if (kc.gix < kc.gsx){
624             int[] arr = s32Array.arrayView();
625             arr[kc.gix] += arr[kc.gix];
626 
627             PrivateArray priv = PrivateArray.createPrivate();
628             int[] privView = priv.privateArrayView();
629             privView[0] = 1;
630             arr[kc.gix] += privView[0];
631 
632             int[] sharedView = shared.localArrayView();
633             sharedView[0] = 16;
634             kc.barrier();
635             arr[kc.gix] += sharedView[0];
636         }
637     }
638 
639     @Reflect
640     public static void privateAndLocal(@RO ComputeContext cc, @RW S32Array s32Array) {
641         cc.dispatchKernel(NDRange.of1D(s32Array.length()),
642                 kc -> squareKernelWithPrivateAndLocal(kc, s32Array)
643         );
644     }
645 
646     @HatTest
647     @Reflect
648     public static void testPrivateAndLocal() {
649 
650         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
651         var arr = S32Array.create(accelerator, 32);
652         for (int i = 0; i < arr.length(); i++) {
653             arr.array(i, i);
654         }
655         accelerator.compute(
656                 cc -> privateAndLocal(cc, arr)
657         );
658         for (int i = 0; i < arr.length(); i++) {
659             HATAsserts.assertEquals(2 * i + 17, arr.array(i));
660         }
661     }
662 
663     /*
664      * testing basic DeviceTypes
665      */
666 
667     public interface SharedNonMappableIface extends NonMappableIface {
668         void array(long index, int value);
669         int array(long index);
670         DeviceSchema<SharedNonMappableIface> schema = DeviceSchema.of(SharedNonMappableIface.class,
671                 arr -> arr.withArray("array", 1024));
672         static SharedNonMappableIface create(Accelerator accelerator) {
673             return null;
674         }
675         static SharedNonMappableIface createLocal() {
676             return null;
677         }
678 
679         default int[] localArrayView() {
680             return null;
681         }
682     }
683 
684     public interface PrivateNonMappableIface extends NonMappableIface {
685         void array(long index, int value);
686         int array(long index);
687         DeviceSchema<PrivateNonMappableIface> schema = DeviceSchema.of(PrivateNonMappableIface.class,
688                 arr -> arr.withArray("array", 32));
689         static PrivateNonMappableIface create(Accelerator accelerator) {
690             return null;
691         }
692         static PrivateNonMappableIface createPrivate() {
693             return null;
694         }
695 
696         default int[] privateArrayView() {
697             return null;
698         }
699     }
700 
701     @Reflect
702     public static void kernelBasicDeviceType(@RO  KernelContext kc, @RW S32Array s32Array) {
703         SharedNonMappableIface shared = SharedNonMappableIface.createLocal();
704         if (kc.gix < kc.gsx){
705             PrivateNonMappableIface priv = PrivateNonMappableIface.createPrivate();
706 
707             int[] arr = s32Array.arrayView();
708             int[] privView = priv.privateArrayView();
709             int[] sharedView = shared.localArrayView();
710 
711             privView[kc.gix] = arr[kc.gix];
712             sharedView[kc.gix] = arr[kc.gix];
713             kc.barrier();
714             arr[kc.gix] = privView[kc.gix] + sharedView[kc.gix];
715         }
716     }
717 
718     @Reflect
719     public static void basicDeviceType(@RO ComputeContext cc, @RW S32Array s32Array) {
720         cc.dispatchKernel(NDRange.of1D(s32Array.length()),
721                 kc -> kernelBasicDeviceType(kc, s32Array)
722         );
723     }
724 
725     @HatTest
726     @Reflect
727     public static void testBasicDeviceType() {
728         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
729         var arr = S32Array.create(accelerator, 32);
730         for (int i = 0; i < arr.length(); i++) {
731             arr.array(i, i);
732         }
733         accelerator.compute(cc -> basicDeviceType(cc, arr));
734         for (int i = 0; i < arr.length(); i++) {
735             HATAsserts.assertEquals(2 * i, arr.array(i));
736         }
737     }
738 
739     @Reflect
740     public static void squareKernelDeviceType(@RO  KernelContext kc, @RW S32Array s32Array) {
741         SharedNonMappableIface shared = SharedNonMappableIface.createLocal();
742         if (kc.gix < kc.gsx){
743             PrivateNonMappableIface priv = PrivateNonMappableIface.createPrivate();
744 
745             int[] arr = s32Array.arrayView();
746             int[] privView = priv.privateArrayView();
747             int[] sharedView = shared.localArrayView();
748 
749             privView[kc.gix] = arr[kc.gix];
750             sharedView[privView[kc.gix]] = 16 * privView[kc.gix];
751             kc.barrier();
752             arr[kc.gix] += privView[kc.gix] + sharedView[kc.gix];
753         }
754     }
755 
756     @Reflect
757     public static void deviceType(@RO ComputeContext cc, @RW S32Array s32Array) {
758         cc.dispatchKernel(NDRange.of1D(s32Array.length()),
759                 kc -> squareKernelDeviceType(kc, s32Array)
760         );
761     }
762 
763     @HatTest
764     @Reflect
765     public static void testDeviceType() {
766         var accelerator = new Accelerator(MethodHandles.lookup(), Backend.FIRST);//new JavaMultiThreadedBackend());
767         var arr = S32Array.create(accelerator, 32);
768         for (int i = 0; i < arr.length(); i++) {
769             arr.array(i, i);
770         }
771         accelerator.compute(cc -> deviceType(cc, arr));
772         for (int i = 0; i < arr.length(); i++) {
773             HATAsserts.assertEquals(18 * i, arr.array(i));
774         }
775     }
776 }