1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 #pragma once
 27 
 28 #include <iostream>
 29 #include <map>
 30 #include <vector>
 31 #include <cstdio>
 32 #include <cstring>
 33 #include <unistd.h>
 34 #include <sys/time.h>
 35 #include <iostream>
 36 #include <iomanip>
 37 #include <bitset>
 38 #include <stack>
 39 
 40 #include "strutil.h"
 41 #include "config.h"
 42 
 43 #ifdef __APPLE__
 44 #define SNPRINTF snprintf
 45 #else
 46 #include <malloc.h>
 47 #if defined (_WIN32)
 48 #include "windows.h"
 49 #define SNPRINTF _snprintf
 50 #else
 51 #define SNPRINTF  snprintf
 52 #endif
 53 #endif
 54 
 55 typedef char s8_t;
 56 typedef char byte;
 57 typedef char boolean;
 58 typedef char z1_t;
 59 typedef unsigned char u8_t;
 60 typedef short s16_t;
 61 typedef unsigned short u16_t;
 62 typedef unsigned int u32_t;
 63 typedef int s32_t;
 64 typedef float f32_t;
 65 typedef double f64_t;
 66 typedef long s64_t;
 67 typedef unsigned long u64_t;
 68 
 69 extern void hexdump(void *ptr, int buflen);
 70 
 71 class Text {
 72 public:
 73     size_t len;
 74     char *text;
 75     bool isCopy;
 76 
 77     Text(size_t len, char *text, bool isCopy);
 78 
 79     Text(char *text, bool isCopy);
 80 
 81     explicit Text(size_t len);
 82 
 83     void write(const std::string &filename) const;
 84 
 85     void read(const std::string &filename);
 86 
 87     virtual ~Text();
 88 };
 89 
 90 class Log : public Text {
 91 public:
 92     explicit Log(size_t len);
 93 
 94     explicit Log(char *text);
 95 
 96     ~Log() override = default;
 97 };
 98 
 99 
100 #define UNKNOWN_BYTE 0
101 #define RO_BYTE (1<<1)
102 #define WO_BYTE (1<<2)
103 #define RW_BYTE (RO_BYTE|WO_BYTE)
104 
105 struct Buffer_s {
106     void *memorySegment; // Address of a Buffer/MemorySegment
107     long sizeInBytes; // The size of the memory segment in bytes
108     u8_t access; // see hat/buffer/ArgArray.java  UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE;
109 };
110 
111 union Value_u {
112     boolean z1; // 'Z'
113     u8_t s8; // 'B'
114     u16_t u16; // 'C'
115     s16_t s16; // 'S'
116     u16_t x16; // 'C' or 'S"
117     s32_t s32; // 'I'
118     s32_t x32; // 'I' or 'F'
119     f32_t f32; // 'F'
120     f64_t f64; // 'D'
121     s64_t s64; // 'J'
122     s64_t x64; // 'D' or 'J'
123     Buffer_s buffer; // '&'
124 };
125 
126 struct KernelArg {
127     u32_t idx; // 0..argc
128     u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment
129     u8_t pad8[8];
130     Value_u value;
131     u8_t pad6[6];
132 
133     size_t size() const {
134         size_t sz;
135         switch (variant) {
136             case 'I':
137             case 'F':
138                 sz = sizeof(u32_t);
139                 break;
140             case 'S':
141             case 'C':
142                 sz = sizeof(u16_t);
143                 break;
144             case 'D':
145             case 'J':
146                 return sizeof(u64_t);
147                 break;
148             case 'B':
149                 return sizeof(u8_t);
150                 break;
151             default:
152                 std::cerr << "Bad variant " << variant << "arg::size" << std::endl;
153                 exit(1);
154         }
155 
156         return sz;
157     }
158 };
159 
160 struct BufferState {
161     static constexpr long MAGIC = 0x4a71facebffab175;
162     static constexpr int NO_STATE = 0;
163     static constexpr int NEW_STATE = 1;
164     static constexpr int HOST_OWNED = 2;
165     static constexpr int DEVICE_OWNED = 3;
166     static constexpr int DEVICE_VALID_HOST_HAS_COPY = 4;
167     const static char *stateNames[]; // See below for out of line definition
168 
169     long magic1;
170     void *ptr;
171     long length;
172     int bits;
173     int state;
174     void *vendorPtr;
175     long magic2;
176 
177     bool ok() const {
178         return ((magic1 == MAGIC) && (magic2 == MAGIC));
179     }
180 
181     void setState(int newState) {
182         state = newState;
183     }
184 
185     int getState() const {
186         return state;
187     }
188 
189     void dump(const char *msg) const {
190         if (ok()) {
191             printf("{%s,ptr:%016lx,length: %016lx,  state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state,
192                    (long) vendorPtr);
193         } else {
194             printf("%s bad magic \n", msg);
195             printf("(magic1:%016lx,", magic1);
196             printf("{%s, ptr:%016lx, length: %016lx,  state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state,
197                    (long) vendorPtr);
198             printf("magic2:%016lx)\n", magic2);
199         }
200     }
201 
202     static BufferState *of(void *ptr, size_t sizeInBytes) {
203         return reinterpret_cast<BufferState *>(static_cast<char *>(ptr) + sizeInBytes - sizeof(BufferState));
204     }
205 
206     static BufferState *of(const KernelArg *arg) {
207         // access?
208         BufferState *bufferState = BufferState::of(
209             arg->value.buffer.memorySegment,
210             arg->value.buffer.sizeInBytes
211         );
212 
213 
214         //Sanity check the buffers
215         // These sanity check finds errors passing memory segments which are not Buffers
216 
217         if (bufferState->ptr != arg->value.buffer.memorySegment) {
218             std::cerr << "bufferState->ptr !=  arg->value.buffer.memorySegment" << std::endl;
219             std::exit(1);
220         }
221 
222         if ((bufferState->vendorPtr == nullptr) && (bufferState->state != BufferState::NEW_STATE)) {
223             std::cerr << "Warning:  Unexpected initial state for buffer "
224                     //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"
225                     << " state=" << bufferState->state << " '"
226                     << BufferState::stateNames[bufferState->state] << "'"
227                     << " vendorPtr" << bufferState->vendorPtr << std::endl;
228         }
229         // End of sanity checks
230         return bufferState;
231     }
232 };
233 
234 #ifdef shared_cpp
235 const char *BufferState::stateNames[] = {
236     "NO_STATE",
237     "NEW_STATE",
238     "HOST_OWNED",
239     "DEVICE_OWNED",
240     "DEVICE_VALID_HOST_HAS_COPY"
241 };
242 #endif
243 
244 struct ArgArray_s {
245     u32_t argc;
246     u8_t pad12[12];
247     KernelArg argv[0/*argc*/];
248 };
249 
250 class ArgSled {
251 private:
252     ArgArray_s *argArray;
253 
254 public:
255     int argc() const {
256         return argArray->argc;
257     }
258 
259     KernelArg *arg(int n) const {
260         KernelArg *a = (argArray->argv + n);
261         return a;
262     }
263 
264     void hexdumpArg(int n) const {
265         hexdump(arg(n), sizeof(KernelArg));
266     }
267 
268     void dumpArg(int n) const {
269         KernelArg *a = arg(n);
270         int idx = (int) a->idx;
271         std::cout << "arg[" << idx << "]";
272         char variant = (char) a->variant;
273         switch (variant) {
274             case 'F':
275                 std::cout << " f32 " << a->value.f32 << std::endl;
276                 break;
277             case 'I':
278                 std::cout << " s32 " << a->value.s32 << std::endl;
279                 break;
280             case 'D':
281                 std::cout << " f64 " << a->value.f64 << std::endl;
282                 break;
283             case 'J':
284                 std::cout << " s64 " << a->value.s64 << std::endl;
285                 break;
286             case 'C':
287                 std::cout << " u16 " << a->value.u16 << std::endl;
288                 break;
289             case 'S':
290                 std::cout << " s16 " << a->value.s32 << std::endl;
291                 break;
292             case 'Z':
293                 std::cout << " z1 " << a->value.z1 << std::endl;
294                 break;
295             case '&':
296                 std::cout << " buffer {"
297                         << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec
298                         << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec
299                         << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec
300                         << "}" << std::endl;
301                 break;
302             default:
303                 std::cout << (char) variant << std::endl;
304                 break;
305         }
306     }
307 
308     void *afterArgsPtrPtr() const {
309         KernelArg *a = arg(argc());
310         return (void *) a;
311     }
312 
313     int *schemaLenPtr() const {
314         int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */);
315         return schemaLenP;
316     }
317 
318     int schemaLen() const {
319         return *schemaLenPtr();
320     }
321 
322     char *schema() const {
323         int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1);
324         return (char *) schemaLenP;
325     }
326 
327     explicit ArgSled(ArgArray_s *argArray)
328         : argArray(argArray) {
329     }
330 };
331 
332 
333 class Timer {
334     struct timeval startTV, endTV;
335 
336 public:
337     unsigned long elapsed_us{};
338 
339     Timer(): startTV(), endTV() {
340     }
341 
342     void start() {
343         gettimeofday(&startTV, nullptr);
344     }
345 
346     unsigned long end() {
347         gettimeofday(&endTV, nullptr);
348         elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us
349         elapsed_us += (endTV.tv_usec - startTV.tv_usec);
350         return elapsed_us;
351     }
352 };
353 
354 
355 //extern void hexdump(void *ptr, int buflen);
356 
357 class Sled {
358 public:
359     static void show(std::ostream &out, void *argArray);
360 };
361 
362 
363 class KernelContext {
364 public:
365     int x;
366     int maxX;
367     int y;
368     int maxY;
369     int z;
370     int maxZ;
371     int dimensions;
372 };
373 
374 
375 class Backend {
376 public:
377     class Config final : public BasicConfig {
378     public:
379         explicit Config(int mode);
380 
381         ~Config() override;
382     };
383 
384     class Buffer {
385     public:
386         Backend *backend;
387         BufferState *bufferState;
388 
389         Buffer(Backend *backend, BufferState *bufferState)
390             : backend(backend), bufferState(bufferState) {
391         }
392 
393         virtual ~Buffer() = default;
394     };
395 
396     class CompilationUnit {
397     public:
398         class Kernel {
399         public:
400             char *name;
401 
402             CompilationUnit *compilationUnit;
403 
404             virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0;
405 
406             virtual bool setArg(KernelArg *arg) = 0;
407 
408             virtual long ndrange(void *argArray) final;
409 
410             Kernel(CompilationUnit *compilationUnit, char *name)
411                 : name(strutil::clone(name)), compilationUnit(compilationUnit) {
412             }
413 
414             virtual ~Kernel() {
415                 delete[] name;
416             }
417         };
418 
419     public:
420         Backend *backend;
421         char *src;
422         char *log;
423         bool ok;
424 
425         virtual Kernel *getKernel(int nameLen, char *name) = 0;
426 
427         virtual bool compilationUnitOK() final {
428             return ok;
429         }
430 
431         CompilationUnit(Backend *backend, char *src, char *log, bool ok)
432             : backend(backend), src(src), log(log), ok(ok) {
433         }
434 
435         virtual ~CompilationUnit() {
436             delete[] src;
437             delete[] log;
438         };
439     };
440 
441     class Queue {
442     public:
443         Backend *backend;
444 
445         explicit Queue(Backend *backend);
446 
447         virtual void wait() = 0;
448 
449         virtual void release() = 0;
450 
451         virtual void computeStart() = 0;
452 
453         virtual void computeEnd() = 0;
454 
455         virtual void copyToDevice(Buffer *buffer) =0;
456 
457         virtual void copyFromDevice(Buffer *buffer) =0;
458 
459         virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0;
460 
461         virtual ~Queue();
462     };
463 
464     class ProfilableQueue : public Queue {
465     public:
466         static constexpr int START_BIT_IDX = 20;
467         static constexpr int CopyToDeviceBits = 1 << START_BIT_IDX;
468         static constexpr int CopyFromDeviceBits = 1 << 21;
469         static constexpr int NDRangeBits = 1 << 22;
470         static constexpr int StartComputeBits = 1 << 23;
471         static constexpr int EndComputeBits = 1 << 24;
472         static constexpr int EnterKernelDispatchBits = 1 << 25;
473         static constexpr int LeaveKernelDispatchBits = 1 << 26;
474         static constexpr int HasConstCharPtrArgBits = 1 << 27;
475         static constexpr int hasIntArgBits = 1 << 28;
476         static constexpr int END_BIT_IDX = 27;
477 
478         size_t eventMax;
479         size_t eventc;
480         int *eventInfoBits;
481         const char **eventInfoConstCharPtrArgs;
482 
483         virtual void showEvents(int width) = 0;
484 
485         virtual void inc(int bits) = 0;
486 
487         virtual void inc(int bits, const char *arg) = 0;
488 
489         virtual void marker(int bits) = 0;
490 
491         virtual void marker(int bits, const char *arg) = 0;
492 
493 
494         virtual void markAsStartComputeAndInc() = 0;
495 
496         virtual void markAsEndComputeAndInc() = 0;
497 
498         virtual void markAsEnterKernelDispatchAndInc() = 0;
499 
500         virtual void markAsLeaveKernelDispatchAndInc() = 0;
501 
502         ProfilableQueue(Backend *backend, int eventMax)
503             : Queue(backend),
504               eventMax(eventMax),
505               eventInfoBits(new int[eventMax]),
506               eventInfoConstCharPtrArgs(new const char *[eventMax]),
507               eventc(0) {
508         }
509 
510         ~ProfilableQueue() override {
511             delete[]eventInfoBits;
512             delete[]eventInfoConstCharPtrArgs;
513         }
514     };
515 
516     Config *config;
517     Queue *queue;
518 
519     Backend(Config *config, Queue *queue)
520         : config(config), queue(queue) {
521     }
522 
523     virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0;
524 
525     virtual void info() = 0;
526 
527     virtual void computeStart() = 0;
528 
529     virtual void computeEnd() = 0;
530 
531     virtual CompilationUnit *compile(int len, char *source) = 0;
532 
533     virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0;
534 
535     virtual ~Backend() = default;
536 };
537 
538 
539 
540 template<typename T>
541 T *bufferOf(const char *name) {
542     size_t lenIncludingBufferState = sizeof(T);
543     size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState);
544     T *buffer = reinterpret_cast<T *>(new unsigned char[lenIncludingBufferState]);
545     auto *bufferState = reinterpret_cast<BufferState *>(reinterpret_cast<char *>(buffer) + lenExcludingBufferState);
546     bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC;
547     bufferState->ptr = buffer;
548     bufferState->length = sizeof(T) - sizeof(BufferState);
549     bufferState->state = BufferState::NEW_STATE;
550     bufferState->vendorPtr = nullptr;
551     bufferState->dump(name);
552     return buffer;
553 }