1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 #pragma once
 27 
 28 #include <iostream>
 29 #include <map>
 30 #include <vector>
 31 #include <cstdio>
 32 #include <cstring>
 33 #include <unistd.h>
 34 #include <sys/time.h>
 35 #include <iostream>
 36 #include <iomanip>
 37 #include <bitset>
 38 #include <stack>
 39 
 40 #include "strutil.h"
 41 
 42 #ifdef __APPLE__
 43 #define SNPRINTF snprintf
 44 #else
 45 #include <malloc.h>
 46 #if defined (_WIN32)
 47 #include "windows.h"
 48 #define SNPRINTF _snprintf
 49 #else
 50 #define SNPRINTF  snprintf
 51 #endif
 52 #endif
 53 
 54 typedef char s8_t;
 55 typedef char byte;
 56 typedef char boolean;
 57 typedef char z1_t;
 58 typedef unsigned char u8_t;
 59 typedef short s16_t;
 60 typedef unsigned short u16_t;
 61 typedef unsigned int u32_t;
 62 typedef int s32_t;
 63 typedef float f32_t;
 64 typedef double f64_t;
 65 typedef long s64_t;
 66 typedef unsigned long u64_t;
 67 
 68 extern void hexdump(void *ptr, int buflen);
 69 
 70 class Text {
 71 public:
 72     size_t len;
 73     char *text;
 74     bool isCopy;
 75 
 76     Text(size_t len, char *text, bool isCopy);
 77 
 78     Text(char *text, bool isCopy);
 79 
 80     Text(size_t len);
 81 
 82     void write(std::string &filename) const;
 83 
 84     void read(std::string &filename);
 85 
 86     virtual ~Text();
 87 };
 88 
 89 class Log : public Text {
 90 public:
 91     Log(size_t len);
 92 
 93     Log(char *text);
 94 
 95     ~Log() = default;
 96 };
 97 
 98 
 99 #define UNKNOWN_BYTE 0
100 #define RO_BYTE (1<<1)
101 #define WO_BYTE (1<<2)
102 #define RW_BYTE (RO_BYTE|WO_BYTE)
103 
104 struct Buffer_s {
105     void *memorySegment;   // Address of a Buffer/MemorySegment
106     long sizeInBytes;     // The size of the memory segment in bytes
107     u8_t access;          // see hat/buffer/ArgArray.java  UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE;
108 };
109 
110 union Value_u {
111     boolean z1;  // 'Z'
112     u8_t s8;  // 'B'
113     u16_t u16;  // 'C'
114     s16_t s16;  // 'S'
115     u16_t x16;  // 'C' or 'S"
116     s32_t s32;  // 'I'
117     s32_t x32;  // 'I' or 'F'
118     f32_t f32; // 'F'
119     f64_t f64; // 'D'
120     s64_t s64; // 'J'
121     s64_t x64; // 'D' or 'J'
122     Buffer_s buffer; // '&'
123 };
124 
125 struct KernelArg {
126     u32_t idx;          // 0..argc
127     u8_t variant;      // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment
128     u8_t pad8[8];
129     Value_u value;
130     u8_t pad6[6];
131 
132     size_t size() {
133         size_t sz;
134         switch (variant) {
135             case 'I':
136             case 'F':
137                 sz = sizeof(u32_t);
138                 break;
139             case 'S':
140             case 'C':
141                 sz = sizeof(u16_t);
142                 break;
143             case 'D':
144             case 'J':
145                 return sizeof(u64_t);
146                 break;
147             case 'B':
148                 return sizeof(u8_t);
149                 break;
150             default:
151                 std::cerr << "Bad variant " << variant << "arg::size" << std::endl;
152                 exit(1);
153 
154         }
155 
156         return sz;
157     }
158 };
159 
160 struct BufferState {
161     static const long MAGIC = 0x4a71facebffab175;
162     static const int NO_STATE = 0;
163     static const int NEW_STATE = 1;
164     static const int HOST_OWNED = 2;
165     static const int DEVICE_OWNED = 3;
166     static const int DEVICE_VALID_HOST_HAS_COPY = 4;
167     const static char *stateNames[]; // See below for out of line definition
168 
169     long magic1;
170     void *ptr;
171     long length;
172     int bits;
173     int state;
174     void *vendorPtr;
175     long magic2;
176 
177     bool ok() {
178         return ((magic1 == MAGIC) && (magic2 == MAGIC));
179     }
180 
181     void setState(int newState) {
182         state = newState;
183     }
184 
185     int getState() {
186         return state;
187     }
188 
189     void dump(const char *msg) {
190         if (ok()) {
191             printf("{%s,ptr:%016lx,length: %016lx,  state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state, (long) vendorPtr);
192         } else {
193             printf("%s bad magic \n", msg);
194             printf("(magic1:%016lx,", magic1);
195             printf("{%s, ptr:%016lx, length: %016lx,  state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state, (long) vendorPtr);
196             printf("magic2:%016lx)\n", magic2);
197         }
198     }
199 
200     static BufferState *of(void *ptr, size_t sizeInBytes) {
201         return (BufferState *) (((char *) ptr) + sizeInBytes - sizeof(BufferState));
202     }
203 
204     static BufferState *of(KernelArg *arg) { // access?
205         BufferState *bufferState = BufferState::of(
206                 arg->value.buffer.memorySegment,
207                 arg->value.buffer.sizeInBytes
208         );
209 
210 
211         //Sanity check the buffers
212         // These sanity check finds errors passing memory segments which are not Buffers
213 
214         if (bufferState->ptr != arg->value.buffer.memorySegment) {
215             std::cerr << "bufferState->ptr !=  arg->value.buffer.memorySegment" << std::endl;
216             std::exit(1);
217         }
218 
219         if ((bufferState->vendorPtr == 0L) && (bufferState->state != BufferState::NEW_STATE)) {
220             std::cerr << "Warning:  Unexpected initial state for buffer "
221                       //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"
222                       << " state=" << bufferState->state << " '"
223                       << BufferState::stateNames[bufferState->state] << "'"
224                       << " vendorPtr" << bufferState->vendorPtr << std::endl;
225         }
226         // End of sanity checks
227         return bufferState;
228     }
229 
230 };
231 
232 #ifdef shared_cpp
233 const  char *BufferState::stateNames[] = {
234               "NO_STATE",
235               "NEW_STATE",
236               "HOST_OWNED",
237               "DEVICE_OWNED",
238               "DEVICE_VALID_HOST_HAS_COPY"
239         };
240 #endif
241 
242 struct ArgArray_s {
243     u32_t argc;
244     u8_t pad12[12];
245     KernelArg argv[0/*argc*/];
246 };
247 
248 class ArgSled {
249 private:
250     ArgArray_s *argArray;
251 public:
252     int argc() {
253         return argArray->argc;
254     }
255 
256     KernelArg *arg(int n) {
257         KernelArg *a = (argArray->argv + n);
258         return a;
259     }
260 
261     void hexdumpArg(int n) {
262         hexdump(arg(n), sizeof(KernelArg));
263     }
264 
265     void dumpArg(int n) {
266         KernelArg *a = arg(n);
267         int idx = (int) a->idx;
268         std::cout << "arg[" << idx << "]";
269         char variant = (char) a->variant;
270         switch (variant) {
271             case 'F':
272                 std::cout << " f32 " << a->value.f32 << std::endl;
273                 break;
274             case 'I':
275                 std::cout << " s32 " << a->value.s32 << std::endl;
276                 break;
277             case 'D':
278                 std::cout << " f64 " << a->value.f64 << std::endl;
279                 break;
280             case 'J':
281                 std::cout << " s64 " << a->value.s64 << std::endl;
282                 break;
283             case 'C':
284                 std::cout << " u16 " << a->value.u16 << std::endl;
285                 break;
286             case 'S':
287                 std::cout << " s16 " << a->value.s32 << std::endl;
288                 break;
289             case 'Z':
290                 std::cout << " z1 " << a->value.z1 << std::endl;
291                 break;
292             case '&':
293                 std::cout << " buffer {"
294                           << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec
295                           << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec
296                           << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec
297                           << "}" << std::endl;
298                 break;
299             default:
300                 std::cout << (char) variant << std::endl;
301                 break;
302         }
303     }
304 
305     void *afterArgsPtrPtr() {
306         KernelArg *a = arg(argc());
307         return (void *) a;
308     }
309 
310     int *schemaLenPtr() {
311         int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */);
312         return schemaLenP;
313     }
314 
315     int schemaLen() {
316         return *schemaLenPtr();
317     }
318 
319     char *schema() {
320         int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1);
321         return (char *) schemaLenP;
322     }
323 
324     ArgSled(ArgArray_s *argArray)
325             : argArray(argArray) {}
326 };
327 
328 
329 class Timer {
330     struct timeval startTV, endTV;
331 public:
332     unsigned long elapsed_us;
333 
334     void start() {
335         gettimeofday(&startTV, NULL);
336     }
337 
338     unsigned long end() {
339         gettimeofday(&endTV, NULL);
340         elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000;      // sec to us
341         elapsed_us += (endTV.tv_usec - startTV.tv_usec);
342         return elapsed_us;
343     }
344 };
345 
346 
347 extern void hexdump(void *ptr, int buflen);
348 
349 class Sled {
350 public:
351     static void show(std::ostream &out, void *argArray);
352 };
353 
354 
355 class KernelContext {
356 public:
357     int x;
358     int maxX;
359 };
360 
361 class Backend {
362 public:
363     class Config {
364     public:
365         // These must sync with hat/backend/ffi/Mode.java
366         // Bits 0-3 select platform id 0..5
367         // Bits 4-7 select device id 0..15
368         const static int START_BIT_IDX = 16;
369         const static int MINIMIZE_COPIES_BIT = 1 << START_BIT_IDX;
370         const static int TRACE_BIT = 1 << 17;
371         const static int PROFILE_BIT = 1 << 18;
372         const static int SHOW_CODE_BIT = 1 << 19;
373         const static int SHOW_KERNEL_MODEL_BIT = 1 << 20;
374         const static int SHOW_COMPUTE_MODEL_BIT = 1 << 21;
375         const static int INFO_BIT = 1 << 22;
376         const static int TRACE_COPIES_BIT = 1 << 23;
377         const static int TRACE_SKIPPED_COPIES_BIT = 1 << 24;
378         const static int TRACE_ENQUEUES_BIT = 1 << 25;
379         const static int TRACE_CALLS_BIT = 1 << 26;
380         const static int SHOW_WHY_BIT = 1 << 27;
381         const static int SHOW_STATE_BIT = 1 << 28;
382         const static int PTX_BIT = 1 << 29;
383         const static int END_BIT_IDX = 30;
384 
385         const static char *bitNames[]; // See below for out of line definition
386         int configBits;
387         bool minimizeCopies;
388         bool alwaysCopy;
389         bool trace;
390         bool profile;
391         bool showCode;
392         bool info;
393         bool traceCopies;
394         bool traceSkippedCopies;
395         bool traceEnqueues;
396         bool traceCalls;
397         bool showWhy;
398         bool showState;
399         bool ptx;
400         int platform; //0..15
401         int device; //0..15
402         Config(int mode);
403 
404         virtual ~Config();
405     };
406 
407     class Buffer {
408     public:
409         Backend *backend;
410         BufferState *bufferState;
411         Buffer(Backend *backend, BufferState *bufferState)
412                 : backend(backend), bufferState(bufferState) {
413         }
414         virtual ~Buffer() = default;
415     };
416 
417     class CompilationUnit {
418     public:
419         class Kernel {
420         public:
421             char *name;// strduped!
422 
423             CompilationUnit *compilationUnit;
424 
425             virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0;
426 
427             virtual bool setArg(KernelArg *arg) = 0;
428 
429             virtual long ndrange(void *argArray) final;
430 
431             Kernel(CompilationUnit *compilationUnit, char *name)
432                     : compilationUnit(compilationUnit), name(strutil::clone(name)) {
433             }
434 
435             virtual ~Kernel() {
436                 if (name) {
437                     delete[] name;
438                 }
439             }
440         };
441 
442     public:
443         Backend *backend;
444         char *src;
445         char *log;
446         bool ok;
447 
448         virtual Kernel *getKernel(int nameLen, char *name) = 0;
449 
450         virtual bool compilationUnitOK() final {
451             return ok;
452         }
453 
454         CompilationUnit(Backend *backend, char *src, char *log, bool ok)
455                 : backend(backend), src(src), log(log), ok(ok) {
456         }
457 
458         virtual ~CompilationUnit() {
459             if (src != nullptr) {
460                 delete[] src;
461             }
462             if (log != nullptr) {
463                 delete[] log;
464             }
465         };
466     };
467 
468     class Queue {
469     public:
470 
471         Backend *backend;
472 
473         Queue(Backend *backend);
474 
475         virtual void wait() = 0;
476 
477         virtual void release() = 0;
478 
479         virtual void computeStart() = 0;
480 
481         virtual void computeEnd() = 0;
482 
483         virtual void copyToDevice(Buffer *buffer)=0;
484 
485         virtual void copyFromDevice(Buffer *buffer)=0;
486 
487         virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0;
488 
489         virtual ~Queue();
490     };
491 
492     class ProfilableQueue : public Queue {
493     public:
494         const static int START_BIT_IDX = 20;
495         static const int CopyToDeviceBits = 1 << START_BIT_IDX;
496         static const int CopyFromDeviceBits = 1 << 21;
497         static const int NDRangeBits = 1 << 22;
498         static const int StartComputeBits = 1 << 23;
499         static const int EndComputeBits = 1 << 24;
500         static const int EnterKernelDispatchBits = 1 << 25;
501         static const int LeaveKernelDispatchBits = 1 << 26;
502         static const int HasConstCharPtrArgBits = 1 << 27;
503         static const int hasIntArgBits = 1 << 28;
504         const static int END_BIT_IDX = 27;
505 
506         size_t eventMax;
507         size_t eventc;
508         int *eventInfoBits;
509         const char **eventInfoConstCharPtrArgs;
510 
511         virtual void showEvents(int width) = 0;
512 
513         virtual void inc(int bits) = 0;
514 
515         virtual void inc(int bits, const char *arg) = 0;
516 
517         virtual void marker(int bits) = 0;
518 
519         virtual void marker(int bits, const char *arg) = 0;
520 
521 
522         virtual void markAsStartComputeAndInc() = 0;
523 
524         virtual void markAsEndComputeAndInc() = 0;
525 
526         virtual void markAsEnterKernelDispatchAndInc() = 0;
527 
528         virtual void markAsLeaveKernelDispatchAndInc() = 0;
529 
530         ProfilableQueue(Backend *backend, int eventMax)
531                 : Queue(backend),
532                   eventMax(eventMax),
533                   eventInfoBits(new int[eventMax]),
534                   eventInfoConstCharPtrArgs(new const char *[eventMax]),
535                   eventc(0) {}
536 
537         virtual ~ProfilableQueue() override {
538             delete[]eventInfoBits;
539             delete[]eventInfoConstCharPtrArgs;
540         }
541     };
542 
543     Config *config;
544     Queue *queue;
545 
546     Backend(Config *config, Queue *queue)
547             : config(config), queue(queue) {}
548 
549     virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0;
550 
551     virtual void info() = 0;
552 
553     virtual void computeStart() = 0;
554 
555     virtual void computeEnd() = 0;
556 
557     virtual CompilationUnit *compile(int len, char *source) = 0;
558 
559     virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0;
560 
561     virtual ~Backend() {};
562 };
563 
564 #ifdef shared_cpp
565 const  char *Backend::Config::bitNames[] = {
566               "MINIMIZE_COPIES",
567               "TRACE",
568               "PROFILE",
569               "SHOW_CODE",
570               "SHOW_KERNEL_MODEL",
571               "SHOW_COMPUTE_MODEL",
572               "INFO",
573               "TRACE_COPIES",
574               "TRACE_SKIPPED_COPIES",
575               "TRACE_ENQUEUES",
576               "TRACE_CALLS"
577               "SHOW_WHY_BIT",
578               "USE_STATE_BIT",
579               "SHOW_STATE_BIT"
580         };
581 #endif
582 
583 template<typename T>
584 T *bufferOf(const char *name) {
585     size_t lenIncludingBufferState = sizeof(T);
586     size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState);
587     T *buffer = (T *) new unsigned char[lenIncludingBufferState];
588     auto *bufferState = (BufferState *) ((char *) buffer + lenExcludingBufferState);
589     bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC;
590     bufferState->ptr = buffer;
591     bufferState->length = sizeof(T) - sizeof(BufferState);
592     bufferState->state = BufferState::NEW_STATE;
593     bufferState->vendorPtr = nullptr;
594     bufferState->dump(name);
595     return buffer;
596 }