1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 #pragma once
 27 
 28 #include <iostream>
 29 #include <map>
 30 #include <vector>
 31 #include <cstdio>
 32 #include <cstring>
 33 #include <unistd.h>
 34 #include <sys/time.h>
 35 #include <iostream>
 36 #include <iomanip>
 37 #include <bitset>
 38 #include <stack>
 39 
 40 #include "strutil.h"
 41 #include "config.h"
 42 
 43 #ifdef __APPLE__
 44 #define SNPRINTF snprintf
 45 #else
 46 #include <malloc.h>
 47 #if defined (_WIN32)
 48 #include "windows.h"
 49 #define SNPRINTF _snprintf
 50 #else
 51 #define SNPRINTF  snprintf
 52 #endif
 53 #endif
 54 
 55 typedef char s8_t;
 56 typedef char byte;
 57 typedef char boolean;
 58 typedef char z1_t;
 59 typedef unsigned char u8_t;
 60 typedef short s16_t;
 61 typedef unsigned short u16_t;
 62 typedef unsigned int u32_t;
 63 typedef int s32_t;
 64 typedef float f32_t;
 65 typedef double f64_t;
 66 typedef long s64_t;
 67 typedef unsigned long u64_t;
 68 
 69 extern void hexdump(void *ptr, int buflen);
 70 
 71 class Text {
 72 public:
 73     size_t len;
 74     char *text;
 75     bool isCopy;
 76 
 77     Text(size_t len, char *text, bool isCopy);
 78 
 79     Text(char *text, bool isCopy);
 80 
 81     explicit Text(size_t len);
 82 
 83     void write(const std::string &filename) const;
 84 
 85     void read(const std::string &filename);
 86 
 87     virtual ~Text();
 88 };
 89 
 90 class Log : public Text {
 91 public:
 92     explicit Log(size_t len);
 93 
 94     explicit Log(char *text);
 95 
 96     ~Log() override = default;
 97 };
 98 
 99 
100 #define UNKNOWN_BYTE 0
101 #define RO_BYTE (1<<1)
102 #define WO_BYTE (1<<2)
103 #define RW_BYTE (RO_BYTE|WO_BYTE)
104 
105 struct Buffer_s {
106     void *memorySegment; // Address of a Buffer/MemorySegment
107     long sizeInBytes; // The size of the memory segment in bytes
108     u8_t access; // see hat/buffer/ArgArray.java  UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE;
109 };
110 
111 union Value_u {
112     boolean z1; // 'Z'
113     u8_t s8; // 'B'
114     u16_t u16; // 'C'
115     s16_t s16; // 'S'
116     u16_t x16; // 'C' or 'S"
117     s32_t s32; // 'I'
118     s32_t x32; // 'I' or 'F'
119     f32_t f32; // 'F'
120     f64_t f64; // 'D'
121     s64_t s64; // 'J'
122     s64_t x64; // 'D' or 'J'
123     Buffer_s buffer; // '&'
124 };
125 
126 struct KernelArg {
127     u32_t idx; // 0..argc
128     u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment
129     u8_t pad8[8];
130     Value_u value;
131     u8_t pad6[6];
132 
133     size_t size() const {
134         size_t sz;
135         switch (variant) {
136             case 'I':
137             case 'F':
138                 sz = sizeof(u32_t);
139                 break;
140             case 'S':
141             case 'C':
142                 sz = sizeof(u16_t);
143                 break;
144             case 'D':
145             case 'J':
146                 return sizeof(u64_t);
147                 break;
148             case 'B':
149                 return sizeof(u8_t);
150                 break;
151             default:
152                 std::cerr << "Bad variant " << variant << "arg::size" << std::endl;
153                 exit(1);
154         }
155 
156         return sz;
157     }
158 };
159 
160 struct BufferState {
161     static constexpr long MAGIC = 0x4a71facebffab175;
162     static constexpr int NO_STATE = 0;
163     static constexpr int NEW_STATE = 1;
164     static constexpr int HOST_OWNED = 2;
165     static constexpr int DEVICE_OWNED = 3;
166     static constexpr int DEVICE_VALID_HOST_HAS_COPY = 4;
167     const static char *stateNames[]; // See below for out of line definition
168 
169     long magic1;
170     void *ptr;
171     long length;
172     int bits;
173     int state;
174     void *vendorPtr;
175     long magic2;
176 
177     bool ok() const {
178         return ((magic1 == MAGIC) && (magic2 == MAGIC));
179     }
180 
181     void setState(int newState) {
182         state = newState;
183     }
184 
185     int getState() const {
186         return state;
187     }
188 
189     void dump(const char *msg) const {
190         if (ok()) {
191             printf("{%s,ptr:%016lx,length: %016lx,  state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state,
192                    (long) vendorPtr);
193         } else {
194             printf("%s bad magic \n", msg);
195             printf("(magic1:%016lx,", magic1);
196             printf("{%s, ptr:%016lx, length: %016lx,  state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state,
197                    (long) vendorPtr);
198             printf("magic2:%016lx)\n", magic2);
199         }
200     }
201 
202     static BufferState *of(void *ptr, size_t sizeInBytes) {
203         return reinterpret_cast<BufferState *>(static_cast<char *>(ptr) + sizeInBytes - sizeof(BufferState));
204     }
205 
206     static BufferState *of(const KernelArg *arg) {
207         // access?
208         BufferState *bufferState = BufferState::of(
209             arg->value.buffer.memorySegment,
210             arg->value.buffer.sizeInBytes
211         );
212 
213 
214         //Sanity check the buffers
215         // These sanity check finds errors passing memory segments which are not Buffers
216 
217         if (bufferState->ptr != arg->value.buffer.memorySegment) {
218             std::cerr << "bufferState->ptr !=  arg->value.buffer.memorySegment" << std::endl;
219             std::exit(1);
220         }
221 
222         if ((bufferState->vendorPtr == nullptr) && (bufferState->state != BufferState::NEW_STATE)) {
223             std::cerr << "Warning:  Unexpected initial state for buffer "
224                     //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"
225                     << " state=" << bufferState->state << " '"
226                     << BufferState::stateNames[bufferState->state] << "'"
227                     << " vendorPtr" << bufferState->vendorPtr << std::endl;
228         }
229         // End of sanity checks
230         return bufferState;
231     }
232 };
233 
234 #ifdef shared_cpp
235 const char *BufferState::stateNames[] = {
236     "NO_STATE",
237     "NEW_STATE",
238     "HOST_OWNED",
239     "DEVICE_OWNED",
240     "DEVICE_VALID_HOST_HAS_COPY"
241 };
242 #endif
243 
244 struct ArgArray_s {
245     u32_t argc;
246     u8_t pad12[12];
247     KernelArg argv[0/*argc*/];
248 };
249 
250 class ArgSled {
251 private:
252     ArgArray_s *argArray;
253 
254 public:
255     int argc() const {
256         return argArray->argc;
257     }
258 
259     KernelArg *arg(int n) const {
260         KernelArg *a = (argArray->argv + n);
261         return a;
262     }
263 
264     void hexdumpArg(int n) const {
265         hexdump(arg(n), sizeof(KernelArg));
266     }
267 
268     void dumpArg(int n) const {
269         KernelArg *a = arg(n);
270         int idx = (int) a->idx;
271         std::cout << "arg[" << idx << "]";
272         char variant = (char) a->variant;
273         switch (variant) {
274             case 'F':
275                 std::cout << " f32 " << a->value.f32 << std::endl;
276                 break;
277             case 'I':
278                 std::cout << " s32 " << a->value.s32 << std::endl;
279                 break;
280             case 'D':
281                 std::cout << " f64 " << a->value.f64 << std::endl;
282                 break;
283             case 'J':
284                 std::cout << " s64 " << a->value.s64 << std::endl;
285                 break;
286             case 'C':
287                 std::cout << " u16 " << a->value.u16 << std::endl;
288                 break;
289             case 'S':
290                 std::cout << " s16 " << a->value.s32 << std::endl;
291                 break;
292             case 'Z':
293                 std::cout << " z1 " << a->value.z1 << std::endl;
294                 break;
295             case '&':
296                 std::cout << " buffer {"
297                         << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec
298                         << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec
299                         << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec
300                         << "}" << std::endl;
301                 break;
302             default:
303                 std::cout << (char) variant << std::endl;
304                 break;
305         }
306     }
307 
308     void *afterArgsPtrPtr() const {
309         KernelArg *a = arg(argc());
310         return (void *) a;
311     }
312 
313     int *schemaLenPtr() const {
314         int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */);
315         return schemaLenP;
316     }
317 
318     int schemaLen() const {
319         return *schemaLenPtr();
320     }
321 
322     char *schema() const {
323         int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1);
324         return (char *) schemaLenP;
325     }
326 
327     explicit ArgSled(ArgArray_s *argArray)
328         : argArray(argArray) {
329     }
330 };
331 
332 
333 class Timer {
334     struct timeval startTV, endTV;
335 
336 public:
337     unsigned long elapsed_us{};
338 
339     Timer(): startTV(), endTV() {
340     }
341 
342     void start() {
343         gettimeofday(&startTV, nullptr);
344     }
345 
346     unsigned long end() {
347         gettimeofday(&endTV, nullptr);
348         elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us
349         elapsed_us += (endTV.tv_usec - startTV.tv_usec);
350         return elapsed_us;
351     }
352 };
353 
354 
355 //extern void hexdump(void *ptr, int buflen);
356 
357 class Sled {
358 public:
359     static void show(std::ostream &out, void *argArray);
360 };
361 
362 class KernelContext {
363 public:
364 
365     // Dimensions of the kernel (1D, 2D or 3D)
366     int dimensions;
367 
368     // global index
369     int gix;
370     int giy;
371     int giz;
372 
373     // global sizes
374     int gsx;
375     int gsy;
376     int gsz;
377 
378     // local index
379     int lix;
380     int liy;
381     int liz;
382 
383     // local size
384     int lsx;
385     int lsy;
386     int lsz;
387 
388     // Group index
389     int bix;
390     int biy;
391     int biz;
392 };
393 
394 class Backend {
395 public:
396     class Config final : public BasicConfig {
397     public:
398         explicit Config(int mode);
399 
400         ~Config() override;
401     };
402 
403     class Buffer {
404     public:
405         Backend *backend;
406         BufferState *bufferState;
407 
408         Buffer(Backend *backend, BufferState *bufferState)
409             : backend(backend), bufferState(bufferState) {
410         }
411 
412         virtual ~Buffer() = default;
413     };
414 
415     class CompilationUnit {
416     public:
417         class Kernel {
418         public:
419             char *name;
420 
421             CompilationUnit *compilationUnit;
422 
423             virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0;
424 
425             virtual bool setArg(KernelArg *arg) = 0;
426 
427             virtual long ndrange(void *argArray) final;
428 
429             Kernel(CompilationUnit *compilationUnit, char *name)
430                 : name(strutil::clone(name)), compilationUnit(compilationUnit) {
431             }
432 
433             virtual ~Kernel() {
434                 delete[] name;
435             }
436         };
437 
438     public:
439         Backend *backend;
440         char *src;
441         char *log;
442         bool ok;
443 
444         virtual Kernel *getKernel(int nameLen, char *name) = 0;
445 
446         virtual bool compilationUnitOK() final {
447             return ok;
448         }
449 
450         CompilationUnit(Backend *backend, char *src, char *log, bool ok)
451             : backend(backend), src(src), log(log), ok(ok) {
452         }
453 
454         virtual ~CompilationUnit() {
455             delete[] src;
456             delete[] log;
457         };
458     };
459 
460     class Queue {
461     public:
462         Backend *backend;
463 
464         explicit Queue(Backend *backend);
465 
466         virtual void wait() = 0;
467 
468         virtual void release() = 0;
469 
470         virtual void computeStart() = 0;
471 
472         virtual void computeEnd() = 0;
473 
474         virtual void copyToDevice(Buffer *buffer) =0;
475 
476         virtual void copyFromDevice(Buffer *buffer) =0;
477 
478         virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0;
479 
480         virtual ~Queue();
481     };
482 
483     class ProfilableQueue : public Queue {
484     public:
485         static constexpr int START_BIT_IDX = 20;
486         static constexpr int CopyToDeviceBits = 1 << START_BIT_IDX;
487         static constexpr int CopyFromDeviceBits = 1 << 21;
488         static constexpr int NDRangeBits = 1 << 22;
489         static constexpr int StartComputeBits = 1 << 23;
490         static constexpr int EndComputeBits = 1 << 24;
491         static constexpr int EnterKernelDispatchBits = 1 << 25;
492         static constexpr int LeaveKernelDispatchBits = 1 << 26;
493         static constexpr int HasConstCharPtrArgBits = 1 << 27;
494         static constexpr int hasIntArgBits = 1 << 28;
495         static constexpr int END_BIT_IDX = 27;
496 
497         size_t eventMax;
498         size_t eventc;
499         int *eventInfoBits;
500         const char **eventInfoConstCharPtrArgs;
501 
502         virtual void showEvents(int width) = 0;
503 
504         virtual void inc(int bits) = 0;
505 
506         virtual void inc(int bits, const char *arg) = 0;
507 
508         virtual void marker(int bits) = 0;
509 
510         virtual void marker(int bits, const char *arg) = 0;
511 
512 
513         virtual void markAsStartComputeAndInc() = 0;
514 
515         virtual void markAsEndComputeAndInc() = 0;
516 
517         virtual void markAsEnterKernelDispatchAndInc() = 0;
518 
519         virtual void markAsLeaveKernelDispatchAndInc() = 0;
520 
521         ProfilableQueue(Backend *backend, int eventMax)
522             : Queue(backend),
523               eventMax(eventMax),
524               eventInfoBits(new int[eventMax]),
525               eventInfoConstCharPtrArgs(new const char *[eventMax]),
526               eventc(0) {
527         }
528 
529         ~ProfilableQueue() override {
530             delete[]eventInfoBits;
531             delete[]eventInfoConstCharPtrArgs;
532         }
533     };
534 
535     Config *config;
536     Queue *queue;
537 
538     Backend(Config *config, Queue *queue)
539         : config(config), queue(queue) {
540     }
541 
542     virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0;
543 
544     virtual void showDeviceInfo() = 0;
545 
546     virtual void computeStart() = 0;
547 
548     virtual void computeEnd() = 0;
549 
550     virtual CompilationUnit *compile(int len, char *source) = 0;
551 
552     virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0;
553 
554     virtual ~Backend() = default;
555 };
556 
557 
558 
559 template<typename T>
560 T *bufferOf(const char *name) {
561     size_t lenIncludingBufferState = sizeof(T);
562     size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState);
563     T *buffer = reinterpret_cast<T *>(new unsigned char[lenIncludingBufferState]);
564     auto *bufferState = reinterpret_cast<BufferState *>(reinterpret_cast<char *>(buffer) + lenExcludingBufferState);
565     bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC;
566     bufferState->ptr = buffer;
567     bufferState->length = sizeof(T) - sizeof(BufferState);
568     bufferState->state = BufferState::NEW_STATE;
569     bufferState->vendorPtr = nullptr;
570     bufferState->dump(name);
571     return buffer;
572 }