1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 #pragma once
 27 
 28 #include <iostream>
 29 #include <map>
 30 #include <vector>
 31 #include <cstdio>
 32 #include <cstring>
 33 #include <unistd.h>
 34 #include <sys/time.h>
 35 #include <iostream>
 36 #include <iomanip>
 37 #include <bitset>
 38 #include <stack>
 39 
 40 #include "strutil.h"
 41 #include "config.h"
 42 
 43 #ifdef __APPLE__
 44 #define SNPRINTF snprintf
 45 #else
 46 #include <malloc.h>
 47 #if defined (_WIN32)
 48 #include "windows.h"
 49 #define SNPRINTF _snprintf
 50 #else
 51 #define SNPRINTF  snprintf
 52 #endif
 53 #endif
 54 
 55 typedef char s8_t;
 56 typedef char byte;
 57 typedef char boolean;
 58 typedef char z1_t;
 59 typedef unsigned char u8_t;
 60 typedef short s16_t;
 61 typedef unsigned short u16_t;
 62 typedef unsigned int u32_t;
 63 typedef int s32_t;
 64 typedef float f32_t;
 65 typedef double f64_t;
 66 typedef long s64_t;
 67 typedef unsigned long u64_t;
 68 
 69 extern void hexdump(void *ptr, int buflen);
 70 
 71 class Text {
 72 public:
 73     size_t len;
 74     char *text;
 75     bool isCopy;
 76 
 77     Text(size_t len, char *text, bool isCopy);
 78 
 79     Text(char *text, bool isCopy);
 80 
 81     explicit Text(size_t len);
 82 
 83     void write(const std::string &filename) const;
 84 
 85     void read(const std::string &filename);
 86 
 87     virtual ~Text();
 88 };
 89 
 90 class Log : public Text {
 91 public:
 92     explicit Log(size_t len);
 93 
 94     explicit Log(char *text);
 95 
 96     ~Log() override = default;
 97 };
 98 
 99 
100 #define UNKNOWN_BYTE 0
101 #define RO_BYTE (1<<1)
102 #define WO_BYTE (1<<2)
103 #define RW_BYTE (RO_BYTE|WO_BYTE)
104 
105 struct Buffer_s {
106     void *memorySegment; // Address of a Buffer/MemorySegment
107     long sizeInBytes; // The size of the memory segment in bytes
108     u8_t access; // see hat/buffer/ArgArray.java  UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE;
109 };
110 
111 union Value_u {
112     boolean z1; // 'Z'
113     u8_t s8; // 'B'
114     u16_t u16; // 'C'
115     s16_t s16; // 'S'
116     u16_t x16; // 'C' or 'S"
117     s32_t s32; // 'I'
118     s32_t x32; // 'I' or 'F'
119     f32_t f32; // 'F'
120     f64_t f64; // 'D'
121     s64_t s64; // 'J'
122     s64_t x64; // 'D' or 'J'
123     Buffer_s buffer; // '&'
124 };
125 
126 struct KernelArg {
127     u32_t idx; // 0..argc
128     u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment
129     u8_t pad8[8];
130     Value_u value;
131     u8_t pad6[6];
132 
133     size_t size() const {
134         size_t sz;
135         switch (variant) {
136             case 'I':
137             case 'F':
138                 sz = sizeof(u32_t);
139                 break;
140             case 'S':
141             case 'C':
142                 sz = sizeof(u16_t);
143                 break;
144             case 'D':
145             case 'J':
146                 return sizeof(u64_t);
147                 break;
148             case 'B':
149                 return sizeof(u8_t);
150                 break;
151             default:
152                 std::cerr << "Bad variant " << variant << "arg::size" << std::endl;
153                 exit(1);
154         }
155 
156         return sz;
157     }
158 };
159 
160 struct BufferState {
161     static constexpr long MAGIC = 0x4a71facebffab175;
162     static constexpr int NO_STATE = 0;
163     static constexpr int NEW_STATE = 1;
164     static constexpr int HOST_OWNED = 2;
165     static constexpr int DEVICE_OWNED = 3;
166     static constexpr int DEVICE_VALID_HOST_HAS_COPY = 4;
167     const static char *stateNames[]; // See below for out of line definition
168 
169     long magic1;
170     void *ptr;
171     long length;
172     int bits;
173     int state;
174     void *vendorPtr;
175     long magic2;
176 
177     bool ok() const {
178         return ((magic1 == MAGIC) && (magic2 == MAGIC));
179     }
180 
181     void setState(int newState) {
182         state = newState;
183     }
184 
185     int getState() const {
186         return state;
187     }
188 
189     void dump(const char *msg) const {
190         if (ok()) {
191             printf("{%s,ptr:%016lx,length: %016lx,  state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state,
192                    (long) vendorPtr);
193         } else {
194             printf("%s bad magic \n", msg);
195             printf("(magic1:%016lx,", magic1);
196             printf("{%s, ptr:%016lx, length: %016lx,  state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state,
197                    (long) vendorPtr);
198             printf("magic2:%016lx)\n", magic2);
199         }
200     }
201 
202     static BufferState *of(void *ptr, size_t sizeInBytes) {
203         return reinterpret_cast<BufferState *>(static_cast<char *>(ptr) + sizeInBytes - sizeof(BufferState));
204     }
205 
206     static BufferState *of(const KernelArg *arg) {
207         // access?
208         BufferState *bufferState = BufferState::of(
209             arg->value.buffer.memorySegment,
210             arg->value.buffer.sizeInBytes
211         );
212 
213 
214         //Sanity check the buffers
215         // These sanity check finds errors passing memory segments which are not Buffers
216 
217         if (bufferState->ptr != arg->value.buffer.memorySegment) {
218             std::cerr << "bufferState->ptr !=  arg->value.buffer.memorySegment" << std::endl;
219             std::exit(1);
220         }
221 
222         if ((bufferState->vendorPtr == nullptr) && (bufferState->state != BufferState::NEW_STATE)) {
223             std::cerr << "Warning:  Unexpected initial state for buffer "
224                     //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"
225                     << " state=" << bufferState->state << " '"
226                     << BufferState::stateNames[bufferState->state] << "'"
227                     << " vendorPtr" << bufferState->vendorPtr << std::endl;
228         }
229         // End of sanity checks
230         return bufferState;
231     }
232 };
233 
234 #ifdef shared_cpp
235 const char *BufferState::stateNames[] = {
236     "NO_STATE",
237     "NEW_STATE",
238     "HOST_OWNED",
239     "DEVICE_OWNED",
240     "DEVICE_VALID_HOST_HAS_COPY"
241 };
242 #endif
243 
244 struct ArgArray_s {
245     u32_t argc;
246     u8_t pad12[12];
247     KernelArg argv[0/*argc*/];
248 };
249 
250 class ArgSled {
251 private:
252     ArgArray_s *argArray;
253 
254 public:
255     int argc() const {
256         return argArray->argc;
257     }
258 
259     KernelArg *arg(int n) const {
260         KernelArg *a = (argArray->argv + n);
261         return a;
262     }
263 
264     void hexdumpArg(int n) const {
265         hexdump(arg(n), sizeof(KernelArg));
266     }
267 
268     void dumpArg(int n) const {
269         KernelArg *a = arg(n);
270         int idx = (int) a->idx;
271         std::cout << "arg[" << idx << "]";
272         char variant = (char) a->variant;
273         switch (variant) {
274             case 'F':
275                 std::cout << " f32 " << a->value.f32 << std::endl;
276                 break;
277             case 'I':
278                 std::cout << " s32 " << a->value.s32 << std::endl;
279                 break;
280             case 'D':
281                 std::cout << " f64 " << a->value.f64 << std::endl;
282                 break;
283             case 'J':
284                 std::cout << " s64 " << a->value.s64 << std::endl;
285                 break;
286             case 'C':
287                 std::cout << " u16 " << a->value.u16 << std::endl;
288                 break;
289             case 'S':
290                 std::cout << " s16 " << a->value.s32 << std::endl;
291                 break;
292             case 'Z':
293                 std::cout << " z1 " << a->value.z1 << std::endl;
294                 break;
295             case '&':
296                 std::cout << " buffer {"
297                         << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec
298                         << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec
299                         << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec
300                         << "}" << std::endl;
301                 break;
302             default:
303                 std::cout << (char) variant << std::endl;
304                 break;
305         }
306     }
307 
308     void *afterArgsPtrPtr() const {
309         KernelArg *a = arg(argc());
310         return (void *) a;
311     }
312 
313     int *schemaLenPtr() const {
314         int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */);
315         return schemaLenP;
316     }
317 
318     int schemaLen() const {
319         return *schemaLenPtr();
320     }
321 
322     char *schema() const {
323         int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1);
324         return (char *) schemaLenP;
325     }
326 
327     explicit ArgSled(ArgArray_s *argArray)
328         : argArray(argArray) {
329     }
330 };
331 
332 
333 class Timer {
334     struct timeval startTV, endTV;
335 
336 public:
337     unsigned long elapsed_us{};
338 
339     Timer(): startTV(), endTV() {
340     }
341 
342     void start() {
343         gettimeofday(&startTV, nullptr);
344     }
345 
346     unsigned long end() {
347         gettimeofday(&endTV, nullptr);
348         elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us
349         elapsed_us += (endTV.tv_usec - startTV.tv_usec);
350         return elapsed_us;
351     }
352 };
353 
354 
355 //extern void hexdump(void *ptr, int buflen);
356 
357 class Sled {
358 public:
359     static void show(std::ostream &out, void *argArray);
360 };
361 
362 
363 class KernelContext {
364 public:
365     int x;
366     int maxX;
367 };
368 
369 
370 class Backend {
371 public:
372     class Config final : public BasicConfig {
373     public:
374         explicit Config(int mode);
375 
376         ~Config() override;
377     };
378 
379     class Buffer {
380     public:
381         Backend *backend;
382         BufferState *bufferState;
383 
384         Buffer(Backend *backend, BufferState *bufferState)
385             : backend(backend), bufferState(bufferState) {
386         }
387 
388         virtual ~Buffer() = default;
389     };
390 
391     class CompilationUnit {
392     public:
393         class Kernel {
394         public:
395             char *name;
396 
397             CompilationUnit *compilationUnit;
398 
399             virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0;
400 
401             virtual bool setArg(KernelArg *arg) = 0;
402 
403             virtual long ndrange(void *argArray) final;
404 
405             Kernel(CompilationUnit *compilationUnit, char *name)
406                 : name(strutil::clone(name)), compilationUnit(compilationUnit) {
407             }
408 
409             virtual ~Kernel() {
410                 delete[] name;
411             }
412         };
413 
414     public:
415         Backend *backend;
416         char *src;
417         char *log;
418         bool ok;
419 
420         virtual Kernel *getKernel(int nameLen, char *name) = 0;
421 
422         virtual bool compilationUnitOK() final {
423             return ok;
424         }
425 
426         CompilationUnit(Backend *backend, char *src, char *log, bool ok)
427             : backend(backend), src(src), log(log), ok(ok) {
428         }
429 
430         virtual ~CompilationUnit() {
431             delete[] src;
432             delete[] log;
433         };
434     };
435 
436     class Queue {
437     public:
438         Backend *backend;
439 
440         explicit Queue(Backend *backend);
441 
442         virtual void wait() = 0;
443 
444         virtual void release() = 0;
445 
446         virtual void computeStart() = 0;
447 
448         virtual void computeEnd() = 0;
449 
450         virtual void copyToDevice(Buffer *buffer) =0;
451 
452         virtual void copyFromDevice(Buffer *buffer) =0;
453 
454         virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0;
455 
456         virtual ~Queue();
457     };
458 
459     class ProfilableQueue : public Queue {
460     public:
461         static constexpr int START_BIT_IDX = 20;
462         static constexpr int CopyToDeviceBits = 1 << START_BIT_IDX;
463         static constexpr int CopyFromDeviceBits = 1 << 21;
464         static constexpr int NDRangeBits = 1 << 22;
465         static constexpr int StartComputeBits = 1 << 23;
466         static constexpr int EndComputeBits = 1 << 24;
467         static constexpr int EnterKernelDispatchBits = 1 << 25;
468         static constexpr int LeaveKernelDispatchBits = 1 << 26;
469         static constexpr int HasConstCharPtrArgBits = 1 << 27;
470         static constexpr int hasIntArgBits = 1 << 28;
471         static constexpr int END_BIT_IDX = 27;
472 
473         size_t eventMax;
474         size_t eventc;
475         int *eventInfoBits;
476         const char **eventInfoConstCharPtrArgs;
477 
478         virtual void showEvents(int width) = 0;
479 
480         virtual void inc(int bits) = 0;
481 
482         virtual void inc(int bits, const char *arg) = 0;
483 
484         virtual void marker(int bits) = 0;
485 
486         virtual void marker(int bits, const char *arg) = 0;
487 
488 
489         virtual void markAsStartComputeAndInc() = 0;
490 
491         virtual void markAsEndComputeAndInc() = 0;
492 
493         virtual void markAsEnterKernelDispatchAndInc() = 0;
494 
495         virtual void markAsLeaveKernelDispatchAndInc() = 0;
496 
497         ProfilableQueue(Backend *backend, int eventMax)
498             : Queue(backend),
499               eventMax(eventMax),
500               eventInfoBits(new int[eventMax]),
501               eventInfoConstCharPtrArgs(new const char *[eventMax]),
502               eventc(0) {
503         }
504 
505         ~ProfilableQueue() override {
506             delete[]eventInfoBits;
507             delete[]eventInfoConstCharPtrArgs;
508         }
509     };
510 
511     Config *config;
512     Queue *queue;
513 
514     Backend(Config *config, Queue *queue)
515         : config(config), queue(queue) {
516     }
517 
518     virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0;
519 
520     virtual void info() = 0;
521 
522     virtual void computeStart() = 0;
523 
524     virtual void computeEnd() = 0;
525 
526     virtual CompilationUnit *compile(int len, char *source) = 0;
527 
528     virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0;
529 
530     virtual ~Backend() = default;
531 };
532 
533 
534 
535 template<typename T>
536 T *bufferOf(const char *name) {
537     size_t lenIncludingBufferState = sizeof(T);
538     size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState);
539     T *buffer = reinterpret_cast<T *>(new unsigned char[lenIncludingBufferState]);
540     auto *bufferState = reinterpret_cast<BufferState *>(reinterpret_cast<char *>(buffer) + lenExcludingBufferState);
541     bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC;
542     bufferState->ptr = buffer;
543     bufferState->length = sizeof(T) - sizeof(BufferState);
544     bufferState->state = BufferState::NEW_STATE;
545     bufferState->vendorPtr = nullptr;
546     bufferState->dump(name);
547     return buffer;
548 }