1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #pragma once 27 28 #include <iostream> 29 #include <map> 30 #include <vector> 31 #include <cstdio> 32 #include <cstring> 33 #include <unistd.h> 34 #include <sys/time.h> 35 #include <iostream> 36 #include <iomanip> 37 #include <bitset> 38 #include <stack> 39 40 #include "strutil.h" 41 #include "config.h" 42 43 #ifdef __APPLE__ 44 #define SNPRINTF snprintf 45 #else 46 #include <malloc.h> 47 #if defined (_WIN32) 48 #include "windows.h" 49 #define SNPRINTF _snprintf 50 #else 51 #define SNPRINTF snprintf 52 #endif 53 #endif 54 55 typedef char s8_t; 56 typedef char byte; 57 typedef char boolean; 58 typedef char z1_t; 59 typedef unsigned char u8_t; 60 typedef short s16_t; 61 typedef unsigned short u16_t; 62 typedef unsigned int u32_t; 63 typedef int s32_t; 64 typedef float f32_t; 65 typedef double f64_t; 66 typedef long s64_t; 67 typedef unsigned long u64_t; 68 69 extern void hexdump(void *ptr, int buflen); 70 71 class Text { 72 public: 73 size_t len; 74 char *text; 75 bool isCopy; 76 77 Text(size_t len, char *text, bool isCopy); 78 79 Text(char *text, bool isCopy); 80 81 explicit Text(size_t len); 82 83 void write(const std::string &filename) const; 84 85 void read(const std::string &filename); 86 87 virtual ~Text(); 88 }; 89 90 class Log : public Text { 91 public: 92 explicit Log(size_t len); 93 94 explicit Log(char *text); 95 96 ~Log() override = default; 97 }; 98 99 100 #define UNKNOWN_BYTE 0 101 #define RO_BYTE (1<<1) 102 #define WO_BYTE (1<<2) 103 #define RW_BYTE (RO_BYTE|WO_BYTE) 104 105 struct Buffer_s { 106 void *memorySegment; // Address of a Buffer/MemorySegment 107 long sizeInBytes; // The size of the memory segment in bytes 108 u8_t access; // see hat/buffer/ArgArray.java UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE; 109 }; 110 111 union Value_u { 112 boolean z1; // 'Z' 113 u8_t s8; // 'B' 114 u16_t u16; // 'C' 115 s16_t s16; // 'S' 116 u16_t x16; // 'C' or 'S" 117 s32_t s32; // 'I' 118 s32_t x32; // 'I' or 'F' 119 f32_t f32; // 'F' 120 f64_t f64; // 'D' 121 s64_t s64; // 'J' 122 s64_t x64; // 'D' or 'J' 123 Buffer_s buffer; // '&' 124 }; 125 126 struct KernelArg { 127 u32_t idx; // 0..argc 128 u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment 129 u8_t pad8[8]; 130 Value_u value; 131 u8_t pad6[6]; 132 133 size_t size() const { 134 size_t sz; 135 switch (variant) { 136 case 'I': 137 case 'F': 138 sz = sizeof(u32_t); 139 break; 140 case 'S': 141 case 'C': 142 sz = sizeof(u16_t); 143 break; 144 case 'D': 145 case 'J': 146 return sizeof(u64_t); 147 break; 148 case 'B': 149 return sizeof(u8_t); 150 break; 151 default: 152 std::cerr << "Bad variant " << variant << "arg::size" << std::endl; 153 exit(1); 154 } 155 156 return sz; 157 } 158 }; 159 160 struct BufferState { 161 static constexpr long MAGIC = 0x4a71facebffab175; 162 static constexpr int NO_STATE = 0; 163 static constexpr int NEW_STATE = 1; 164 static constexpr int HOST_OWNED = 2; 165 static constexpr int DEVICE_OWNED = 3; 166 static constexpr int DEVICE_VALID_HOST_HAS_COPY = 4; 167 const static char *stateNames[]; // See below for out of line definition 168 169 long magic1; 170 void *ptr; 171 long length; 172 int bits; 173 int state; 174 void *vendorPtr; 175 long magic2; 176 177 bool ok() const { 178 return ((magic1 == MAGIC) && (magic2 == MAGIC)); 179 } 180 181 void setState(int newState) { 182 state = newState; 183 } 184 185 int getState() const { 186 return state; 187 } 188 189 void dump(const char *msg) const { 190 if (ok()) { 191 printf("{%s,ptr:%016lx,length: %016lx, state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state, 192 (long) vendorPtr); 193 } else { 194 printf("%s bad magic \n", msg); 195 printf("(magic1:%016lx,", magic1); 196 printf("{%s, ptr:%016lx, length: %016lx, state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state, 197 (long) vendorPtr); 198 printf("magic2:%016lx)\n", magic2); 199 } 200 } 201 202 static BufferState *of(void *ptr, size_t sizeInBytes) { 203 return reinterpret_cast<BufferState *>(static_cast<char *>(ptr) + sizeInBytes - sizeof(BufferState)); 204 } 205 206 static BufferState *of(const KernelArg *arg) { 207 // access? 208 BufferState *bufferState = BufferState::of( 209 arg->value.buffer.memorySegment, 210 arg->value.buffer.sizeInBytes 211 ); 212 213 214 //Sanity check the buffers 215 // These sanity check finds errors passing memory segments which are not Buffers 216 217 if (bufferState->ptr != arg->value.buffer.memorySegment) { 218 std::cerr << "bufferState->ptr != arg->value.buffer.memorySegment" << std::endl; 219 std::exit(1); 220 } 221 222 if ((bufferState->vendorPtr == nullptr) && (bufferState->state != BufferState::NEW_STATE)) { 223 std::cerr << "Warning: Unexpected initial state for buffer " 224 //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'" 225 << " state=" << bufferState->state << " '" 226 << BufferState::stateNames[bufferState->state] << "'" 227 << " vendorPtr" << bufferState->vendorPtr << std::endl; 228 } 229 // End of sanity checks 230 return bufferState; 231 } 232 }; 233 234 #ifdef shared_cpp 235 const char *BufferState::stateNames[] = { 236 "NO_STATE", 237 "NEW_STATE", 238 "HOST_OWNED", 239 "DEVICE_OWNED", 240 "DEVICE_VALID_HOST_HAS_COPY" 241 }; 242 #endif 243 244 struct ArgArray_s { 245 u32_t argc; 246 u8_t pad12[12]; 247 KernelArg argv[0/*argc*/]; 248 }; 249 250 class ArgSled { 251 private: 252 ArgArray_s *argArray; 253 254 public: 255 int argc() const { 256 return argArray->argc; 257 } 258 259 KernelArg *arg(int n) const { 260 KernelArg *a = (argArray->argv + n); 261 return a; 262 } 263 264 void hexdumpArg(int n) const { 265 hexdump(arg(n), sizeof(KernelArg)); 266 } 267 268 void dumpArg(int n) const { 269 KernelArg *a = arg(n); 270 int idx = (int) a->idx; 271 std::cout << "arg[" << idx << "]"; 272 char variant = (char) a->variant; 273 switch (variant) { 274 case 'F': 275 std::cout << " f32 " << a->value.f32 << std::endl; 276 break; 277 case 'I': 278 std::cout << " s32 " << a->value.s32 << std::endl; 279 break; 280 case 'D': 281 std::cout << " f64 " << a->value.f64 << std::endl; 282 break; 283 case 'J': 284 std::cout << " s64 " << a->value.s64 << std::endl; 285 break; 286 case 'C': 287 std::cout << " u16 " << a->value.u16 << std::endl; 288 break; 289 case 'S': 290 std::cout << " s16 " << a->value.s32 << std::endl; 291 break; 292 case 'Z': 293 std::cout << " z1 " << a->value.z1 << std::endl; 294 break; 295 case '&': 296 std::cout << " buffer {" 297 << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec 298 << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec 299 << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec 300 << "}" << std::endl; 301 break; 302 default: 303 std::cout << (char) variant << std::endl; 304 break; 305 } 306 } 307 308 void *afterArgsPtrPtr() const { 309 KernelArg *a = arg(argc()); 310 return (void *) a; 311 } 312 313 int *schemaLenPtr() const { 314 int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */); 315 return schemaLenP; 316 } 317 318 int schemaLen() const { 319 return *schemaLenPtr(); 320 } 321 322 char *schema() const { 323 int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1); 324 return (char *) schemaLenP; 325 } 326 327 explicit ArgSled(ArgArray_s *argArray) 328 : argArray(argArray) { 329 } 330 }; 331 332 333 class Timer { 334 struct timeval startTV, endTV; 335 336 public: 337 unsigned long elapsed_us{}; 338 339 Timer(): startTV(), endTV() { 340 } 341 342 void start() { 343 gettimeofday(&startTV, nullptr); 344 } 345 346 unsigned long end() { 347 gettimeofday(&endTV, nullptr); 348 elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us 349 elapsed_us += (endTV.tv_usec - startTV.tv_usec); 350 return elapsed_us; 351 } 352 }; 353 354 355 //extern void hexdump(void *ptr, int buflen); 356 357 class Sled { 358 public: 359 static void show(std::ostream &out, void *argArray); 360 }; 361 362 363 class KernelContext { 364 public: 365 int x; 366 int maxX; 367 }; 368 369 370 class Backend { 371 public: 372 class Config final : public BasicConfig { 373 public: 374 explicit Config(int mode); 375 376 ~Config() override; 377 }; 378 379 class Buffer { 380 public: 381 Backend *backend; 382 BufferState *bufferState; 383 384 Buffer(Backend *backend, BufferState *bufferState) 385 : backend(backend), bufferState(bufferState) { 386 } 387 388 virtual ~Buffer() = default; 389 }; 390 391 class CompilationUnit { 392 public: 393 class Kernel { 394 public: 395 char *name; 396 397 CompilationUnit *compilationUnit; 398 399 virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0; 400 401 virtual bool setArg(KernelArg *arg) = 0; 402 403 virtual long ndrange(void *argArray) final; 404 405 Kernel(CompilationUnit *compilationUnit, char *name) 406 : name(strutil::clone(name)), compilationUnit(compilationUnit) { 407 } 408 409 virtual ~Kernel() { 410 delete[] name; 411 } 412 }; 413 414 public: 415 Backend *backend; 416 char *src; 417 char *log; 418 bool ok; 419 420 virtual Kernel *getKernel(int nameLen, char *name) = 0; 421 422 virtual bool compilationUnitOK() final { 423 return ok; 424 } 425 426 CompilationUnit(Backend *backend, char *src, char *log, bool ok) 427 : backend(backend), src(src), log(log), ok(ok) { 428 } 429 430 virtual ~CompilationUnit() { 431 delete[] src; 432 delete[] log; 433 }; 434 }; 435 436 class Queue { 437 public: 438 Backend *backend; 439 440 explicit Queue(Backend *backend); 441 442 virtual void wait() = 0; 443 444 virtual void release() = 0; 445 446 virtual void computeStart() = 0; 447 448 virtual void computeEnd() = 0; 449 450 virtual void copyToDevice(Buffer *buffer) =0; 451 452 virtual void copyFromDevice(Buffer *buffer) =0; 453 454 virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0; 455 456 virtual ~Queue(); 457 }; 458 459 class ProfilableQueue : public Queue { 460 public: 461 static constexpr int START_BIT_IDX = 20; 462 static constexpr int CopyToDeviceBits = 1 << START_BIT_IDX; 463 static constexpr int CopyFromDeviceBits = 1 << 21; 464 static constexpr int NDRangeBits = 1 << 22; 465 static constexpr int StartComputeBits = 1 << 23; 466 static constexpr int EndComputeBits = 1 << 24; 467 static constexpr int EnterKernelDispatchBits = 1 << 25; 468 static constexpr int LeaveKernelDispatchBits = 1 << 26; 469 static constexpr int HasConstCharPtrArgBits = 1 << 27; 470 static constexpr int hasIntArgBits = 1 << 28; 471 static constexpr int END_BIT_IDX = 27; 472 473 size_t eventMax; 474 size_t eventc; 475 int *eventInfoBits; 476 const char **eventInfoConstCharPtrArgs; 477 478 virtual void showEvents(int width) = 0; 479 480 virtual void inc(int bits) = 0; 481 482 virtual void inc(int bits, const char *arg) = 0; 483 484 virtual void marker(int bits) = 0; 485 486 virtual void marker(int bits, const char *arg) = 0; 487 488 489 virtual void markAsStartComputeAndInc() = 0; 490 491 virtual void markAsEndComputeAndInc() = 0; 492 493 virtual void markAsEnterKernelDispatchAndInc() = 0; 494 495 virtual void markAsLeaveKernelDispatchAndInc() = 0; 496 497 ProfilableQueue(Backend *backend, int eventMax) 498 : Queue(backend), 499 eventMax(eventMax), 500 eventInfoBits(new int[eventMax]), 501 eventInfoConstCharPtrArgs(new const char *[eventMax]), 502 eventc(0) { 503 } 504 505 ~ProfilableQueue() override { 506 delete[]eventInfoBits; 507 delete[]eventInfoConstCharPtrArgs; 508 } 509 }; 510 511 Config *config; 512 Queue *queue; 513 514 Backend(Config *config, Queue *queue) 515 : config(config), queue(queue) { 516 } 517 518 virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0; 519 520 virtual void info() = 0; 521 522 virtual void computeStart() = 0; 523 524 virtual void computeEnd() = 0; 525 526 virtual CompilationUnit *compile(int len, char *source) = 0; 527 528 virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0; 529 530 virtual ~Backend() = default; 531 }; 532 533 534 535 template<typename T> 536 T *bufferOf(const char *name) { 537 size_t lenIncludingBufferState = sizeof(T); 538 size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState); 539 T *buffer = reinterpret_cast<T *>(new unsigned char[lenIncludingBufferState]); 540 auto *bufferState = reinterpret_cast<BufferState *>(reinterpret_cast<char *>(buffer) + lenExcludingBufferState); 541 bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC; 542 bufferState->ptr = buffer; 543 bufferState->length = sizeof(T) - sizeof(BufferState); 544 bufferState->state = BufferState::NEW_STATE; 545 bufferState->vendorPtr = nullptr; 546 bufferState->dump(name); 547 return buffer; 548 }