1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #pragma once 27 28 #include <iostream> 29 #include <map> 30 #include <vector> 31 #include <cstdio> 32 #include <cstring> 33 #include <unistd.h> 34 #include <sys/time.h> 35 #include <iostream> 36 #include <iomanip> 37 #include <bitset> 38 #include <stack> 39 40 #include "strutil.h" 41 42 #ifdef __APPLE__ 43 #define SNPRINTF snprintf 44 #else 45 #include <malloc.h> 46 #if defined (_WIN32) 47 #include "windows.h" 48 #define SNPRINTF _snprintf 49 #else 50 #define SNPRINTF snprintf 51 #endif 52 #endif 53 54 typedef char s8_t; 55 typedef char byte; 56 typedef char boolean; 57 typedef char z1_t; 58 typedef unsigned char u8_t; 59 typedef short s16_t; 60 typedef unsigned short u16_t; 61 typedef unsigned int u32_t; 62 typedef int s32_t; 63 typedef float f32_t; 64 typedef double f64_t; 65 typedef long s64_t; 66 typedef unsigned long u64_t; 67 68 extern void hexdump(void *ptr, int buflen); 69 70 class Text { 71 public: 72 size_t len; 73 char *text; 74 bool isCopy; 75 76 Text(size_t len, char *text, bool isCopy); 77 78 Text(char *text, bool isCopy); 79 80 Text(size_t len); 81 82 void write(std::string &filename) const; 83 84 void read(std::string &filename); 85 86 virtual ~Text(); 87 }; 88 89 class Log : public Text { 90 public: 91 Log(size_t len); 92 93 Log(char *text); 94 95 ~Log() = default; 96 }; 97 98 99 #define UNKNOWN_BYTE 0 100 #define RO_BYTE (1<<1) 101 #define WO_BYTE (1<<2) 102 #define RW_BYTE (RO_BYTE|WO_BYTE) 103 104 struct Buffer_s { 105 void *memorySegment; // Address of a Buffer/MemorySegment 106 long sizeInBytes; // The size of the memory segment in bytes 107 u8_t access; // see hat/buffer/ArgArray.java UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE; 108 }; 109 110 union Value_u { 111 boolean z1; // 'Z' 112 u8_t s8; // 'B' 113 u16_t u16; // 'C' 114 s16_t s16; // 'S' 115 u16_t x16; // 'C' or 'S" 116 s32_t s32; // 'I' 117 s32_t x32; // 'I' or 'F' 118 f32_t f32; // 'F' 119 f64_t f64; // 'D' 120 s64_t s64; // 'J' 121 s64_t x64; // 'D' or 'J' 122 Buffer_s buffer; // '&' 123 }; 124 125 struct KernelArg { 126 u32_t idx; // 0..argc 127 u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment 128 u8_t pad8[8]; 129 Value_u value; 130 u8_t pad6[6]; 131 132 size_t size() { 133 size_t sz; 134 switch (variant) { 135 case 'I': 136 case 'F': 137 sz = sizeof(u32_t); 138 break; 139 case 'S': 140 case 'C': 141 sz = sizeof(u16_t); 142 break; 143 case 'D': 144 case 'J': 145 return sizeof(u64_t); 146 break; 147 case 'B': 148 return sizeof(u8_t); 149 break; 150 default: 151 std::cerr << "Bad variant " << variant << "arg::size" << std::endl; 152 exit(1); 153 154 } 155 156 return sz; 157 } 158 }; 159 160 struct BufferState { 161 static const long MAGIC = 0x4a71facebffab175; 162 static const int NO_STATE = 0; 163 static const int NEW_STATE = 1; 164 static const int HOST_OWNED = 2; 165 static const int DEVICE_OWNED = 3; 166 static const int DEVICE_VALID_HOST_HAS_COPY = 4; 167 const static char *stateNames[]; // See below for out of line definition 168 169 long magic1; 170 void *ptr; 171 long length; 172 int bits; 173 int state; 174 void *vendorPtr; 175 long magic2; 176 177 bool ok() { 178 return ((magic1 == MAGIC) && (magic2 == MAGIC)); 179 } 180 181 void setState(int newState) { 182 state = newState; 183 } 184 185 int getState() { 186 return state; 187 } 188 189 void dump(const char *msg) { 190 if (ok()) { 191 printf("{%s,ptr:%016lx,length: %016lx, state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state, (long) vendorPtr); 192 } else { 193 printf("%s bad magic \n", msg); 194 printf("(magic1:%016lx,", magic1); 195 printf("{%s, ptr:%016lx, length: %016lx, state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state, (long) vendorPtr); 196 printf("magic2:%016lx)\n", magic2); 197 } 198 } 199 200 static BufferState *of(void *ptr, size_t sizeInBytes) { 201 return (BufferState *) (((char *) ptr) + sizeInBytes - sizeof(BufferState)); 202 } 203 204 static BufferState *of(KernelArg *arg) { // access? 205 BufferState *bufferState = BufferState::of( 206 arg->value.buffer.memorySegment, 207 arg->value.buffer.sizeInBytes 208 ); 209 210 211 //Sanity check the buffers 212 // These sanity check finds errors passing memory segments which are not Buffers 213 214 if (bufferState->ptr != arg->value.buffer.memorySegment) { 215 std::cerr << "bufferState->ptr != arg->value.buffer.memorySegment" << std::endl; 216 std::exit(1); 217 } 218 219 if ((bufferState->vendorPtr == 0L) && (bufferState->state != BufferState::NEW_STATE)) { 220 std::cerr << "Warning: Unexpected initial state for buffer " 221 //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'" 222 << " state=" << bufferState->state << " '" 223 << BufferState::stateNames[bufferState->state] << "'" 224 << " vendorPtr" << bufferState->vendorPtr << std::endl; 225 } 226 // End of sanity checks 227 return bufferState; 228 } 229 230 }; 231 232 #ifdef shared_cpp 233 const char *BufferState::stateNames[] = { 234 "NO_STATE", 235 "NEW_STATE", 236 "HOST_OWNED", 237 "DEVICE_OWNED", 238 "DEVICE_VALID_HOST_HAS_COPY" 239 }; 240 #endif 241 242 struct ArgArray_s { 243 u32_t argc; 244 u8_t pad12[12]; 245 KernelArg argv[0/*argc*/]; 246 }; 247 248 class ArgSled { 249 private: 250 ArgArray_s *argArray; 251 public: 252 int argc() { 253 return argArray->argc; 254 } 255 256 KernelArg *arg(int n) { 257 KernelArg *a = (argArray->argv + n); 258 return a; 259 } 260 261 void hexdumpArg(int n) { 262 hexdump(arg(n), sizeof(KernelArg)); 263 } 264 265 void dumpArg(int n) { 266 KernelArg *a = arg(n); 267 int idx = (int) a->idx; 268 std::cout << "arg[" << idx << "]"; 269 char variant = (char) a->variant; 270 switch (variant) { 271 case 'F': 272 std::cout << " f32 " << a->value.f32 << std::endl; 273 break; 274 case 'I': 275 std::cout << " s32 " << a->value.s32 << std::endl; 276 break; 277 case 'D': 278 std::cout << " f64 " << a->value.f64 << std::endl; 279 break; 280 case 'J': 281 std::cout << " s64 " << a->value.s64 << std::endl; 282 break; 283 case 'C': 284 std::cout << " u16 " << a->value.u16 << std::endl; 285 break; 286 case 'S': 287 std::cout << " s16 " << a->value.s32 << std::endl; 288 break; 289 case 'Z': 290 std::cout << " z1 " << a->value.z1 << std::endl; 291 break; 292 case '&': 293 std::cout << " buffer {" 294 << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec 295 << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec 296 << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec 297 << "}" << std::endl; 298 break; 299 default: 300 std::cout << (char) variant << std::endl; 301 break; 302 } 303 } 304 305 void *afterArgsPtrPtr() { 306 KernelArg *a = arg(argc()); 307 return (void *) a; 308 } 309 310 int *schemaLenPtr() { 311 int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */); 312 return schemaLenP; 313 } 314 315 int schemaLen() { 316 return *schemaLenPtr(); 317 } 318 319 char *schema() { 320 int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1); 321 return (char *) schemaLenP; 322 } 323 324 ArgSled(ArgArray_s *argArray) 325 : argArray(argArray) {} 326 }; 327 328 329 class Timer { 330 struct timeval startTV, endTV; 331 public: 332 unsigned long elapsed_us; 333 334 void start() { 335 gettimeofday(&startTV, NULL); 336 } 337 338 unsigned long end() { 339 gettimeofday(&endTV, NULL); 340 elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us 341 elapsed_us += (endTV.tv_usec - startTV.tv_usec); 342 return elapsed_us; 343 } 344 }; 345 346 347 extern void hexdump(void *ptr, int buflen); 348 349 class Sled { 350 public: 351 static void show(std::ostream &out, void *argArray); 352 }; 353 354 355 class KernelContext { 356 public: 357 int x; 358 int maxX; 359 }; 360 361 class Backend { 362 public: 363 class Config { 364 public: 365 // These must sync with hat/backend/ffi/Mode.java 366 // Bits 0-3 select platform id 0..5 367 // Bits 4-7 select device id 0..15 368 const static int START_BIT_IDX = 16; 369 const static int MINIMIZE_COPIES_BIT = 1 << START_BIT_IDX; 370 const static int TRACE_BIT = 1 << 17; 371 const static int PROFILE_BIT = 1 << 18; 372 const static int SHOW_CODE_BIT = 1 << 19; 373 const static int SHOW_KERNEL_MODEL_BIT = 1 << 20; 374 const static int SHOW_COMPUTE_MODEL_BIT = 1 << 21; 375 const static int INFO_BIT = 1 << 22; 376 const static int TRACE_COPIES_BIT = 1 << 23; 377 const static int TRACE_SKIPPED_COPIES_BIT = 1 << 24; 378 const static int TRACE_ENQUEUES_BIT = 1 << 25; 379 const static int TRACE_CALLS_BIT = 1 << 26; 380 const static int SHOW_WHY_BIT = 1 << 27; 381 const static int SHOW_STATE_BIT = 1 << 28; 382 const static int PTX_BIT = 1 << 29; 383 const static int END_BIT_IDX = 30; 384 385 const static char *bitNames[]; // See below for out of line definition 386 int configBits; 387 bool minimizeCopies; 388 bool alwaysCopy; 389 bool trace; 390 bool profile; 391 bool showCode; 392 bool info; 393 bool traceCopies; 394 bool traceSkippedCopies; 395 bool traceEnqueues; 396 bool traceCalls; 397 bool showWhy; 398 bool showState; 399 bool ptx; 400 int platform; //0..15 401 int device; //0..15 402 Config(int mode); 403 404 virtual ~Config(); 405 }; 406 407 class Buffer { 408 public: 409 Backend *backend; 410 BufferState *bufferState; 411 Buffer(Backend *backend, BufferState *bufferState) 412 : backend(backend), bufferState(bufferState) { 413 } 414 virtual ~Buffer() = default; 415 }; 416 417 class CompilationUnit { 418 public: 419 class Kernel { 420 public: 421 char *name;// strduped! 422 423 CompilationUnit *compilationUnit; 424 425 virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0; 426 427 virtual bool setArg(KernelArg *arg) = 0; 428 429 virtual long ndrange(void *argArray) final; 430 431 Kernel(CompilationUnit *compilationUnit, char *name) 432 : compilationUnit(compilationUnit), name(strutil::clone(name)) { 433 } 434 435 virtual ~Kernel() { 436 if (name) { 437 delete[] name; 438 } 439 } 440 }; 441 442 public: 443 Backend *backend; 444 char *src; 445 char *log; 446 bool ok; 447 448 virtual Kernel *getKernel(int nameLen, char *name) = 0; 449 450 virtual bool compilationUnitOK() final { 451 return ok; 452 } 453 454 CompilationUnit(Backend *backend, char *src, char *log, bool ok) 455 : backend(backend), src(src), log(log), ok(ok) { 456 } 457 458 virtual ~CompilationUnit() { 459 if (src != nullptr) { 460 delete[] src; 461 } 462 if (log != nullptr) { 463 delete[] log; 464 } 465 }; 466 }; 467 468 class Queue { 469 public: 470 471 Backend *backend; 472 473 Queue(Backend *backend); 474 475 virtual void wait() = 0; 476 477 virtual void release() = 0; 478 479 virtual void computeStart() = 0; 480 481 virtual void computeEnd() = 0; 482 483 virtual void copyToDevice(Buffer *buffer)=0; 484 485 virtual void copyFromDevice(Buffer *buffer)=0; 486 487 virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0; 488 489 virtual ~Queue(); 490 }; 491 492 class ProfilableQueue : public Queue { 493 public: 494 const static int START_BIT_IDX = 20; 495 static const int CopyToDeviceBits = 1 << START_BIT_IDX; 496 static const int CopyFromDeviceBits = 1 << 21; 497 static const int NDRangeBits = 1 << 22; 498 static const int StartComputeBits = 1 << 23; 499 static const int EndComputeBits = 1 << 24; 500 static const int EnterKernelDispatchBits = 1 << 25; 501 static const int LeaveKernelDispatchBits = 1 << 26; 502 static const int HasConstCharPtrArgBits = 1 << 27; 503 static const int hasIntArgBits = 1 << 28; 504 const static int END_BIT_IDX = 27; 505 506 size_t eventMax; 507 size_t eventc; 508 int *eventInfoBits; 509 const char **eventInfoConstCharPtrArgs; 510 511 virtual void showEvents(int width) = 0; 512 513 virtual void inc(int bits) = 0; 514 515 virtual void inc(int bits, const char *arg) = 0; 516 517 virtual void marker(int bits) = 0; 518 519 virtual void marker(int bits, const char *arg) = 0; 520 521 522 virtual void markAsStartComputeAndInc() = 0; 523 524 virtual void markAsEndComputeAndInc() = 0; 525 526 virtual void markAsEnterKernelDispatchAndInc() = 0; 527 528 virtual void markAsLeaveKernelDispatchAndInc() = 0; 529 530 ProfilableQueue(Backend *backend, int eventMax) 531 : Queue(backend), 532 eventMax(eventMax), 533 eventInfoBits(new int[eventMax]), 534 eventInfoConstCharPtrArgs(new const char *[eventMax]), 535 eventc(0) {} 536 537 virtual ~ProfilableQueue() override { 538 delete[]eventInfoBits; 539 delete[]eventInfoConstCharPtrArgs; 540 } 541 }; 542 543 Config *config; 544 Queue *queue; 545 546 Backend(Config *config, Queue *queue) 547 : config(config), queue(queue) {} 548 549 virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0; 550 551 virtual void info() = 0; 552 553 virtual void computeStart() = 0; 554 555 virtual void computeEnd() = 0; 556 557 virtual CompilationUnit *compile(int len, char *source) = 0; 558 559 virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0; 560 561 virtual ~Backend() {}; 562 }; 563 564 #ifdef shared_cpp 565 const char *Backend::Config::bitNames[] = { 566 "MINIMIZE_COPIES", 567 "TRACE", 568 "PROFILE", 569 "SHOW_CODE", 570 "SHOW_KERNEL_MODEL", 571 "SHOW_COMPUTE_MODEL", 572 "INFO", 573 "TRACE_COPIES", 574 "TRACE_SKIPPED_COPIES", 575 "TRACE_ENQUEUES", 576 "TRACE_CALLS" 577 "SHOW_WHY_BIT", 578 "USE_STATE_BIT", 579 "SHOW_STATE_BIT" 580 }; 581 #endif 582 583 template<typename T> 584 T *bufferOf(const char *name) { 585 size_t lenIncludingBufferState = sizeof(T); 586 size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState); 587 T *buffer = (T *) new unsigned char[lenIncludingBufferState]; 588 auto *bufferState = (BufferState *) ((char *) buffer + lenExcludingBufferState); 589 bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC; 590 bufferState->ptr = buffer; 591 bufferState->length = sizeof(T) - sizeof(BufferState); 592 bufferState->state = BufferState::NEW_STATE; 593 bufferState->vendorPtr = nullptr; 594 bufferState->dump(name); 595 return buffer; 596 }