1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #pragma once 27 28 #include <iostream> 29 #include <map> 30 #include <vector> 31 #include <cstdio> 32 #include <cstring> 33 #include <unistd.h> 34 #include <sys/time.h> 35 #include <iostream> 36 #include <iomanip> 37 #include <bitset> 38 #include <stack> 39 40 #include "strutil.h" 41 #include "config.h" 42 43 #ifdef __APPLE__ 44 #define SNPRINTF snprintf 45 #else 46 #include <malloc.h> 47 #if defined (_WIN32) 48 #include "windows.h" 49 #define SNPRINTF _snprintf 50 #else 51 #define SNPRINTF snprintf 52 #endif 53 #endif 54 55 typedef char s8_t; 56 typedef char byte; 57 typedef char boolean; 58 typedef char z1_t; 59 typedef unsigned char u8_t; 60 typedef short s16_t; 61 typedef unsigned short u16_t; 62 typedef unsigned int u32_t; 63 typedef int s32_t; 64 typedef float f32_t; 65 typedef double f64_t; 66 typedef long s64_t; 67 typedef unsigned long u64_t; 68 69 extern void hexdump(void *ptr, int buflen); 70 71 class Text { 72 public: 73 size_t len; 74 char *text; 75 bool isCopy; 76 77 Text(size_t len, char *text, bool isCopy); 78 79 Text(char *text, bool isCopy); 80 81 explicit Text(size_t len); 82 83 void write(const std::string &filename) const; 84 85 void read(const std::string &filename); 86 87 virtual ~Text(); 88 }; 89 90 class Log : public Text { 91 public: 92 explicit Log(size_t len); 93 94 explicit Log(char *text); 95 96 ~Log() override = default; 97 }; 98 99 100 #define UNKNOWN_BYTE 0 101 #define RO_BYTE (1<<1) 102 #define WO_BYTE (1<<2) 103 #define RW_BYTE (RO_BYTE|WO_BYTE) 104 105 struct Buffer_s { 106 void *memorySegment; // Address of a Buffer/MemorySegment 107 long sizeInBytes; // The size of the memory segment in bytes 108 u8_t access; // see hat/buffer/ArgArray.java UNKNOWN_BYTE=0, RO_BYTE =1<<1,WO_BYTE =1<<2,RW_BYTE =RO_BYTE|WO_BYTE; 109 }; 110 111 union Value_u { 112 boolean z1; // 'Z' 113 u8_t s8; // 'B' 114 u16_t u16; // 'C' 115 s16_t s16; // 'S' 116 u16_t x16; // 'C' or 'S" 117 s32_t s32; // 'I' 118 s32_t x32; // 'I' or 'F' 119 f32_t f32; // 'F' 120 f64_t f64; // 'D' 121 s64_t s64; // 'J' 122 s64_t x64; // 'D' or 'J' 123 Buffer_s buffer; // '&' 124 }; 125 126 struct KernelArg { 127 u32_t idx; // 0..argc 128 u8_t variant; // which variant 'I','Z','S','J','F', '&' implies Buffer/MemorySegment 129 u8_t pad8[8]; 130 Value_u value; 131 u8_t pad6[6]; 132 133 size_t size() const { 134 size_t sz; 135 switch (variant) { 136 case 'I': 137 case 'F': 138 sz = sizeof(u32_t); 139 break; 140 case 'S': 141 case 'C': 142 sz = sizeof(u16_t); 143 break; 144 case 'D': 145 case 'J': 146 return sizeof(u64_t); 147 break; 148 case 'B': 149 return sizeof(u8_t); 150 break; 151 default: 152 std::cerr << "Bad variant " << variant << "arg::size" << std::endl; 153 exit(1); 154 } 155 156 return sz; 157 } 158 }; 159 160 struct BufferState { 161 static constexpr long MAGIC = 0x4a71facebffab175; 162 static constexpr int NO_STATE = 0; 163 static constexpr int NEW_STATE = 1; 164 static constexpr int HOST_OWNED = 2; 165 static constexpr int DEVICE_OWNED = 3; 166 static constexpr int DEVICE_VALID_HOST_HAS_COPY = 4; 167 const static char *stateNames[]; // See below for out of line definition 168 169 long magic1; 170 void *ptr; 171 long length; 172 int bits; 173 int state; 174 void *vendorPtr; 175 long magic2; 176 177 bool ok() const { 178 return ((magic1 == MAGIC) && (magic2 == MAGIC)); 179 } 180 181 void setState(int newState) { 182 state = newState; 183 } 184 185 int getState() const { 186 return state; 187 } 188 189 void dump(const char *msg) const { 190 if (ok()) { 191 printf("{%s,ptr:%016lx,length: %016lx, state:%08x, vendorPtr:%016lx}\n", msg, (long) ptr, length, state, 192 (long) vendorPtr); 193 } else { 194 printf("%s bad magic \n", msg); 195 printf("(magic1:%016lx,", magic1); 196 printf("{%s, ptr:%016lx, length: %016lx, state:%08x, vendorPtr:%016lx}", msg, (long) ptr, length, state, 197 (long) vendorPtr); 198 printf("magic2:%016lx)\n", magic2); 199 } 200 } 201 202 static BufferState *of(void *ptr, size_t sizeInBytes) { 203 return reinterpret_cast<BufferState *>(static_cast<char *>(ptr) + sizeInBytes - sizeof(BufferState)); 204 } 205 206 static BufferState *of(const KernelArg *arg) { 207 // access? 208 BufferState *bufferState = BufferState::of( 209 arg->value.buffer.memorySegment, 210 arg->value.buffer.sizeInBytes 211 ); 212 213 214 //Sanity check the buffers 215 // These sanity check finds errors passing memory segments which are not Buffers 216 217 if (bufferState->ptr != arg->value.buffer.memorySegment) { 218 std::cerr << "bufferState->ptr != arg->value.buffer.memorySegment" << std::endl; 219 std::exit(1); 220 } 221 222 if ((bufferState->vendorPtr == nullptr) && (bufferState->state != BufferState::NEW_STATE)) { 223 std::cerr << "Warning: Unexpected initial state for buffer " 224 //<<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'" 225 << " state=" << bufferState->state << " '" 226 << BufferState::stateNames[bufferState->state] << "'" 227 << " vendorPtr" << bufferState->vendorPtr << std::endl; 228 } 229 // End of sanity checks 230 return bufferState; 231 } 232 }; 233 234 #ifdef shared_cpp 235 const char *BufferState::stateNames[] = { 236 "NO_STATE", 237 "NEW_STATE", 238 "HOST_OWNED", 239 "DEVICE_OWNED", 240 "DEVICE_VALID_HOST_HAS_COPY" 241 }; 242 #endif 243 244 struct ArgArray_s { 245 u32_t argc; 246 u8_t pad12[12]; 247 KernelArg argv[0/*argc*/]; 248 }; 249 250 class ArgSled { 251 private: 252 ArgArray_s *argArray; 253 254 public: 255 int argc() const { 256 return argArray->argc; 257 } 258 259 KernelArg *arg(int n) const { 260 KernelArg *a = (argArray->argv + n); 261 return a; 262 } 263 264 void hexdumpArg(int n) const { 265 hexdump(arg(n), sizeof(KernelArg)); 266 } 267 268 void dumpArg(int n) const { 269 KernelArg *a = arg(n); 270 int idx = (int) a->idx; 271 std::cout << "arg[" << idx << "]"; 272 char variant = (char) a->variant; 273 switch (variant) { 274 case 'F': 275 std::cout << " f32 " << a->value.f32 << std::endl; 276 break; 277 case 'I': 278 std::cout << " s32 " << a->value.s32 << std::endl; 279 break; 280 case 'D': 281 std::cout << " f64 " << a->value.f64 << std::endl; 282 break; 283 case 'J': 284 std::cout << " s64 " << a->value.s64 << std::endl; 285 break; 286 case 'C': 287 std::cout << " u16 " << a->value.u16 << std::endl; 288 break; 289 case 'S': 290 std::cout << " s16 " << a->value.s32 << std::endl; 291 break; 292 case 'Z': 293 std::cout << " z1 " << a->value.z1 << std::endl; 294 break; 295 case '&': 296 std::cout << " buffer {" 297 << " void *address = 0x" << std::hex << (long) a->value.buffer.memorySegment << std::dec 298 << ", long bytesSize= 0x" << std::hex << (long) a->value.buffer.sizeInBytes << std::dec 299 << ", char access= 0x" << std::hex << (unsigned char) a->value.buffer.access << std::dec 300 << "}" << std::endl; 301 break; 302 default: 303 std::cout << (char) variant << std::endl; 304 break; 305 } 306 } 307 308 void *afterArgsPtrPtr() const { 309 KernelArg *a = arg(argc()); 310 return (void *) a; 311 } 312 313 int *schemaLenPtr() const { 314 int *schemaLenP = (int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *) */); 315 return schemaLenP; 316 } 317 318 int schemaLen() const { 319 return *schemaLenPtr(); 320 } 321 322 char *schema() const { 323 int *schemaLenP = ((int *) ((char *) afterArgsPtrPtr() /*+ sizeof(void *)*/) + 1); 324 return (char *) schemaLenP; 325 } 326 327 explicit ArgSled(ArgArray_s *argArray) 328 : argArray(argArray) { 329 } 330 }; 331 332 333 class Timer { 334 struct timeval startTV, endTV; 335 336 public: 337 unsigned long elapsed_us{}; 338 339 Timer(): startTV(), endTV() { 340 } 341 342 void start() { 343 gettimeofday(&startTV, nullptr); 344 } 345 346 unsigned long end() { 347 gettimeofday(&endTV, nullptr); 348 elapsed_us = (endTV.tv_sec - startTV.tv_sec) * 1000000; // sec to us 349 elapsed_us += (endTV.tv_usec - startTV.tv_usec); 350 return elapsed_us; 351 } 352 }; 353 354 355 //extern void hexdump(void *ptr, int buflen); 356 357 class Sled { 358 public: 359 static void show(std::ostream &out, void *argArray); 360 }; 361 362 class KernelContext { 363 public: 364 int x; 365 int maxX; 366 int y; 367 int maxY; 368 int z; 369 int maxZ; 370 int dimensions; 371 372 // global index 373 int gix; 374 int giy; 375 int giz; 376 377 // global sizes 378 int gsx; 379 int gsy; 380 int gsz; 381 382 // local index 383 int lix; 384 int liy; 385 int liz; 386 387 // local size 388 int lsx; 389 int lsy; 390 int lsz; 391 392 // Group index 393 int bix; 394 int biy; 395 int biz; 396 }; 397 398 class Backend { 399 public: 400 class Config final : public BasicConfig { 401 public: 402 explicit Config(int mode); 403 404 ~Config() override; 405 }; 406 407 class Buffer { 408 public: 409 Backend *backend; 410 BufferState *bufferState; 411 412 Buffer(Backend *backend, BufferState *bufferState) 413 : backend(backend), bufferState(bufferState) { 414 } 415 416 virtual ~Buffer() = default; 417 }; 418 419 class CompilationUnit { 420 public: 421 class Kernel { 422 public: 423 char *name; 424 425 CompilationUnit *compilationUnit; 426 427 virtual bool setArg(KernelArg *arg, Buffer *openCLBuffer) = 0; 428 429 virtual bool setArg(KernelArg *arg) = 0; 430 431 virtual long ndrange(void *argArray) final; 432 433 Kernel(CompilationUnit *compilationUnit, char *name) 434 : name(strutil::clone(name)), compilationUnit(compilationUnit) { 435 } 436 437 virtual ~Kernel() { 438 delete[] name; 439 } 440 }; 441 442 public: 443 Backend *backend; 444 char *src; 445 char *log; 446 bool ok; 447 448 virtual Kernel *getKernel(int nameLen, char *name) = 0; 449 450 virtual bool compilationUnitOK() final { 451 return ok; 452 } 453 454 CompilationUnit(Backend *backend, char *src, char *log, bool ok) 455 : backend(backend), src(src), log(log), ok(ok) { 456 } 457 458 virtual ~CompilationUnit() { 459 delete[] src; 460 delete[] log; 461 }; 462 }; 463 464 class Queue { 465 public: 466 Backend *backend; 467 468 explicit Queue(Backend *backend); 469 470 virtual void wait() = 0; 471 472 virtual void release() = 0; 473 474 virtual void computeStart() = 0; 475 476 virtual void computeEnd() = 0; 477 478 virtual void copyToDevice(Buffer *buffer) =0; 479 480 virtual void copyFromDevice(Buffer *buffer) =0; 481 482 virtual void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) = 0; 483 484 virtual ~Queue(); 485 }; 486 487 class ProfilableQueue : public Queue { 488 public: 489 static constexpr int START_BIT_IDX = 20; 490 static constexpr int CopyToDeviceBits = 1 << START_BIT_IDX; 491 static constexpr int CopyFromDeviceBits = 1 << 21; 492 static constexpr int NDRangeBits = 1 << 22; 493 static constexpr int StartComputeBits = 1 << 23; 494 static constexpr int EndComputeBits = 1 << 24; 495 static constexpr int EnterKernelDispatchBits = 1 << 25; 496 static constexpr int LeaveKernelDispatchBits = 1 << 26; 497 static constexpr int HasConstCharPtrArgBits = 1 << 27; 498 static constexpr int hasIntArgBits = 1 << 28; 499 static constexpr int END_BIT_IDX = 27; 500 501 size_t eventMax; 502 size_t eventc; 503 int *eventInfoBits; 504 const char **eventInfoConstCharPtrArgs; 505 506 virtual void showEvents(int width) = 0; 507 508 virtual void inc(int bits) = 0; 509 510 virtual void inc(int bits, const char *arg) = 0; 511 512 virtual void marker(int bits) = 0; 513 514 virtual void marker(int bits, const char *arg) = 0; 515 516 517 virtual void markAsStartComputeAndInc() = 0; 518 519 virtual void markAsEndComputeAndInc() = 0; 520 521 virtual void markAsEnterKernelDispatchAndInc() = 0; 522 523 virtual void markAsLeaveKernelDispatchAndInc() = 0; 524 525 ProfilableQueue(Backend *backend, int eventMax) 526 : Queue(backend), 527 eventMax(eventMax), 528 eventInfoBits(new int[eventMax]), 529 eventInfoConstCharPtrArgs(new const char *[eventMax]), 530 eventc(0) { 531 } 532 533 ~ProfilableQueue() override { 534 delete[]eventInfoBits; 535 delete[]eventInfoConstCharPtrArgs; 536 } 537 }; 538 539 Config *config; 540 Queue *queue; 541 542 Backend(Config *config, Queue *queue) 543 : config(config), queue(queue) { 544 } 545 546 virtual Buffer *getOrCreateBuffer(BufferState *bufferState) = 0; 547 548 virtual void info() = 0; 549 550 virtual void computeStart() = 0; 551 552 virtual void computeEnd() = 0; 553 554 virtual CompilationUnit *compile(int len, char *source) = 0; 555 556 virtual bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) = 0; 557 558 virtual ~Backend() = default; 559 }; 560 561 562 563 template<typename T> 564 T *bufferOf(const char *name) { 565 size_t lenIncludingBufferState = sizeof(T); 566 size_t lenExcludingBufferState = lenIncludingBufferState - sizeof(BufferState); 567 T *buffer = reinterpret_cast<T *>(new unsigned char[lenIncludingBufferState]); 568 auto *bufferState = reinterpret_cast<BufferState *>(reinterpret_cast<char *>(buffer) + lenExcludingBufferState); 569 bufferState->magic1 = bufferState->magic2 = BufferState::MAGIC; 570 bufferState->ptr = buffer; 571 bufferState->length = sizeof(T) - sizeof(BufferState); 572 bufferState->state = BufferState::NEW_STATE; 573 bufferState->vendorPtr = nullptr; 574 bufferState->dump(name); 575 return buffer; 576 }