1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #define shared_cpp 26 27 #include "shared.h" 28 29 #define INFO 0 30 31 32 void hexdump(void *ptr, int buflen) { 33 unsigned char *buf = (unsigned char *) ptr; 34 int i, j; 35 for (i = 0; i < buflen; i += 16) { 36 printf("%06x: ", i); 37 for (j = 0; j < 16; j++) 38 if (i + j < buflen) 39 printf("%02x ", buf[i + j]); 40 else 41 printf(" "); 42 printf(" "); 43 for (j = 0; j < 16; j++) 44 if (i + j < buflen) 45 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.'); 46 printf("\n"); 47 } 48 } 49 50 void Sled::show(std::ostream &out, void *argArray) { 51 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 52 for (int i = 0; i < argSled.argc(); i++) { 53 KernelArg *arg = argSled.arg(i); 54 switch (arg->variant) { 55 case '&': { 56 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl; 57 break; 58 } 59 case 'B': { 60 out << "S8:" << arg->value.s8 << std::endl; 61 break; 62 } 63 case 'Z': { 64 out << "Z:" << arg->value.z1 << std::endl; 65 break; 66 } 67 case 'C': { 68 out << "U16:" << arg->value.u16 << std::endl; 69 break; 70 } 71 case 'S': { 72 out << "S16:" << arg->value.s16 << std::endl; 73 break; 74 } 75 case 'I': { 76 out << "S32:" << arg->value.s32 << std::endl; 77 break; 78 } 79 case 'F': { 80 out << "F32:" << arg->value.f32 << std::endl; 81 break; 82 } 83 case 'J': { 84 out << "S64:" << arg->value.s64 << std::endl; 85 break; 86 } 87 case 'D': { 88 out << "F64:" << arg->value.f64 << std::endl; 89 break; 90 } 91 default: { 92 std::cerr << "unexpected variant (shared.cpp) '" << (char) arg->variant << "'" << std::endl; 93 exit(1); 94 } 95 } 96 } 97 out << "schema len = " << argSled.schemaLen() << std::endl; 98 99 out << "schema = " << argSled.schema() << std::endl; 100 } 101 102 103 extern "C" void info(long backendHandle) { 104 if (INFO) { 105 std::cout << "trampolining through backendHandle to backend.info()" << std::endl; 106 } 107 auto *backend = reinterpret_cast<Backend *>(backendHandle); 108 backend->info(); 109 } 110 extern "C" void computeStart(long backendHandle) { 111 if (INFO) { 112 std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl; 113 } 114 auto *backend = reinterpret_cast<Backend *>(backendHandle); 115 backend->computeStart(); 116 } 117 extern "C" void computeEnd(long backendHandle) { 118 if (INFO) { 119 std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl; 120 } 121 auto *backend = reinterpret_cast<Backend *>(backendHandle); 122 backend->computeEnd(); 123 } 124 extern "C" void releaseBackend(long backendHandle) { 125 auto *backend = reinterpret_cast<Backend *>(backendHandle); 126 delete backend; 127 } 128 extern "C" long compile(long backendHandle, int len, char *source) { 129 if (INFO) { 130 std::cout << "trampolining through backendHandle to backend.compile() " 131 << std::hex << backendHandle << std::dec << std::endl; 132 } 133 auto *backend = reinterpret_cast<Backend *>(backendHandle); 134 long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source)); 135 if (INFO) { 136 std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl; 137 } 138 return compilationUnitHandle; 139 } 140 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) { 141 if (INFO) { 142 std::cout << "trampolining through programHandle to compilationUnit.getKernel()" 143 << std::hex << compilationUnitHandle << std::dec << std::endl; 144 } 145 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 146 return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name)); 147 } 148 149 extern "C" long ndrange(long kernelHandle, void *argArray) { 150 if (INFO) { 151 std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl; 152 } 153 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle); 154 kernel->ndrange(argArray); 155 return (long) 0; 156 } 157 extern "C" void releaseKernel(long kernelHandle) { 158 if (INFO) { 159 std::cout << "trampolining through to releaseKernel " << std::endl; 160 } 161 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle); 162 delete kernel; 163 } 164 165 extern "C" void releaseCompilationUnit(long compilationUnitHandle) { 166 if (INFO) { 167 std::cout << "trampolining through to releaseCompilationUnit " << std::endl; 168 } 169 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 170 delete compilationUnit; 171 } 172 extern "C" bool compilationUnitOK(long compilationUnitHandle) { 173 if (INFO) { 174 std::cout << "trampolining through to compilationUnitHandleOK " << std::endl; 175 } 176 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 177 return compilationUnit->compilationUnitOK(); 178 } 179 180 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) { 181 if (INFO) { 182 std::cout << "trampolining through to getBuffer " << std::endl; 183 } 184 auto backend = reinterpret_cast<Backend *>(backendHandle); 185 auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle); 186 return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength); 187 } 188 189 190 Backend::Config::Config(int configBits) 191 : 192 configBits(configBits), 193 minimizeCopies((configBits & MINIMIZE_COPIES_BIT) == MINIMIZE_COPIES_BIT), 194 alwaysCopy(!minimizeCopies), 195 trace((configBits & TRACE_BIT) == TRACE_BIT), 196 traceCopies((configBits & TRACE_COPIES_BIT) == TRACE_COPIES_BIT), 197 traceEnqueues((configBits & TRACE_ENQUEUES_BIT) == TRACE_ENQUEUES_BIT), 198 traceCalls((configBits & TRACE_CALLS_BIT) == TRACE_CALLS_BIT), 199 traceSkippedCopies((configBits & TRACE_SKIPPED_COPIES_BIT) == TRACE_SKIPPED_COPIES_BIT), 200 info((configBits & INFO_BIT) == INFO_BIT), 201 showCode((configBits & SHOW_CODE_BIT) == SHOW_CODE_BIT), 202 profile((configBits & PROFILE_BIT) == PROFILE_BIT), 203 showWhy((configBits & SHOW_WHY_BIT) == SHOW_WHY_BIT), 204 showState((configBits & SHOW_STATE_BIT) == SHOW_STATE_BIT), 205 ptx((configBits & PTX_BIT) == PTX_BIT), 206 207 platform((configBits & 0xf)), 208 device((configBits & 0xf0) >> 4) { 209 if (info) { 210 std::cout << "native showCode " << showCode << std::endl; 211 std::cout << "native info " << info << std::endl; 212 std::cout << "native minimizeCopies " << minimizeCopies << std::endl; 213 std::cout << "native alwaysCopy " << alwaysCopy << std::endl; 214 std::cout << "native trace " << trace << std::endl; 215 std::cout << "native traceSkippedCopies " << traceSkippedCopies << std::endl; 216 std::cout << "native traceCalls " << traceCalls << std::endl; 217 std::cout << "native traceCopies " << traceCopies << std::endl; 218 std::cout << "native traceEnqueues " << traceEnqueues << std::endl; 219 std::cout << "native profile " << profile << std::endl; 220 std::cout << "native showWhy " << showWhy << std::endl; 221 std::cout << "native showState " << showState << std::endl; 222 std::cout << "native ptx " << ptx << std::endl; 223 std::cout << "native platform " << platform << std::endl; 224 std::cout << "native device " << device << std::endl; 225 } 226 } 227 228 Backend::Config::~Config() { 229 } 230 231 Backend::Queue::Queue(Backend *backend) 232 : backend(backend) { 233 } 234 235 Backend::Queue::~Queue() { 236 237 } 238 239 Text::Text(size_t len, char *text, bool isCopy) 240 : len(len), text(text), isCopy(isCopy) { 241 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 242 } 243 244 Text::Text(char *text, bool isCopy) 245 : len(std::strlen(text)), text(text), isCopy(isCopy) { 246 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 247 } 248 249 Text::Text(size_t len) 250 : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) { 251 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 252 } 253 254 void Text::write(std::string &filename) const { 255 std::ofstream out; 256 out.open(filename, std::ofstream::trunc); 257 out.write(text, len); 258 out.close(); 259 } 260 261 void Text::read(std::string &filename) { 262 if (isCopy && text) { 263 delete[] text; 264 } 265 text = nullptr; 266 isCopy = false; 267 // std::cout << "reading from " << filename << std::endl; 268 269 std::ifstream ptxStream; 270 ptxStream.open(filename); 271 272 273 ptxStream.seekg(0, std::ios::end); 274 len = ptxStream.tellg(); 275 ptxStream.seekg(0, std::ios::beg); 276 277 if (len > 0) { 278 text = new char[len]; 279 isCopy = true; 280 //std::cerr << "about to read " << len << std::endl; 281 ptxStream.read(text, len); 282 ptxStream.close(); 283 //std::cerr << "read " << len << std::endl; 284 text[len - 1] = '\0'; 285 //std::cerr << "read text " << text << std::endl; 286 } 287 } 288 289 Text::~Text() { 290 if (isCopy && text) { 291 delete[] text; 292 } 293 text = nullptr; 294 isCopy = false; 295 len = 0; 296 } 297 298 Log::Log(size_t len) 299 : Text(len) { 300 } 301 302 Log::Log(char *text) 303 : Text(text, false) { 304 } 305 306 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) { 307 if (compilationUnit->backend->config->traceCalls) { 308 std::cout << "kernelContext(\"" << name << "\"){" << std::endl; 309 } 310 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 311 auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue); 312 if (profilableQueue != nullptr) { 313 profilableQueue->marker(Backend::ProfilableQueue::EnterKernelDispatchBits, name); 314 } 315 if (compilationUnit->backend->config->trace) { 316 Sled::show(std::cout, argArray); 317 } 318 KernelContext *kernelContext = nullptr; 319 for (int i = 0; i < argSled.argc(); i++) { 320 KernelArg *arg = argSled.arg(i); 321 switch (arg->variant) { 322 case '&': { 323 if (arg->idx == 0) { 324 kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment); 325 } 326 if (compilationUnit->backend->config->trace) { 327 std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access); 328 switch (arg->value.buffer.access) { 329 case RO_BYTE: 330 std::cout << " RO"; 331 break; 332 case WO_BYTE: 333 std::cout << " WO"; 334 break; 335 case RW_BYTE: 336 std::cout << " RW"; 337 break; 338 } 339 std::cout << std::endl; 340 } 341 342 BufferState *bufferState = BufferState::of(arg); 343 344 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState); 345 346 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || (arg->value.buffer.access == RO_BYTE); 347 bool copyToDevice = 348 compilationUnit->backend->config->alwaysCopy 349 || (bufferState->state == BufferState::NEW_STATE) 350 || ((bufferState->state == BufferState::HOST_OWNED) 351 ); 352 353 if (compilationUnit->backend->config->showWhy) { 354 std::cout << 355 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy 356 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE) 357 << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE) 358 << " | kernel.needsToRead=" << kernelReadsFromThisArg 359 << " | Buffer state = " << BufferState::stateNames[bufferState->state] 360 << " so "; 361 } 362 if (copyToDevice) { 363 compilationUnit->backend->queue->copyToDevice(buffer); 364 // buffer->copyToDevice(); 365 if (compilationUnit->backend->config->traceCopies) { 366 std::cout << "copying arg " << arg->idx << " to device " << std::endl; 367 } 368 } else { 369 if (compilationUnit->backend->config->traceSkippedCopies) { 370 std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl; 371 } 372 } 373 setArg(arg, buffer); 374 if (compilationUnit->backend->config->trace) { 375 std::cout << "set buffer arg " << arg->idx << std::endl; 376 } 377 break; 378 } 379 case 'B': 380 case 'S': 381 case 'C': 382 case 'I': 383 case 'F': 384 case 'J': 385 case 'D': { 386 setArg(arg); 387 if (compilationUnit->backend->config->trace) { 388 std::cerr << "set " << arg->variant << " " << arg->idx << std::endl; 389 } 390 break; 391 } 392 default: { 393 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant << std::endl; 394 exit(1); 395 } 396 } 397 } 398 399 if (kernelContext == nullptr){ 400 std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='"<<name<<"'" << std::endl; 401 exit(1); 402 } 403 404 if (compilationUnit->backend->config->trace) { 405 std::cout << "kernelContext = " << kernelContext->maxX << std::endl; 406 } 407 408 // We 'double dispatch' back to the kernel to actually do the dispatch 409 410 compilationUnit->backend->queue->dispatch(kernelContext, this); 411 412 413 for (int i = 0; i < argSled.argc(); i++) { // note i = 1... we never need to copy back the KernelContext 414 KernelArg *arg = argSled.arg(i); 415 if (arg->variant == '&') { 416 BufferState *bufferState = BufferState::of(arg); 417 418 bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE); 419 if (compilationUnit->backend->config->showWhy) { 420 std::cout << 421 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy 422 << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE) 423 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE) 424 << " | kernel.wroteToThisArg=" << kernelWroteToThisArg 425 << "Buffer state = " << BufferState::stateNames[bufferState->state] 426 << " so "; 427 } 428 429 auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr); 430 if (compilationUnit->backend->config->alwaysCopy) { 431 compilationUnit->backend->queue->copyFromDevice(buffer); 432 // buffer->copyFromDevice(); 433 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) { 434 std::cout << "copying arg " << arg->idx << " from device " << std::endl; 435 } 436 } else { 437 if (compilationUnit->backend->config->traceSkippedCopies) { 438 std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl; 439 } 440 if (kernelWroteToThisArg) { 441 bufferState->state = BufferState::DEVICE_OWNED; 442 } 443 } 444 } 445 } 446 if (profilableQueue != nullptr) { 447 profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name); 448 } 449 compilationUnit->backend->queue->wait(); 450 compilationUnit->backend->queue->release(); 451 if (compilationUnit->backend->config->traceCalls) { 452 std::cout << "\"" << name << "\"}" << std::endl; 453 } 454 return 0; 455 } 456 457