1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include <fstream> 26 #define shared_cpp 27 28 #include "shared.h" 29 30 #define INFO 0 31 32 33 void hexdump(void *ptr, int buflen) { 34 auto *buf = static_cast<unsigned char *>(ptr); 35 int i, j; 36 for (i = 0; i < buflen; i += 16) { 37 printf("%06x: ", i); 38 for (j = 0; j < 16; j++) 39 if (i + j < buflen) 40 printf("%02x ", buf[i + j]); 41 else 42 printf(" "); 43 printf(" "); 44 for (j = 0; j < 16; j++) 45 if (i + j < buflen) 46 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.'); 47 printf("\n"); 48 } 49 } 50 51 void Sled::show(std::ostream &out, void *argArray) { 52 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 53 for (int i = 0; i < argSled.argc(); i++) { 54 KernelArg *arg = argSled.arg(i); 55 switch (arg->variant) { 56 case '&': { 57 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl; 58 break; 59 } 60 case 'B': { 61 out << "S8:" << arg->value.s8 << std::endl; 62 break; 63 } 64 case 'Z': { 65 out << "Z:" << arg->value.z1 << std::endl; 66 break; 67 } 68 case 'C': { 69 out << "U16:" << arg->value.u16 << std::endl; 70 break; 71 } 72 case 'S': { 73 out << "S16:" << arg->value.s16 << std::endl; 74 break; 75 } 76 case 'I': { 77 out << "S32:" << arg->value.s32 << std::endl; 78 break; 79 } 80 case 'F': { 81 out << "F32:" << arg->value.f32 << std::endl; 82 break; 83 } 84 case 'J': { 85 out << "S64:" << arg->value.s64 << std::endl; 86 break; 87 } 88 case 'D': { 89 out << "F64:" << arg->value.f64 << std::endl; 90 break; 91 } 92 default: { 93 std::cerr << "unexpected variant (shared.cpp) '" << static_cast<char>(arg->variant) << "'" << std::endl; 94 exit(1); 95 } 96 } 97 } 98 out << "schema len = " << argSled.schemaLen() << std::endl; 99 100 out << "schema = " << argSled.schema() << std::endl; 101 } 102 103 104 extern "C" void info(long backendHandle) { 105 if (INFO) { 106 std::cout << "trampolining through backendHandle to backend.info()" << std::endl; 107 } 108 auto *backend = reinterpret_cast<Backend *>(backendHandle); 109 backend->info(); 110 } 111 112 extern "C" void computeStart(long backendHandle) { 113 if (INFO) { 114 std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl; 115 } 116 auto *backend = reinterpret_cast<Backend *>(backendHandle); 117 backend->computeStart(); 118 } 119 120 extern "C" void computeEnd(long backendHandle) { 121 if (INFO) { 122 std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl; 123 } 124 auto *backend = reinterpret_cast<Backend *>(backendHandle); 125 backend->computeEnd(); 126 } 127 128 extern "C" void releaseBackend(long backendHandle) { 129 auto *backend = reinterpret_cast<Backend *>(backendHandle); 130 delete backend; 131 } 132 133 extern "C" long compile(long backendHandle, int len, char *source) { 134 if (INFO) { 135 std::cout << "trampolining through backendHandle to backend.compile() " 136 << std::hex << backendHandle << std::dec << std::endl; 137 } 138 auto *backend = reinterpret_cast<Backend *>(backendHandle); 139 long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source)); 140 if (INFO) { 141 std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl; 142 } 143 return compilationUnitHandle; 144 } 145 146 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) { 147 if (INFO) { 148 std::cout << "trampolining through programHandle to compilationUnit.getKernel()" 149 << std::hex << compilationUnitHandle << std::dec << std::endl; 150 } 151 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 152 return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name)); 153 } 154 155 extern "C" long ndrange(long kernelHandle, void *argArray) { 156 if (INFO) { 157 std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl; 158 } 159 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle); 160 kernel->ndrange(argArray); 161 return (long) 0; 162 } 163 164 extern "C" void releaseKernel(long kernelHandle) { 165 if (INFO) { 166 std::cout << "trampolining through to releaseKernel " << std::endl; 167 } 168 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle); 169 delete kernel; 170 } 171 172 extern "C" void releaseCompilationUnit(long compilationUnitHandle) { 173 if (INFO) { 174 std::cout << "trampolining through to releaseCompilationUnit " << std::endl; 175 } 176 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 177 delete compilationUnit; 178 } 179 180 extern "C" bool compilationUnitOK(long compilationUnitHandle) { 181 if (INFO) { 182 std::cout << "trampolining through to compilationUnitHandleOK " << std::endl; 183 } 184 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle); 185 return compilationUnit->compilationUnitOK(); 186 } 187 188 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) { 189 if (INFO) { 190 std::cout << "trampolining through to getBuffer " << std::endl; 191 } 192 auto backend = reinterpret_cast<Backend *>(backendHandle); 193 auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle); 194 return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength); 195 } 196 197 198 Backend::Config::Config(int configBits):BasicConfig(configBits) { 199 200 } 201 202 Backend::Config::~Config() = default; 203 204 Backend::Queue::Queue(Backend *backend) 205 : backend(backend) { 206 } 207 208 Backend::Queue::~Queue() = default; 209 210 Text::Text(size_t len, char *text, bool isCopy) 211 : len(len), text(text), isCopy(isCopy) { 212 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 213 } 214 215 Text::Text(char *text, bool isCopy) 216 : len(std::strlen(text)), text(text), isCopy(isCopy) { 217 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 218 } 219 220 Text::Text(size_t len) 221 : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) { 222 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl; 223 } 224 225 void Text::write(const std::string &filename) const { 226 std::ofstream out; 227 out.open(filename, std::ofstream::trunc); 228 out.write(text, len); 229 out.close(); 230 } 231 232 void Text::read(const std::string &filename) { 233 if (isCopy && text) { 234 delete[] text; 235 } 236 text = nullptr; 237 isCopy = false; 238 // std::cout << "reading from " << filename << std::endl; 239 240 std::ifstream ptxStream; 241 ptxStream.open(filename); 242 243 244 ptxStream.seekg(0, std::ios::end); 245 len = ptxStream.tellg(); 246 ptxStream.seekg(0, std::ios::beg); 247 248 if (len > 0) { 249 text = new char[len]; 250 isCopy = true; 251 //std::cerr << "about to read " << len << std::endl; 252 ptxStream.read(text, len); 253 ptxStream.close(); 254 //std::cerr << "read " << len << std::endl; 255 text[len - 1] = '\0'; 256 //std::cerr << "read text " << text << std::endl; 257 } 258 } 259 260 Text::~Text() { 261 if (isCopy && text) { 262 delete[] text; 263 } 264 text = nullptr; 265 isCopy = false; 266 len = 0; 267 } 268 269 Log::Log(const size_t len) 270 : Text(len) { 271 } 272 273 Log::Log(char *text) 274 : Text(text, false) { 275 } 276 277 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) { 278 if (compilationUnit->backend->config->traceCalls) { 279 std::cout << "kernelContext(\"" << name << "\"){" << std::endl; 280 } 281 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 282 auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue); 283 if (profilableQueue != nullptr) { 284 profilableQueue->marker(Backend::ProfilableQueue::EnterKernelDispatchBits, name); 285 } 286 if (compilationUnit->backend->config->trace) { 287 Sled::show(std::cout, argArray); 288 } 289 KernelContext *kernelContext = nullptr; 290 for (int i = 0; i < argSled.argc(); i++) { 291 KernelArg *arg = argSled.arg(i); 292 switch (arg->variant) { 293 case '&': { 294 if (arg->idx == 0) { 295 kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment); 296 } 297 if (compilationUnit->backend->config->trace) { 298 std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access); 299 switch (arg->value.buffer.access) { 300 case RO_BYTE: 301 std::cout << " RO"; 302 break; 303 case WO_BYTE: 304 std::cout << " WO"; 305 break; 306 case RW_BYTE: 307 std::cout << " RW"; 308 break; 309 } 310 std::cout << std::endl; 311 } 312 313 BufferState *bufferState = BufferState::of(arg); 314 315 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState); 316 317 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || ( 318 arg->value.buffer.access == RO_BYTE); 319 bool copyToDevice = 320 compilationUnit->backend->config->alwaysCopy 321 || (bufferState->state == BufferState::NEW_STATE) 322 || ((bufferState->state == BufferState::HOST_OWNED) 323 ); 324 325 if (compilationUnit->backend->config->showWhy) { 326 std::cout << 327 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy 328 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE) 329 << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE) 330 << " | kernel.needsToRead=" << kernelReadsFromThisArg 331 << " | Buffer state = " << BufferState::stateNames[bufferState->state] 332 << " so "; 333 } 334 if (copyToDevice) { 335 compilationUnit->backend->queue->copyToDevice(buffer); 336 // buffer->copyToDevice(); 337 if (compilationUnit->backend->config->traceCopies) { 338 std::cout << "copying arg " << arg->idx << " to device " << std::endl; 339 } 340 } else { 341 if (compilationUnit->backend->config->traceSkippedCopies) { 342 std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl; 343 } 344 } 345 setArg(arg, buffer); 346 if (compilationUnit->backend->config->trace) { 347 std::cout << "set buffer arg " << arg->idx << std::endl; 348 } 349 break; 350 } 351 case 'B': 352 case 'S': 353 case 'C': 354 case 'I': 355 case 'F': 356 case 'J': 357 case 'D': { 358 setArg(arg); 359 if (compilationUnit->backend->config->trace) { 360 std::cerr << "set " << arg->variant << " " << arg->idx << std::endl; 361 } 362 break; 363 } 364 default: { 365 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant << 366 std::endl; 367 exit(1); 368 } 369 } 370 } 371 372 if (kernelContext == nullptr) { 373 std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='" << name << "'" << std::endl; 374 exit(1); 375 } 376 377 if (compilationUnit->backend->config->trace) { 378 std::cout << "kernelContext = " << kernelContext->maxX << std::endl; 379 } 380 381 // We 'double dispatch' back to the kernel to actually do the dispatch 382 383 compilationUnit->backend->queue->dispatch(kernelContext, this); 384 385 386 for (int i = 0; i < argSled.argc(); i++) { 387 // note i = 1... we never need to copy back the KernelContext 388 KernelArg *arg = argSled.arg(i); 389 if (arg->variant == '&') { 390 BufferState *bufferState = BufferState::of(arg); 391 392 bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE); 393 if (compilationUnit->backend->config->showWhy) { 394 std::cout << 395 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy 396 << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE) 397 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE) 398 << " | kernel.wroteToThisArg=" << kernelWroteToThisArg 399 << "Buffer state = " << BufferState::stateNames[bufferState->state] 400 << " so "; 401 } 402 403 auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr); 404 if (compilationUnit->backend->config->alwaysCopy) { 405 compilationUnit->backend->queue->copyFromDevice(buffer); 406 // buffer->copyFromDevice(); 407 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) { 408 std::cout << "copying arg " << arg->idx << " from device " << std::endl; 409 } 410 } else { 411 if (compilationUnit->backend->config->traceSkippedCopies) { 412 std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl; 413 } 414 if (kernelWroteToThisArg) { 415 bufferState->state = BufferState::DEVICE_OWNED; 416 } 417 } 418 } 419 } 420 if (profilableQueue != nullptr) { 421 profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name); 422 } 423 compilationUnit->backend->queue->wait(); 424 compilationUnit->backend->queue->release(); 425 if (compilationUnit->backend->config->traceCalls) { 426 std::cout << "\"" << name << "\"}" << std::endl; 427 } 428 return 0; 429 }