1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include "opencl_backend.h" 26 27 OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode): 28 mode(mode), 29 gpu((mode&GPU_BIT)==GPU_BIT), 30 cpu((mode&CPU_BIT)==CPU_BIT), 31 minimizeCopies((mode&MINIMIZE_COPIES_BIT)==MINIMIZE_COPIES_BIT), 32 trace((mode&TRACE_BIT)==TRACE_BIT), 33 traceCopies((mode&TRACE_COPIES_BIT)==TRACE_COPIES_BIT), 34 info((mode&INFO_BIT)==INFO_BIT), 35 showCode((mode&SHOW_CODE_BIT)==SHOW_CODE_BIT), 36 profile((mode&PROFILE_BIT)==PROFILE_BIT){ 37 if (info){ 38 std::cout << "native show_code " << showCode <<std::endl; 39 std::cout << "native info " << info<<std::endl; 40 std::cout << "native gpu " << gpu<<std::endl; 41 std::cout << "native cpu " << cpu<<std::endl; 42 std::cout << "native minimizeCopies " << minimizeCopies<<std::endl; 43 std::cout << "native trace " << trace<<std::endl; 44 std::cout << "native traceCopies " << traceCopies<<std::endl; 45 std::cout << "native profile " << profile<<std::endl; 46 } 47 } 48 OpenCLBackend::OpenCLConfig::~OpenCLConfig(){ 49 } 50 51 52 /* 53 OpenCLBuffer 54 */ 55 56 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::OpenCLBuffer(Backend::Program::Kernel *kernel, Arg_s *arg) 57 : Backend::Program::Kernel::Buffer(kernel, arg) { 58 cl_int status; 59 OpenCLBackend * openclBackend = dynamic_cast<OpenCLBackend *>(kernel->program->backend); 60 clMem = clCreateBuffer( 61 openclBackend->context, 62 CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, 63 arg->value.buffer.sizeInBytes, 64 arg->value.buffer.memorySegment, 65 &status); 66 67 if (status != CL_SUCCESS) { 68 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 69 exit(1); 70 } 71 72 BufferState_s * bufferState = BufferState_s::of(arg); 73 bufferState->vendorPtr = static_cast<void *>(this); 74 if (openclBackend->openclConfig.traceCopies){ 75 std::cout << "created buffer for arg idx "<< arg->idx << std::endl; 76 } 77 78 } 79 80 81 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() { 82 OpenCLKernel *openclKernel = dynamic_cast<OpenCLKernel *>(kernel); 83 OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend); 84 cl_int status = clEnqueueWriteBuffer( 85 openclBackend->openclQueue.command_queue, 86 clMem, 87 CL_FALSE, 88 0, 89 arg->value.buffer.sizeInBytes, 90 arg->value.buffer.memorySegment, 91 openclBackend->openclQueue.eventc, 92 openclBackend->openclQueue.eventListPtr(), 93 openclBackend->openclQueue.nextEventPtr() 94 ); 95 openclBackend->openclQueue.markAsCopyToDeviceAndInc(arg->idx); 96 97 if (status != CL_SUCCESS) { 98 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 99 exit(1); 100 } 101 if(openclBackend->openclConfig.traceCopies){ 102 std::cout << "enqueued buffer for arg idx " << arg->idx << " in OpenCLBuffer::copyToDevice()" << std::endl; 103 } 104 } 105 106 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyFromDevice() { 107 OpenCLKernel * openclKernel = dynamic_cast<OpenCLKernel *>(kernel); 108 OpenCLBackend * openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend); 109 110 cl_int status = clEnqueueReadBuffer( 111 openclBackend->openclQueue.command_queue, 112 clMem, 113 CL_FALSE, 114 0, 115 arg->value.buffer.sizeInBytes, 116 arg->value.buffer.memorySegment, 117 openclBackend->openclQueue.eventc, 118 openclBackend->openclQueue.eventListPtr(), 119 openclBackend->openclQueue.nextEventPtr() 120 ); 121 openclBackend->openclQueue.markAsCopyFromDeviceAndInc(arg->idx); 122 if (status != CL_SUCCESS) { 123 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 124 exit(1); 125 } 126 if(openclBackend->openclConfig.traceCopies){ 127 std::cout << "enqueued buffer for arg idx " << arg->idx << " in OpenCLBuffer::copyFromDevice()" << std::endl; 128 } 129 } 130 131 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::~OpenCLBuffer() { 132 clReleaseMemObject(clMem); 133 } 134 135 /* 136 OpenCLKernel 137 */ 138 139 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::Program *program, char* name, cl_kernel kernel) 140 : Backend::Program::Kernel(program, name), kernel(kernel){ 141 } 142 143 OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() { 144 clReleaseKernel(kernel); 145 } 146 147 long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { 148 149 // std::cout << "ndrange(" << range << ") " << std::endl; 150 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 151 OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend*>(program->backend); 152 // std::cout << "Kernel name '"<< (dynamic_cast<Backend::Program::Kernel*>(this))->name<<"'"<<std::endl; 153 openclBackend->openclQueue.marker(openclBackend->openclQueue.EnterKernelDispatchBits, 154 (dynamic_cast<Backend::Program::Kernel*>(this))->name); 155 if (openclBackend->openclConfig.trace){ 156 Sled::show(std::cout, argArray); 157 } 158 NDRange *ndrange = nullptr; 159 for (int i = 0; i < argSled.argc(); i++) { 160 Arg_s *arg = argSled.arg(i); 161 switch (arg->variant) { 162 case '&': { 163 if (openclBackend->openclConfig.trace){ 164 std::cout << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access); 165 switch (arg->value.buffer.access){ 166 case RO_BYTE: std::cout << " RO";break; 167 case WO_BYTE: std::cout << " WO";break; 168 case RW_BYTE: std::cout << " RW";break; 169 default: std::cout << "JUNK!!!!"; break; 170 } 171 std::cout << std::endl; 172 } 173 if ((arg->value.buffer.access == RO_BYTE ) || (arg->value.buffer.access == RW_BYTE ) ||(arg->value.buffer.access == WO_BYTE )){ 174 // OK 175 }else{ 176 std::cerr << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access) << std::endl; 177 std::exit(1); 178 } 179 180 BufferState_s * bufferState = BufferState_s::of(arg); 181 OpenCLBuffer * openclBuffer =nullptr; 182 if (bufferState->isHostNew()){ 183 openclBuffer = new OpenCLBuffer(this, arg); 184 if (openclBackend->openclConfig.trace){ 185 std::cout << "We allocated arg "<<i<<" buffer "<<std::endl; 186 } 187 bufferState->clearHostNew(); 188 }else{ 189 if (openclBackend->openclConfig.trace){ 190 std::cout << "Were reusing arg "<<i<<" buffer "<<std::endl; 191 } 192 openclBuffer= static_cast<OpenCLBuffer*>(bufferState->vendorPtr); 193 } 194 if (arg->idx == 0){ 195 ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment); 196 } 197 if (openclBackend->openclConfig.minimizeCopies){ 198 // is the buffer GPU dirty. If so we should not need to copy 199 200 if (bufferState->isDeviceDirty() && bufferState->isHostDirty()){ 201 std::cerr <<" WHY is buffer host and device dirty for arg " << arg->idx << " This should not happen!"<< std::endl; 202 exit(1); 203 } 204 205 206 if (bufferState->isHostDirty()){ 207 if (openclBackend->openclConfig.traceCopies){ 208 std::cout << "HOST is dirty (java side changed code) so copying arg " << arg->idx <<" to device "<< std::endl; 209 } 210 bufferState->clearHostDirty(); 211 openclBuffer->copyToDevice(); 212 213 }else{ 214 if (openclBackend->openclConfig.traceCopies){ 215 std::cout << "HOST is not dirty (java side has not changed code) so not copying arg " << arg->idx <<" to device "<< std::endl; 216 } 217 } 218 219 }else{ 220 if (openclBackend->openclConfig.traceCopies){ 221 std::cout << "copying arg " << arg->idx <<" to device "<< std::endl; 222 } 223 openclBuffer->copyToDevice(); 224 } 225 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem); 226 if (status != CL_SUCCESS) { 227 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 228 exit(1); 229 } 230 if (openclBackend->openclConfig.trace){ 231 std::cout << "set buffer arg " << arg->idx << std::endl; 232 } 233 break; 234 } 235 case 'B': 236 case 'S': 237 case 'C': 238 case 'I': 239 case 'F': 240 case 'J': 241 case 'D': 242 { 243 cl_int status = clSetKernelArg(kernel, arg->idx, arg->size(), (void *) &arg->value); 244 if (status != CL_SUCCESS) { 245 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 246 exit(1); 247 } 248 if (openclBackend->openclConfig.trace){ 249 std::cerr << "set " <<arg->variant << " " << arg->idx << std::endl; 250 } 251 break; 252 } 253 default: { 254 std::cerr << "unexpected variant setting args in OpenCLkernel::ndrange " << (char) arg->variant << std::endl; 255 exit(1); 256 } 257 } 258 } 259 260 size_t globalSize = ndrange->maxX; 261 if (openclBackend->openclConfig.trace){ 262 std::cout << "ndrange = " << ndrange->maxX << std::endl; 263 } 264 size_t dims = 1; 265 cl_int status = clEnqueueNDRangeKernel( 266 openclBackend->openclQueue.command_queue, 267 kernel, 268 dims, 269 nullptr, 270 &globalSize, 271 nullptr, 272 openclBackend->openclQueue.eventc, 273 openclBackend->openclQueue.eventListPtr(), 274 openclBackend->openclQueue.nextEventPtr()); 275 openclBackend->openclQueue.markAsNDRangeAndInc(); 276 if (status != CL_SUCCESS) { 277 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 278 exit(1); 279 } 280 if (openclBackend->openclConfig.trace){ 281 std::cout << "enqueued kernel dispatch globalSize=" << globalSize << std::endl; 282 } 283 if (openclBackend->openclConfig.minimizeCopies){ 284 openclBackend->openclQueue.wait(); 285 }else{ 286 for (int i = 1; i < argSled.argc(); i++) { // note i = 1... we don't need to copy back the KernelContext 287 Arg_s *arg = argSled.arg(i); 288 if (arg->variant == '&') { 289 BufferState_s * bufferState = BufferState_s::of(arg ); 290 static_cast<OpenCLBuffer *>(bufferState->vendorPtr)->copyFromDevice(); 291 if (openclBackend->openclConfig.traceCopies){ 292 std::cout << "copying arg " << arg->idx <<" from device "<< std::endl; 293 bufferState->dump("After copy from device"); 294 } 295 bufferState->setDeviceDirty(); 296 } 297 } 298 openclBackend->openclQueue.wait(); 299 } 300 openclBackend->openclQueue.marker(openclBackend->openclQueue.LeaveKernelDispatchBits, 301 (dynamic_cast<Backend::Program::Kernel*>(this))->name 302 ); 303 return 0; 304 } 305 306 /* 307 OpenCLProgram 308 */ 309 OpenCLBackend::OpenCLProgram::OpenCLProgram(Backend *backend, BuildInfo *buildInfo, cl_program program) 310 : Backend::Program(backend, buildInfo), program(program) { 311 } 312 313 OpenCLBackend::OpenCLProgram::~OpenCLProgram() { 314 clReleaseProgram(program); 315 } 316 317 long OpenCLBackend::OpenCLProgram::getKernel(int nameLen, char *name) { 318 cl_int status; 319 cl_kernel kernel = clCreateKernel(program, name, &status); 320 if (status != CL_SUCCESS){ 321 std::cerr << "Failed to get kernel "<<name<<" "<<errorMsg(status)<<std::endl; 322 } 323 return (long) new OpenCLKernel(this,name, kernel); 324 } 325 326 bool OpenCLBackend::OpenCLProgram::programOK() { 327 return true; 328 } 329 /* 330 OpenCLBackend 331 */ 332 bool OpenCLBackend::getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) { 333 334 // if (openclConfig->trace){ 335 if (openclConfig.minimizeCopies){ 336 std::cout << "attempting to get buffer from device (if dirty) from OpenCLBackend "<<std::endl; 337 //}else{ 338 // std::cout << "skipping attempt to get buffer from device (if dirty) from OpenCLBackend (we are not minimizing copies) "<<std::endl; 339 } 340 // } 341 342 return true; 343 } 344 345 OpenCLBackend::OpenCLBackend(int mode, int platform, int device ) 346 : Backend(mode), openclConfig(mode), openclQueue(this) { 347 if (openclConfig.trace){ 348 std::cout << "openclConfig->gpu" << (openclConfig.gpu ? "true" : "false") << std::endl; 349 std::cout << "openclConfig->minimizeCopies" << (openclConfig.minimizeCopies ? "true" : "false") << std::endl; 350 } 351 cl_device_type requestedType =openclConfig.gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; 352 353 cl_int status; 354 cl_uint platformc = 0; 355 if ((status = clGetPlatformIDs(0, NULL, &platformc)) != CL_SUCCESS) { 356 if (status != CL_SUCCESS){ 357 std::cerr << "clGetPlatformIDs (to get count) failed " << errorMsg(status)<<std::endl; 358 } 359 return; 360 } 361 cl_platform_id *platforms = new cl_platform_id[platformc]; 362 if ((status = clGetPlatformIDs(platformc, platforms, NULL)) != CL_SUCCESS) { 363 if (status != CL_SUCCESS){ 364 std::cerr << "clGetPlatformIDs failed " << errorMsg(status)<<std::endl; 365 } 366 return; 367 } 368 369 cl_uint devicec = 0; 370 for (unsigned int i = 0; devicec == 0 && i < platformc; ++i) { 371 platform_id = platforms[i]; 372 if ((status = clGetDeviceIDs(platform_id, requestedType, 0, NULL, &devicec)) != CL_SUCCESS) { 373 if (status != CL_SUCCESS){ 374 std::cerr << "clGetDeviceIDs (to get count) failed " << errorMsg(status)<<std::endl; 375 } 376 delete[] platforms; 377 return; 378 } 379 } 380 if (devicec == 0) { 381 status = CL_DEVICE_NOT_AVAILABLE; 382 std::cerr << "No device available " << errorMsg(status)<<std::endl; 383 return; 384 } 385 cl_device_id *device_ids = new cl_device_id[devicec]; // compute device id 386 if ((status = clGetDeviceIDs(platform_id, requestedType, devicec, device_ids, NULL)) != CL_SUCCESS) { 387 388 std::cerr << "clGetDeviceIDs failed " << errorMsg(status)<<std::endl; 389 delete[] platforms; 390 delete[] device_ids; 391 return; 392 } 393 if ((context = clCreateContext(0, 1, device_ids, NULL, NULL, &status)) == NULL || status != CL_SUCCESS) { 394 std::cerr << "clCreateContext failed " << errorMsg(status)<<std::endl; 395 delete[] platforms; 396 delete[] device_ids; 397 return; 398 } 399 400 cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE; 401 402 if ((openclQueue.command_queue = clCreateCommandQueue(context, device_ids[0], queue_props, &status)) == NULL || 403 status != CL_SUCCESS) { 404 std::cerr << "clCreateCommandQueue failed " << errorMsg(status)<<std::endl; 405 clReleaseContext(context); 406 delete[] platforms; 407 delete[] device_ids; 408 return; 409 } 410 411 device_id = device_ids[0]; 412 delete[] device_ids; 413 delete[] platforms; 414 } 415 416 OpenCLBackend::~OpenCLBackend() { 417 clReleaseContext(context); 418 419 } 420 /* 421 static char *strInfo(cl_device_id device_id, cl_device_info device_info){ 422 size_t sz; 423 cl_int status = clGetDeviceInfo(device_id, device_info, 0, nullptr, &sz); 424 char *ptr = new char[sz+1]; 425 status = clGetDeviceInfo(device_id, device_info, sz, ptr,nullptr); 426 return ptr; 427 } 428 429 static cl_int cl_int_info(cl_device_id device_id, cl_device_info device_info){ 430 cl_uint v; 431 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 432 return v; 433 } 434 static cl_ulong cl_ulong_info(cl_device_id device_id, cl_device_info device_info){ 435 cl_ulong v; 436 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 437 return v; 438 } 439 static size_t size_t_info(cl_device_id device_id, cl_device_info device_info){ 440 size_t v; 441 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 442 return v; 443 } 444 445 static char *strInfo(cl_platform_id platform_id,cl_platform_info platform_info){ 446 size_t sz; 447 cl_int status = clGetPlatformInfo(platform_id, platform_info, 0, nullptr, &sz); 448 char *ptr = new char[sz+1]; 449 status = clGetPlatformInfo(platform_id, platform_info, sz, ptr,nullptr); 450 return ptr; 451 } 452 */ 453 char *OpenCLBackend::strInfo( cl_device_info device_info){ 454 size_t sz; 455 cl_int status = clGetDeviceInfo(device_id, device_info, 0, nullptr, &sz); 456 char *ptr = new char[sz+1]; 457 status = clGetDeviceInfo(device_id, device_info, sz, ptr,nullptr); 458 return ptr; 459 } 460 461 cl_int OpenCLBackend::cl_int_info( cl_device_info device_info){ 462 cl_uint v; 463 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 464 return v; 465 } 466 cl_ulong OpenCLBackend::cl_ulong_info( cl_device_info device_info){ 467 cl_ulong v; 468 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 469 return v; 470 } 471 size_t OpenCLBackend::size_t_info( cl_device_info device_info){ 472 size_t v; 473 cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr); 474 return v; 475 } 476 477 char *OpenCLBackend::strPlatformInfo(cl_platform_info platform_info){ 478 size_t sz; 479 cl_int status = clGetPlatformInfo(platform_id, platform_info, 0, nullptr, &sz); 480 char *ptr = new char[sz+1]; 481 status = clGetPlatformInfo(platform_id, platform_info, sz, ptr,nullptr); 482 return ptr; 483 } 484 485 void OpenCLBackend::computeStart() { 486 if (openclConfig.trace){ 487 std::cout <<"compute start" <<std::endl; 488 } 489 openclQueue.computeStart(); 490 } 491 void OpenCLBackend::computeEnd() { 492 openclQueue.computeEnd(); 493 openclQueue.wait(); 494 495 if (openclConfig.profile){ 496 openclQueue.showEvents(100); 497 } 498 openclQueue.release(); 499 if (openclConfig.trace){ 500 std::cout <<"compute end" <<std::endl; 501 } 502 } 503 504 struct PlatformInfo{ 505 OpenCLBackend *openclBackend; 506 char *versionName; 507 char *vendorName; 508 char *name; 509 510 struct DeviceInfo{ 511 OpenCLBackend *openclBackend; 512 cl_int maxComputeUnits; 513 cl_int maxWorkItemDimensions; 514 cl_device_type deviceType; 515 size_t maxWorkGroupSize; 516 cl_ulong globalMemSize; 517 cl_ulong localMemSize; 518 cl_ulong maxMemAllocSize; 519 char *profile; 520 char *deviceVersion; 521 size_t *maxWorkItemSizes ; 522 char *driverVersion; 523 char *cVersion; 524 char *name; 525 char *extensions; 526 char *builtInKernels; 527 char *deviceTypeStr; 528 529 DeviceInfo(OpenCLBackend *openclBackend): 530 openclBackend(openclBackend), 531 maxComputeUnits(openclBackend->cl_int_info( CL_DEVICE_MAX_COMPUTE_UNITS)), 532 maxWorkItemDimensions(openclBackend->cl_int_info( CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)), 533 maxWorkGroupSize(openclBackend->size_t_info( CL_DEVICE_MAX_WORK_GROUP_SIZE)), 534 maxWorkItemSizes( new size_t[maxWorkItemDimensions]), 535 maxMemAllocSize(openclBackend->cl_ulong_info(CL_DEVICE_MAX_MEM_ALLOC_SIZE)), 536 globalMemSize(openclBackend->cl_ulong_info( CL_DEVICE_GLOBAL_MEM_SIZE)), 537 localMemSize(openclBackend->cl_ulong_info( CL_DEVICE_LOCAL_MEM_SIZE)), 538 profile(openclBackend->strInfo( CL_DEVICE_PROFILE)), 539 deviceVersion(openclBackend->strInfo( CL_DEVICE_VERSION)), 540 driverVersion(openclBackend->strInfo( CL_DRIVER_VERSION)), 541 cVersion(openclBackend->strInfo( CL_DEVICE_OPENCL_C_VERSION)), 542 name(openclBackend->strInfo( CL_DEVICE_NAME)), 543 extensions(openclBackend->strInfo( CL_DEVICE_EXTENSIONS)), 544 builtInKernels(openclBackend->strInfo( CL_DEVICE_BUILT_IN_KERNELS)){ 545 546 clGetDeviceInfo(openclBackend->device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxWorkItemDimensions, maxWorkItemSizes, NULL); 547 clGetDeviceInfo(openclBackend->device_id, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, NULL); 548 char buf[512]; 549 buf[0]='\0'; 550 if (CL_DEVICE_TYPE_CPU == (deviceType & CL_DEVICE_TYPE_CPU)) { 551 std::strcat(buf, "CPU "); 552 } 553 if (CL_DEVICE_TYPE_GPU == (deviceType & CL_DEVICE_TYPE_GPU)) { 554 std::strcat(buf, "GPU "); 555 } 556 if (CL_DEVICE_TYPE_ACCELERATOR == (deviceType & CL_DEVICE_TYPE_ACCELERATOR)) { 557 std::strcat(buf, "ACC "); 558 } 559 deviceTypeStr = new char[std::strlen(buf)]; 560 std::strcpy(deviceTypeStr, buf); 561 } 562 ~DeviceInfo(){ 563 delete [] deviceTypeStr; 564 delete [] profile; 565 delete [] deviceVersion; 566 delete [] driverVersion; 567 delete [] cVersion; 568 delete [] name; 569 delete [] extensions; 570 delete [] builtInKernels; 571 delete [] maxWorkItemSizes; 572 } 573 }; 574 DeviceInfo deviceInfo; 575 PlatformInfo(OpenCLBackend *openclBackend): 576 openclBackend(openclBackend), 577 versionName(openclBackend->strPlatformInfo(CL_PLATFORM_VERSION)), 578 vendorName(openclBackend->strPlatformInfo(CL_PLATFORM_VENDOR)), 579 name(openclBackend->strPlatformInfo(CL_PLATFORM_NAME)), 580 deviceInfo(openclBackend){ 581 } 582 ~PlatformInfo(){ 583 delete [] versionName; 584 delete [] vendorName; 585 delete [] name; 586 } 587 }; 588 589 void OpenCLBackend::info() { 590 PlatformInfo platformInfo(this); 591 cl_int status; 592 std::cerr << "platform{" <<std::endl; 593 std::cerr << " CL_PLATFORM_VENDOR..\"" << platformInfo.vendorName <<"\""<<std::endl; 594 std::cerr << " CL_PLATFORM_VERSION.\"" << platformInfo.versionName <<"\""<<std::endl; 595 std::cerr << " CL_PLATFORM_NAME....\"" << platformInfo.name <<"\""<<std::endl; 596 std::cerr << " CL_DEVICE_TYPE..................... " << platformInfo.deviceInfo.deviceTypeStr << " "<< platformInfo.deviceInfo.deviceType<<std::endl; 597 std::cerr << " CL_DEVICE_MAX_COMPUTE_UNITS........ " << platformInfo.deviceInfo.maxComputeUnits<<std::endl; 598 std::cerr << " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. " << platformInfo.deviceInfo.maxWorkItemDimensions << " {"; 599 for (unsigned dimIdx = 0; dimIdx < platformInfo.deviceInfo.maxWorkItemDimensions; dimIdx++) { 600 std::cerr<< platformInfo.deviceInfo.maxWorkItemSizes[dimIdx] << " "; 601 } 602 std::cerr<< "}"<<std::endl; 603 std::cerr << " CL_DEVICE_MAX_WORK_GROUP_SIZE...... "<< platformInfo.deviceInfo.maxWorkGroupSize<<std::endl; 604 std::cerr << " CL_DEVICE_MAX_MEM_ALLOC_SIZE....... "<< platformInfo.deviceInfo.maxMemAllocSize<<std::endl; 605 std::cerr << " CL_DEVICE_GLOBAL_MEM_SIZE.......... "<< platformInfo.deviceInfo.globalMemSize<<std::endl; 606 std::cerr << " CL_DEVICE_LOCAL_MEM_SIZE........... "<< platformInfo.deviceInfo.localMemSize<<std::endl; 607 std::cerr << " CL_DEVICE_PROFILE.................. "<< platformInfo.deviceInfo.profile<<std::endl; 608 std::cerr << " CL_DEVICE_VERSION.................. "<< platformInfo.deviceInfo.deviceVersion<<std::endl; 609 std::cerr << " CL_DRIVER_VERSION.................. "<< platformInfo.deviceInfo.driverVersion<<std::endl; 610 std::cerr << " CL_DEVICE_OPENCL_C_VERSION......... "<< platformInfo.deviceInfo.cVersion<<std::endl; 611 std::cerr << " CL_DEVICE_NAME..................... "<< platformInfo.deviceInfo.name<<std::endl; 612 std::cerr << " CL_DEVICE_EXTENSIONS............... "<< platformInfo.deviceInfo.extensions<<std::endl; 613 std::cerr << " CL_DEVICE_BUILT_IN_KERNELS......... "<< platformInfo.deviceInfo.builtInKernels<<std::endl; 614 std::cerr << "}"<<std::endl; 615 } 616 617 int OpenCLBackend::getMaxComputeUnits() { 618 PlatformInfo platformInfo(this); 619 return platformInfo.deviceInfo.maxComputeUnits; 620 } 621 long OpenCLBackend::compileProgram(int len, char *source) { 622 size_t srcLen = ::strlen(source); 623 char *src = new char[srcLen + 1]; 624 ::strncpy(src, source, srcLen); 625 src[srcLen] = '\0'; 626 if(openclConfig.trace){ 627 std::cout << "native compiling " << src << std::endl; 628 } 629 cl_int status; 630 cl_program program; 631 if ((program = clCreateProgramWithSource(context, 1, (const char **) &src, nullptr, &status)) == nullptr || 632 status != CL_SUCCESS) { 633 std::cerr << "clCreateProgramWithSource failed" << std::endl; 634 delete[] src; 635 return 0; 636 } 637 638 if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS) { 639 std::cerr << "clBuildProgram failed" << std::endl; 640 // dont return we may still be able to get log! 641 } 642 size_t logLen = 0; 643 644 BuildInfo *buildInfo = nullptr; 645 if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logLen)) != CL_SUCCESS) { 646 std::cerr << "clGetBuildInfo (getting log size) failed" << std::endl; 647 buildInfo = new BuildInfo(src, nullptr, true); 648 } else { 649 cl_build_status buildStatus; 650 clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); 651 if (logLen > 0) { 652 char *log = new char[logLen + 1]; 653 if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logLen + 1, (void *) log, 654 nullptr)) != CL_SUCCESS) { 655 std::cerr << "clGetBuildInfo (getting log) failed" << std::endl; 656 delete[] log; 657 log = nullptr; 658 } else { 659 log[logLen] = '\0'; 660 if (logLen > 1) { 661 std::cerr << "logLen = " << logLen << " log = " << log << std::endl; 662 } 663 } 664 buildInfo = new BuildInfo(src, log, true); 665 } else { 666 buildInfo = new BuildInfo(src, nullptr, true); 667 } 668 } 669 670 return reinterpret_cast<long>(new OpenCLProgram(this, buildInfo, program)); 671 } 672 673 const char *OpenCLBackend::errorMsg(cl_int status) { 674 static struct { 675 cl_int code; 676 const char *msg; 677 } error_table[] = { 678 {CL_SUCCESS, "success"}, 679 {CL_DEVICE_NOT_FOUND, "device not found",}, 680 {CL_DEVICE_NOT_AVAILABLE, "device not available",}, 681 {CL_COMPILER_NOT_AVAILABLE, "compiler not available",}, 682 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "mem object allocation failure",}, 683 {CL_OUT_OF_RESOURCES, "out of resources",}, 684 {CL_OUT_OF_HOST_MEMORY, "out of host memory",}, 685 {CL_PROFILING_INFO_NOT_AVAILABLE, "profiling not available",}, 686 {CL_MEM_COPY_OVERLAP, "memcopy overlaps",}, 687 {CL_IMAGE_FORMAT_MISMATCH, "image format mismatch",}, 688 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "image format not supported",}, 689 {CL_BUILD_PROGRAM_FAILURE, "build program failed",}, 690 {CL_MAP_FAILURE, "map failed",}, 691 {CL_INVALID_VALUE, "invalid value",}, 692 {CL_INVALID_DEVICE_TYPE, "invalid device type",}, 693 {CL_INVALID_PLATFORM, "invlaid platform",}, 694 {CL_INVALID_DEVICE, "invalid device",}, 695 {CL_INVALID_CONTEXT, "invalid context",}, 696 {CL_INVALID_QUEUE_PROPERTIES, "invalid queue properties",}, 697 {CL_INVALID_COMMAND_QUEUE, "invalid command queue",}, 698 {CL_INVALID_HOST_PTR, "invalid host ptr",}, 699 {CL_INVALID_MEM_OBJECT, "invalid mem object",}, 700 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "invalid image format descriptor ",}, 701 {CL_INVALID_IMAGE_SIZE, "invalid image size",}, 702 {CL_INVALID_SAMPLER, "invalid sampler",}, 703 {CL_INVALID_BINARY, "invalid binary",}, 704 {CL_INVALID_BUILD_OPTIONS, "invalid build options",}, 705 {CL_INVALID_PROGRAM, "invalid program ",}, 706 {CL_INVALID_PROGRAM_EXECUTABLE, "invalid program executable",}, 707 {CL_INVALID_KERNEL_NAME, "invalid kernel name",}, 708 {CL_INVALID_KERNEL_DEFINITION, "invalid definition",}, 709 {CL_INVALID_KERNEL, "invalid kernel",}, 710 {CL_INVALID_ARG_INDEX, "invalid arg index",}, 711 {CL_INVALID_ARG_VALUE, "invalid arg value",}, 712 {CL_INVALID_ARG_SIZE, "invalid arg size",}, 713 {CL_INVALID_KERNEL_ARGS, "invalid kernel args",}, 714 {CL_INVALID_WORK_DIMENSION, "invalid work dimension",}, 715 {CL_INVALID_WORK_GROUP_SIZE, "invalid work group size",}, 716 {CL_INVALID_WORK_ITEM_SIZE, "invalid work item size",}, 717 {CL_INVALID_GLOBAL_OFFSET, "invalid global offset",}, 718 {CL_INVALID_EVENT_WAIT_LIST, "invalid event wait list",}, 719 {CL_INVALID_EVENT, "invalid event",}, 720 {CL_INVALID_OPERATION, "invalid operation",}, 721 {CL_INVALID_GL_OBJECT, "invalid gl object",}, 722 {CL_INVALID_BUFFER_SIZE, "invalid buffer size",}, 723 {CL_INVALID_MIP_LEVEL, "invalid mip level",}, 724 {CL_INVALID_GLOBAL_WORK_SIZE, "invalid global work size",}, 725 {-9999, "enqueueNdRangeKernel Illegal read or write to a buffer",}, 726 {0, NULL}, 727 }; 728 for (int i = 0; error_table[i].msg != NULL; i++) { 729 if (error_table[i].code == status) { 730 //std::cerr << " clerror '" << error_table[i].msg << "'" << std::endl; 731 return error_table[i].msg; 732 } 733 } 734 static char unknown[256]; 735 #if defined (_WIN32) 736 _snprintf 737 #else 738 snprintf 739 #endif 740 (unknown, sizeof(unknown), "unmapped string for error %d", status); 741 return unknown; 742 } 743 744 745 long getOpenCLBackend(int mode, int platform, int device, int unused) { 746 // std::cerr << "Opencl Driver mode=" << mode << " platform=" << platform << " device=" << device << std::endl; 747 748 return reinterpret_cast<long>(new OpenCLBackend(mode, platform, device)); 749 } 750 751 752 void __checkOpenclErrors(cl_int status, const char *file, const int line) { 753 if (CL_SUCCESS != status) { 754 std::cerr << "Opencl Driver API error = " << status << " from file " << file << " line " << line << std::endl; 755 exit(-1); 756 } 757 } 758