1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include "opencl_backend.h"
 26 
 27 OpenCLBackend::OpenCLConfig::OpenCLConfig(int mode):
 28        mode(mode),
 29        gpu((mode&GPU_BIT)==GPU_BIT),
 30        cpu((mode&CPU_BIT)==CPU_BIT),
 31        minimizeCopies((mode&MINIMIZE_COPIES_BIT)==MINIMIZE_COPIES_BIT),
 32        trace((mode&TRACE_BIT)==TRACE_BIT),
 33        traceCopies((mode&TRACE_COPIES_BIT)==TRACE_COPIES_BIT),
 34        info((mode&INFO_BIT)==INFO_BIT),
 35        showCode((mode&SHOW_CODE_BIT)==SHOW_CODE_BIT),
 36        profile((mode&PROFILE_BIT)==PROFILE_BIT){
 37        if (info){
 38           std::cout << "native show_code " << showCode <<std::endl;
 39           std::cout <<  "native info " << info<<std::endl;
 40           std::cout << "native gpu " << gpu<<std::endl;
 41           std::cout << "native cpu " << cpu<<std::endl;
 42           std::cout << "native minimizeCopies " << minimizeCopies<<std::endl;
 43           std::cout << "native trace " << trace<<std::endl;
 44           std::cout << "native traceCopies " << traceCopies<<std::endl;
 45           std::cout << "native profile " << profile<<std::endl;
 46        }
 47  }
 48  OpenCLBackend::OpenCLConfig::~OpenCLConfig(){
 49  }
 50 
 51 
 52 /*
 53   OpenCLBuffer
 54   */
 55 
 56 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::OpenCLBuffer(Backend::Program::Kernel *kernel, Arg_s *arg)
 57         : Backend::Program::Kernel::Buffer(kernel, arg) {
 58     cl_int status;
 59     OpenCLBackend * openclBackend = dynamic_cast<OpenCLBackend *>(kernel->program->backend);
 60     clMem = clCreateBuffer(
 61         openclBackend->context,
 62         CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
 63         arg->value.buffer.sizeInBytes,
 64         arg->value.buffer.memorySegment,
 65         &status);
 66 
 67     if (status != CL_SUCCESS) {
 68         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
 69         exit(1);
 70     }
 71 
 72     BufferState_s * bufferState = BufferState_s::of(arg);
 73      bufferState->vendorPtr =  static_cast<void *>(this);
 74     if (openclBackend->openclConfig.traceCopies){
 75         std::cout << "created buffer for arg idx "<< arg->idx << std::endl;
 76     }
 77 
 78 }
 79 
 80 
 81 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
 82     OpenCLKernel *openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
 83     OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
 84     cl_int status = clEnqueueWriteBuffer(
 85        openclBackend->openclQueue.command_queue,
 86        clMem,
 87        CL_FALSE,
 88        0,
 89        arg->value.buffer.sizeInBytes,
 90        arg->value.buffer.memorySegment,
 91        openclBackend->openclQueue.eventc,
 92        openclBackend->openclQueue.eventListPtr(),
 93        openclBackend->openclQueue.nextEventPtr()
 94     );
 95     openclBackend->openclQueue.markAsCopyToDeviceAndInc(arg->idx);
 96 
 97     if (status != CL_SUCCESS) {
 98         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
 99         exit(1);
100     }
101     if(openclBackend->openclConfig.traceCopies){
102         std::cout << "enqueued buffer for arg idx " << arg->idx << " in OpenCLBuffer::copyToDevice()" << std::endl;
103     }
104 }
105 
106 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyFromDevice() {
107     OpenCLKernel * openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
108     OpenCLBackend * openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
109 
110     cl_int status = clEnqueueReadBuffer(
111        openclBackend->openclQueue.command_queue,
112        clMem,
113        CL_FALSE,
114        0,
115        arg->value.buffer.sizeInBytes,
116        arg->value.buffer.memorySegment,
117        openclBackend->openclQueue.eventc,
118        openclBackend->openclQueue.eventListPtr(),
119        openclBackend->openclQueue.nextEventPtr()
120     );
121     openclBackend->openclQueue.markAsCopyFromDeviceAndInc(arg->idx);
122     if (status != CL_SUCCESS) {
123         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
124         exit(1);
125     }
126     if(openclBackend->openclConfig.traceCopies){
127        std::cout << "enqueued buffer for arg idx " << arg->idx << " in OpenCLBuffer::copyFromDevice()" << std::endl;
128     }
129 }
130 
131 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::~OpenCLBuffer() {
132     clReleaseMemObject(clMem);
133 }
134 
135 /*
136   OpenCLKernel
137   */
138 
139 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::Program *program, char* name, cl_kernel kernel)
140     : Backend::Program::Kernel(program, name), kernel(kernel){
141 }
142 
143 OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() {
144     clReleaseKernel(kernel);
145 }
146 
147 long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
148 
149    // std::cout << "ndrange(" << range << ") " << std::endl;
150     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
151     OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend*>(program->backend);
152   //  std::cout << "Kernel name '"<< (dynamic_cast<Backend::Program::Kernel*>(this))->name<<"'"<<std::endl;
153     openclBackend->openclQueue.marker(openclBackend->openclQueue.EnterKernelDispatchBits,
154      (dynamic_cast<Backend::Program::Kernel*>(this))->name);
155     if (openclBackend->openclConfig.trace){
156        Sled::show(std::cout, argArray);
157     }
158     NDRange *ndrange = nullptr;
159     for (int i = 0; i < argSled.argc(); i++) {
160         Arg_s *arg = argSled.arg(i);
161         switch (arg->variant) {
162             case '&': {
163                if (openclBackend->openclConfig.trace){
164                   std::cout << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access);
165                   switch (arg->value.buffer.access){
166                       case RO_BYTE: std::cout << " RO";break;
167                       case WO_BYTE: std::cout << " WO";break;
168                       case RW_BYTE: std::cout << " RW";break;
169                       default: std::cout << "JUNK!!!!"; break;
170                   }
171                   std::cout << std::endl;
172                }
173                if ((arg->value.buffer.access == RO_BYTE ) || (arg->value.buffer.access == RW_BYTE ) ||(arg->value.buffer.access == WO_BYTE )){
174                  // OK
175                }else{
176                   std::cerr << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access) << std::endl;
177                   std::exit(1);
178                }
179 
180                BufferState_s * bufferState = BufferState_s::of(arg);
181                OpenCLBuffer * openclBuffer =nullptr;
182                if (bufferState->isHostNew()){
183                   openclBuffer = new OpenCLBuffer(this, arg);
184                   if (openclBackend->openclConfig.trace){
185                      std::cout << "We allocated arg "<<i<<" buffer "<<std::endl;
186                   }
187                   bufferState->clearHostNew();
188                }else{
189                   if (openclBackend->openclConfig.trace){
190                       std::cout << "Were reusing  arg "<<i<<" buffer "<<std::endl;
191                   }
192                   openclBuffer=  static_cast<OpenCLBuffer*>(bufferState->vendorPtr);
193                 }
194                 if (arg->idx == 0){
195                     ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
196                 }
197                 if (openclBackend->openclConfig.minimizeCopies){
198                    // is the buffer GPU dirty. If so we should not need to copy
199 
200                      if (bufferState->isDeviceDirty() && bufferState->isHostDirty()){
201                            std::cerr <<" WHY is buffer host and device dirty for arg " << arg->idx << "  This should not happen!"<< std::endl;
202                            exit(1);
203                         }
204 
205 
206                     if (bufferState->isHostDirty()){
207                        if (openclBackend->openclConfig.traceCopies){
208                           std::cout << "HOST is dirty (java side changed code) so copying arg " << arg->idx <<" to device "<< std::endl;
209                        }
210                        bufferState->clearHostDirty();
211                        openclBuffer->copyToDevice();
212 
213                     }else{
214                        if (openclBackend->openclConfig.traceCopies){
215                            std::cout << "HOST is not dirty (java side has not changed code) so not copying arg " << arg->idx <<" to device "<< std::endl;
216                        }
217                     }
218 
219                 }else{
220                     if (openclBackend->openclConfig.traceCopies){
221                         std::cout << "copying arg " << arg->idx <<" to device "<< std::endl;
222                     }
223                     openclBuffer->copyToDevice();
224                 }
225                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem);
226                 if (status != CL_SUCCESS) {
227                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
228                     exit(1);
229                 }
230                 if (openclBackend->openclConfig.trace){
231                    std::cout << "set buffer arg " << arg->idx << std::endl;
232                 }
233                 break;
234             }
235              case 'B':
236              case 'S':
237              case 'C':
238              case 'I':
239              case 'F':
240              case 'J':
241              case 'D':
242              {
243                 cl_int status = clSetKernelArg(kernel, arg->idx, arg->size(), (void *) &arg->value);
244                 if (status != CL_SUCCESS) {
245                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
246                     exit(1);
247                 }
248                 if (openclBackend->openclConfig.trace){
249                    std::cerr << "set " <<arg->variant << " " << arg->idx << std::endl;
250                 }
251                 break;
252             }
253             default: {
254                 std::cerr << "unexpected variant setting args in OpenCLkernel::ndrange " << (char) arg->variant << std::endl;
255                 exit(1);
256             }
257         }
258     }
259 
260     size_t globalSize = ndrange->maxX;
261     if (openclBackend->openclConfig.trace){
262        std::cout << "ndrange = " << ndrange->maxX << std::endl;
263     }
264     size_t dims = 1;
265     cl_int status = clEnqueueNDRangeKernel(
266             openclBackend->openclQueue.command_queue,
267             kernel,
268             dims,
269             nullptr,
270             &globalSize,
271             nullptr,
272             openclBackend->openclQueue.eventc,
273             openclBackend->openclQueue.eventListPtr(),
274             openclBackend->openclQueue.nextEventPtr());
275     openclBackend->openclQueue.markAsNDRangeAndInc();
276     if (status != CL_SUCCESS) {
277         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
278         exit(1);
279     }
280     if (openclBackend->openclConfig.trace){
281        std::cout << "enqueued kernel dispatch globalSize=" << globalSize << std::endl;
282     }
283    if (openclBackend->openclConfig.minimizeCopies){
284      openclBackend->openclQueue.wait();
285    }else{
286        for (int i = 1; i < argSled.argc(); i++) { // note i = 1... we don't need to copy back the KernelContext
287           Arg_s *arg = argSled.arg(i);
288           if (arg->variant == '&') {
289              BufferState_s * bufferState = BufferState_s::of(arg );
290              static_cast<OpenCLBuffer *>(bufferState->vendorPtr)->copyFromDevice();
291              if (openclBackend->openclConfig.traceCopies){
292                 std::cout << "copying arg " << arg->idx <<" from device "<< std::endl;
293                 bufferState->dump("After copy from device");
294              }
295              bufferState->setDeviceDirty();
296           }
297        }
298          openclBackend->openclQueue.wait();
299     }
300       openclBackend->openclQueue.marker(openclBackend->openclQueue.LeaveKernelDispatchBits,
301            (dynamic_cast<Backend::Program::Kernel*>(this))->name
302       );
303     return 0;
304 }
305 
306 /*
307   OpenCLProgram
308   */
309 OpenCLBackend::OpenCLProgram::OpenCLProgram(Backend *backend, BuildInfo *buildInfo, cl_program program)
310     : Backend::Program(backend, buildInfo), program(program) {
311 }
312 
313 OpenCLBackend::OpenCLProgram::~OpenCLProgram() {
314     clReleaseProgram(program);
315 }
316 
317 long OpenCLBackend::OpenCLProgram::getKernel(int nameLen, char *name) {
318     cl_int status;
319     cl_kernel kernel = clCreateKernel(program, name, &status);
320     if (status != CL_SUCCESS){
321        std::cerr << "Failed to get kernel "<<name<<" "<<errorMsg(status)<<std::endl;
322     }
323     return (long) new OpenCLKernel(this,name, kernel);
324 }
325 
326 bool OpenCLBackend::OpenCLProgram::programOK() {
327     return true;
328 }
329 /*
330   OpenCLBackend
331   */
332 bool OpenCLBackend::getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) {
333 
334    // if (openclConfig->trace){
335        if (openclConfig.minimizeCopies){
336          std::cout << "attempting  to get buffer from device (if dirty) from OpenCLBackend "<<std::endl;
337        //}else{
338        //  std::cout << "skipping attempt  to get buffer from device (if dirty) from OpenCLBackend (we are not minimizing copies) "<<std::endl;
339        }
340    // }
341 
342     return true;
343 }
344 
345 OpenCLBackend::OpenCLBackend(int mode, int platform, int device )
346         : Backend(mode), openclConfig(mode), openclQueue(this) {
347     if (openclConfig.trace){
348         std::cout << "openclConfig->gpu" << (openclConfig.gpu ? "true" : "false") << std::endl;
349         std::cout << "openclConfig->minimizeCopies" << (openclConfig.minimizeCopies ? "true" : "false") << std::endl;
350     }
351     cl_device_type requestedType =openclConfig.gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
352 
353     cl_int status;
354     cl_uint platformc = 0;
355     if ((status = clGetPlatformIDs(0, NULL, &platformc)) != CL_SUCCESS) {
356         if (status != CL_SUCCESS){
357            std::cerr << "clGetPlatformIDs (to get count) failed " << errorMsg(status)<<std::endl;
358         }
359         return;
360     }
361     cl_platform_id *platforms = new cl_platform_id[platformc];
362     if ((status = clGetPlatformIDs(platformc, platforms, NULL)) != CL_SUCCESS) {
363          if (status != CL_SUCCESS){
364             std::cerr << "clGetPlatformIDs failed " << errorMsg(status)<<std::endl;
365          }
366         return;
367     }
368 
369     cl_uint devicec = 0;
370     for (unsigned int i = 0; devicec == 0 && i < platformc; ++i) {
371         platform_id = platforms[i];
372         if ((status = clGetDeviceIDs(platform_id, requestedType, 0, NULL, &devicec)) != CL_SUCCESS) {
373             if (status != CL_SUCCESS){
374                std::cerr << "clGetDeviceIDs (to get count) failed " << errorMsg(status)<<std::endl;
375             }
376             delete[] platforms;
377             return;
378         }
379     }
380     if (devicec == 0) {
381         status = CL_DEVICE_NOT_AVAILABLE;
382         std::cerr << "No device available " << errorMsg(status)<<std::endl;
383         return;
384     }
385     cl_device_id *device_ids = new cl_device_id[devicec];             // compute device id
386     if ((status = clGetDeviceIDs(platform_id, requestedType, devicec, device_ids, NULL)) != CL_SUCCESS) {
387 
388         std::cerr << "clGetDeviceIDs failed " << errorMsg(status)<<std::endl;
389         delete[] platforms;
390         delete[] device_ids;
391         return;
392     }
393     if ((context = clCreateContext(0, 1, device_ids, NULL, NULL, &status)) == NULL || status != CL_SUCCESS) {
394         std::cerr << "clCreateContext failed " << errorMsg(status)<<std::endl;
395         delete[] platforms;
396         delete[] device_ids;
397         return;
398     }
399 
400     cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE;
401 
402     if ((openclQueue.command_queue = clCreateCommandQueue(context, device_ids[0], queue_props, &status)) == NULL ||
403         status != CL_SUCCESS) {
404         std::cerr << "clCreateCommandQueue failed " << errorMsg(status)<<std::endl;
405         clReleaseContext(context);
406         delete[] platforms;
407         delete[] device_ids;
408         return;
409     }
410 
411     device_id = device_ids[0];
412     delete[] device_ids;
413     delete[] platforms;
414 }
415 
416 OpenCLBackend::~OpenCLBackend() {
417     clReleaseContext(context);
418 
419 }
420 /*
421   static char *strInfo(cl_device_id device_id, cl_device_info device_info){
422      size_t sz;
423      cl_int  status = clGetDeviceInfo(device_id, device_info, 0, nullptr,  &sz);
424      char *ptr = new char[sz+1];
425      status = clGetDeviceInfo(device_id, device_info, sz, ptr,nullptr);
426      return ptr;
427   }
428 
429   static cl_int cl_int_info(cl_device_id device_id, cl_device_info device_info){
430      cl_uint v;
431      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
432      return v;
433   }
434   static cl_ulong cl_ulong_info(cl_device_id device_id, cl_device_info device_info){
435      cl_ulong v;
436      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
437      return v;
438   }
439   static size_t size_t_info(cl_device_id device_id, cl_device_info device_info){
440      size_t v;
441      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
442      return v;
443   }
444 
445   static char *strInfo(cl_platform_id platform_id,cl_platform_info platform_info){
446        size_t sz;
447        cl_int  status = clGetPlatformInfo(platform_id, platform_info, 0, nullptr,  &sz);
448        char *ptr = new char[sz+1];
449        status = clGetPlatformInfo(platform_id, platform_info, sz, ptr,nullptr);
450        return ptr;
451   }
452   */
453    char *OpenCLBackend::strInfo( cl_device_info device_info){
454      size_t sz;
455      cl_int  status = clGetDeviceInfo(device_id, device_info, 0, nullptr,  &sz);
456      char *ptr = new char[sz+1];
457      status = clGetDeviceInfo(device_id, device_info, sz, ptr,nullptr);
458      return ptr;
459   }
460 
461    cl_int OpenCLBackend::cl_int_info( cl_device_info device_info){
462      cl_uint v;
463      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
464      return v;
465   }
466    cl_ulong OpenCLBackend::cl_ulong_info( cl_device_info device_info){
467      cl_ulong v;
468      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
469      return v;
470   }
471    size_t OpenCLBackend::size_t_info( cl_device_info device_info){
472      size_t v;
473      cl_int status = clGetDeviceInfo(device_id, device_info, sizeof(v), &v, nullptr);
474      return v;
475   }
476 
477   char *OpenCLBackend::strPlatformInfo(cl_platform_info platform_info){
478        size_t sz;
479        cl_int  status = clGetPlatformInfo(platform_id, platform_info, 0, nullptr,  &sz);
480        char *ptr = new char[sz+1];
481        status = clGetPlatformInfo(platform_id, platform_info, sz, ptr,nullptr);
482        return ptr;
483   }
484 
485 void OpenCLBackend::computeStart() {
486   if (openclConfig.trace){
487      std::cout <<"compute start" <<std::endl;
488   }
489   openclQueue.computeStart();
490 }
491 void OpenCLBackend::computeEnd() {
492   openclQueue.computeEnd();
493  openclQueue.wait();
494 
495  if (openclConfig.profile){
496      openclQueue.showEvents(100);
497  }
498  openclQueue.release();
499  if (openclConfig.trace){
500      std::cout <<"compute end" <<std::endl;
501  }
502 }
503 
504 struct PlatformInfo{
505   OpenCLBackend *openclBackend;
506   char *versionName;
507   char *vendorName;
508   char *name;
509 
510 struct DeviceInfo{
511   OpenCLBackend *openclBackend;
512   cl_int maxComputeUnits;
513   cl_int maxWorkItemDimensions;
514   cl_device_type deviceType;
515   size_t maxWorkGroupSize;
516   cl_ulong globalMemSize;
517   cl_ulong localMemSize;
518   cl_ulong maxMemAllocSize;
519   char *profile;
520   char *deviceVersion;
521   size_t *maxWorkItemSizes ;
522   char *driverVersion;
523   char *cVersion;
524   char *name;
525   char *extensions;
526   char *builtInKernels;
527   char *deviceTypeStr;
528 
529   DeviceInfo(OpenCLBackend *openclBackend):
530        openclBackend(openclBackend),
531        maxComputeUnits(openclBackend->cl_int_info( CL_DEVICE_MAX_COMPUTE_UNITS)),
532        maxWorkItemDimensions(openclBackend->cl_int_info( CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)),
533        maxWorkGroupSize(openclBackend->size_t_info( CL_DEVICE_MAX_WORK_GROUP_SIZE)),
534        maxWorkItemSizes( new size_t[maxWorkItemDimensions]),
535        maxMemAllocSize(openclBackend->cl_ulong_info(CL_DEVICE_MAX_MEM_ALLOC_SIZE)),
536        globalMemSize(openclBackend->cl_ulong_info( CL_DEVICE_GLOBAL_MEM_SIZE)),
537        localMemSize(openclBackend->cl_ulong_info( CL_DEVICE_LOCAL_MEM_SIZE)),
538        profile(openclBackend->strInfo( CL_DEVICE_PROFILE)),
539        deviceVersion(openclBackend->strInfo(  CL_DEVICE_VERSION)),
540        driverVersion(openclBackend->strInfo(  CL_DRIVER_VERSION)),
541        cVersion(openclBackend->strInfo(  CL_DEVICE_OPENCL_C_VERSION)),
542        name(openclBackend->strInfo(  CL_DEVICE_NAME)),
543        extensions(openclBackend->strInfo(  CL_DEVICE_EXTENSIONS)),
544        builtInKernels(openclBackend->strInfo( CL_DEVICE_BUILT_IN_KERNELS)){
545 
546        clGetDeviceInfo(openclBackend->device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxWorkItemDimensions, maxWorkItemSizes, NULL);
547        clGetDeviceInfo(openclBackend->device_id, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, NULL);
548        char buf[512];
549        buf[0]='\0';
550        if (CL_DEVICE_TYPE_CPU == (deviceType & CL_DEVICE_TYPE_CPU)) {
551           std::strcat(buf, "CPU ");
552        }
553        if (CL_DEVICE_TYPE_GPU == (deviceType & CL_DEVICE_TYPE_GPU)) {
554           std::strcat(buf, "GPU ");
555        }
556        if (CL_DEVICE_TYPE_ACCELERATOR == (deviceType & CL_DEVICE_TYPE_ACCELERATOR)) {
557           std::strcat(buf, "ACC ");
558        }
559        deviceTypeStr = new char[std::strlen(buf)];
560        std::strcpy(deviceTypeStr, buf);
561   }
562   ~DeviceInfo(){
563      delete [] deviceTypeStr;
564      delete [] profile;
565      delete [] deviceVersion;
566      delete [] driverVersion;
567      delete [] cVersion;
568      delete [] name;
569      delete [] extensions;
570      delete [] builtInKernels;
571      delete [] maxWorkItemSizes;
572   }
573 };
574   DeviceInfo deviceInfo;
575   PlatformInfo(OpenCLBackend *openclBackend):
576      openclBackend(openclBackend),
577      versionName(openclBackend->strPlatformInfo(CL_PLATFORM_VERSION)),
578      vendorName(openclBackend->strPlatformInfo(CL_PLATFORM_VENDOR)),
579      name(openclBackend->strPlatformInfo(CL_PLATFORM_NAME)),
580      deviceInfo(openclBackend){
581   }
582   ~PlatformInfo(){
583      delete [] versionName;
584      delete [] vendorName;
585      delete [] name;
586   }
587 };
588 
589 void OpenCLBackend::info() {
590     PlatformInfo platformInfo(this);
591     cl_int status;
592     std::cerr << "platform{" <<std::endl;
593     std::cerr << "   CL_PLATFORM_VENDOR..\"" << platformInfo.vendorName <<"\""<<std::endl;
594     std::cerr << "   CL_PLATFORM_VERSION.\"" << platformInfo.versionName <<"\""<<std::endl;
595     std::cerr << "   CL_PLATFORM_NAME....\"" << platformInfo.name <<"\""<<std::endl;
596     std::cerr << "         CL_DEVICE_TYPE..................... " <<  platformInfo.deviceInfo.deviceTypeStr << " "<<  platformInfo.deviceInfo.deviceType<<std::endl;
597     std::cerr << "         CL_DEVICE_MAX_COMPUTE_UNITS........ " <<  platformInfo.deviceInfo.maxComputeUnits<<std::endl;
598     std::cerr << "         CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. " <<  platformInfo.deviceInfo.maxWorkItemDimensions << " {";
599     for (unsigned dimIdx = 0; dimIdx <  platformInfo.deviceInfo.maxWorkItemDimensions; dimIdx++) {
600         std::cerr<<  platformInfo.deviceInfo.maxWorkItemSizes[dimIdx] << " ";
601     }
602     std::cerr<< "}"<<std::endl;
603      std::cerr <<  "         CL_DEVICE_MAX_WORK_GROUP_SIZE...... "<<  platformInfo.deviceInfo.maxWorkGroupSize<<std::endl;
604      std::cerr <<  "         CL_DEVICE_MAX_MEM_ALLOC_SIZE....... "<<  platformInfo.deviceInfo.maxMemAllocSize<<std::endl;
605      std::cerr <<  "         CL_DEVICE_GLOBAL_MEM_SIZE.......... "<<  platformInfo.deviceInfo.globalMemSize<<std::endl;
606      std::cerr <<  "         CL_DEVICE_LOCAL_MEM_SIZE........... "<<  platformInfo.deviceInfo.localMemSize<<std::endl;
607      std::cerr <<  "         CL_DEVICE_PROFILE.................. "<<  platformInfo.deviceInfo.profile<<std::endl;
608      std::cerr <<  "         CL_DEVICE_VERSION.................. "<<  platformInfo.deviceInfo.deviceVersion<<std::endl;
609      std::cerr <<  "         CL_DRIVER_VERSION.................. "<<  platformInfo.deviceInfo.driverVersion<<std::endl;
610      std::cerr <<  "         CL_DEVICE_OPENCL_C_VERSION......... "<<  platformInfo.deviceInfo.cVersion<<std::endl;
611      std::cerr <<  "         CL_DEVICE_NAME..................... "<<  platformInfo.deviceInfo.name<<std::endl;
612      std::cerr <<  "         CL_DEVICE_EXTENSIONS............... "<<  platformInfo.deviceInfo.extensions<<std::endl;
613      std::cerr <<  "         CL_DEVICE_BUILT_IN_KERNELS......... "<<  platformInfo.deviceInfo.builtInKernels<<std::endl;
614      std::cerr <<  "}"<<std::endl;
615 }
616 
617 int OpenCLBackend::getMaxComputeUnits() {
618     PlatformInfo platformInfo(this);
619     return platformInfo.deviceInfo.maxComputeUnits;
620 }
621 long OpenCLBackend::compileProgram(int len, char *source) {
622     size_t srcLen = ::strlen(source);
623     char *src = new char[srcLen + 1];
624     ::strncpy(src, source, srcLen);
625     src[srcLen] = '\0';
626     if(openclConfig.trace){
627         std::cout << "native compiling " << src << std::endl;
628     }
629     cl_int status;
630     cl_program program;
631     if ((program = clCreateProgramWithSource(context, 1, (const char **) &src, nullptr, &status)) == nullptr ||
632         status != CL_SUCCESS) {
633         std::cerr << "clCreateProgramWithSource failed" << std::endl;
634         delete[] src;
635         return 0;
636     }
637 
638     if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS) {
639         std::cerr << "clBuildProgram failed" << std::endl;
640         // dont return we may still be able to get log!
641     }
642     size_t logLen = 0;
643 
644     BuildInfo *buildInfo = nullptr;
645     if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logLen)) != CL_SUCCESS) {
646         std::cerr << "clGetBuildInfo (getting log size) failed" << std::endl;
647         buildInfo = new BuildInfo(src, nullptr, true);
648     } else {
649         cl_build_status buildStatus;
650         clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr);
651         if (logLen > 0) {
652             char *log = new char[logLen + 1];
653             if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logLen + 1, (void *) log,
654                                                 nullptr)) != CL_SUCCESS) {
655                 std::cerr << "clGetBuildInfo (getting log) failed" << std::endl;
656                 delete[] log;
657                 log = nullptr;
658             } else {
659                 log[logLen] = '\0';
660                 if (logLen > 1) {
661                     std::cerr << "logLen = " << logLen << " log  = " << log << std::endl;
662                 }
663             }
664             buildInfo = new BuildInfo(src, log, true);
665         } else {
666             buildInfo = new BuildInfo(src, nullptr, true);
667         }
668     }
669 
670     return reinterpret_cast<long>(new OpenCLProgram(this, buildInfo, program));
671 }
672 
673 const char *OpenCLBackend::errorMsg(cl_int status) {
674     static struct {
675         cl_int code;
676         const char *msg;
677     } error_table[] = {
678             {CL_SUCCESS,                         "success"},
679             {CL_DEVICE_NOT_FOUND,                "device not found",},
680             {CL_DEVICE_NOT_AVAILABLE,            "device not available",},
681             {CL_COMPILER_NOT_AVAILABLE,          "compiler not available",},
682             {CL_MEM_OBJECT_ALLOCATION_FAILURE,   "mem object allocation failure",},
683             {CL_OUT_OF_RESOURCES,                "out of resources",},
684             {CL_OUT_OF_HOST_MEMORY,              "out of host memory",},
685             {CL_PROFILING_INFO_NOT_AVAILABLE,    "profiling not available",},
686             {CL_MEM_COPY_OVERLAP,                "memcopy overlaps",},
687             {CL_IMAGE_FORMAT_MISMATCH,           "image format mismatch",},
688             {CL_IMAGE_FORMAT_NOT_SUPPORTED,      "image format not supported",},
689             {CL_BUILD_PROGRAM_FAILURE,           "build program failed",},
690             {CL_MAP_FAILURE,                     "map failed",},
691             {CL_INVALID_VALUE,                   "invalid value",},
692             {CL_INVALID_DEVICE_TYPE,             "invalid device type",},
693             {CL_INVALID_PLATFORM,                "invlaid platform",},
694             {CL_INVALID_DEVICE,                  "invalid device",},
695             {CL_INVALID_CONTEXT,                 "invalid context",},
696             {CL_INVALID_QUEUE_PROPERTIES,        "invalid queue properties",},
697             {CL_INVALID_COMMAND_QUEUE,           "invalid command queue",},
698             {CL_INVALID_HOST_PTR,                "invalid host ptr",},
699             {CL_INVALID_MEM_OBJECT,              "invalid mem object",},
700             {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "invalid image format descriptor ",},
701             {CL_INVALID_IMAGE_SIZE,              "invalid image size",},
702             {CL_INVALID_SAMPLER,                 "invalid sampler",},
703             {CL_INVALID_BINARY,                  "invalid binary",},
704             {CL_INVALID_BUILD_OPTIONS,           "invalid build options",},
705             {CL_INVALID_PROGRAM,                 "invalid program ",},
706             {CL_INVALID_PROGRAM_EXECUTABLE,      "invalid program executable",},
707             {CL_INVALID_KERNEL_NAME,             "invalid kernel name",},
708             {CL_INVALID_KERNEL_DEFINITION,       "invalid definition",},
709             {CL_INVALID_KERNEL,                  "invalid kernel",},
710             {CL_INVALID_ARG_INDEX,               "invalid arg index",},
711             {CL_INVALID_ARG_VALUE,               "invalid arg value",},
712             {CL_INVALID_ARG_SIZE,                "invalid arg size",},
713             {CL_INVALID_KERNEL_ARGS,             "invalid kernel args",},
714             {CL_INVALID_WORK_DIMENSION,          "invalid work dimension",},
715             {CL_INVALID_WORK_GROUP_SIZE,         "invalid work group size",},
716             {CL_INVALID_WORK_ITEM_SIZE,          "invalid work item size",},
717             {CL_INVALID_GLOBAL_OFFSET,           "invalid global offset",},
718             {CL_INVALID_EVENT_WAIT_LIST,         "invalid event wait list",},
719             {CL_INVALID_EVENT,                   "invalid event",},
720             {CL_INVALID_OPERATION,               "invalid operation",},
721             {CL_INVALID_GL_OBJECT,               "invalid gl object",},
722             {CL_INVALID_BUFFER_SIZE,             "invalid buffer size",},
723             {CL_INVALID_MIP_LEVEL,               "invalid mip level",},
724             {CL_INVALID_GLOBAL_WORK_SIZE,        "invalid global work size",},
725             {-9999,                              "enqueueNdRangeKernel Illegal read or write to a buffer",},
726             {0,                                  NULL},
727     };
728     for (int i = 0; error_table[i].msg != NULL; i++) {
729         if (error_table[i].code == status) {
730             //std::cerr << " clerror '" << error_table[i].msg << "'" << std::endl;
731             return error_table[i].msg;
732         }
733     }
734     static char unknown[256];
735      #if defined (_WIN32)
736         _snprintf
737      #else
738         snprintf
739      #endif
740      (unknown, sizeof(unknown), "unmapped string for  error %d", status);
741     return unknown;
742 }
743 
744 
745 long getOpenCLBackend(int mode, int platform, int device, int unused) {
746  // std::cerr << "Opencl Driver mode=" << mode << " platform=" << platform << " device=" << device << std::endl;
747 
748     return reinterpret_cast<long>(new OpenCLBackend(mode, platform, device));
749 }
750 
751 
752 void __checkOpenclErrors(cl_int status, const char *file, const int line) {
753     if (CL_SUCCESS != status) {
754         std::cerr << "Opencl Driver API error = " << status << " from file " << file << " line " << line << std::endl;
755         exit(-1);
756     }
757 }
758