1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include "opencl_backend.h"
 26 
 27 #define INFO 0
 28 
 29 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::OpenCLBuffer(Backend::Program::Kernel *kernel, Arg_s *arg)
 30         : Backend::Program::Kernel::Buffer(kernel, arg) {
 31     /*
 32      *   (void *) arg->value.buffer.memorySegment,
 33      *   (size_t) arg->value.buffer.sizeInBytes);
 34      */
 35     cl_int status;
 36     auto openclBackend = dynamic_cast<OpenCLBackend *>(kernel->program->backend);
 37     clMem = clCreateBuffer(openclBackend->context,
 38                            CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
 39                            arg->value.buffer.sizeInBytes,
 40                            arg->value.buffer.memorySegment,
 41                            &status);
 42     if (status != CL_SUCCESS) {
 43         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
 44         exit(1);
 45     }
 46     arg->value.buffer.vendorPtr = static_cast<void *>(this);
 47     if (INFO){
 48          std::cout << "created buffer " << std::endl;
 49     }
 50 }
 51 
 52 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyToDevice() {
 53 
 54     /*
 55      *   (void *) arg->value.buffer.memorySegment,
 56      *   (size_t) arg->value.buffer.sizeInBytes);
 57      */
 58     auto openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
 59     auto openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
 60     cl_int status = clEnqueueWriteBuffer(openclBackend->command_queue,
 61                                          clMem,
 62                                          CL_FALSE,
 63                                          0,
 64                                          arg->value.buffer.sizeInBytes,
 65                                          arg->value.buffer.memorySegment,
 66                                          openclKernel->eventc,
 67                                          ((openclKernel->eventc == 0) ? NULL : openclKernel->events),
 68                                          &(openclKernel->events[openclKernel->eventc]));
 69 
 70 
 71     if (status != CL_SUCCESS) {
 72         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
 73         exit(1);
 74     }
 75     openclKernel->eventc++;
 76     if (INFO){
 77         std::cout << "enqueued buffer copyToDevice " << std::endl;
 78     }
 79 }
 80 
 81 void OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::copyFromDevice() {
 82     auto openclKernel = dynamic_cast<OpenCLKernel *>(kernel);
 83     auto openclBackend = dynamic_cast<OpenCLBackend *>(openclKernel->program->backend);
 84     cl_int status = clEnqueueReadBuffer(openclBackend->command_queue,
 85                                         clMem,
 86                                         CL_FALSE,
 87                                         0,
 88                                         arg->value.buffer.sizeInBytes,
 89                                         arg->value.buffer.memorySegment,
 90                                         openclKernel->eventc,
 91                                         ((openclKernel->eventc == 0) ? NULL : openclKernel->events),
 92                                         &(openclKernel->events[openclKernel->eventc]));
 93 
 94     if (status != CL_SUCCESS) {
 95         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
 96         exit(1);
 97     }
 98     openclKernel->eventc++;
 99     if (INFO){
100        std::cout << "enqueued buffer copyFromDevice " << std::endl;
101     }
102 }
103 
104 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLBuffer::~OpenCLBuffer() {
105     clReleaseMemObject(clMem);
106 }
107 
108 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::Program *program, char* name, cl_kernel kernel)
109         : Backend::Program::Kernel(program, name), kernel(kernel), eventMax(0), events(nullptr),
110           eventc(0) {
111 }
112 
113 OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() {
114     clReleaseKernel(kernel);
115 }
116 
117 long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
118    // std::cout << "ndrange(" << range << ") " << std::endl;
119     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
120     if (INFO){
121        Sled::show(std::cout, argArray);
122     }
123     if (events != nullptr || eventc != 0) {
124         std::cerr << "opencl state issue, we might have leaked events!" << std::endl;
125     }
126     eventMax = argSled.argc() * 4 + 1;
127     eventc = 0;
128     events = new cl_event[eventMax];
129     NDRange *ndrange = nullptr;
130     for (int i = 0; i < argSled.argc(); i++) {
131         Arg_s *arg = argSled.arg(i);
132         switch (arg->variant) {
133             case '&': {
134                auto openclBuffer = new OpenCLBuffer(this, arg);
135                 if (arg->idx == 0){
136                     ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
137                 }else{
138                      if (INFO){
139                         if (arg->value.buffer.state == 1) { //Java described this as dirty
140                             std::cout << "JAVA_DIRTY !"<<std::endl;
141                          }else{
142                             std::cout << "NOT JAVA_DIRTY"<<std::endl;
143                           }
144                      }
145                 }
146                 openclBuffer->copyToDevice();
147                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem);
148                 if (status != CL_SUCCESS) {
149                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
150                     exit(1);
151                 }
152                 if (INFO){
153                    std::cout << "set buffer arg " << arg->idx << std::endl;
154                 }
155                 break;
156             }
157             case 'I':
158             case 'F': {
159                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(arg->value.x32), (void *) &arg->value);
160                 if (status != CL_SUCCESS) {
161                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
162                     exit(1);
163                 }
164                 if (INFO){
165                    std::cout << "set I or F arg " << arg->idx << std::endl;
166                 }
167                 break;
168             }
169             case 'S':
170             case 'C': {
171                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(arg->value.x16), (void *) &arg->value);
172                 if (status != CL_SUCCESS) {
173                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
174                     exit(1);
175                 }
176                 if (INFO){
177                    std::cout << "set S or C arg " << arg->idx << std::endl;
178                 }
179                 break;
180             }
181             case 'J':
182             case 'D': {
183                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(arg->value.x64), (void *) &arg->value);
184                 if (status != CL_SUCCESS) {
185                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
186                     exit(1);
187                 }
188                 if (INFO){
189                    std::cout << "set J or D arg " << arg->idx << std::endl;
190                 }
191                 break;
192             }
193             default: {
194                 std::cerr << "unexpected variant " << (char) arg->variant << std::endl;
195                 exit(1);
196             }
197         }
198     }
199 
200     size_t globalSize = ndrange->maxX;
201     if (INFO){
202        std::cout << "ndrange = " << ndrange->maxX << std::endl;
203     }
204     size_t dims = 1;
205     cl_int status = clEnqueueNDRangeKernel(
206             dynamic_cast<OpenCLBackend *>(program->backend)->command_queue,
207             kernel,
208             dims,
209             nullptr,
210             &globalSize,
211             nullptr,
212             eventc,
213             ((eventc == 0) ? nullptr : events),
214             &(events[eventc]));
215     if (status != CL_SUCCESS) {
216         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
217         exit(1);
218     }
219     if (INFO){
220        std::cout << "enqueued dispatch  " << std::endl;
221        std::cout <<  " globalSize=" << globalSize << " " << std::endl;
222     }
223 
224     eventc++;
225     for (int i = 0; i < argSled.argc(); i++) {
226         Arg_s *arg = argSled.arg(i);
227         if (arg->variant == '&') {
228             static_cast<OpenCLBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice();
229         }
230     }
231     status = clWaitForEvents(eventc, events);
232     if (status != CL_SUCCESS) {
233         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
234         exit(1);
235     }
236     for (int i = 0; i < eventc; i++) {
237         status = clReleaseEvent(events[i]);
238         if (status != CL_SUCCESS) {
239             std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
240             exit(1);
241         }
242     }
243     delete[] events;
244     eventMax = 0;
245     eventc = 0;
246     events = nullptr;
247     for (int i = 0; i < argSled.argc(); i++) {
248         Arg_s *arg = argSled.arg(i);
249         if (arg->variant == '&') {
250             delete static_cast<OpenCLBuffer *>(arg->value.buffer.vendorPtr);
251             arg->value.buffer.vendorPtr = nullptr;
252         }
253     }
254     return 0;
255 }
256 
257 
258 OpenCLBackend::OpenCLProgram::OpenCLProgram(Backend *backend, BuildInfo *buildInfo, cl_program program)
259         : Backend::Program(backend, buildInfo), program(program) {
260 }
261 
262 OpenCLBackend::OpenCLProgram::~OpenCLProgram() {
263     clReleaseProgram(program);
264 }
265 
266 long OpenCLBackend::OpenCLProgram::getKernel(int nameLen, char *name) {
267     cl_int status;
268     cl_kernel kernel = clCreateKernel(program, name, &status);
269     return (long) new OpenCLKernel(this,name, kernel);
270 }
271 
272 bool OpenCLBackend::OpenCLProgram::programOK() {
273     return true;
274 }
275 
276 OpenCLBackend::OpenCLBackend(OpenCLBackend::OpenCLConfig *openclConfig, int configSchemaLen, char *configSchema)
277         : Backend((Backend::Config *) openclConfig, configSchemaLen, configSchema) {
278 
279     if (INFO){
280        if (openclConfig == nullptr) {
281            std::cout << "openclConfig == null" << std::endl;
282        } else {
283            std::cout << "openclConfig->gpu" << (openclConfig->gpu ? "true" : "false") << std::endl;
284            std::cout << "openclConfig->schema" << configSchema << std::endl;
285        }
286     }
287     cl_device_type requestedType =
288             openclConfig == nullptr ? CL_DEVICE_TYPE_GPU : openclConfig->gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
289 
290     cl_int status;
291     cl_uint platformc = 0;
292     if ((status = clGetPlatformIDs(0, NULL, &platformc)) != CL_SUCCESS) {
293         return;
294     }
295     cl_platform_id *platforms = new cl_platform_id[platformc];
296     if ((status = clGetPlatformIDs(platformc, platforms, NULL)) != CL_SUCCESS) {
297         return;
298     }
299 
300     cl_uint devicec = 0;
301     for (unsigned int i = 0; devicec == 0 && i < platformc; ++i) {
302         platform_id = platforms[i];
303         if ((status = clGetDeviceIDs(platform_id, requestedType, 0, NULL, &devicec)) != CL_SUCCESS) {
304             delete[] platforms;
305             return;
306         }
307     }
308     if (devicec == 0) {
309         status = CL_DEVICE_NOT_AVAILABLE;
310         return;
311     }
312     cl_device_id *device_ids = new cl_device_id[devicec];             // compute device id
313     if ((status = clGetDeviceIDs(platform_id, requestedType, devicec, device_ids, NULL)) != CL_SUCCESS) {
314         delete[] platforms;
315         delete[] device_ids;
316         return;
317     }
318     if ((context = clCreateContext(0, 1, device_ids, NULL, NULL, &status)) == NULL || status != CL_SUCCESS) {
319         delete[] platforms;
320         delete[] device_ids;
321         return;
322     }
323 
324     cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE;
325 
326     if ((command_queue = clCreateCommandQueue(context, device_ids[0], queue_props, &status)) == NULL ||
327         status != CL_SUCCESS) {
328         clReleaseContext(context);
329         delete[] platforms;
330         delete[] device_ids;
331         return;
332     }
333     device_id = device_ids[0];
334     delete[] device_ids;
335     delete[] platforms;
336 }
337 
338 OpenCLBackend::OpenCLBackend()
339         : OpenCLBackend(nullptr, 0, nullptr) {
340 
341 }
342 
343 OpenCLBackend::~OpenCLBackend() {
344     clReleaseContext(context);
345     clReleaseCommandQueue(command_queue);
346 }
347 
348 void OpenCLBackend::OpenCLProgram::OpenCLKernel::showEvents(int width) {
349     cl_ulong *samples = new cl_ulong[4 * eventc]; // queued, submit, start, end
350     int sample = 0;
351     cl_ulong min;
352     cl_ulong max;
353     for (int event = 0; event < eventc; event++) {
354         for (int type = 0; type < 4; type++) {
355             cl_profiling_info info;
356             switch (type) {
357                 case 0:
358                     info = CL_PROFILING_COMMAND_QUEUED;
359                     break;
360                 case 1:
361                     info = CL_PROFILING_COMMAND_SUBMIT;
362                     break;
363                 case 2:
364                     info = CL_PROFILING_COMMAND_START;
365                     break;
366                 case 3:
367                     info = CL_PROFILING_COMMAND_END;
368                     break;
369             }
370 
371             if ((clGetEventProfilingInfo(events[event], info, sizeof(samples[sample]), &samples[sample], NULL)) !=
372                 CL_SUCCESS) {
373                 std::cerr << "failed to get profile info " << info << std::endl;
374             }
375             if (sample == 0) {
376                 min = max = samples[sample];
377             } else {
378                 if (samples[sample] < min) {
379                     min = samples[sample];
380                 }
381                 if (samples[sample] > max) {
382                     max = samples[sample];
383                 }
384             }
385             sample++;
386         }
387     }
388     sample = 0;
389     int range = (max - min);
390     int scale = range / width;  // range per char
391     std::cout << "Range: " << range << "(ns)" << std::endl;
392     std::cout << "Scale: " << scale << " range (ns) per char" << std::endl;
393 
394     for (int event = 0; event < eventc; event++) {
395         cl_ulong queue = (samples[sample++] - min) / scale;
396         cl_ulong submit = (samples[sample++] - min) / scale;
397         cl_ulong start = (samples[sample++] - min) / scale;
398         cl_ulong end = (samples[sample++] - min) / scale;
399         for (int c = 0; c < 80; c++) {
400             if (c > queue) {
401                 if (c > submit) {
402                     if (c > start) {
403                         if (c > end) {
404                             std::cout << " ";
405                         } else {
406                             std::cout << "=";
407                         }
408                     } else {
409                         std::cout << "#";
410                     }
411                 } else {
412                     std::cout << "+";
413                 }
414             } else {
415                 std::cout << " ";
416             }
417         }
418         std::cout << std::endl;
419 
420     }
421     delete[] samples;
422 }
423 
424 int OpenCLBackend::getMaxComputeUnits() {
425     if (INFO){
426        std::cout << "getMaxComputeUnits()" << std::endl;
427     }
428     cl_uint value;
429     cl_int status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(value), &value, nullptr);
430     if (status != CL_SUCCESS) {
431         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
432         exit(1);
433     }
434     return value;
435 
436 }
437 
438 void OpenCLBackend::info() {
439     cl_int status;
440     fprintf(stderr, "platform{\n");
441     char platformVersionName[512];
442     status = clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, sizeof(platformVersionName), platformVersionName,
443                                NULL);
444     char platformVendorName[512];
445     char platformName[512];
446     status = clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL);
447     status = clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
448     fprintf(stderr, "   CL_PLATFORM_VENDOR..\"%s\"\n", platformVendorName);
449     fprintf(stderr, "   CL_PLATFORM_VERSION.\"%s\"\n", platformVersionName);
450     fprintf(stderr, "   CL_PLATFORM_NAME....\"%s\"\n", platformName);
451 
452 
453     cl_device_type deviceType;
454     status = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, NULL);
455     fprintf(stderr, "         CL_DEVICE_TYPE..................... ");
456     if (deviceType & CL_DEVICE_TYPE_DEFAULT) {
457         deviceType &= ~CL_DEVICE_TYPE_DEFAULT;
458         fprintf(stderr, "Default ");
459     }
460     if (deviceType & CL_DEVICE_TYPE_CPU) {
461         deviceType &= ~CL_DEVICE_TYPE_CPU;
462         fprintf(stderr, "CPU ");
463     }
464     if (deviceType & CL_DEVICE_TYPE_GPU) {
465         deviceType &= ~CL_DEVICE_TYPE_GPU;
466         fprintf(stderr, "GPU ");
467     }
468     if (deviceType & CL_DEVICE_TYPE_ACCELERATOR) {
469         deviceType &= ~CL_DEVICE_TYPE_ACCELERATOR;
470         fprintf(stderr, "Accelerator ");
471     }
472     fprintf(stderr, LongHexNewline, deviceType);
473 
474     cl_uint maxComputeUnits;
475     status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
476     fprintf(stderr, "         CL_DEVICE_MAX_COMPUTE_UNITS........ %u\n", maxComputeUnits);
477 
478     cl_uint maxWorkItemDimensions;
479     status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(maxWorkItemDimensions),
480                              &maxWorkItemDimensions, NULL);
481     fprintf(stderr, "         CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. %u\n", maxWorkItemDimensions);
482 
483     size_t *maxWorkItemSizes = new size_t[maxWorkItemDimensions];
484     status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxWorkItemDimensions,
485                              maxWorkItemSizes, NULL);
486     for (unsigned dimIdx = 0; dimIdx < maxWorkItemDimensions; dimIdx++) {
487         fprintf(stderr, "             dim[%d] = %ld\n", dimIdx, maxWorkItemSizes[dimIdx]);
488     }
489 
490     size_t maxWorkGroupSize;
491     status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroupSize), &maxWorkGroupSize,
492                              NULL);
493 
494     fprintf(stderr, "         CL_DEVICE_MAX_WORK_GROUP_SIZE...... "
495     Size_tNewline, maxWorkGroupSize);
496 
497 
498     cl_ulong maxMemAllocSize;
499     status = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL);
500     fprintf(stderr, "         CL_DEVICE_MAX_MEM_ALLOC_SIZE....... "
501     LongUnsignedNewline, maxMemAllocSize);
502 
503     cl_ulong globalMemSize;
504     status = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalMemSize), &globalMemSize, NULL);
505     fprintf(stderr, "         CL_DEVICE_GLOBAL_MEM_SIZE.......... "
506     LongUnsignedNewline, globalMemSize);
507 
508     cl_ulong localMemSize;
509     status = clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
510     fprintf(stderr, "         CL_DEVICE_LOCAL_MEM_SIZE........... "
511     LongUnsignedNewline, localMemSize);
512 
513     char profile[2048];
514     status = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), &profile, NULL);
515     fprintf(stderr, "         CL_DEVICE_PROFILE.................. %s\n", profile);
516 
517     char deviceVersion[2048];
518     status = clGetDeviceInfo(device_id, CL_DEVICE_VERSION, sizeof(deviceVersion), &deviceVersion, NULL);
519     fprintf(stderr, "         CL_DEVICE_VERSION.................. %s\n", deviceVersion);
520 
521     char driverVersion[2048];
522     status = clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(driverVersion), &driverVersion, NULL);
523     fprintf(stderr, "         CL_DRIVER_VERSION.................. %s\n", driverVersion);
524 
525     char cVersion[2048];
526     status = clGetDeviceInfo(device_id, CL_DEVICE_OPENCL_C_VERSION, sizeof(cVersion), &cVersion, NULL);
527     fprintf(stderr, "         CL_DEVICE_OPENCL_C_VERSION......... %s\n", cVersion);
528 
529     char name[2048];
530     status = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL);
531     fprintf(stderr, "         CL_DEVICE_NAME..................... %s\n", name);
532     char extensions[2048];
533     status = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(extensions), &extensions, NULL);
534     fprintf(stderr, "         CL_DEVICE_EXTENSIONS............... %s\n", extensions);
535     char builtInKernels[2048];
536     status = clGetDeviceInfo(device_id, CL_DEVICE_BUILT_IN_KERNELS, sizeof(builtInKernels), &builtInKernels, NULL);
537     fprintf(stderr, "         CL_DEVICE_BUILT_IN_KERNELS......... %s\n", builtInKernels);
538 
539     fprintf(stderr, "      }\n");
540 }
541 
542 long OpenCLBackend::compileProgram(int len, char *source) {
543     size_t srcLen = ::strlen(source);
544     char *src = new char[srcLen + 1];
545     ::strncpy(src, source, srcLen);
546     src[srcLen] = '\0';
547     if(INFO){
548         std::cout << "native compiling " << src << std::endl;
549     }
550     cl_int status;
551     cl_program program;
552     if ((program = clCreateProgramWithSource(context, 1, (const char **) &src, nullptr, &status)) == nullptr ||
553         status != CL_SUCCESS) {
554         std::cerr << "clCreateProgramWithSource failed" << std::endl;
555         delete[] src;
556         return 0;
557     }
558 
559     if ((status = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS) {
560         std::cerr << "clBuildProgram failed" << std::endl;
561         // dont return we may still be able to get log!
562     }
563     size_t logLen = 0;
564 
565     BuildInfo *buildInfo = nullptr;
566     if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logLen)) != CL_SUCCESS) {
567         std::cerr << "clGetBuildInfo (getting log size) failed" << std::endl;
568         buildInfo = new BuildInfo(src, nullptr, true);
569     } else {
570         cl_build_status buildStatus;
571         clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr);
572         if (logLen > 0) {
573             char *log = new char[logLen + 1];
574             if ((status = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logLen + 1, (void *) log,
575                                                 nullptr)) != CL_SUCCESS) {
576                 std::cerr << "clGetBuildInfo (getting log) failed" << std::endl;
577                 delete[] log;
578                 log = nullptr;
579             } else {
580                 log[logLen] = '\0';
581                 if (logLen > 1) {
582                     std::cerr << "logLen = " << logLen << " log  = " << log << std::endl;
583                 }
584             }
585             buildInfo = new BuildInfo(src, log, true);
586         } else {
587             buildInfo = new BuildInfo(src, nullptr, true);
588         }
589     }
590 
591     return reinterpret_cast<long>(new OpenCLProgram(this, buildInfo, program));
592 }
593 
594 const char *OpenCLBackend::errorMsg(cl_int status) {
595     static struct {
596         cl_int code;
597         const char *msg;
598     } error_table[] = {
599             {CL_SUCCESS,                         "success"},
600             {CL_DEVICE_NOT_FOUND,                "device not found",},
601             {CL_DEVICE_NOT_AVAILABLE,            "device not available",},
602             {CL_COMPILER_NOT_AVAILABLE,          "compiler not available",},
603             {CL_MEM_OBJECT_ALLOCATION_FAILURE,   "mem object allocation failure",},
604             {CL_OUT_OF_RESOURCES,                "out of resources",},
605             {CL_OUT_OF_HOST_MEMORY,              "out of host memory",},
606             {CL_PROFILING_INFO_NOT_AVAILABLE,    "profiling not available",},
607             {CL_MEM_COPY_OVERLAP,                "memcopy overlaps",},
608             {CL_IMAGE_FORMAT_MISMATCH,           "image format mismatch",},
609             {CL_IMAGE_FORMAT_NOT_SUPPORTED,      "image format not supported",},
610             {CL_BUILD_PROGRAM_FAILURE,           "build program failed",},
611             {CL_MAP_FAILURE,                     "map failed",},
612             {CL_INVALID_VALUE,                   "invalid value",},
613             {CL_INVALID_DEVICE_TYPE,             "invalid device type",},
614             {CL_INVALID_PLATFORM,                "invlaid platform",},
615             {CL_INVALID_DEVICE,                  "invalid device",},
616             {CL_INVALID_CONTEXT,                 "invalid context",},
617             {CL_INVALID_QUEUE_PROPERTIES,        "invalid queue properties",},
618             {CL_INVALID_COMMAND_QUEUE,           "invalid command queue",},
619             {CL_INVALID_HOST_PTR,                "invalid host ptr",},
620             {CL_INVALID_MEM_OBJECT,              "invalid mem object",},
621             {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "invalid image format descriptor ",},
622             {CL_INVALID_IMAGE_SIZE,              "invalid image size",},
623             {CL_INVALID_SAMPLER,                 "invalid sampler",},
624             {CL_INVALID_BINARY,                  "invalid binary",},
625             {CL_INVALID_BUILD_OPTIONS,           "invalid build options",},
626             {CL_INVALID_PROGRAM,                 "invalid program ",},
627             {CL_INVALID_PROGRAM_EXECUTABLE,      "invalid program executable",},
628             {CL_INVALID_KERNEL_NAME,             "invalid kernel name",},
629             {CL_INVALID_KERNEL_DEFINITION,       "invalid definition",},
630             {CL_INVALID_KERNEL,                  "invalid kernel",},
631             {CL_INVALID_ARG_INDEX,               "invalid arg index",},
632             {CL_INVALID_ARG_VALUE,               "invalid arg value",},
633             {CL_INVALID_ARG_SIZE,                "invalid arg size",},
634             {CL_INVALID_KERNEL_ARGS,             "invalid kernel args",},
635             {CL_INVALID_WORK_DIMENSION,          "invalid work dimension",},
636             {CL_INVALID_WORK_GROUP_SIZE,         "invalid work group size",},
637             {CL_INVALID_WORK_ITEM_SIZE,          "invalid work item size",},
638             {CL_INVALID_GLOBAL_OFFSET,           "invalid global offset",},
639             {CL_INVALID_EVENT_WAIT_LIST,         "invalid event wait list",},
640             {CL_INVALID_EVENT,                   "invalid event",},
641             {CL_INVALID_OPERATION,               "invalid operation",},
642             {CL_INVALID_GL_OBJECT,               "invalid gl object",},
643             {CL_INVALID_BUFFER_SIZE,             "invalid buffer size",},
644             {CL_INVALID_MIP_LEVEL,               "invalid mip level",},
645             {CL_INVALID_GLOBAL_WORK_SIZE,        "invalid global work size",},
646             {-9999,                              "enqueueNdRangeKernel Illegal read or write to a buffer",},
647             {0,                                  NULL},
648     };
649     static char unknown[256];
650     int ii;
651 
652     for (ii = 0; error_table[ii].msg != NULL; ii++) {
653         if (error_table[ii].code == status) {
654             //std::cerr << " clerror '" << error_table[ii].msg << "'" << std::endl;
655             return error_table[ii].msg;
656         }
657     }
658     SNPRINTF(unknown, sizeof(unknown), "unmapped string for  error %d", status);
659     return unknown;
660 }
661 
662 
663 long getBackend(void *config, int configSchemaLen, char *configSchema) {
664     return reinterpret_cast<long>(new OpenCLBackend(static_cast<OpenCLBackend::OpenCLConfig *>(config), configSchemaLen,
665                                                     configSchema));
666 }
667 
668 void __checkOpenclErrors(cl_int status, const char *file, const int line) {
669     if (CL_SUCCESS != status) {
670         std::cerr << "Opencl Driver API error = " << status << " from file " << file << " line " << line << std::endl;
671         exit(-1);
672     }
673 }
674