1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include "opencl_backend.h"
 26 
 27 
 28 /*
 29   OpenCLKernel
 30   */
 31 
 32 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::CompilationUnit *compilationUnit, char* name, cl_kernel kernel)
 33     : Backend::CompilationUnit::Kernel(compilationUnit, name), kernel(kernel){
 34 }
 35 
 36 OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() {
 37     clReleaseKernel(kernel);
 38 }
 39 
 40 
 41 /*
 42 void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) {
 43     for (int argn = 0; argn<args.length; argn++){
 44       Arg arg = args[argn];
 45       if (alwaysCopyBuffers || (((arg.flags &JavaDirty)==JavaDirty) && kernel.readsFrom(arg))) {
 46          enqueueCopyToDevice(arg);
 47       }
 48     }
 49     enqueueKernel(kernel);
 50     waitForKernel();
 51 
 52     for (int argn = 0; argn<args.length; argn++){
 53       Arg arg = args[argn];
 54       if (alwaysCopyBuffers){
 55          enqueueCopyFromDevice(arg);
 56          arg.flags = 0;
 57       }else{
 58           if (kernel.writesTo(arg)) {
 59              arg.flags = DeviceDirty;
 60           }else{
 61              arg.flags = 0;
 62           }
 63       }
 64     }
 65 
 66 }
 67 */
 68 
 69 
 70 long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) {
 71 
 72    // std::cout << "ndrange(" << range << ") " << std::endl;
 73     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
 74     OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend*>(compilationUnit->backend);
 75   //
 76     openclBackend->openclQueue.marker(openclBackend->openclQueue.EnterKernelDispatchBits,
 77      (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name);
 78     if (openclBackend->openclConfig.traceCalls){
 79        std::cout << "ndrange(\"" <<  (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<< "\"){"<<std::endl;
 80         std::cout << "Kernel name '"<< (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"<<std::endl;
 81     }
 82     if (openclBackend->openclConfig.trace){
 83        Sled::show(std::cout, argArray);
 84     }
 85     NDRange *ndrange = nullptr;
 86     for (int i = 0; i < argSled.argc(); i++) {
 87         Arg_s *arg = argSled.arg(i);
 88         switch (arg->variant) {
 89             case '&': {
 90                if (arg->idx == 0){
 91                    ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
 92                }
 93                if (openclBackend->openclConfig.trace){
 94                   std::cout << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access);
 95                   switch (arg->value.buffer.access){
 96                       case RO_BYTE: std::cout << " RO";break;
 97                       case WO_BYTE: std::cout << " WO";break;
 98                       case RW_BYTE: std::cout << " RW"; break;
 99                   }
100                   std::cout << std::endl;
101                }
102 
103                BufferState_s * bufferState = BufferState_s::of(arg);
104                if (bufferState->ptr != arg->value.buffer.memorySegment){
105                    std::cerr <<"bufferState->ptr !=  arg->value.buffer.memorySegment"<<std::endl;
106                    std::exit(1);
107                }
108 
109                if ((bufferState->vendorPtr == 0L) && (bufferState->state != BufferState_s::NEW_STATE)){
110                    std::cerr << "Warning:  Unexpected initial state for arg "<< i
111                       <<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"
112                       << " state=" << bufferState->state<< " '"
113                       << BufferState_s::stateNames[bufferState->state]<< "'"
114                       << " vendorPtr" << bufferState->vendorPtr<<std::endl;
115                }
116                OpenCLBuffer * openclBuffer =nullptr;
117 
118                if (bufferState->vendorPtr == 0L || bufferState->state == BufferState_s::NEW_STATE){
119                   openclBuffer = new OpenCLBuffer(openclBackend, arg, bufferState);
120                  if (openclBackend->openclConfig.trace){
121                      std::cout << "We allocated arg "<<i<<" buffer "<<std::endl;
122                   }
123                }else{
124                   if (openclBackend->openclConfig.trace){
125                       std::cout << "Were reusing  arg "<<i<<" buffer "<<std::endl;
126                   }
127                   openclBuffer=  static_cast<OpenCLBuffer*>(bufferState->vendorPtr);
128                 }
129                 if (openclBuffer->shouldCopyToDevice(arg)){
130                    openclBuffer->copyToDevice();
131                 }else if (openclBackend->openclConfig.traceSkippedCopies){
132                     std::cout << "NOT copying arg " << arg->idx <<" to device "<< std::endl;
133                 }
134 
135                 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem);
136                 if (status != CL_SUCCESS) {
137                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
138                     exit(1);
139                 }
140                 if (openclBackend->openclConfig.trace){
141                    std::cout << "set buffer arg " << arg->idx << std::endl;
142                 }
143                 break;
144             }
145              case 'B':
146              case 'S':
147              case 'C':
148              case 'I':
149              case 'F':
150              case 'J':
151              case 'D':
152              {
153                 cl_int status = clSetKernelArg(kernel, arg->idx, arg->size(), (void *) &arg->value);
154                 if (status != CL_SUCCESS) {
155                     std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
156                     exit(1);
157                 }
158                 if (openclBackend->openclConfig.trace){
159                    std::cerr << "set " <<arg->variant << " " << arg->idx << std::endl;
160                 }
161                 break;
162             }
163             default: {
164                 std::cerr << "unexpected variant setting args in OpenCLkernel::ndrange " << (char) arg->variant << std::endl;
165                 exit(1);
166             }
167         }
168     }
169 
170     size_t globalSize = ndrange->maxX;
171     if (openclBackend->openclConfig.trace){
172        std::cout << "ndrange = " << ndrange->maxX << std::endl;
173     }
174     size_t dims = 1;
175     cl_int status = clEnqueueNDRangeKernel(
176             openclBackend->openclQueue.command_queue,
177             kernel,
178             dims,
179             nullptr,
180             &globalSize,
181             nullptr,
182             openclBackend->openclQueue.eventc,
183             openclBackend->openclQueue.eventListPtr(),
184             openclBackend->openclQueue.nextEventPtr());
185     openclBackend->openclQueue.markAsNDRangeAndInc();
186     if (status != CL_SUCCESS) {
187         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
188         exit(1);
189     }
190     if (openclBackend->openclConfig.trace | openclBackend->openclConfig.traceEnqueues){
191        std::cout << "enqueued kernel dispatch \"" << (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name <<
192        "\" globalSize=" << globalSize << std::endl;
193     }
194 
195 
196        for (int i = 0; i < argSled.argc(); i++) { // note i = 1... we don't need to copy back the KernelContext
197           Arg_s *arg = argSled.arg(i);
198           if (arg->variant == '&') {
199              BufferState_s * bufferState = BufferState_s::of(arg );
200              OpenCLBuffer *openclBuffer = static_cast<OpenCLBuffer *>(bufferState->vendorPtr);
201              if (openclBuffer->shouldCopyFromDevice(arg)){
202                 openclBuffer->copyFromDevice();
203                 if (openclBackend->openclConfig.traceCopies||openclBackend->openclConfig.traceEnqueues){
204                    std::cout << "copying arg " << arg->idx <<" from device "<< std::endl;
205                 }
206                   bufferState->state = BufferState_s::DEVICE_OWNED;
207              //   bufferState->state = BufferState_s::HOST_OWNED;
208              }else{
209                  if (openclBackend->openclConfig.traceSkippedCopies){
210                       std::cout << "NOT copying arg " << arg->idx <<" from device "<< std::endl;
211                  }
212                  bufferState->state = BufferState_s::DEVICE_OWNED;
213              }
214           }
215        }
216 
217 
218 
219       openclBackend->openclQueue.marker(openclBackend->openclQueue.LeaveKernelDispatchBits,
220            (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name
221       );
222       openclBackend->openclQueue.wait();
223       openclBackend->openclQueue.release();
224        if (openclBackend->openclConfig.traceCalls){
225                   std::cout << "\"" <<  (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<< "\"}"<<std::endl;
226        }
227     return 0;
228 }