1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include "opencl_backend.h"
 26 /*
 27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue.
 28 
 29 So
 30 */
 31  OpenCLBackend::OpenCLQueue::OpenCLQueue(Backend *backend)
 32     : ProfilableQueue(backend, 10000),
 33       command_queue(),
 34       events(new cl_event[eventMax]){
 35  }
 36 
 37  cl_event *OpenCLBackend::OpenCLQueue::eventListPtr() const {
 38     return (eventc == 0) ? nullptr : events;
 39  }
 40  cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr() const {
 41     return &events[eventc];
 42  }
 43 
 44 void OpenCLBackend::OpenCLQueue::showEvents(const int width) {
 45     constexpr int  SAMPLE_TYPES=4;
 46     auto *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete
 47     int sample = 0;
 48     cl_ulong min=CL_LONG_MAX;
 49     cl_ulong max=CL_LONG_MIN;
 50 
 51     for (int event = 0; event < eventc; event++) {
 52         for (int type = 0; type < SAMPLE_TYPES; type++) {
 53             cl_profiling_info profiling_info_arr[]={CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,CL_PROFILING_COMMAND_END};
 54             if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]), &samples[sample], NULL)) !=
 55                 CL_SUCCESS) {
 56                 const char* profiling_info_name_arr[]={"CL_PROFILING_COMMAND_QUEUED","CL_PROFILING_COMMAND_SUBMIT","CL_PROFILING_COMMAND_START","CL_PROFILING_COMMAND_END" };
 57                 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl;
 58             }
 59             if (sample == 0) {
 60                 if (type == 0){
 61                    min = max = samples[sample];
 62                 }
 63             } else {
 64                 if (samples[sample] < min) {
 65                     min = samples[sample];
 66                 }
 67                 if (samples[sample] > max) {
 68                     max = samples[sample];
 69                 }
 70             }
 71             sample++;
 72         }
 73     }
 74     sample = 0;
 75     const cl_ulong range = (max - min);
 76     const cl_ulong scale = range / width;  // range per char
 77     std::cout << "Range: " <<min<< "-" <<max<< "("<< range << "ns)"
 78         <<  "  (" << scale << "ns) per char"
 79         << " +:submitted, .:started, =:end  "<< std::endl;
 80 
 81     for (int event = 0; event < eventc; event++) {
 82       /*  cl_command_type command_type;
 83         clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr);
 84         switch (command_type){
 85           case CL_COMMAND_MARKER:         std::cout <<   "marker "; break;
 86           case CL_COMMAND_USER:           std::cout <<   "  user "; break;
 87           case CL_COMMAND_NDRANGE_KERNEL: std::cout <<   "kernel "; break;
 88           case CL_COMMAND_READ_BUFFER:    std::cout <<   "  read "; break;
 89           case CL_COMMAND_WRITE_BUFFER:   std::cout <<   " write "; break;
 90           default: std::cout <<                          " other "; break;
 91         } */
 92         const int bits = eventInfoBits[event];
 93         if ((bits&CopyToDeviceBits)==CopyToDeviceBits){
 94            std::cout <<   "  write "<<(bits&0xffff)<<" " ;
 95         }
 96         if ((bits&CopyFromDeviceBits)==CopyFromDeviceBits){
 97            std::cout <<   "   read "<<(bits&0xffff)<<" ";
 98         }
 99         if ((bits&StartComputeBits)==StartComputeBits){
100            std::cout <<   "  start    ";
101         }
102         if ((bits&EndComputeBits)==EndComputeBits){
103            std::cout <<   "    end    ";
104         }
105         if ((bits&NDRangeBits)==NDRangeBits){
106            std::cout <<   " kernel    ";
107         }
108         if ((bits&EnterKernelDispatchBits)==EnterKernelDispatchBits){
109            if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){
110                std::cout<< eventInfoConstCharPtrArgs[event]<<std::endl;
111            }
112            std::cout <<   "  enter{   ";
113 
114         }
115         if ((bits&LeaveKernelDispatchBits)==LeaveKernelDispatchBits){
116           // std::cout <<   "  leave    ";
117             if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){
118                           std::cout<< eventInfoConstCharPtrArgs[event] <<std::endl;
119                       }
120                           std::cout <<   " }leave    ";
121 
122         }
123 
124 
125         const cl_ulong queue = (samples[sample++] - min) / scale;
126         const cl_ulong submit = (samples[sample++] - min) / scale;
127         const cl_ulong start = (samples[sample++] - min) / scale;
128         const cl_ulong end = (samples[sample++] - min) / scale;
129 
130         std::cout << std::setw(20)<< (queue-end) << "(ns) ";
131         for (int c = 0; c < width; c++) {
132             char ch = ' ';
133             if (c >= queue && c<=submit) {
134                 ch = '+';
135             }else if (c>submit && c<start){
136                 ch = '.';
137             }else if (c>=start && c<end){
138                 ch = '=';
139             }
140             std::cout << ch;
141         }
142         std::cout << std::endl;
143     }
144     delete[] samples;
145 }
146 void OpenCLBackend::OpenCLQueue::wait(){
147     if (eventc > 0){
148        cl_int status = clWaitForEvents(eventc, events);
149        if (status != CL_SUCCESS) {
150           std::cerr << "failed clWaitForEvents" << OpenCLBackend::errorMsg(status) << std::endl;
151           exit(1);
152        }
153     }
154  }
155 // void clCallback(void *){
156   //    std::cerr<<"start of compute"<<std::endl;
157 // }
158 
159   void OpenCLBackend::OpenCLQueue::marker(int bits){
160       cl_int status = clEnqueueMarkerWithWaitList(
161           command_queue,
162           this->eventc,
163           this->eventListPtr(),
164           this->nextEventPtr()
165           );
166         if (status != CL_SUCCESS){
167              std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl;
168              std::exit(1);
169          }
170       inc(bits);
171   }
172     void OpenCLBackend::OpenCLQueue::marker(int bits, const char* arg){
173     cl_int status = clEnqueueMarkerWithWaitList(
174         command_queue,
175         this->eventc, this->eventListPtr(),this->nextEventPtr()
176         );
177           if (status != CL_SUCCESS){
178                std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl;
179                std::exit(1);
180            }
181         inc(bits, arg);
182     }
183 
184 
185  void OpenCLBackend::OpenCLQueue::computeStart(){
186    wait(); // should be no-op
187    release(); // also ;
188    marker(StartComputeBits);
189  }
190 
191  void OpenCLBackend::OpenCLQueue::computeEnd(){
192    marker(EndComputeBits);
193  }
194 
195  void OpenCLBackend::OpenCLQueue::inc(const int bits){
196     if (eventc+1 >= eventMax){
197        std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
198     }else{
199         eventInfoBits[eventc]=bits;
200     }
201     eventc++;
202  }
203  void OpenCLBackend::OpenCLQueue::inc(const int bits, const char *arg){
204      if (eventc+1 >= eventMax){
205         std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
206      }else{
207          eventInfoBits[eventc]=bits|HasConstCharPtrArgBits;
208          eventInfoConstCharPtrArgs[eventc]=arg;
209      }
210      eventc++;
211   }
212 
213 
214  void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc(){
215      inc(EndComputeBits);
216  }
217  void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc(){
218      inc(StartComputeBits);
219  }
220 
221  void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc(){
222      inc(EnterKernelDispatchBits);
223  }
224  void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc(){
225      inc(LeaveKernelDispatchBits);
226  }
227 
228  void OpenCLBackend::OpenCLQueue::release(){
229      cl_int status = CL_SUCCESS;
230      for (int i = 0; i < eventc; i++) {
231          status = clReleaseEvent(events[i]);
232          if (status != CL_SUCCESS) {
233              std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
234              exit(1);
235          }
236      }
237      eventc = 0;
238  }
239 
240  OpenCLBackend::OpenCLQueue::~OpenCLQueue(){
241      clReleaseCommandQueue(command_queue);
242      delete []events;
243  }
244 
245 void OpenCLBackend::OpenCLQueue::dispatch(KernelContext *kernelContext, Backend::CompilationUnit::Kernel *kernel){
246     size_t dims = 1;
247     size_t global_work_size[]{
248        static_cast<size_t>(kernelContext->maxX),
249        static_cast<size_t>(0),// Todo: kernelContext->maxY
250        static_cast<size_t>(0),// Todo: kernelContext->maxZ
251     };
252     cl_int status = clEnqueueNDRangeKernel(
253             command_queue,
254             dynamic_cast<OpenCLProgram::OpenCLKernel*>(kernel)->kernel,
255             dims,
256             nullptr,
257             global_work_size,
258             nullptr,
259             eventc,
260             eventListPtr(),
261             nextEventPtr());
262     inc(NDRangeBits);
263    // markAsNDRangeAndInc();
264     if (status != CL_SUCCESS) {
265         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
266         exit(1);
267     }
268     if (backend->config->trace | backend->config->traceEnqueues){
269         std::cout << "enqueued kernel dispatch \""<< kernel->name <<"\" globalSize=" << kernelContext->maxX << std::endl;
270     }
271 
272 }
273 
274 
275 void OpenCLBackend::OpenCLQueue::copyToDevice(Backend::Buffer *buffer) {
276 
277     auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
278     cl_int status = clEnqueueWriteBuffer(
279             command_queue,
280             openclBuffer->clMem,
281             CL_FALSE,
282             0,
283             buffer->bufferState->length,
284             buffer->bufferState->ptr,
285             eventc,
286             eventListPtr(),
287             nextEventPtr()
288     );
289 
290     if (status != CL_SUCCESS) {
291         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
292         exit(1);
293     }
294     inc(CopyToDeviceBits);
295   //  markAsCopyToDeviceAndInc();
296 }
297 
298 void  OpenCLBackend::OpenCLQueue::copyFromDevice(Backend::Buffer *buffer) {
299     auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
300     cl_int status = clEnqueueReadBuffer(
301             command_queue,
302             openclBuffer->clMem,
303             CL_FALSE,
304             0,
305             buffer->bufferState->length,
306             buffer->bufferState->ptr,
307             eventc,
308             eventListPtr(),
309             nextEventPtr()
310     );
311     if (status != CL_SUCCESS) {
312         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
313         exit(1);
314     }
315     inc(CopyFromDeviceBits);
316     //markAsCopyFromDeviceAndInc();
317 }