1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include "opencl_backend.h"
 26 /*
 27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue.
 28 
 29 So
 30 */
 31  OpenCLBackend::OpenCLQueue::OpenCLQueue(Backend *backend)
 32     : Backend::ProfilableQueue(backend, 10000),
 33       command_queue(),
 34       events(new cl_event[eventMax]){
 35  }
 36 
 37  cl_event *OpenCLBackend::OpenCLQueue::eventListPtr(){
 38     return (eventc == 0) ? nullptr : events;
 39  }
 40  cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr(){
 41     return &events[eventc];
 42  }
 43 
 44 void OpenCLBackend::OpenCLQueue::showEvents(int width) {
 45     const int  SAMPLE_TYPES=4;
 46     cl_ulong *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete
 47     int sample = 0;
 48     cl_ulong min;
 49     cl_ulong max;
 50     cl_profiling_info profiling_info_arr[]={CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,CL_PROFILING_COMMAND_END} ;
 51     const char* profiling_info_name_arr[]={"CL_PROFILING_COMMAND_QUEUED","CL_PROFILING_COMMAND_SUBMIT","CL_PROFILING_COMMAND_START","CL_PROFILING_COMMAND_END" } ;
 52 
 53     for (int event = 0; event < eventc; event++) {
 54         for (int type = 0; type < SAMPLE_TYPES; type++) {
 55             if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]), &samples[sample], NULL)) !=
 56                 CL_SUCCESS) {
 57                 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl;
 58             }
 59             if (sample == 0) {
 60                 if (type == 0){
 61                    min = max = samples[sample];
 62                 }
 63             } else {
 64                 if (samples[sample] < min) {
 65                     min = samples[sample];
 66                 }
 67                 if (samples[sample] > max) {
 68                     max = samples[sample];
 69                 }
 70             }
 71             sample++;
 72         }
 73     }
 74     sample = 0;
 75     int range = (max - min);
 76     int scale = range / width;  // range per char
 77     std::cout << "Range: " <<min<< "-" <<max<< "("<< range << "ns)"
 78         <<  "  (" << scale << "ns) per char"
 79         << " +:submitted, .:started, =:end  "<< std::endl;
 80 
 81     for (int event = 0; event < eventc; event++) {
 82       /*  cl_command_type command_type;
 83         clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr);
 84         switch (command_type){
 85           case CL_COMMAND_MARKER:         std::cout <<   "marker "; break;
 86           case CL_COMMAND_USER:           std::cout <<   "  user "; break;
 87           case CL_COMMAND_NDRANGE_KERNEL: std::cout <<   "kernel "; break;
 88           case CL_COMMAND_READ_BUFFER:    std::cout <<   "  read "; break;
 89           case CL_COMMAND_WRITE_BUFFER:   std::cout <<   " write "; break;
 90           default: std::cout <<                          " other "; break;
 91         } */
 92         int bits = eventInfoBits[event];
 93         if ((bits&CopyToDeviceBits)==CopyToDeviceBits){
 94            std::cout <<   "  write "<<(bits&0xffff)<<" " ;
 95         }
 96         if ((bits&CopyFromDeviceBits)==CopyFromDeviceBits){
 97            std::cout <<   "   read "<<(bits&0xffff)<<" ";
 98         }
 99         if ((bits&StartComputeBits)==StartComputeBits){
100            std::cout <<   "  start    ";
101         }
102         if ((bits&EndComputeBits)==EndComputeBits){
103            std::cout <<   "    end    ";
104         }
105         if ((bits&NDRangeBits)==NDRangeBits){
106            std::cout <<   " kernel    ";
107         }
108         if ((bits&EnterKernelDispatchBits)==EnterKernelDispatchBits){
109 
110            if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){
111                std::cout<< eventInfoConstCharPtrArgs[event]<<std::endl;
112            }
113            std::cout <<   "  enter{   ";
114 
115         }
116         if ((bits&LeaveKernelDispatchBits)==LeaveKernelDispatchBits){
117           // std::cout <<   "  leave    ";
118             if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){
119                           std::cout<< eventInfoConstCharPtrArgs[event] <<std::endl;
120                       }
121                           std::cout <<   " }leave    ";
122 
123         }
124 
125 
126         cl_ulong queue = (samples[sample++] - min) / scale;
127         cl_ulong submit = (samples[sample++] - min) / scale;
128         cl_ulong start = (samples[sample++] - min) / scale;
129         cl_ulong end = (samples[sample++] - min) / scale;
130 
131         std::cout << std::setw(20)<< (queue-end) << "(ns) ";
132         for (int c = 0; c < width; c++) {
133             char ch = ' ';
134             if (c >= queue && c<=submit) {
135                 ch = '+';
136             }else if (c>submit && c<start){
137                 ch = '.';
138             }else if (c>=start && c<end){
139                 ch = '=';
140             }
141             std::cout << ch;
142         }
143         std::cout << std::endl;
144     }
145     delete[] samples;
146 }
147 void OpenCLBackend::OpenCLQueue::wait(){
148     if (eventc > 0){
149        cl_int status = clWaitForEvents(eventc, events);
150        if (status != CL_SUCCESS) {
151           std::cerr << "failed clWaitForEvents" << OpenCLBackend::errorMsg(status) << std::endl;
152           exit(1);
153        }
154     }
155  }
156 // void clCallback(void *){
157   //    std::cerr<<"start of compute"<<std::endl;
158 // }
159 
160   void OpenCLBackend::OpenCLQueue::marker(int bits){
161       cl_int status = clEnqueueMarkerWithWaitList(
162           command_queue,
163           this->eventc,
164           this->eventListPtr(),
165           this->nextEventPtr()
166           );
167         if (status != CL_SUCCESS){
168              std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl;
169              std::exit(1);
170          }
171       inc(bits);
172   }
173     void OpenCLBackend::OpenCLQueue::marker(int bits, const char* arg){
174     cl_int status = clEnqueueMarkerWithWaitList(
175         command_queue,
176         this->eventc, this->eventListPtr(),this->nextEventPtr()
177         );
178           if (status != CL_SUCCESS){
179                std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl;
180                std::exit(1);
181            }
182         inc(bits, arg);
183     }
184 
185 
186  void OpenCLBackend::OpenCLQueue::computeStart(){
187    wait(); // should be no-op
188    release(); // also ;
189    marker(StartComputeBits);
190  }
191 
192  void OpenCLBackend::OpenCLQueue::computeEnd(){
193    marker(EndComputeBits);
194  }
195 
196  void OpenCLBackend::OpenCLQueue::inc(int bits){
197     if (eventc+1 >= eventMax){
198        std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
199     }else{
200         eventInfoBits[eventc]=bits;
201     }
202     eventc++;
203  }
204  void OpenCLBackend::OpenCLQueue::inc(int bits, const char *arg){
205      if (eventc+1 >= eventMax){
206         std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
207      }else{
208          eventInfoBits[eventc]=bits|HasConstCharPtrArgBits;
209          eventInfoConstCharPtrArgs[eventc]=arg;
210      }
211      eventc++;
212   }
213 
214 
215  void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc(){
216      inc(EndComputeBits);
217  }
218  void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc(){
219      inc(StartComputeBits);
220  }
221 
222  void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc(){
223      inc(EnterKernelDispatchBits);
224  }
225  void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc(){
226      inc(LeaveKernelDispatchBits);
227  }
228 
229  void OpenCLBackend::OpenCLQueue::release(){
230      cl_int status = CL_SUCCESS;
231      for (int i = 0; i < eventc; i++) {
232          status = clReleaseEvent(events[i]);
233          if (status != CL_SUCCESS) {
234              std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
235              exit(1);
236          }
237      }
238      eventc = 0;
239  }
240 
241  OpenCLBackend::OpenCLQueue::~OpenCLQueue(){
242      clReleaseCommandQueue(command_queue);
243      delete []events;
244  }
245 
246 void OpenCLBackend::OpenCLQueue::dispatch(KernelContext *kernelContext, Backend::CompilationUnit::Kernel *kernel){
247     size_t dims = 1;
248     cl_int status = clEnqueueNDRangeKernel(
249             command_queue,
250             dynamic_cast<OpenCLProgram::OpenCLKernel*>(kernel)->kernel,
251             dims,
252             nullptr,
253             reinterpret_cast<const size_t *>(&kernelContext->maxX),
254             nullptr,
255             eventc,
256             eventListPtr(),
257             nextEventPtr());
258     inc(NDRangeBits);
259    // markAsNDRangeAndInc();
260     if (status != CL_SUCCESS) {
261         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
262         exit(1);
263     }
264     if (backend->config->trace | backend->config->traceEnqueues){
265         std::cout << "enqueued kernel dispatch \""<< kernel->name <<"\" globalSize=" << kernelContext->maxX << std::endl;
266     }
267 
268 }
269 
270 
271 void OpenCLBackend::OpenCLQueue::copyToDevice(Backend::Buffer *buffer) {
272 
273     auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
274     cl_int status = clEnqueueWriteBuffer(
275             command_queue,
276             openclBuffer->clMem,
277             CL_FALSE,
278             0,
279             buffer->bufferState->length,
280             buffer->bufferState->ptr,
281             eventc,
282             eventListPtr(),
283             nextEventPtr()
284     );
285 
286     if (status != CL_SUCCESS) {
287         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
288         exit(1);
289     }
290     inc(CopyToDeviceBits);
291   //  markAsCopyToDeviceAndInc();
292 }
293 
294 void  OpenCLBackend::OpenCLQueue::copyFromDevice(Backend::Buffer *buffer) {
295     auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
296     cl_int status = clEnqueueReadBuffer(
297             command_queue,
298             openclBuffer->clMem,
299             CL_FALSE,
300             0,
301             buffer->bufferState->length,
302             buffer->bufferState->ptr,
303             eventc,
304             eventListPtr(),
305             nextEventPtr()
306     );
307     if (status != CL_SUCCESS) {
308         std::cerr << OpenCLBackend::errorMsg(status) << std::endl;
309         exit(1);
310     }
311     inc(CopyFromDeviceBits);
312     //markAsCopyFromDeviceAndInc();
313 }