1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include "opencl_backend.h" 26 /* 27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue. 28 29 So 30 */ 31 OpenCLBackend::OpenCLQueue::OpenCLQueue(Backend *backend) 32 : ProfilableQueue(backend, 10000), 33 command_queue(), 34 events(new cl_event[eventMax]){ 35 } 36 37 cl_event *OpenCLBackend::OpenCLQueue::eventListPtr() const { 38 return (eventc == 0) ? nullptr : events; 39 } 40 cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr() const { 41 return &events[eventc]; 42 } 43 44 void OpenCLBackend::OpenCLQueue::showEvents(const int width) { 45 constexpr int SAMPLE_TYPES=4; 46 auto *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete 47 int sample = 0; 48 cl_ulong min=CL_LONG_MAX; 49 cl_ulong max=CL_LONG_MIN; 50 51 for (int event = 0; event < eventc; event++) { 52 for (int type = 0; type < SAMPLE_TYPES; type++) { 53 cl_profiling_info profiling_info_arr[]={CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,CL_PROFILING_COMMAND_END}; 54 if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]), &samples[sample], NULL)) != 55 CL_SUCCESS) { 56 const char* profiling_info_name_arr[]={"CL_PROFILING_COMMAND_QUEUED","CL_PROFILING_COMMAND_SUBMIT","CL_PROFILING_COMMAND_START","CL_PROFILING_COMMAND_END" }; 57 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl; 58 } 59 if (sample == 0) { 60 if (type == 0){ 61 min = max = samples[sample]; 62 } 63 } else { 64 if (samples[sample] < min) { 65 min = samples[sample]; 66 } 67 if (samples[sample] > max) { 68 max = samples[sample]; 69 } 70 } 71 sample++; 72 } 73 } 74 sample = 0; 75 const cl_ulong range = (max - min); 76 const cl_ulong scale = range / width; // range per char 77 std::cout << "Range: " <<min<< "-" <<max<< "("<< range << "ns)" 78 << " (" << scale << "ns) per char" 79 << " +:submitted, .:started, =:end "<< std::endl; 80 81 for (int event = 0; event < eventc; event++) { 82 /* cl_command_type command_type; 83 clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr); 84 switch (command_type){ 85 case CL_COMMAND_MARKER: std::cout << "marker "; break; 86 case CL_COMMAND_USER: std::cout << " user "; break; 87 case CL_COMMAND_NDRANGE_KERNEL: std::cout << "kernel "; break; 88 case CL_COMMAND_READ_BUFFER: std::cout << " read "; break; 89 case CL_COMMAND_WRITE_BUFFER: std::cout << " write "; break; 90 default: std::cout << " other "; break; 91 } */ 92 const int bits = eventInfoBits[event]; 93 if ((bits&CopyToDeviceBits)==CopyToDeviceBits){ 94 std::cout << " write "<<(bits&0xffff)<<" " ; 95 } 96 if ((bits&CopyFromDeviceBits)==CopyFromDeviceBits){ 97 std::cout << " read "<<(bits&0xffff)<<" "; 98 } 99 if ((bits&StartComputeBits)==StartComputeBits){ 100 std::cout << " start "; 101 } 102 if ((bits&EndComputeBits)==EndComputeBits){ 103 std::cout << " end "; 104 } 105 if ((bits&NDRangeBits)==NDRangeBits){ 106 std::cout << " kernel "; 107 } 108 if ((bits&EnterKernelDispatchBits)==EnterKernelDispatchBits){ 109 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 110 std::cout<< eventInfoConstCharPtrArgs[event]<<std::endl; 111 } 112 std::cout << " enter{ "; 113 114 } 115 if ((bits&LeaveKernelDispatchBits)==LeaveKernelDispatchBits){ 116 // std::cout << " leave "; 117 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 118 std::cout<< eventInfoConstCharPtrArgs[event] <<std::endl; 119 } 120 std::cout << " }leave "; 121 122 } 123 124 125 const cl_ulong queue = (samples[sample++] - min) / scale; 126 const cl_ulong submit = (samples[sample++] - min) / scale; 127 const cl_ulong start = (samples[sample++] - min) / scale; 128 const cl_ulong end = (samples[sample++] - min) / scale; 129 130 std::cout << std::setw(20)<< (queue-end) << "(ns) "; 131 for (int c = 0; c < width; c++) { 132 char ch = ' '; 133 if (c >= queue && c<=submit) { 134 ch = '+'; 135 }else if (c>submit && c<start){ 136 ch = '.'; 137 }else if (c>=start && c<end){ 138 ch = '='; 139 } 140 std::cout << ch; 141 } 142 std::cout << std::endl; 143 } 144 delete[] samples; 145 } 146 void OpenCLBackend::OpenCLQueue::wait(){ 147 if (eventc > 0){ 148 cl_int status = clWaitForEvents(eventc, events); 149 if (status != CL_SUCCESS) { 150 std::cerr << "failed clWaitForEvents" << OpenCLBackend::errorMsg(status) << std::endl; 151 exit(1); 152 } 153 } 154 } 155 // void clCallback(void *){ 156 // std::cerr<<"start of compute"<<std::endl; 157 // } 158 159 void OpenCLBackend::OpenCLQueue::marker(int bits){ 160 cl_int status = clEnqueueMarkerWithWaitList( 161 command_queue, 162 this->eventc, 163 this->eventListPtr(), 164 this->nextEventPtr() 165 ); 166 if (status != CL_SUCCESS){ 167 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 168 std::exit(1); 169 } 170 inc(bits); 171 } 172 void OpenCLBackend::OpenCLQueue::marker(int bits, const char* arg){ 173 cl_int status = clEnqueueMarkerWithWaitList( 174 command_queue, 175 this->eventc, this->eventListPtr(),this->nextEventPtr() 176 ); 177 if (status != CL_SUCCESS){ 178 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 179 std::exit(1); 180 } 181 inc(bits, arg); 182 } 183 184 185 void OpenCLBackend::OpenCLQueue::computeStart(){ 186 wait(); // should be no-op 187 release(); // also ; 188 marker(StartComputeBits); 189 } 190 191 void OpenCLBackend::OpenCLQueue::computeEnd(){ 192 marker(EndComputeBits); 193 } 194 195 void OpenCLBackend::OpenCLQueue::inc(const int bits){ 196 if (eventc+1 >= eventMax){ 197 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 198 }else{ 199 eventInfoBits[eventc]=bits; 200 } 201 eventc++; 202 } 203 void OpenCLBackend::OpenCLQueue::inc(const int bits, const char *arg){ 204 if (eventc+1 >= eventMax){ 205 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 206 }else{ 207 eventInfoBits[eventc]=bits|HasConstCharPtrArgBits; 208 eventInfoConstCharPtrArgs[eventc]=arg; 209 } 210 eventc++; 211 } 212 213 214 void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc(){ 215 inc(EndComputeBits); 216 } 217 void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc(){ 218 inc(StartComputeBits); 219 } 220 221 void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc(){ 222 inc(EnterKernelDispatchBits); 223 } 224 void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc(){ 225 inc(LeaveKernelDispatchBits); 226 } 227 228 void OpenCLBackend::OpenCLQueue::release(){ 229 cl_int status = CL_SUCCESS; 230 for (int i = 0; i < eventc; i++) { 231 status = clReleaseEvent(events[i]); 232 if (status != CL_SUCCESS) { 233 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 234 exit(1); 235 } 236 } 237 eventc = 0; 238 } 239 240 OpenCLBackend::OpenCLQueue::~OpenCLQueue(){ 241 clReleaseCommandQueue(command_queue); 242 delete []events; 243 } 244 245 void OpenCLBackend::OpenCLQueue::dispatch(KernelContext *kernelContext, Backend::CompilationUnit::Kernel *kernel){ 246 size_t dims = 1; 247 size_t global_work_size[]{ 248 static_cast<size_t>(kernelContext->maxX), 249 static_cast<size_t>(0),// Todo: kernelContext->maxY 250 static_cast<size_t>(0),// Todo: kernelContext->maxZ 251 }; 252 cl_int status = clEnqueueNDRangeKernel( 253 command_queue, 254 dynamic_cast<OpenCLProgram::OpenCLKernel*>(kernel)->kernel, 255 dims, 256 nullptr, 257 global_work_size, 258 nullptr, 259 eventc, 260 eventListPtr(), 261 nextEventPtr()); 262 inc(NDRangeBits); 263 // markAsNDRangeAndInc(); 264 if (status != CL_SUCCESS) { 265 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 266 exit(1); 267 } 268 if (backend->config->trace | backend->config->traceEnqueues){ 269 std::cout << "enqueued kernel dispatch \""<< kernel->name <<"\" globalSize=" << kernelContext->maxX << std::endl; 270 } 271 272 } 273 274 275 void OpenCLBackend::OpenCLQueue::copyToDevice(Backend::Buffer *buffer) { 276 277 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer); 278 cl_int status = clEnqueueWriteBuffer( 279 command_queue, 280 openclBuffer->clMem, 281 CL_FALSE, 282 0, 283 buffer->bufferState->length, 284 buffer->bufferState->ptr, 285 eventc, 286 eventListPtr(), 287 nextEventPtr() 288 ); 289 290 if (status != CL_SUCCESS) { 291 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 292 exit(1); 293 } 294 inc(CopyToDeviceBits); 295 // markAsCopyToDeviceAndInc(); 296 } 297 298 void OpenCLBackend::OpenCLQueue::copyFromDevice(Backend::Buffer *buffer) { 299 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer); 300 cl_int status = clEnqueueReadBuffer( 301 command_queue, 302 openclBuffer->clMem, 303 CL_FALSE, 304 0, 305 buffer->bufferState->length, 306 buffer->bufferState->ptr, 307 eventc, 308 eventListPtr(), 309 nextEventPtr() 310 ); 311 if (status != CL_SUCCESS) { 312 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 313 exit(1); 314 } 315 inc(CopyFromDeviceBits); 316 //markAsCopyFromDeviceAndInc(); 317 }