1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include "opencl_backend.h" 26 /* 27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue. 28 29 So 30 */ 31 OpenCLBackend::OpenCLQueue::OpenCLQueue(Backend *backend) 32 : Backend::ProfilableQueue(backend, 10000), 33 command_queue(), 34 events(new cl_event[eventMax]){ 35 } 36 37 cl_event *OpenCLBackend::OpenCLQueue::eventListPtr(){ 38 return (eventc == 0) ? nullptr : events; 39 } 40 cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr(){ 41 return &events[eventc]; 42 } 43 44 void OpenCLBackend::OpenCLQueue::showEvents(int width) { 45 const int SAMPLE_TYPES=4; 46 cl_ulong *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete 47 int sample = 0; 48 cl_ulong min; 49 cl_ulong max; 50 cl_profiling_info profiling_info_arr[]={CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,CL_PROFILING_COMMAND_END} ; 51 const char* profiling_info_name_arr[]={"CL_PROFILING_COMMAND_QUEUED","CL_PROFILING_COMMAND_SUBMIT","CL_PROFILING_COMMAND_START","CL_PROFILING_COMMAND_END" } ; 52 53 for (int event = 0; event < eventc; event++) { 54 for (int type = 0; type < SAMPLE_TYPES; type++) { 55 if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]), &samples[sample], NULL)) != 56 CL_SUCCESS) { 57 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl; 58 } 59 if (sample == 0) { 60 if (type == 0){ 61 min = max = samples[sample]; 62 } 63 } else { 64 if (samples[sample] < min) { 65 min = samples[sample]; 66 } 67 if (samples[sample] > max) { 68 max = samples[sample]; 69 } 70 } 71 sample++; 72 } 73 } 74 sample = 0; 75 int range = (max - min); 76 int scale = range / width; // range per char 77 std::cout << "Range: " <<min<< "-" <<max<< "("<< range << "ns)" 78 << " (" << scale << "ns) per char" 79 << " +:submitted, .:started, =:end "<< std::endl; 80 81 for (int event = 0; event < eventc; event++) { 82 /* cl_command_type command_type; 83 clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr); 84 switch (command_type){ 85 case CL_COMMAND_MARKER: std::cout << "marker "; break; 86 case CL_COMMAND_USER: std::cout << " user "; break; 87 case CL_COMMAND_NDRANGE_KERNEL: std::cout << "kernel "; break; 88 case CL_COMMAND_READ_BUFFER: std::cout << " read "; break; 89 case CL_COMMAND_WRITE_BUFFER: std::cout << " write "; break; 90 default: std::cout << " other "; break; 91 } */ 92 int bits = eventInfoBits[event]; 93 if ((bits&CopyToDeviceBits)==CopyToDeviceBits){ 94 std::cout << " write "<<(bits&0xffff)<<" " ; 95 } 96 if ((bits&CopyFromDeviceBits)==CopyFromDeviceBits){ 97 std::cout << " read "<<(bits&0xffff)<<" "; 98 } 99 if ((bits&StartComputeBits)==StartComputeBits){ 100 std::cout << " start "; 101 } 102 if ((bits&EndComputeBits)==EndComputeBits){ 103 std::cout << " end "; 104 } 105 if ((bits&NDRangeBits)==NDRangeBits){ 106 std::cout << " kernel "; 107 } 108 if ((bits&EnterKernelDispatchBits)==EnterKernelDispatchBits){ 109 110 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 111 std::cout<< eventInfoConstCharPtrArgs[event]<<std::endl; 112 } 113 std::cout << " enter{ "; 114 115 } 116 if ((bits&LeaveKernelDispatchBits)==LeaveKernelDispatchBits){ 117 // std::cout << " leave "; 118 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 119 std::cout<< eventInfoConstCharPtrArgs[event] <<std::endl; 120 } 121 std::cout << " }leave "; 122 123 } 124 125 126 cl_ulong queue = (samples[sample++] - min) / scale; 127 cl_ulong submit = (samples[sample++] - min) / scale; 128 cl_ulong start = (samples[sample++] - min) / scale; 129 cl_ulong end = (samples[sample++] - min) / scale; 130 131 std::cout << std::setw(20)<< (queue-end) << "(ns) "; 132 for (int c = 0; c < width; c++) { 133 char ch = ' '; 134 if (c >= queue && c<=submit) { 135 ch = '+'; 136 }else if (c>submit && c<start){ 137 ch = '.'; 138 }else if (c>=start && c<end){ 139 ch = '='; 140 } 141 std::cout << ch; 142 } 143 std::cout << std::endl; 144 } 145 delete[] samples; 146 } 147 void OpenCLBackend::OpenCLQueue::wait(){ 148 if (eventc > 0){ 149 cl_int status = clWaitForEvents(eventc, events); 150 if (status != CL_SUCCESS) { 151 std::cerr << "failed clWaitForEvents" << OpenCLBackend::errorMsg(status) << std::endl; 152 exit(1); 153 } 154 } 155 } 156 // void clCallback(void *){ 157 // std::cerr<<"start of compute"<<std::endl; 158 // } 159 160 void OpenCLBackend::OpenCLQueue::marker(int bits){ 161 cl_int status = clEnqueueMarkerWithWaitList( 162 command_queue, 163 this->eventc, 164 this->eventListPtr(), 165 this->nextEventPtr() 166 ); 167 if (status != CL_SUCCESS){ 168 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 169 std::exit(1); 170 } 171 inc(bits); 172 } 173 void OpenCLBackend::OpenCLQueue::marker(int bits, const char* arg){ 174 cl_int status = clEnqueueMarkerWithWaitList( 175 command_queue, 176 this->eventc, this->eventListPtr(),this->nextEventPtr() 177 ); 178 if (status != CL_SUCCESS){ 179 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 180 std::exit(1); 181 } 182 inc(bits, arg); 183 } 184 185 186 void OpenCLBackend::OpenCLQueue::computeStart(){ 187 wait(); // should be no-op 188 release(); // also ; 189 marker(StartComputeBits); 190 } 191 192 void OpenCLBackend::OpenCLQueue::computeEnd(){ 193 marker(EndComputeBits); 194 } 195 196 void OpenCLBackend::OpenCLQueue::inc(int bits){ 197 if (eventc+1 >= eventMax){ 198 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 199 }else{ 200 eventInfoBits[eventc]=bits; 201 } 202 eventc++; 203 } 204 void OpenCLBackend::OpenCLQueue::inc(int bits, const char *arg){ 205 if (eventc+1 >= eventMax){ 206 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 207 }else{ 208 eventInfoBits[eventc]=bits|HasConstCharPtrArgBits; 209 eventInfoConstCharPtrArgs[eventc]=arg; 210 } 211 eventc++; 212 } 213 214 215 void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc(){ 216 inc(EndComputeBits); 217 } 218 void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc(){ 219 inc(StartComputeBits); 220 } 221 222 void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc(){ 223 inc(EnterKernelDispatchBits); 224 } 225 void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc(){ 226 inc(LeaveKernelDispatchBits); 227 } 228 229 void OpenCLBackend::OpenCLQueue::release(){ 230 cl_int status = CL_SUCCESS; 231 for (int i = 0; i < eventc; i++) { 232 status = clReleaseEvent(events[i]); 233 if (status != CL_SUCCESS) { 234 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 235 exit(1); 236 } 237 } 238 eventc = 0; 239 } 240 241 OpenCLBackend::OpenCLQueue::~OpenCLQueue(){ 242 clReleaseCommandQueue(command_queue); 243 delete []events; 244 } 245 246 void OpenCLBackend::OpenCLQueue::dispatch(KernelContext *kernelContext, Backend::CompilationUnit::Kernel *kernel){ 247 size_t dims = 1; 248 cl_int status = clEnqueueNDRangeKernel( 249 command_queue, 250 dynamic_cast<OpenCLProgram::OpenCLKernel*>(kernel)->kernel, 251 dims, 252 nullptr, 253 reinterpret_cast<const size_t *>(&kernelContext->maxX), 254 nullptr, 255 eventc, 256 eventListPtr(), 257 nextEventPtr()); 258 inc(NDRangeBits); 259 // markAsNDRangeAndInc(); 260 if (status != CL_SUCCESS) { 261 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 262 exit(1); 263 } 264 if (backend->config->trace | backend->config->traceEnqueues){ 265 std::cout << "enqueued kernel dispatch \""<< kernel->name <<"\" globalSize=" << kernelContext->maxX << std::endl; 266 } 267 268 } 269 270 271 void OpenCLBackend::OpenCLQueue::copyToDevice(Backend::Buffer *buffer) { 272 273 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer); 274 cl_int status = clEnqueueWriteBuffer( 275 command_queue, 276 openclBuffer->clMem, 277 CL_FALSE, 278 0, 279 buffer->bufferState->length, 280 buffer->bufferState->ptr, 281 eventc, 282 eventListPtr(), 283 nextEventPtr() 284 ); 285 286 if (status != CL_SUCCESS) { 287 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 288 exit(1); 289 } 290 inc(CopyToDeviceBits); 291 // markAsCopyToDeviceAndInc(); 292 } 293 294 void OpenCLBackend::OpenCLQueue::copyFromDevice(Backend::Buffer *buffer) { 295 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer); 296 cl_int status = clEnqueueReadBuffer( 297 command_queue, 298 openclBuffer->clMem, 299 CL_FALSE, 300 0, 301 buffer->bufferState->length, 302 buffer->bufferState->ptr, 303 eventc, 304 eventListPtr(), 305 nextEventPtr() 306 ); 307 if (status != CL_SUCCESS) { 308 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 309 exit(1); 310 } 311 inc(CopyFromDeviceBits); 312 //markAsCopyFromDeviceAndInc(); 313 }