1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include "opencl_backend.h" 26 /* 27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue. 28 29 So 30 */ 31 OpenCLBackend::OpenCLQueue::OpenCLQueue(OpenCLBackend *openclBackend) 32 :openclBackend(openclBackend), 33 eventMax(10000), 34 events(new cl_event[eventMax]), 35 eventInfoBits(new int[eventMax]), 36 eventInfoConstCharPtrArgs(new const char *[eventMax]), 37 eventc(0){ 38 } 39 40 cl_event *OpenCLBackend::OpenCLQueue::eventListPtr(){ 41 return (eventc == 0) ? nullptr : events; 42 } 43 cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr(){ 44 return &events[eventc]; 45 } 46 47 void OpenCLBackend::OpenCLQueue::showEvents(int width) { 48 const int SAMPLE_TYPES=4; 49 cl_ulong *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete 50 int sample = 0; 51 cl_ulong min; 52 cl_ulong max; 53 cl_profiling_info profiling_info_arr[]={CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,CL_PROFILING_COMMAND_END} ; 54 const char* profiling_info_name_arr[]={"CL_PROFILING_COMMAND_QUEUED","CL_PROFILING_COMMAND_SUBMIT","CL_PROFILING_COMMAND_START","CL_PROFILING_COMMAND_END" } ; 55 56 for (int event = 0; event < eventc; event++) { 57 for (int type = 0; type < SAMPLE_TYPES; type++) { 58 if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]), &samples[sample], NULL)) != 59 CL_SUCCESS) { 60 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl; 61 } 62 if (sample == 0) { 63 if (type == 0){ 64 min = max = samples[sample]; 65 } 66 } else { 67 if (samples[sample] < min) { 68 min = samples[sample]; 69 } 70 if (samples[sample] > max) { 71 max = samples[sample]; 72 } 73 } 74 sample++; 75 } 76 } 77 sample = 0; 78 int range = (max - min); 79 int scale = range / width; // range per char 80 std::cout << "Range: " <<min<< "-" <<max<< "("<< range << "ns)" 81 << " (" << scale << "ns) per char" 82 << " +:submitted, .:started, =:end "<< std::endl; 83 84 for (int event = 0; event < eventc; event++) { 85 /* cl_command_type command_type; 86 clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr); 87 switch (command_type){ 88 case CL_COMMAND_MARKER: std::cout << "marker "; break; 89 case CL_COMMAND_USER: std::cout << " user "; break; 90 case CL_COMMAND_NDRANGE_KERNEL: std::cout << "kernel "; break; 91 case CL_COMMAND_READ_BUFFER: std::cout << " read "; break; 92 case CL_COMMAND_WRITE_BUFFER: std::cout << " write "; break; 93 default: std::cout << " other "; break; 94 } */ 95 int bits = eventInfoBits[event]; 96 if ((bits&CopyToDeviceBits)==CopyToDeviceBits){ 97 std::cout << " write "<<(bits&0xffff)<<" " ; 98 } 99 if ((bits&CopyFromDeviceBits)==CopyFromDeviceBits){ 100 std::cout << " read "<<(bits&0xffff)<<" "; 101 } 102 if ((bits&StartComputeBits)==StartComputeBits){ 103 std::cout << " start "; 104 } 105 if ((bits&EndComputeBits)==EndComputeBits){ 106 std::cout << " end "; 107 } 108 if ((bits&NDRangeBits)==NDRangeBits){ 109 std::cout << " kernel "; 110 } 111 if ((bits&EnterKernelDispatchBits)==EnterKernelDispatchBits){ 112 113 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 114 std::cout<< eventInfoConstCharPtrArgs[event]<<std::endl; 115 } 116 std::cout << " enter{ "; 117 118 } 119 if ((bits&LeaveKernelDispatchBits)==LeaveKernelDispatchBits){ 120 // std::cout << " leave "; 121 if ((bits&HasConstCharPtrArgBits)==HasConstCharPtrArgBits){ 122 std::cout<< eventInfoConstCharPtrArgs[event] <<std::endl; 123 } 124 std::cout << " }leave "; 125 126 } 127 128 129 cl_ulong queue = (samples[sample++] - min) / scale; 130 cl_ulong submit = (samples[sample++] - min) / scale; 131 cl_ulong start = (samples[sample++] - min) / scale; 132 cl_ulong end = (samples[sample++] - min) / scale; 133 134 std::cout << std::setw(20)<< (queue-end) << "(ns) "; 135 for (int c = 0; c < width; c++) { 136 char ch = ' '; 137 if (c >= queue && c<=submit) { 138 ch = '+'; 139 }else if (c>submit && c<start){ 140 ch = '.'; 141 }else if (c>=start && c<end){ 142 ch = '='; 143 } 144 std::cout << ch; 145 } 146 std::cout << std::endl; 147 } 148 delete[] samples; 149 } 150 void OpenCLBackend::OpenCLQueue::wait(){ 151 if (eventc > 0){ 152 cl_int status = clWaitForEvents(eventc, events); 153 if (status != CL_SUCCESS) { 154 std::cerr << "failed clWaitForEvents" << OpenCLBackend::errorMsg(status) << std::endl; 155 exit(1); 156 } 157 } 158 } 159 void clCallback(void *){ 160 std::cerr<<"start of compute"<<std::endl; 161 } 162 163 void OpenCLBackend::OpenCLQueue::marker(int bits){ 164 cl_int status = clEnqueueMarkerWithWaitList( 165 command_queue, 166 this->eventc, this->eventListPtr(),this->nextEventPtr() 167 ); 168 if (status != CL_SUCCESS){ 169 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 170 std::exit(1); 171 } 172 inc(bits); 173 } 174 void OpenCLBackend::OpenCLQueue::marker(int bits, const char* arg){ 175 cl_int status = clEnqueueMarkerWithWaitList( 176 command_queue, 177 this->eventc, this->eventListPtr(),this->nextEventPtr() 178 ); 179 if (status != CL_SUCCESS){ 180 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 181 std::exit(1); 182 } 183 inc(bits, arg); 184 } 185 void OpenCLBackend::OpenCLQueue::marker(int bits, int arg){ 186 cl_int status = clEnqueueMarkerWithWaitList( 187 command_queue, 188 this->eventc, this->eventListPtr(),this->nextEventPtr() 189 ); 190 if (status != CL_SUCCESS){ 191 std::cerr << "failed to clEnqueueMarkerWithWaitList "<<errorMsg(status)<< std::endl; 192 std::exit(1); 193 } 194 inc(bits, arg); 195 } 196 197 void OpenCLBackend::OpenCLQueue::computeStart(){ 198 wait(); // should be no-op 199 release(); // also ; 200 marker(StartComputeBits); 201 } 202 203 204 205 void OpenCLBackend::OpenCLQueue::computeEnd(){ 206 marker(EndComputeBits); 207 } 208 209 void OpenCLBackend::OpenCLQueue::inc(int bits){ 210 if (eventc+1 >= eventMax){ 211 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 212 }else{ 213 eventInfoBits[eventc]=bits; 214 } 215 eventc++; 216 } 217 void OpenCLBackend::OpenCLQueue::inc(int bits, const char *arg){ 218 if (eventc+1 >= eventMax){ 219 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 220 }else{ 221 eventInfoBits[eventc]=bits|HasConstCharPtrArgBits; 222 eventInfoConstCharPtrArgs[eventc]=arg; 223 } 224 eventc++; 225 } 226 void OpenCLBackend::OpenCLQueue::inc(int bits, int arg){ 227 if (eventc+1 >= eventMax){ 228 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl; 229 }else{ 230 eventInfoBits[eventc]=bits|arg|hasIntArgBits; 231 } 232 eventc++; 233 } 234 235 void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc(){ 236 inc(EndComputeBits); 237 } 238 void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc(){ 239 inc(StartComputeBits); 240 } 241 void OpenCLBackend::OpenCLQueue::markAsNDRangeAndInc(){ 242 inc(NDRangeBits); 243 } 244 void OpenCLBackend::OpenCLQueue::markAsCopyToDeviceAndInc(int argn){ 245 inc(CopyToDeviceBits, argn); 246 } 247 void OpenCLBackend::OpenCLQueue::markAsCopyFromDeviceAndInc(int argn){ 248 inc(CopyFromDeviceBits, argn); 249 } 250 void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc(){ 251 inc(EnterKernelDispatchBits); 252 } 253 void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc(){ 254 inc(LeaveKernelDispatchBits); 255 } 256 257 void OpenCLBackend::OpenCLQueue::release(){ 258 cl_int status = CL_SUCCESS; 259 for (int i = 0; i < eventc; i++) { 260 status = clReleaseEvent(events[i]); 261 if (status != CL_SUCCESS) { 262 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 263 exit(1); 264 } 265 } 266 eventc = 0; 267 } 268 269 OpenCLBackend::OpenCLQueue::~OpenCLQueue(){ 270 clReleaseCommandQueue(command_queue); 271 delete []events; 272 delete []eventInfoBits; 273 delete []eventInfoConstCharPtrArgs; 274 }