1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 #include "opencl_backend.h"
26 /*
27 While based on OpenCL's event list, I think we need to use a MOD eventMax queue.
28
29 So
30 */
31 OpenCLBackend::OpenCLQueue::OpenCLQueue(Backend *backend)
32 : ProfilableQueue(backend, 10000),
33 command_queue(),
34 events(new cl_event[eventMax]) {
35 }
36
37 cl_event *OpenCLBackend::OpenCLQueue::eventListPtr() const {
38 return (eventc == 0) ? nullptr : events;
39 }
40
41 cl_event *OpenCLBackend::OpenCLQueue::nextEventPtr() const {
42 return &events[eventc];
43 }
44
45 void OpenCLBackend::OpenCLQueue::showEvents(const int width) {
46 constexpr int SAMPLE_TYPES = 4;
47 auto *samples = new cl_ulong[SAMPLE_TYPES * eventc]; // queued, submit, start, end, complete
48 int sample = 0;
49 cl_ulong min = CL_LONG_MAX;
50 cl_ulong max = CL_LONG_MIN;
51
52 for (int event = 0; event < eventc; event++) {
53 for (int type = 0; type < SAMPLE_TYPES; type++) {
54 cl_profiling_info profiling_info_arr[] = {
55 CL_PROFILING_COMMAND_QUEUED,CL_PROFILING_COMMAND_SUBMIT,CL_PROFILING_COMMAND_START,
56 CL_PROFILING_COMMAND_END
57 };
58 if ((clGetEventProfilingInfo(events[event], profiling_info_arr[type], sizeof(samples[sample]),
59 &samples[sample], NULL)) !=
60 CL_SUCCESS) {
61 const char *profiling_info_name_arr[] = {
62 "CL_PROFILING_COMMAND_QUEUED", "CL_PROFILING_COMMAND_SUBMIT", "CL_PROFILING_COMMAND_START",
63 "CL_PROFILING_COMMAND_END"
64 };
65 std::cerr << "failed to get profile info " << profiling_info_name_arr[type] << std::endl;
66 }
67 if (sample == 0) {
68 if (type == 0) {
69 min = max = samples[sample];
70 }
71 } else {
72 if (samples[sample] < min) {
73 min = samples[sample];
74 }
75 if (samples[sample] > max) {
76 max = samples[sample];
77 }
78 }
79 sample++;
80 }
81 }
82 sample = 0;
83 const cl_ulong range = (max - min);
84 const cl_ulong scale = range / width; // range per char
85 std::cout << "Range: " << min << "-" << max << "(" << range << "ns)"
86 << " (" << scale << "ns) per char"
87 << " +:submitted, .:started, =:end " << std::endl;
88
89 for (int event = 0; event < eventc; event++) {
90 /* cl_command_type command_type;
91 clGetEventInfo(events[event],CL_EVENT_COMMAND_TYPE,sizeof(command_type), &command_type, nullptr);
92 switch (command_type){
93 case CL_COMMAND_MARKER: std::cout << "marker "; break;
94 case CL_COMMAND_USER: std::cout << " user "; break;
95 case CL_COMMAND_NDRANGE_KERNEL: std::cout << "kernel "; break;
96 case CL_COMMAND_READ_BUFFER: std::cout << " read "; break;
97 case CL_COMMAND_WRITE_BUFFER: std::cout << " write "; break;
98 default: std::cout << " other "; break;
99 } */
100 const int bits = eventInfoBits[event];
101 if ((bits & CopyToDeviceBits) == CopyToDeviceBits) {
102 std::cout << " write " << (bits & 0xffff) << " ";
103 }
104 if ((bits & CopyFromDeviceBits) == CopyFromDeviceBits) {
105 std::cout << " read " << (bits & 0xffff) << " ";
106 }
107 if ((bits & StartComputeBits) == StartComputeBits) {
108 std::cout << " start ";
109 }
110 if ((bits & EndComputeBits) == EndComputeBits) {
111 std::cout << " end ";
112 }
113 if ((bits & NDRangeBits) == NDRangeBits) {
114 std::cout << " kernel ";
115 }
116 if ((bits & EnterKernelDispatchBits) == EnterKernelDispatchBits) {
117 if ((bits & HasConstCharPtrArgBits) == HasConstCharPtrArgBits) {
118 std::cout << eventInfoConstCharPtrArgs[event] << std::endl;
119 }
120 std::cout << " enter{ ";
121 }
122 if ((bits & LeaveKernelDispatchBits) == LeaveKernelDispatchBits) {
123 // std::cout << " leave ";
124 if ((bits & HasConstCharPtrArgBits) == HasConstCharPtrArgBits) {
125 std::cout << eventInfoConstCharPtrArgs[event] << std::endl;
126 }
127 std::cout << " }leave ";
128 }
129
130
131 const cl_ulong queue = (samples[sample++] - min) / scale;
132 const cl_ulong submit = (samples[sample++] - min) / scale;
133 const cl_ulong start = (samples[sample++] - min) / scale;
134 const cl_ulong end = (samples[sample++] - min) / scale;
135
136 std::cout << std::setw(20) << (queue - end) << "(ns) ";
137 for (int c = 0; c < width; c++) {
138 char ch = ' ';
139 if (c >= queue && c <= submit) {
140 ch = '+';
141 } else if (c > submit && c < start) {
142 ch = '.';
143 } else if (c >= start && c < end) {
144 ch = '=';
145 }
146 std::cout << ch;
147 }
148 std::cout << std::endl;
149 }
150 delete[] samples;
151 }
152
153 void OpenCLBackend::OpenCLQueue::wait() {
154 if (eventc > 0) {
155 OPENCL_CHECK(clWaitForEvents(eventc, events), "clWaitForEvents");
156 }
157 }
158
159 void OpenCLBackend::OpenCLQueue::marker(int bits) {
160 cl_int status = clEnqueueMarkerWithWaitList(
161 command_queue,
162 this->eventc,
163 this->eventListPtr(),
164 this->nextEventPtr()
165 );
166 if (status != CL_SUCCESS) {
167 std::cerr << "failed to clEnqueueMarkerWithWaitList " << errorMsg(status) << std::endl;
168 std::exit(1);
169 }
170 inc(bits);
171 }
172
173 void OpenCLBackend::OpenCLQueue::marker(int bits, const char *arg) {
174 OPENCL_CHECK(clEnqueueMarkerWithWaitList(
175 command_queue,
176 this->eventc,
177 this->eventListPtr(),
178 this->nextEventPtr()),
179 "clEnqueueMarkerWithWaitList");
180
181 inc(bits, arg);
182 }
183
184 void OpenCLBackend::OpenCLQueue::computeStart() {
185 wait(); // should be no-op
186 release(); // also ;
187 marker(StartComputeBits);
188 }
189
190 void OpenCLBackend::OpenCLQueue::computeEnd() {
191 marker(EndComputeBits);
192 }
193
194 void OpenCLBackend::OpenCLQueue::inc(const int bits) {
195 if (eventc + 1 >= eventMax) {
196 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
197 } else {
198 eventInfoBits[eventc] = bits;
199 }
200 eventc++;
201 }
202
203 void OpenCLBackend::OpenCLQueue::inc(const int bits, const char *arg) {
204 if (eventc + 1 >= eventMax) {
205 std::cerr << "OpenCLBackend::OpenCLQueue event list overflowed!!" << std::endl;
206 } else {
207 eventInfoBits[eventc] = bits | HasConstCharPtrArgBits;
208 eventInfoConstCharPtrArgs[eventc] = arg;
209 }
210 eventc++;
211 }
212
213 void OpenCLBackend::OpenCLQueue::markAsEndComputeAndInc() {
214 inc(EndComputeBits);
215 }
216
217 void OpenCLBackend::OpenCLQueue::markAsStartComputeAndInc() {
218 inc(StartComputeBits);
219 }
220
221 void OpenCLBackend::OpenCLQueue::markAsEnterKernelDispatchAndInc() {
222 inc(EnterKernelDispatchBits);
223 }
224
225 void OpenCLBackend::OpenCLQueue::markAsLeaveKernelDispatchAndInc() {
226 inc(LeaveKernelDispatchBits);
227 }
228
229 void OpenCLBackend::OpenCLQueue::release() {
230 // TODO: possible check ALL events before return from the macro
231 for (int i = 0; i < eventc; i++) {
232 OPENCL_CHECK(clReleaseEvent(events[i]), "clReleaseEvent");
233 }
234 eventc = 0;
235 }
236
237 OpenCLBackend::OpenCLQueue::~OpenCLQueue() {
238 OPENCL_CHECK(clReleaseCommandQueue(command_queue), "clReleaseCommandQueue");
239 delete []events;
240 }
241
242 void OpenCLBackend::OpenCLQueue::dispatch(KernelContext *kernelContext, Backend::CompilationUnit::Kernel *kernel) {
243 size_t numDimensions = kernelContext->dimensions;
244
245 size_t global_work_size[] {
246 static_cast<size_t>(kernelContext->maxX), // to be replaced with gsx
247 static_cast<size_t>(kernelContext->maxY), // to be replaced with gsy
248 static_cast<size_t>(kernelContext->maxZ) // to be replaced with gsz
249 };
250
251 size_t local_work_size[] = {
252 static_cast<size_t>(kernelContext->lsx),
253 static_cast<size_t>(kernelContext->lsy),
254 static_cast<size_t>(kernelContext->lsz),
255 };
256
257 if (backend->config->info) {
258 std::cout << "[INFO] OpenCLBackend::OpenCLQueue::dispatch" << std::endl;
259 std::cout << "[INFO] numDimensions: " << numDimensions << std::endl;
260 std::cout << "[INFO] GLOBAL [" << global_work_size[0] << "," << global_work_size[1] << "," << global_work_size[2] << "]" << std::endl;
261 if (kernelContext->lsx > 0) {
262 std::cout << "[INFO] LOCAL [" << local_work_size[0] << "," << local_work_size[1] << "," << local_work_size[2] << "]" << std::endl;
263 } else {
264 std::cout << "[INFO] LOCAL [ nullptr ] // The driver will setup a default value" << std::endl;
265 }
266 }
267
268 const cl_int status = clEnqueueNDRangeKernel(
269 command_queue,
270 dynamic_cast<OpenCLProgram::OpenCLKernel *>(kernel)->kernel,
271 numDimensions,
272 nullptr,
273 global_work_size,
274 kernelContext->lsx > 0 ? local_work_size : nullptr,
275 eventc,
276 eventListPtr(),
277 nextEventPtr());
278
279 inc(NDRangeBits);
280 // markAsNDRangeAndInc();
281
282 OPENCL_CHECK(status, "clEnqueueNDRangeKernel");
283 if (backend->config->trace | backend->config->traceEnqueues) {
284 std::cout << "enqueued kernel dispatch \"" << kernel->name << "\" globalSize=" << kernelContext->maxX <<
285 std::endl;
286 }
287 }
288
289 void OpenCLBackend::OpenCLQueue::copyToDevice(Buffer *buffer) {
290 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
291 cl_int status = clEnqueueWriteBuffer(
292 command_queue,
293 openclBuffer->clMem,
294 CL_FALSE,
295 0,
296 buffer->bufferState->length,
297 buffer->bufferState->ptr,
298 eventc,
299 eventListPtr(),
300 nextEventPtr()
301 );
302
303 OPENCL_CHECK(status, "clEnqueueWriteBuffer");
304
305 inc(CopyToDeviceBits);
306 // markAsCopyToDeviceAndInc();
307 }
308
309 void OpenCLBackend::OpenCLQueue::copyFromDevice(Buffer *buffer) {
310 auto openclBuffer = dynamic_cast<OpenCLBuffer *>(buffer);
311 cl_int status = clEnqueueReadBuffer(
312 command_queue,
313 openclBuffer->clMem,
314 CL_FALSE,
315 0,
316 buffer->bufferState->length,
317 buffer->bufferState->ptr,
318 eventc,
319 eventListPtr(),
320 nextEventPtr()
321 );
322 OPENCL_CHECK(status, "clEnqueueReadBuffer");
323 inc(CopyFromDeviceBits);
324 //markAsCopyFromDeviceAndInc();
325 }