1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 #include <fstream>
26 #define shared_cpp
27
28 #include "shared.h"
29
30 #define INFO 0
31
32
33 void hexdump(void *ptr, int buflen) {
34 auto *buf = static_cast<unsigned char *>(ptr);
35 int i, j;
36 for (i = 0; i < buflen; i += 16) {
37 printf("%06x: ", i);
38 for (j = 0; j < 16; j++)
39 if (i + j < buflen)
40 printf("%02x ", buf[i + j]);
41 else
42 printf(" ");
43 printf(" ");
44 for (j = 0; j < 16; j++)
45 if (i + j < buflen)
46 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.');
47 printf("\n");
48 }
49 }
50
51 void Sled::show(std::ostream &out, void *argArray) {
52 ArgSled argSled(static_cast<ArgArray_s *>(argArray));
53 for (int i = 0; i < argSled.argc(); i++) {
54 KernelArg *arg = argSled.arg(i);
55 switch (arg->variant) {
56 case '&': {
57 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl;
58 break;
59 }
60 case 'B': {
61 out << "S8:" << arg->value.s8 << std::endl;
62 break;
63 }
64 case 'Z': {
65 out << "Z:" << arg->value.z1 << std::endl;
66 break;
67 }
68 case 'C': {
69 out << "U16:" << arg->value.u16 << std::endl;
70 break;
71 }
72 case 'S': {
73 out << "S16:" << arg->value.s16 << std::endl;
74 break;
75 }
76 case 'I': {
77 out << "S32:" << arg->value.s32 << std::endl;
78 break;
79 }
80 case 'F': {
81 out << "F32:" << arg->value.f32 << std::endl;
82 break;
83 }
84 case 'J': {
85 out << "S64:" << arg->value.s64 << std::endl;
86 break;
87 }
88 case 'D': {
89 out << "F64:" << arg->value.f64 << std::endl;
90 break;
91 }
92 default: {
93 std::cerr << "unexpected variant (shared.cpp) '" << static_cast<char>(arg->variant) << "'" << std::endl;
94 exit(1);
95 }
96 }
97 }
98 out << "schema len = " << argSled.schemaLen() << std::endl;
99
100 out << "schema = " << argSled.schema() << std::endl;
101 }
102
103
104 extern "C" void info(long backendHandle) {
105 if (INFO) {
106 std::cout << "trampolining through backendHandle to backend.info()" << std::endl;
107 }
108 auto *backend = reinterpret_cast<Backend *>(backendHandle);
109 backend->info();
110 }
111
112 extern "C" void computeStart(long backendHandle) {
113 if (INFO) {
114 std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl;
115 }
116 auto *backend = reinterpret_cast<Backend *>(backendHandle);
117 backend->computeStart();
118 }
119
120 extern "C" void computeEnd(long backendHandle) {
121 if (INFO) {
122 std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl;
123 }
124 auto *backend = reinterpret_cast<Backend *>(backendHandle);
125 backend->computeEnd();
126 }
127
128 extern "C" void releaseBackend(long backendHandle) {
129 auto *backend = reinterpret_cast<Backend *>(backendHandle);
130 delete backend;
131 }
132
133 extern "C" long compile(long backendHandle, int len, char *source) {
134 if (INFO) {
135 std::cout << "trampolining through backendHandle to backend.compile() "
136 << std::hex << backendHandle << std::dec << std::endl;
137 }
138 auto *backend = reinterpret_cast<Backend *>(backendHandle);
139 long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source));
140 if (INFO) {
141 std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl;
142 }
143 return compilationUnitHandle;
144 }
145
146 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) {
147 if (INFO) {
148 std::cout << "trampolining through programHandle to compilationUnit.getKernel()"
149 << std::hex << compilationUnitHandle << std::dec << std::endl;
150 }
151 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
152 return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name));
153 }
154
155 extern "C" long ndrange(long kernelHandle, void *argArray) {
156 if (INFO) {
157 std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl;
158 }
159 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
160 kernel->ndrange(argArray);
161 return (long) 0;
162 }
163
164 extern "C" void releaseKernel(long kernelHandle) {
165 if (INFO) {
166 std::cout << "trampolining through to releaseKernel " << std::endl;
167 }
168 auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
169 delete kernel;
170 }
171
172 extern "C" void releaseCompilationUnit(long compilationUnitHandle) {
173 if (INFO) {
174 std::cout << "trampolining through to releaseCompilationUnit " << std::endl;
175 }
176 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
177 delete compilationUnit;
178 }
179
180 extern "C" bool compilationUnitOK(long compilationUnitHandle) {
181 if (INFO) {
182 std::cout << "trampolining through to compilationUnitHandleOK " << std::endl;
183 }
184 auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
185 return compilationUnit->compilationUnitOK();
186 }
187
188 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) {
189 if (INFO) {
190 std::cout << "trampolining through to getBuffer " << std::endl;
191 }
192 auto backend = reinterpret_cast<Backend *>(backendHandle);
193 auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle);
194 return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength);
195 }
196
197
198 Backend::Config::Config(int configBits):BasicConfig(configBits) {
199
200 }
201
202 Backend::Config::~Config() = default;
203
204 Backend::Queue::Queue(Backend *backend)
205 : backend(backend) {
206 }
207
208 Backend::Queue::~Queue() = default;
209
210 Text::Text(size_t len, char *text, bool isCopy)
211 : len(len), text(text), isCopy(isCopy) {
212 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
213 }
214
215 Text::Text(char *text, bool isCopy)
216 : len(std::strlen(text)), text(text), isCopy(isCopy) {
217 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
218 }
219
220 Text::Text(size_t len)
221 : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) {
222 // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
223 }
224
225 void Text::write(const std::string &filename) const {
226 std::ofstream out;
227 out.open(filename, std::ofstream::trunc);
228 out.write(text, len);
229 out.close();
230 }
231
232 void Text::read(const std::string &filename) {
233 if (isCopy && text) {
234 delete[] text;
235 }
236 text = nullptr;
237 isCopy = false;
238 // std::cout << "reading from " << filename << std::endl;
239
240 std::ifstream ptxStream;
241 ptxStream.open(filename);
242
243
244 ptxStream.seekg(0, std::ios::end);
245 len = ptxStream.tellg();
246 ptxStream.seekg(0, std::ios::beg);
247
248 if (len > 0) {
249 text = new char[len];
250 isCopy = true;
251 //std::cerr << "about to read " << len << std::endl;
252 ptxStream.read(text, len);
253 ptxStream.close();
254 //std::cerr << "read " << len << std::endl;
255 text[len - 1] = '\0';
256 //std::cerr << "read text " << text << std::endl;
257 }
258 }
259
260 Text::~Text() {
261 if (isCopy && text) {
262 delete[] text;
263 }
264 text = nullptr;
265 isCopy = false;
266 len = 0;
267 }
268
269 Log::Log(const size_t len)
270 : Text(len) {
271 }
272
273 Log::Log(char *text)
274 : Text(text, false) {
275 }
276
277 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) {
278 if (compilationUnit->backend->config->traceCalls) {
279 std::cout << "kernelContext(\"" << name << "\"){" << std::endl;
280 }
281 ArgSled argSled(static_cast<ArgArray_s *>(argArray));
282 auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue);
283 if (profilableQueue != nullptr) {
284 profilableQueue->marker(ProfilableQueue::EnterKernelDispatchBits, name);
285 }
286 if (compilationUnit->backend->config->trace) {
287 Sled::show(std::cout, argArray);
288 }
289 KernelContext *kernelContext = nullptr;
290 for (int i = 0; i < argSled.argc(); i++) {
291 KernelArg *arg = argSled.arg(i);
292 switch (arg->variant) {
293 case '&': {
294 if (arg->idx == 0) {
295 kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment);
296 }
297 if (compilationUnit->backend->config->trace) {
298 std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access);
299 switch (arg->value.buffer.access) {
300 case RO_BYTE:
301 std::cout << " RO";
302 break;
303 case WO_BYTE:
304 std::cout << " WO";
305 break;
306 case RW_BYTE:
307 std::cout << " RW";
308 break;
309 }
310 std::cout << std::endl;
311 }
312
313 BufferState *bufferState = BufferState::of(arg);
314
315 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState);
316
317 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || (
318 arg->value.buffer.access == RO_BYTE);
319 bool copyToDevice =
320 compilationUnit->backend->config->alwaysCopy
321 || (bufferState->state == BufferState::NEW_STATE)
322 || ((bufferState->state == BufferState::HOST_OWNED)
323 );
324
325 if (compilationUnit->backend->config->showWhy) {
326 std::cout <<
327 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
328 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
329 << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE)
330 << " | kernel.needsToRead=" << kernelReadsFromThisArg
331 << " | Buffer state = " << BufferState::stateNames[bufferState->state]
332 << " so ";
333 }
334 if (copyToDevice) {
335 compilationUnit->backend->queue->copyToDevice(buffer);
336 // buffer->copyToDevice();
337 if (compilationUnit->backend->config->traceCopies) {
338 std::cout << "copying arg " << arg->idx << " to device " << std::endl;
339 }
340 } else {
341 if (compilationUnit->backend->config->traceSkippedCopies) {
342 std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl;
343 }
344 }
345 setArg(arg, buffer);
346 if (compilationUnit->backend->config->trace) {
347 std::cout << "set buffer arg " << arg->idx << std::endl;
348 }
349 break;
350 }
351 case 'B':
352 case 'S':
353 case 'C':
354 case 'I':
355 case 'F':
356 case 'J':
357 case 'D': {
358 setArg(arg);
359 if (compilationUnit->backend->config->trace) {
360 std::cerr << "set " << arg->variant << " " << arg->idx << std::endl;
361 }
362 break;
363 }
364 default: {
365 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant <<
366 std::endl;
367 exit(1);
368 }
369 }
370 }
371
372 if (kernelContext == nullptr) {
373 std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='" << name << "'" << std::endl;
374 exit(1);
375 }
376
377 if (compilationUnit->backend->config->trace) {
378 std::cout << "kernelContext = " << kernelContext->maxX << std::endl;
379 }
380
381 // We 'double dispatch' back to the kernel to actually do the dispatch
382
383 compilationUnit->backend->queue->dispatch(kernelContext, this);
384
385
386 for (int i = 0; i < argSled.argc(); i++) {
387 // note i = 1... we never need to copy back the KernelContext
388 KernelArg *arg = argSled.arg(i);
389 if (arg->variant == '&') {
390 BufferState *bufferState = BufferState::of(arg);
391
392 bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE);
393 if (compilationUnit->backend->config->showWhy) {
394 std::cout <<
395 "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
396 << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE)
397 << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
398 << " | kernel.wroteToThisArg=" << kernelWroteToThisArg
399 << "Buffer state = " << BufferState::stateNames[bufferState->state]
400 << " so ";
401 }
402
403 auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr);
404 if (compilationUnit->backend->config->alwaysCopy) {
405 compilationUnit->backend->queue->copyFromDevice(buffer);
406 // buffer->copyFromDevice();
407 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) {
408 std::cout << "copying arg " << arg->idx << " from device " << std::endl;
409 }
410 } else {
411 if (compilationUnit->backend->config->traceSkippedCopies) {
412 std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl;
413 }
414 if (kernelWroteToThisArg) {
415 bufferState->state = BufferState::DEVICE_OWNED;
416 }
417 }
418 }
419 }
420 if (profilableQueue != nullptr) {
421 profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name);
422 }
423 compilationUnit->backend->queue->wait();
424 compilationUnit->backend->queue->release();
425 if (compilationUnit->backend->config->traceCalls) {
426 std::cout << "\"" << name << "\"}" << std::endl;
427 }
428 return 0;
429 }