1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include <fstream>
 26 #define shared_cpp
 27 
 28 #include "shared.h"
 29 
 30 #define INFO 0
 31 
 32 
 33 void hexdump(void *ptr, int buflen) {
 34     auto *buf = static_cast<unsigned char *>(ptr);
 35     int i, j;
 36     for (i = 0; i < buflen; i += 16) {
 37         printf("%06x: ", i);
 38         for (j = 0; j < 16; j++)
 39             if (i + j < buflen)
 40                 printf("%02x ", buf[i + j]);
 41             else
 42                 printf("   ");
 43         printf(" ");
 44         for (j = 0; j < 16; j++)
 45             if (i + j < buflen)
 46                 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.');
 47         printf("\n");
 48     }
 49 }
 50 
 51 void Sled::show(std::ostream &out, void *argArray) {
 52     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
 53     for (int i = 0; i < argSled.argc(); i++) {
 54         KernelArg *arg = argSled.arg(i);
 55         switch (arg->variant) {
 56             case '&': {
 57                 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl;
 58                 break;
 59             }
 60             case 'B': {
 61                 out << "S8:" << arg->value.s8 << std::endl;
 62                 break;
 63             }
 64             case 'Z': {
 65                 out << "Z:" << arg->value.z1 << std::endl;
 66                 break;
 67             }
 68             case 'C': {
 69                 out << "U16:" << arg->value.u16 << std::endl;
 70                 break;
 71             }
 72             case 'S': {
 73                 out << "S16:" << arg->value.s16 << std::endl;
 74                 break;
 75             }
 76             case 'I': {
 77                 out << "S32:" << arg->value.s32 << std::endl;
 78                 break;
 79             }
 80             case 'F': {
 81                 out << "F32:" << arg->value.f32 << std::endl;
 82                 break;
 83             }
 84             case 'J': {
 85                 out << "S64:" << arg->value.s64 << std::endl;
 86                 break;
 87             }
 88             case 'D': {
 89                 out << "F64:" << arg->value.f64 << std::endl;
 90                 break;
 91             }
 92             default: {
 93                 std::cerr << "unexpected variant (shared.cpp) '" << static_cast<char>(arg->variant) << "'" << std::endl;
 94                 exit(1);
 95             }
 96         }
 97     }
 98     out << "schema len = " << argSled.schemaLen() << std::endl;
 99 
100     out << "schema = " << argSled.schema() << std::endl;
101 }
102 
103 
104 extern "C" void showDeviceInfo(long backendHandle) {
105         std::cout << "DEBUGGGGGGG through backendHandle to backend.showDeviceInfo()" << std::endl;
106     if (INFO) {
107         std::cout << "trampolining through backendHandle to backend.showDeviceInfo()" << std::endl;
108     }
109     auto *backend = reinterpret_cast<Backend *>(backendHandle);
110     backend->showDeviceInfo();
111 }
112 
113 extern "C" void computeStart(long backendHandle) {
114     if (INFO) {
115         std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl;
116     }
117     auto *backend = reinterpret_cast<Backend *>(backendHandle);
118     backend->computeStart();
119 }
120 
121 extern "C" void computeEnd(long backendHandle) {
122     if (INFO) {
123         std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl;
124     }
125     auto *backend = reinterpret_cast<Backend *>(backendHandle);
126     backend->computeEnd();
127 }
128 
129 extern "C" void releaseBackend(long backendHandle) {
130     auto *backend = reinterpret_cast<Backend *>(backendHandle);
131     delete backend;
132 }
133 
134 extern "C" long compile(long backendHandle, int len, char *source) {
135     if (INFO) {
136         std::cout << "trampolining through backendHandle to backend.compile() "
137                 << std::hex << backendHandle << std::dec << std::endl;
138     }
139     auto *backend = reinterpret_cast<Backend *>(backendHandle);
140     long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source));
141     if (INFO) {
142         std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl;
143     }
144     return compilationUnitHandle;
145 }
146 
147 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) {
148     if (INFO) {
149         std::cout << "trampolining through programHandle to compilationUnit.getKernel()"
150                 << std::hex << compilationUnitHandle << std::dec << std::endl;
151     }
152     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
153     return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name));
154 }
155 
156 extern "C" long ndrange(long kernelHandle, void *argArray) {
157     if (INFO) {
158         std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl;
159     }
160     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
161     kernel->ndrange(argArray);
162     return (long) 0;
163 }
164 
165 extern "C" void releaseKernel(long kernelHandle) {
166     if (INFO) {
167         std::cout << "trampolining through to releaseKernel " << std::endl;
168     }
169     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
170     delete kernel;
171 }
172 
173 extern "C" void releaseCompilationUnit(long compilationUnitHandle) {
174     if (INFO) {
175         std::cout << "trampolining through to releaseCompilationUnit " << std::endl;
176     }
177     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
178     delete compilationUnit;
179 }
180 
181 extern "C" bool compilationUnitOK(long compilationUnitHandle) {
182     if (INFO) {
183         std::cout << "trampolining through to compilationUnitHandleOK " << std::endl;
184     }
185     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
186     return compilationUnit->compilationUnitOK();
187 }
188 
189 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) {
190     if (INFO) {
191         std::cout << "trampolining through to getBuffer " << std::endl;
192     }
193     auto backend = reinterpret_cast<Backend *>(backendHandle);
194     auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle);
195     return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength);
196 }
197 
198 
199 Backend::Config::Config(int configBits):BasicConfig(configBits) {
200 
201 }
202 
203 Backend::Config::~Config() = default;
204 
205 Backend::Queue::Queue(Backend *backend)
206     : backend(backend) {
207 }
208 
209 Backend::Queue::~Queue() = default;
210 
211 Text::Text(size_t len, char *text, bool isCopy)
212     : len(len), text(text), isCopy(isCopy) {
213     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
214 }
215 
216 Text::Text(char *text, bool isCopy)
217     : len(std::strlen(text)), text(text), isCopy(isCopy) {
218     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
219 }
220 
221 Text::Text(size_t len)
222     : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) {
223     //  std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
224 }
225 
226 void Text::write(const std::string &filename) const {
227     std::ofstream out;
228     out.open(filename, std::ofstream::trunc);
229     out.write(text, len);
230     out.close();
231 }
232 
233 void Text::read(const std::string &filename) {
234     if (isCopy && text) {
235         delete[] text;
236     }
237     text = nullptr;
238     isCopy = false;
239     // std::cout << "reading from " << filename << std::endl;
240 
241     std::ifstream ptxStream;
242     ptxStream.open(filename);
243 
244 
245     ptxStream.seekg(0, std::ios::end);
246     len = ptxStream.tellg();
247     ptxStream.seekg(0, std::ios::beg);
248 
249     if (len > 0) {
250         text = new char[len];
251         isCopy = true;
252         //std::cerr << "about to read  " << len << std::endl;
253         ptxStream.read(text, len);
254         ptxStream.close();
255         //std::cerr << "read  " << len << std::endl;
256         text[len - 1] = '\0';
257         //std::cerr << "read text " << text << std::endl;
258     }
259 }
260 
261 Text::~Text() {
262     if (isCopy && text) {
263         delete[] text;
264     }
265     text = nullptr;
266     isCopy = false;
267     len = 0;
268 }
269 
270 Log::Log(const size_t len)
271     : Text(len) {
272 }
273 
274 Log::Log(char *text)
275     : Text(text, false) {
276 }
277 
278 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) {
279     if (compilationUnit->backend->config->traceCalls) {
280         std::cout << "kernelContext(\"" << name << "\"){" << std::endl;
281     }
282     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
283     auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue);
284     if (profilableQueue != nullptr) {
285         profilableQueue->marker(ProfilableQueue::EnterKernelDispatchBits, name);
286     }
287     if (compilationUnit->backend->config->trace) {
288         Sled::show(std::cout, argArray);
289     }
290     KernelContext *kernelContext = nullptr;
291     for (int i = 0; i < argSled.argc(); i++) {
292         KernelArg *arg = argSled.arg(i);
293         switch (arg->variant) {
294             case '&': {
295                 if (arg->idx == 0) {
296                     kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment);
297                 }
298                 if (compilationUnit->backend->config->trace) {
299                     std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access);
300                     switch (arg->value.buffer.access) {
301                         case RO_BYTE:
302                             std::cout << " RO";
303                             break;
304                         case WO_BYTE:
305                             std::cout << " WO";
306                             break;
307                         case RW_BYTE:
308                             std::cout << " RW";
309                             break;
310                     }
311                     std::cout << std::endl;
312                 }
313 
314                 BufferState *bufferState = BufferState::of(arg);
315 
316                 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState);
317 
318                 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || (
319                                                   arg->value.buffer.access == RO_BYTE);
320                 bool copyToDevice =
321                         compilationUnit->backend->config->alwaysCopy
322                         || (bufferState->state == BufferState::NEW_STATE)
323                         || ((bufferState->state == BufferState::HOST_OWNED)
324                         );
325 
326                 if (compilationUnit->backend->config->showWhy) {
327                     std::cout <<
328                             "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
329                             << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
330                             << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE)
331                             << " | kernel.needsToRead=" << kernelReadsFromThisArg
332                             << " | Buffer state = " << BufferState::stateNames[bufferState->state]
333                             << " so ";
334                 }
335                 if (copyToDevice) {
336                     compilationUnit->backend->queue->copyToDevice(buffer);
337                     // buffer->copyToDevice();
338                     if (compilationUnit->backend->config->traceCopies) {
339                         std::cout << "copying arg " << arg->idx << " to device " << std::endl;
340                     }
341                 } else {
342                     if (compilationUnit->backend->config->traceSkippedCopies) {
343                         std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl;
344                     }
345                 }
346                 setArg(arg, buffer);
347                 if (compilationUnit->backend->config->trace) {
348                     std::cout << "set buffer arg " << arg->idx << std::endl;
349                 }
350                 break;
351             }
352             case 'B':
353             case 'S':
354             case 'C':
355             case 'I':
356             case 'F':
357             case 'J':
358             case 'D': {
359                 setArg(arg);
360                 if (compilationUnit->backend->config->trace) {
361                     std::cerr << "set " << arg->variant << " " << arg->idx << std::endl;
362                 }
363                 break;
364             }
365             default: {
366                 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant <<
367                         std::endl;
368                 exit(1);
369             }
370         }
371     }
372 
373     if (kernelContext == nullptr) {
374         std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='" << name << "'" << std::endl;
375         exit(1);
376     }
377 
378     if (compilationUnit->backend->config->trace) {
379         std::cout << "kernelContext = <" << kernelContext->gsx << "," << kernelContext->gsy << "," << kernelContext->gsz << ">" << std::endl;
380     }
381 
382     // We 'double dispatch' back to the kernel to actually do the dispatch
383 
384     compilationUnit->backend->queue->dispatch(kernelContext, this);
385 
386 
387     for (int i = 0; i < argSled.argc(); i++) {
388         // note i = 1... we never need to copy back the KernelContext
389         KernelArg *arg = argSled.arg(i);
390         if (arg->variant == '&') {
391             BufferState *bufferState = BufferState::of(arg);
392 
393             bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE);
394             if (compilationUnit->backend->config->showWhy) {
395                 std::cout <<
396                         "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
397                         << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE)
398                         << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
399                         << " | kernel.wroteToThisArg=" << kernelWroteToThisArg
400                         << "Buffer state = " << BufferState::stateNames[bufferState->state]
401                         << " so ";
402             }
403 
404             auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr);
405             if (compilationUnit->backend->config->alwaysCopy) {
406                 compilationUnit->backend->queue->copyFromDevice(buffer);
407                 // buffer->copyFromDevice();
408                 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) {
409                     std::cout << "copying arg " << arg->idx << " from device " << std::endl;
410                 }
411             } else {
412                 if (compilationUnit->backend->config->traceSkippedCopies) {
413                     std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl;
414                 }
415                 if (kernelWroteToThisArg) {
416                     bufferState->state = BufferState::DEVICE_OWNED;
417                 }
418             }
419         }
420     }
421     if (profilableQueue != nullptr) {
422         profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name);
423     }
424     compilationUnit->backend->queue->wait();
425     compilationUnit->backend->queue->release();
426     if (compilationUnit->backend->config->traceCalls) {
427         std::cout << "\"" << name << "\"}" << std::endl;
428     }
429     return 0;
430 }