1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #include <fstream>
 26 #define shared_cpp
 27 
 28 #include "shared.h"
 29 
 30 #define INFO 0
 31 
 32 
 33 void hexdump(void *ptr, int buflen) {
 34     auto *buf = static_cast<unsigned char *>(ptr);
 35     int i, j;
 36     for (i = 0; i < buflen; i += 16) {
 37         printf("%06x: ", i);
 38         for (j = 0; j < 16; j++)
 39             if (i + j < buflen)
 40                 printf("%02x ", buf[i + j]);
 41             else
 42                 printf("   ");
 43         printf(" ");
 44         for (j = 0; j < 16; j++)
 45             if (i + j < buflen)
 46                 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.');
 47         printf("\n");
 48     }
 49 }
 50 
 51 void Sled::show(std::ostream &out, void *argArray) {
 52     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
 53     for (int i = 0; i < argSled.argc(); i++) {
 54         KernelArg *arg = argSled.arg(i);
 55         switch (arg->variant) {
 56             case '&': {
 57                 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl;
 58                 break;
 59             }
 60             case 'B': {
 61                 out << "S8:" << arg->value.s8 << std::endl;
 62                 break;
 63             }
 64             case 'Z': {
 65                 out << "Z:" << arg->value.z1 << std::endl;
 66                 break;
 67             }
 68             case 'C': {
 69                 out << "U16:" << arg->value.u16 << std::endl;
 70                 break;
 71             }
 72             case 'S': {
 73                 out << "S16:" << arg->value.s16 << std::endl;
 74                 break;
 75             }
 76             case 'I': {
 77                 out << "S32:" << arg->value.s32 << std::endl;
 78                 break;
 79             }
 80             case 'F': {
 81                 out << "F32:" << arg->value.f32 << std::endl;
 82                 break;
 83             }
 84             case 'J': {
 85                 out << "S64:" << arg->value.s64 << std::endl;
 86                 break;
 87             }
 88             case 'D': {
 89                 out << "F64:" << arg->value.f64 << std::endl;
 90                 break;
 91             }
 92             default: {
 93                 std::cerr << "unexpected variant (shared.cpp) '" << static_cast<char>(arg->variant) << "'" << std::endl;
 94                 exit(1);
 95             }
 96         }
 97     }
 98     out << "schema len = " << argSled.schemaLen() << std::endl;
 99 
100     out << "schema = " << argSled.schema() << std::endl;
101 }
102 
103 
104 extern "C" void info(long backendHandle) {
105     if (INFO) {
106         std::cout << "trampolining through backendHandle to backend.info()" << std::endl;
107     }
108     auto *backend = reinterpret_cast<Backend *>(backendHandle);
109     backend->info();
110 }
111 
112 extern "C" void computeStart(long backendHandle) {
113     if (INFO) {
114         std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl;
115     }
116     auto *backend = reinterpret_cast<Backend *>(backendHandle);
117     backend->computeStart();
118 }
119 
120 extern "C" void computeEnd(long backendHandle) {
121     if (INFO) {
122         std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl;
123     }
124     auto *backend = reinterpret_cast<Backend *>(backendHandle);
125     backend->computeEnd();
126 }
127 
128 extern "C" void releaseBackend(long backendHandle) {
129     auto *backend = reinterpret_cast<Backend *>(backendHandle);
130     delete backend;
131 }
132 
133 extern "C" long compile(long backendHandle, int len, char *source) {
134     if (INFO) {
135         std::cout << "trampolining through backendHandle to backend.compile() "
136                 << std::hex << backendHandle << std::dec << std::endl;
137     }
138     auto *backend = reinterpret_cast<Backend *>(backendHandle);
139     long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source));
140     if (INFO) {
141         std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl;
142     }
143     return compilationUnitHandle;
144 }
145 
146 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) {
147     if (INFO) {
148         std::cout << "trampolining through programHandle to compilationUnit.getKernel()"
149                 << std::hex << compilationUnitHandle << std::dec << std::endl;
150     }
151     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
152     return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name));
153 }
154 
155 extern "C" long ndrange(long kernelHandle, void *argArray) {
156     if (INFO) {
157         std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl;
158     }
159     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
160     kernel->ndrange(argArray);
161     return (long) 0;
162 }
163 
164 extern "C" void releaseKernel(long kernelHandle) {
165     if (INFO) {
166         std::cout << "trampolining through to releaseKernel " << std::endl;
167     }
168     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
169     delete kernel;
170 }
171 
172 extern "C" void releaseCompilationUnit(long compilationUnitHandle) {
173     if (INFO) {
174         std::cout << "trampolining through to releaseCompilationUnit " << std::endl;
175     }
176     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
177     delete compilationUnit;
178 }
179 
180 extern "C" bool compilationUnitOK(long compilationUnitHandle) {
181     if (INFO) {
182         std::cout << "trampolining through to compilationUnitHandleOK " << std::endl;
183     }
184     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
185     return compilationUnit->compilationUnitOK();
186 }
187 
188 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) {
189     if (INFO) {
190         std::cout << "trampolining through to getBuffer " << std::endl;
191     }
192     auto backend = reinterpret_cast<Backend *>(backendHandle);
193     auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle);
194     return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength);
195 }
196 
197 
198 Backend::Config::Config(int configBits):BasicConfig(configBits) {
199 
200 }
201 
202 Backend::Config::~Config() = default;
203 
204 Backend::Queue::Queue(Backend *backend)
205     : backend(backend) {
206 }
207 
208 Backend::Queue::~Queue() = default;
209 
210 Text::Text(size_t len, char *text, bool isCopy)
211     : len(len), text(text), isCopy(isCopy) {
212     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
213 }
214 
215 Text::Text(char *text, bool isCopy)
216     : len(std::strlen(text)), text(text), isCopy(isCopy) {
217     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
218 }
219 
220 Text::Text(size_t len)
221     : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) {
222     //  std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
223 }
224 
225 void Text::write(const std::string &filename) const {
226     std::ofstream out;
227     out.open(filename, std::ofstream::trunc);
228     out.write(text, len);
229     out.close();
230 }
231 
232 void Text::read(const std::string &filename) {
233     if (isCopy && text) {
234         delete[] text;
235     }
236     text = nullptr;
237     isCopy = false;
238     // std::cout << "reading from " << filename << std::endl;
239 
240     std::ifstream ptxStream;
241     ptxStream.open(filename);
242 
243 
244     ptxStream.seekg(0, std::ios::end);
245     len = ptxStream.tellg();
246     ptxStream.seekg(0, std::ios::beg);
247 
248     if (len > 0) {
249         text = new char[len];
250         isCopy = true;
251         //std::cerr << "about to read  " << len << std::endl;
252         ptxStream.read(text, len);
253         ptxStream.close();
254         //std::cerr << "read  " << len << std::endl;
255         text[len - 1] = '\0';
256         //std::cerr << "read text " << text << std::endl;
257     }
258 }
259 
260 Text::~Text() {
261     if (isCopy && text) {
262         delete[] text;
263     }
264     text = nullptr;
265     isCopy = false;
266     len = 0;
267 }
268 
269 Log::Log(const size_t len)
270     : Text(len) {
271 }
272 
273 Log::Log(char *text)
274     : Text(text, false) {
275 }
276 
277 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) {
278     if (compilationUnit->backend->config->traceCalls) {
279         std::cout << "kernelContext(\"" << name << "\"){" << std::endl;
280     }
281     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
282     auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue);
283     if (profilableQueue != nullptr) {
284         profilableQueue->marker(Backend::ProfilableQueue::EnterKernelDispatchBits, name);
285     }
286     if (compilationUnit->backend->config->trace) {
287         Sled::show(std::cout, argArray);
288     }
289     KernelContext *kernelContext = nullptr;
290     for (int i = 0; i < argSled.argc(); i++) {
291         KernelArg *arg = argSled.arg(i);
292         switch (arg->variant) {
293             case '&': {
294                 if (arg->idx == 0) {
295                     kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment);
296                 }
297                 if (compilationUnit->backend->config->trace) {
298                     std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access);
299                     switch (arg->value.buffer.access) {
300                         case RO_BYTE:
301                             std::cout << " RO";
302                             break;
303                         case WO_BYTE:
304                             std::cout << " WO";
305                             break;
306                         case RW_BYTE:
307                             std::cout << " RW";
308                             break;
309                     }
310                     std::cout << std::endl;
311                 }
312 
313                 BufferState *bufferState = BufferState::of(arg);
314 
315                 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState);
316 
317                 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || (
318                                                   arg->value.buffer.access == RO_BYTE);
319                 bool copyToDevice =
320                         compilationUnit->backend->config->alwaysCopy
321                         || (bufferState->state == BufferState::NEW_STATE)
322                         || ((bufferState->state == BufferState::HOST_OWNED)
323                         );
324 
325                 if (compilationUnit->backend->config->showWhy) {
326                     std::cout <<
327                             "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
328                             << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
329                             << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE)
330                             << " | kernel.needsToRead=" << kernelReadsFromThisArg
331                             << " | Buffer state = " << BufferState::stateNames[bufferState->state]
332                             << " so ";
333                 }
334                 if (copyToDevice) {
335                     compilationUnit->backend->queue->copyToDevice(buffer);
336                     // buffer->copyToDevice();
337                     if (compilationUnit->backend->config->traceCopies) {
338                         std::cout << "copying arg " << arg->idx << " to device " << std::endl;
339                     }
340                 } else {
341                     if (compilationUnit->backend->config->traceSkippedCopies) {
342                         std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl;
343                     }
344                 }
345                 setArg(arg, buffer);
346                 if (compilationUnit->backend->config->trace) {
347                     std::cout << "set buffer arg " << arg->idx << std::endl;
348                 }
349                 break;
350             }
351             case 'B':
352             case 'S':
353             case 'C':
354             case 'I':
355             case 'F':
356             case 'J':
357             case 'D': {
358                 setArg(arg);
359                 if (compilationUnit->backend->config->trace) {
360                     std::cerr << "set " << arg->variant << " " << arg->idx << std::endl;
361                 }
362                 break;
363             }
364             default: {
365                 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant <<
366                         std::endl;
367                 exit(1);
368             }
369         }
370     }
371 
372     if (kernelContext == nullptr) {
373         std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='" << name << "'" << std::endl;
374         exit(1);
375     }
376 
377     if (compilationUnit->backend->config->trace) {
378         std::cout << "kernelContext = " << kernelContext->maxX << std::endl;
379     }
380 
381     // We 'double dispatch' back to the kernel to actually do the dispatch
382 
383     compilationUnit->backend->queue->dispatch(kernelContext, this);
384 
385 
386     for (int i = 0; i < argSled.argc(); i++) {
387         // note i = 1... we never need to copy back the KernelContext
388         KernelArg *arg = argSled.arg(i);
389         if (arg->variant == '&') {
390             BufferState *bufferState = BufferState::of(arg);
391 
392             bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE);
393             if (compilationUnit->backend->config->showWhy) {
394                 std::cout <<
395                         "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
396                         << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE)
397                         << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
398                         << " | kernel.wroteToThisArg=" << kernelWroteToThisArg
399                         << "Buffer state = " << BufferState::stateNames[bufferState->state]
400                         << " so ";
401             }
402 
403             auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr);
404             if (compilationUnit->backend->config->alwaysCopy) {
405                 compilationUnit->backend->queue->copyFromDevice(buffer);
406                 // buffer->copyFromDevice();
407                 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) {
408                     std::cout << "copying arg " << arg->idx << " from device " << std::endl;
409                 }
410             } else {
411                 if (compilationUnit->backend->config->traceSkippedCopies) {
412                     std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl;
413                 }
414                 if (kernelWroteToThisArg) {
415                     bufferState->state = BufferState::DEVICE_OWNED;
416                 }
417             }
418         }
419     }
420     if (profilableQueue != nullptr) {
421         profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name);
422     }
423     compilationUnit->backend->queue->wait();
424     compilationUnit->backend->queue->release();
425     if (compilationUnit->backend->config->traceCalls) {
426         std::cout << "\"" << name << "\"}" << std::endl;
427     }
428     return 0;
429 }