1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 #define shared_cpp
 26 
 27 #include "shared.h"
 28 
 29 #define INFO 0
 30 
 31 
 32 void hexdump(void *ptr, int buflen) {
 33     unsigned char *buf = (unsigned char *) ptr;
 34     int i, j;
 35     for (i = 0; i < buflen; i += 16) {
 36         printf("%06x: ", i);
 37         for (j = 0; j < 16; j++)
 38             if (i + j < buflen)
 39                 printf("%02x ", buf[i + j]);
 40             else
 41                 printf("   ");
 42         printf(" ");
 43         for (j = 0; j < 16; j++)
 44             if (i + j < buflen)
 45                 printf("%c", isprint(buf[i + j]) ? buf[i + j] : '.');
 46         printf("\n");
 47     }
 48 }
 49 
 50 void Sled::show(std::ostream &out, void *argArray) {
 51     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
 52     for (int i = 0; i < argSled.argc(); i++) {
 53         KernelArg *arg = argSled.arg(i);
 54         switch (arg->variant) {
 55             case '&': {
 56                 out << "Buf: of " << arg->value.buffer.sizeInBytes << " bytes " << std::endl;
 57                 break;
 58             }
 59             case 'B': {
 60                 out << "S8:" << arg->value.s8 << std::endl;
 61                 break;
 62             }
 63             case 'Z': {
 64                 out << "Z:" << arg->value.z1 << std::endl;
 65                 break;
 66             }
 67             case 'C': {
 68                 out << "U16:" << arg->value.u16 << std::endl;
 69                 break;
 70             }
 71             case 'S': {
 72                 out << "S16:" << arg->value.s16 << std::endl;
 73                 break;
 74             }
 75             case 'I': {
 76                 out << "S32:" << arg->value.s32 << std::endl;
 77                 break;
 78             }
 79             case 'F': {
 80                 out << "F32:" << arg->value.f32 << std::endl;
 81                 break;
 82             }
 83             case 'J': {
 84                 out << "S64:" << arg->value.s64 << std::endl;
 85                 break;
 86             }
 87             case 'D': {
 88                 out << "F64:" << arg->value.f64 << std::endl;
 89                 break;
 90             }
 91             default: {
 92                 std::cerr << "unexpected variant (shared.cpp) '" << (char) arg->variant << "'" << std::endl;
 93                 exit(1);
 94             }
 95         }
 96     }
 97     out << "schema len = " << argSled.schemaLen() << std::endl;
 98 
 99     out << "schema = " << argSled.schema() << std::endl;
100 }
101 
102 
103 extern "C" void info(long backendHandle) {
104     if (INFO) {
105         std::cout << "trampolining through backendHandle to backend.info()" << std::endl;
106     }
107     auto *backend = reinterpret_cast<Backend *>(backendHandle);
108     backend->info();
109 }
110 extern "C" void computeStart(long backendHandle) {
111     if (INFO) {
112         std::cout << "trampolining through backendHandle to backend.computeStart()" << std::endl;
113     }
114     auto *backend = reinterpret_cast<Backend *>(backendHandle);
115     backend->computeStart();
116 }
117 extern "C" void computeEnd(long backendHandle) {
118     if (INFO) {
119         std::cout << "trampolining through backendHandle to backend.computeEnd()" << std::endl;
120     }
121     auto *backend = reinterpret_cast<Backend *>(backendHandle);
122     backend->computeEnd();
123 }
124 extern "C" void releaseBackend(long backendHandle) {
125     auto *backend = reinterpret_cast<Backend *>(backendHandle);
126     delete backend;
127 }
128 extern "C" long compile(long backendHandle, int len, char *source) {
129     if (INFO) {
130         std::cout << "trampolining through backendHandle to backend.compile() "
131                   << std::hex << backendHandle << std::dec << std::endl;
132     }
133     auto *backend = reinterpret_cast<Backend *>(backendHandle);
134     long compilationUnitHandle = reinterpret_cast<long>(backend->compile(len, source));
135     if (INFO) {
136         std::cout << "compilationUnitHandle = " << std::hex << compilationUnitHandle << std::dec << std::endl;
137     }
138     return compilationUnitHandle;
139 }
140 extern "C" long getKernel(long compilationUnitHandle, int nameLen, char *name) {
141     if (INFO) {
142         std::cout << "trampolining through programHandle to compilationUnit.getKernel()"
143                   << std::hex << compilationUnitHandle << std::dec << std::endl;
144     }
145     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
146     return reinterpret_cast<long>(compilationUnit->getKernel(nameLen, name));
147 }
148 
149 extern "C" long ndrange(long kernelHandle, void *argArray) {
150     if (INFO) {
151         std::cout << "trampolining through kernelHandle to kernel.ndrange(...) " << std::endl;
152     }
153     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
154     kernel->ndrange(argArray);
155     return (long) 0;
156 }
157 extern "C" void releaseKernel(long kernelHandle) {
158     if (INFO) {
159         std::cout << "trampolining through to releaseKernel " << std::endl;
160     }
161     auto kernel = reinterpret_cast<Backend::CompilationUnit::Kernel *>(kernelHandle);
162     delete kernel;
163 }
164 
165 extern "C" void releaseCompilationUnit(long compilationUnitHandle) {
166     if (INFO) {
167         std::cout << "trampolining through to releaseCompilationUnit " << std::endl;
168     }
169     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
170     delete compilationUnit;
171 }
172 extern "C" bool compilationUnitOK(long compilationUnitHandle) {
173     if (INFO) {
174         std::cout << "trampolining through to compilationUnitHandleOK " << std::endl;
175     }
176     auto compilationUnit = reinterpret_cast<Backend::CompilationUnit *>(compilationUnitHandle);
177     return compilationUnit->compilationUnitOK();
178 }
179 
180 extern "C" bool getBufferFromDeviceIfDirty(long backendHandle, long memorySegmentHandle, long memorySegmentLength) {
181     if (INFO) {
182         std::cout << "trampolining through to getBuffer " << std::endl;
183     }
184     auto backend = reinterpret_cast<Backend *>(backendHandle);
185     auto memorySegment = reinterpret_cast<void *>(memorySegmentHandle);
186     return backend->getBufferFromDeviceIfDirty(memorySegment, memorySegmentLength);
187 }
188 
189 
190 Backend::Config::Config(int configBits)
191         :
192         configBits(configBits),
193         minimizeCopies((configBits & MINIMIZE_COPIES_BIT) == MINIMIZE_COPIES_BIT),
194         alwaysCopy(!minimizeCopies),
195         trace((configBits & TRACE_BIT) == TRACE_BIT),
196         traceCopies((configBits & TRACE_COPIES_BIT) == TRACE_COPIES_BIT),
197         traceEnqueues((configBits & TRACE_ENQUEUES_BIT) == TRACE_ENQUEUES_BIT),
198         traceCalls((configBits & TRACE_CALLS_BIT) == TRACE_CALLS_BIT),
199         traceSkippedCopies((configBits & TRACE_SKIPPED_COPIES_BIT) == TRACE_SKIPPED_COPIES_BIT),
200         info((configBits & INFO_BIT) == INFO_BIT),
201         showCode((configBits & SHOW_CODE_BIT) == SHOW_CODE_BIT),
202         profile((configBits & PROFILE_BIT) == PROFILE_BIT),
203         showWhy((configBits & SHOW_WHY_BIT) == SHOW_WHY_BIT),
204         showState((configBits & SHOW_STATE_BIT) == SHOW_STATE_BIT),
205         ptx((configBits & PTX_BIT) == PTX_BIT),
206 
207         platform((configBits & 0xf)),
208         device((configBits & 0xf0) >> 4) {
209     if (info) {
210         std::cout << "native showCode " << showCode << std::endl;
211         std::cout << "native info " << info << std::endl;
212         std::cout << "native minimizeCopies " << minimizeCopies << std::endl;
213         std::cout << "native alwaysCopy " << alwaysCopy << std::endl;
214         std::cout << "native trace " << trace << std::endl;
215         std::cout << "native traceSkippedCopies " << traceSkippedCopies << std::endl;
216         std::cout << "native traceCalls " << traceCalls << std::endl;
217         std::cout << "native traceCopies " << traceCopies << std::endl;
218         std::cout << "native traceEnqueues " << traceEnqueues << std::endl;
219         std::cout << "native profile " << profile << std::endl;
220         std::cout << "native showWhy " << showWhy << std::endl;
221         std::cout << "native showState " << showState << std::endl;
222         std::cout << "native ptx " << ptx << std::endl;
223         std::cout << "native platform " << platform << std::endl;
224         std::cout << "native device " << device << std::endl;
225     }
226 }
227 
228 Backend::Config::~Config() {
229 }
230 
231 Backend::Queue::Queue(Backend *backend)
232         : backend(backend) {
233 }
234 
235 Backend::Queue::~Queue() {
236 
237 }
238 
239 Text::Text(size_t len, char *text, bool isCopy)
240         : len(len), text(text), isCopy(isCopy) {
241     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
242 }
243 
244 Text::Text(char *text, bool isCopy)
245         : len(std::strlen(text)), text(text), isCopy(isCopy) {
246     // std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
247 }
248 
249 Text::Text(size_t len)
250         : len(len), text(len > 0 ? new char[len] : nullptr), isCopy(true) {
251     //  std::cout << "in Text len="<<len<<" isCopy="<<isCopy << std::endl;
252 }
253 
254 void Text::write(std::string &filename) const {
255     std::ofstream out;
256     out.open(filename, std::ofstream::trunc);
257     out.write(text, len);
258     out.close();
259 }
260 
261 void Text::read(std::string &filename) {
262     if (isCopy && text) {
263         delete[] text;
264     }
265     text = nullptr;
266     isCopy = false;
267     // std::cout << "reading from " << filename << std::endl;
268 
269     std::ifstream ptxStream;
270     ptxStream.open(filename);
271 
272 
273     ptxStream.seekg(0, std::ios::end);
274     len = ptxStream.tellg();
275     ptxStream.seekg(0, std::ios::beg);
276 
277     if (len > 0) {
278         text = new char[len];
279         isCopy = true;
280         //std::cerr << "about to read  " << len << std::endl;
281         ptxStream.read(text, len);
282         ptxStream.close();
283         //std::cerr << "read  " << len << std::endl;
284         text[len - 1] = '\0';
285         //std::cerr << "read text " << text << std::endl;
286     }
287 }
288 
289 Text::~Text() {
290     if (isCopy && text) {
291         delete[] text;
292     }
293     text = nullptr;
294     isCopy = false;
295     len = 0;
296 }
297 
298 Log::Log(size_t len)
299         : Text(len) {
300 }
301 
302 Log::Log(char *text)
303         : Text(text, false) {
304 }
305 
306 long Backend::CompilationUnit::Kernel::ndrange(void *argArray) {
307     if (compilationUnit->backend->config->traceCalls) {
308         std::cout << "kernelContext(\"" << name << "\"){" << std::endl;
309     }
310     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
311     auto *profilableQueue = dynamic_cast<ProfilableQueue *>(compilationUnit->backend->queue);
312     if (profilableQueue != nullptr) {
313         profilableQueue->marker(Backend::ProfilableQueue::EnterKernelDispatchBits, name);
314     }
315     if (compilationUnit->backend->config->trace) {
316         Sled::show(std::cout, argArray);
317     }
318     KernelContext *kernelContext = nullptr;
319     for (int i = 0; i < argSled.argc(); i++) {
320         KernelArg *arg = argSled.arg(i);
321         switch (arg->variant) {
322             case '&': {
323                 if (arg->idx == 0) {
324                     kernelContext = static_cast<KernelContext *>(arg->value.buffer.memorySegment);
325                 }
326                 if (compilationUnit->backend->config->trace) {
327                     std::cout << "arg[" << i << "] = " << std::hex << (int) (arg->value.buffer.access);
328                     switch (arg->value.buffer.access) {
329                         case RO_BYTE:
330                             std::cout << " RO";
331                             break;
332                         case WO_BYTE:
333                             std::cout << " WO";
334                             break;
335                         case RW_BYTE:
336                             std::cout << " RW";
337                             break;
338                     }
339                     std::cout << std::endl;
340                 }
341 
342                 BufferState *bufferState = BufferState::of(arg);
343 
344                 Buffer *buffer = compilationUnit->backend->getOrCreateBuffer(bufferState);
345 
346                 bool kernelReadsFromThisArg = (arg->value.buffer.access == RW_BYTE) || (arg->value.buffer.access == RO_BYTE);
347                 bool copyToDevice =
348                         compilationUnit->backend->config->alwaysCopy
349                         || (bufferState->state == BufferState::NEW_STATE)
350                         || ((bufferState->state == BufferState::HOST_OWNED)
351                         );
352 
353                 if (compilationUnit->backend->config->showWhy) {
354                     std::cout <<
355                               "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
356                               << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
357                               << " | arg.RO=" << (arg->value.buffer.access == RO_BYTE)
358                               << " | kernel.needsToRead=" << kernelReadsFromThisArg
359                               << " | Buffer state = " << BufferState::stateNames[bufferState->state]
360                               << " so ";
361                 }
362                 if (copyToDevice) {
363                     compilationUnit->backend->queue->copyToDevice(buffer);
364                    // buffer->copyToDevice();
365                     if (compilationUnit->backend->config->traceCopies) {
366                         std::cout << "copying arg " << arg->idx << " to device " << std::endl;
367                     }
368                 } else {
369                     if (compilationUnit->backend->config->traceSkippedCopies) {
370                         std::cout << "NOT copying arg " << arg->idx << " to device " << std::endl;
371                     }
372                 }
373                 setArg(arg, buffer);
374                 if (compilationUnit->backend->config->trace) {
375                     std::cout << "set buffer arg " << arg->idx << std::endl;
376                 }
377                 break;
378             }
379             case 'B':
380             case 'S':
381             case 'C':
382             case 'I':
383             case 'F':
384             case 'J':
385             case 'D': {
386                 setArg(arg);
387                 if (compilationUnit->backend->config->trace) {
388                     std::cerr << "set " << arg->variant << " " << arg->idx << std::endl;
389                 }
390                 break;
391             }
392             default: {
393                 std::cerr << "unexpected variant setting args in OpenCLkernel::kernelContext " << (char) arg->variant << std::endl;
394                 exit(1);
395             }
396         }
397     }
398 
399     if (kernelContext == nullptr){
400         std::cerr << "Looks like we recieved a kernel dispatch with xero args kernel='"<<name<<"'" << std::endl;
401         exit(1);
402     }
403 
404     if (compilationUnit->backend->config->trace) {
405         std::cout << "kernelContext = " << kernelContext->maxX << std::endl;
406     }
407 
408     // We 'double dispatch' back to the kernel to actually do the dispatch
409 
410     compilationUnit->backend->queue->dispatch(kernelContext, this);
411 
412 
413     for (int i = 0; i < argSled.argc(); i++) { // note i = 1... we never need to copy back the KernelContext
414         KernelArg *arg = argSled.arg(i);
415         if (arg->variant == '&') {
416             BufferState *bufferState = BufferState::of(arg);
417 
418             bool kernelWroteToThisArg = (arg->value.buffer.access == WO_BYTE) | (arg->value.buffer.access == RW_BYTE);
419             if (compilationUnit->backend->config->showWhy) {
420                 std::cout <<
421                           "config.alwaysCopy=" << compilationUnit->backend->config->alwaysCopy
422                           << " | arg.WO=" << (arg->value.buffer.access == WO_BYTE)
423                           << " | arg.RW=" << (arg->value.buffer.access == RW_BYTE)
424                           << " | kernel.wroteToThisArg=" << kernelWroteToThisArg
425                           << "Buffer state = " << BufferState::stateNames[bufferState->state]
426                           << " so ";
427             }
428 
429             auto *buffer = static_cast<Buffer *>(bufferState->vendorPtr);
430             if (compilationUnit->backend->config->alwaysCopy) {
431                 compilationUnit->backend->queue->copyFromDevice(buffer);
432                // buffer->copyFromDevice();
433                 if (compilationUnit->backend->config->traceCopies || compilationUnit->backend->config->traceEnqueues) {
434                     std::cout << "copying arg " << arg->idx << " from device " << std::endl;
435                 }
436             } else {
437                 if (compilationUnit->backend->config->traceSkippedCopies) {
438                     std::cout << "NOT copying arg " << arg->idx << " from device " << std::endl;
439                 }
440                 if (kernelWroteToThisArg) {
441                     bufferState->state = BufferState::DEVICE_OWNED;
442                 }
443             }
444         }
445     }
446     if (profilableQueue != nullptr) {
447         profilableQueue->marker(Backend::ProfilableQueue::LeaveKernelDispatchBits, name);
448     }
449     compilationUnit->backend->queue->wait();
450     compilationUnit->backend->queue->release();
451     if (compilationUnit->backend->config->traceCalls) {
452         std::cout << "\"" << name << "\"}" << std::endl;
453     }
454     return 0;
455 }
456 
457