1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #include "opencl_backend.h" 26 27 28 /* 29 OpenCLKernel 30 */ 31 32 OpenCLBackend::OpenCLProgram::OpenCLKernel::OpenCLKernel(Backend::CompilationUnit *compilationUnit, char* name, cl_kernel kernel) 33 : Backend::CompilationUnit::Kernel(compilationUnit, name), kernel(kernel){ 34 } 35 36 OpenCLBackend::OpenCLProgram::OpenCLKernel::~OpenCLKernel() { 37 clReleaseKernel(kernel); 38 } 39 40 41 /* 42 void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) { 43 for (int argn = 0; argn<args.length; argn++){ 44 Arg arg = args[argn]; 45 if (alwaysCopyBuffers || (((arg.flags &JavaDirty)==JavaDirty) && kernel.readsFrom(arg))) { 46 enqueueCopyToDevice(arg); 47 } 48 } 49 enqueueKernel(kernel); 50 waitForKernel(); 51 52 for (int argn = 0; argn<args.length; argn++){ 53 Arg arg = args[argn]; 54 if (alwaysCopyBuffers){ 55 enqueueCopyFromDevice(arg); 56 arg.flags = 0; 57 }else{ 58 if (kernel.writesTo(arg)) { 59 arg.flags = DeviceDirty; 60 }else{ 61 arg.flags = 0; 62 } 63 } 64 } 65 66 } 67 */ 68 69 70 long OpenCLBackend::OpenCLProgram::OpenCLKernel::ndrange(void *argArray) { 71 72 // std::cout << "ndrange(" << range << ") " << std::endl; 73 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 74 OpenCLBackend *openclBackend = dynamic_cast<OpenCLBackend*>(compilationUnit->backend); 75 // 76 openclBackend->openclQueue.marker(openclBackend->openclQueue.EnterKernelDispatchBits, 77 (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name); 78 if (openclBackend->openclConfig.traceCalls){ 79 std::cout << "ndrange(\"" << (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<< "\"){"<<std::endl; 80 std::cout << "Kernel name '"<< (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'"<<std::endl; 81 } 82 if (openclBackend->openclConfig.trace){ 83 Sled::show(std::cout, argArray); 84 } 85 NDRange *ndrange = nullptr; 86 for (int i = 0; i < argSled.argc(); i++) { 87 Arg_s *arg = argSled.arg(i); 88 switch (arg->variant) { 89 case '&': { 90 if (arg->idx == 0){ 91 ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment); 92 } 93 if (openclBackend->openclConfig.trace){ 94 std::cout << "arg["<<i<<"] = "<< std::hex << (int)(arg->value.buffer.access); 95 switch (arg->value.buffer.access){ 96 case RO_BYTE: std::cout << " RO";break; 97 case WO_BYTE: std::cout << " WO";break; 98 case RW_BYTE: std::cout << " RW"; break; 99 } 100 std::cout << std::endl; 101 } 102 103 BufferState_s * bufferState = BufferState_s::of(arg); 104 if (bufferState->ptr != arg->value.buffer.memorySegment){ 105 std::cerr <<"bufferState->ptr != arg->value.buffer.memorySegment"<<std::endl; 106 std::exit(1); 107 } 108 109 if ((bufferState->vendorPtr == 0L) && (bufferState->state != BufferState_s::NEW_STATE)){ 110 std::cerr << "Warning: Unexpected initial state for arg "<< i 111 <<" of kernel '"<<(dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<<"'" 112 << " state=" << bufferState->state<< " '" 113 << BufferState_s::stateNames[bufferState->state]<< "'" 114 << " vendorPtr" << bufferState->vendorPtr<<std::endl; 115 } 116 OpenCLBuffer * openclBuffer =nullptr; 117 118 if (bufferState->vendorPtr == 0L || bufferState->state == BufferState_s::NEW_STATE){ 119 openclBuffer = new OpenCLBuffer(openclBackend, arg, bufferState); 120 if (openclBackend->openclConfig.trace){ 121 std::cout << "We allocated arg "<<i<<" buffer "<<std::endl; 122 } 123 }else{ 124 if (openclBackend->openclConfig.trace){ 125 std::cout << "Were reusing arg "<<i<<" buffer "<<std::endl; 126 } 127 openclBuffer= static_cast<OpenCLBuffer*>(bufferState->vendorPtr); 128 } 129 if (openclBuffer->shouldCopyToDevice(arg)){ 130 openclBuffer->copyToDevice(); 131 }else if (openclBackend->openclConfig.traceSkippedCopies){ 132 std::cout << "NOT copying arg " << arg->idx <<" to device "<< std::endl; 133 } 134 135 cl_int status = clSetKernelArg(kernel, arg->idx, sizeof(cl_mem), &openclBuffer->clMem); 136 if (status != CL_SUCCESS) { 137 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 138 exit(1); 139 } 140 if (openclBackend->openclConfig.trace){ 141 std::cout << "set buffer arg " << arg->idx << std::endl; 142 } 143 break; 144 } 145 case 'B': 146 case 'S': 147 case 'C': 148 case 'I': 149 case 'F': 150 case 'J': 151 case 'D': 152 { 153 cl_int status = clSetKernelArg(kernel, arg->idx, arg->size(), (void *) &arg->value); 154 if (status != CL_SUCCESS) { 155 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 156 exit(1); 157 } 158 if (openclBackend->openclConfig.trace){ 159 std::cerr << "set " <<arg->variant << " " << arg->idx << std::endl; 160 } 161 break; 162 } 163 default: { 164 std::cerr << "unexpected variant setting args in OpenCLkernel::ndrange " << (char) arg->variant << std::endl; 165 exit(1); 166 } 167 } 168 } 169 170 size_t globalSize = ndrange->maxX; 171 if (openclBackend->openclConfig.trace){ 172 std::cout << "ndrange = " << ndrange->maxX << std::endl; 173 } 174 size_t dims = 1; 175 cl_int status = clEnqueueNDRangeKernel( 176 openclBackend->openclQueue.command_queue, 177 kernel, 178 dims, 179 nullptr, 180 &globalSize, 181 nullptr, 182 openclBackend->openclQueue.eventc, 183 openclBackend->openclQueue.eventListPtr(), 184 openclBackend->openclQueue.nextEventPtr()); 185 openclBackend->openclQueue.markAsNDRangeAndInc(); 186 if (status != CL_SUCCESS) { 187 std::cerr << OpenCLBackend::errorMsg(status) << std::endl; 188 exit(1); 189 } 190 if (openclBackend->openclConfig.trace | openclBackend->openclConfig.traceEnqueues){ 191 std::cout << "enqueued kernel dispatch \"" << (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name << 192 "\" globalSize=" << globalSize << std::endl; 193 } 194 195 196 for (int i = 0; i < argSled.argc(); i++) { // note i = 1... we don't need to copy back the KernelContext 197 Arg_s *arg = argSled.arg(i); 198 if (arg->variant == '&') { 199 BufferState_s * bufferState = BufferState_s::of(arg ); 200 OpenCLBuffer *openclBuffer = static_cast<OpenCLBuffer *>(bufferState->vendorPtr); 201 if (openclBuffer->shouldCopyFromDevice(arg)){ 202 openclBuffer->copyFromDevice(); 203 if (openclBackend->openclConfig.traceCopies||openclBackend->openclConfig.traceEnqueues){ 204 std::cout << "copying arg " << arg->idx <<" from device "<< std::endl; 205 } 206 bufferState->state = BufferState_s::DEVICE_OWNED; 207 // bufferState->state = BufferState_s::HOST_OWNED; 208 }else{ 209 if (openclBackend->openclConfig.traceSkippedCopies){ 210 std::cout << "NOT copying arg " << arg->idx <<" from device "<< std::endl; 211 } 212 bufferState->state = BufferState_s::DEVICE_OWNED; 213 } 214 } 215 } 216 217 218 219 openclBackend->openclQueue.marker(openclBackend->openclQueue.LeaveKernelDispatchBits, 220 (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name 221 ); 222 openclBackend->openclQueue.wait(); 223 openclBackend->openclQueue.release(); 224 if (openclBackend->openclConfig.traceCalls){ 225 std::cout << "\"" << (dynamic_cast<Backend::CompilationUnit::Kernel*>(this))->name<< "\"}"<<std::endl; 226 } 227 return 0; 228 }