1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 #include "cuda_backend.h" 28 29 30 CudaBackend::CudaModule::CudaKernel::CudaKernel(Backend::CompilationUnit *program,char * name, CUfunction function) 31 : Backend::CompilationUnit::Kernel(program, name), function(function) { 32 } 33 34 CudaBackend::CudaModule::CudaKernel::~CudaKernel() = default; 35 /* 36 long CudaBackend::CudaModule::CudaKernel::ndrange(void *argArray) { 37 38 auto cudaBackend = CudaBackend::of(compilationUnit->backend); 39 if (cudaBackend->cudaConfig.traceCalls) { 40 std::cout << "ndrange(" << ") " << name << std::endl; 41 } 42 ArgSled argSled(static_cast<ArgArray_s *>(argArray)); 43 void *argslist[argSled.argc()]; 44 45 NDRange *ndrange = nullptr; 46 for (int i = 0; i < argSled.argc(); i++) { 47 Arg_s *arg = argSled.arg(i); 48 switch (arg->variant) { 49 case '&': { 50 if (arg->idx == 0){ 51 ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment); 52 } 53 auto cudaBuffer = new CudaBackend::CudaBuffer(cudaBackend, arg, BufferState_s::of(arg)); 54 cudaBuffer->copyToDevice(); 55 argslist[arg->idx] = static_cast<void *>(&cudaBuffer->devicePtr); 56 break; 57 } 58 case 'I': 59 case 'F': 60 case 'J': 61 case 'D': 62 case 'C': 63 case 'S': { 64 argslist[arg->idx] = static_cast<void *>(&arg->value); 65 break; 66 } 67 default: { 68 std::cerr << " unhandled variant " << (char) arg->variant << std::endl; 69 break; 70 } 71 } 72 } 73 int range = ndrange->maxX; 74 int rangediv1024 = range / 1024; 75 int rangemod1024 = range % 1024; 76 if (rangemod1024 > 0) { 77 rangediv1024++; 78 } 79 // std::cout << "Running the kernel..." << std::endl; 80 // std::cout << " Requested range = " << range << std::endl; 81 // std::cout << " Range mod 1024 = " << rangemod1024 << std::endl; 82 // std::cout << " Actual range 1024 = " << (rangediv1024 * 1024) << std::endl; 83 // auto status= static_cast<CUresult>(cudaStreamSynchronize(cudaBackend->cudaQueue.cuStream)); 84 85 // cudaBackend->cudaQueue.wait(); 86 auto status= cuLaunchKernel(function, 87 rangediv1024, 1, 1, 88 1024, 1, 1, 89 0, cudaBackend->cudaQueue.cuStream , 90 argslist, nullptr); 91 92 WHERE{.f=__FILE__, .l=__LINE__, .e=status, .t="cuLaunchKernel"}.report(); 93 // cudaBackend->cudaQueue.wait(); 94 95 96 for (int i = 0; i < argSled.argc(); i++) { 97 Arg_s *arg = argSled.arg(i); 98 if (arg->variant == '&') { 99 auto bufferState = BufferState_s::of(arg)->vendorPtr; 100 auto cudaBuffer = static_cast<CudaBuffer *>(bufferState); 101 cudaBuffer->copyFromDevice(); 102 } 103 } 104 cudaBackend->cudaQueue.wait(); 105 106 for (int i = 0; i < argSled.argc(); i++) { 107 Arg_s *arg = argSled.arg(i); 108 if (arg->variant == '&') { 109 auto bufferState = BufferState_s::of(arg)->vendorPtr; 110 auto cudaBuffer = static_cast<CudaBuffer *>(bufferState); 111 delete cudaBuffer; 112 113 } 114 } 115 116 return (long) 0; 117 } */ 118 119 CudaBackend::CudaModule::CudaKernel * CudaBackend::CudaModule::CudaKernel::of(long kernelHandle){ 120 return reinterpret_cast<CudaBackend::CudaModule::CudaKernel *>(kernelHandle); 121 } 122 CudaBackend::CudaModule::CudaKernel * CudaBackend::CudaModule::CudaKernel::of(Backend::CompilationUnit::Kernel *kernel){ 123 return dynamic_cast<CudaBackend::CudaModule::CudaKernel *>(kernel); 124 } 125 126 bool CudaBackend::CudaModule::CudaKernel::setArg(KernelArg *arg){ 127 argslist[arg->idx] = static_cast<void *>(&arg->value); 128 return true; 129 } 130 bool CudaBackend::CudaModule::CudaKernel::setArg(KernelArg *arg, Buffer *buffer) { 131 argslist[arg->idx] = static_cast<void *>(&dynamic_cast<CudaBuffer *>(buffer)->devicePtr); 132 return true; 133 }