1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <sys/wait.h> 27 #include <chrono> 28 #include <thread> 29 #include "cuda_backend.h" 30 31 CudaBackend::CudaQueue::CudaQueue(Backend *backend) 32 : Backend::Queue(backend),cuStream(),streamCreationThread() { 33 } 34 void CudaBackend::CudaQueue::init(){ 35 streamCreationThread = std::this_thread::get_id(); 36 if (backend->config->traceCalls){ 37 std::cout << "init() 0x" 38 << " thread=" <<streamCreationThread 39 << std::endl; 40 } 41 42 WHERE{.f=__FILE__ , .l=__LINE__, 43 .e=cuStreamCreate(&cuStream,CU_STREAM_DEFAULT), 44 .t= "cuStreamCreate" 45 }.report(); 46 47 if (backend->config->traceCalls){ 48 std::cout << "exiting init() 0x" 49 << " custream=" <<std::hex<<streamCreationThread <<std::dec 50 << std::endl; 51 } 52 } 53 54 void CudaBackend::CudaQueue::wait(){ 55 WHERE{.f=__FILE__, .l=__LINE__, 56 .e=cuStreamSynchronize(cuStream), 57 .t= "cuStreamSynchronize" 58 }.report(); 59 } 60 61 62 void CudaBackend::CudaQueue::computeStart(){ 63 wait(); // should be no-op 64 release(); // also ; 65 } 66 67 68 69 void CudaBackend::CudaQueue::computeEnd(){ 70 71 } 72 73 74 75 76 void CudaBackend::CudaQueue::release(){ 77 78 } 79 80 CudaBackend::CudaQueue::~CudaQueue(){ 81 WHERE{.f=__FILE__, .l=__LINE__, 82 .e=cuStreamDestroy(cuStream), 83 .t= "cuStreamDestroy" 84 }.report(); 85 } 86 87 void CudaBackend::CudaQueue::copyToDevice(Buffer *buffer) { 88 const auto *cudaBuffer = dynamic_cast<CudaBuffer *>(buffer); 89 const std::thread::id thread_id = std::this_thread::get_id(); 90 if (thread_id != streamCreationThread){ 91 std::cout << "copyToDevice() thread=" <<thread_id<< " != "<< streamCreationThread<< std::endl; 92 } 93 if (backend->config->traceCalls) { 94 95 std::cout << "copyToDevice() 0x" 96 << std::hex<<cudaBuffer->bufferState->length<<std::dec << "/" 97 << cudaBuffer->bufferState->length << " " 98 << "devptr=" << std::hex<< static_cast<long>(cudaBuffer->devicePtr) <<std::dec 99 << " thread=" <<thread_id 100 << std::endl; 101 } 102 WHERE{.f=__FILE__, .l=__LINE__, 103 .e=cuMemcpyHtoDAsync( 104 cudaBuffer->devicePtr, 105 cudaBuffer->bufferState->ptr, 106 cudaBuffer->bufferState->length, 107 dynamic_cast<CudaQueue*>(backend->queue)->cuStream), 108 .t="cuMemcpyHtoDAsync" 109 }.report(); 110 111 } 112 113 void CudaBackend::CudaQueue::copyFromDevice(Buffer *buffer) { 114 const auto *cudaBuffer = dynamic_cast<CudaBuffer *>(buffer); 115 const std::thread::id thread_id = std::this_thread::get_id(); 116 if (thread_id != streamCreationThread){ 117 std::cout << "copyFromDevice() thread=" <<thread_id<< " != "<< streamCreationThread<< std::endl; 118 } 119 if (backend->config->traceCalls) { 120 121 std::cout << "copyFromDevice() 0x" 122 << std::hex<<cudaBuffer->bufferState->length<<std::dec << "/" 123 << cudaBuffer->bufferState->length << " " 124 << "devptr=" << std::hex<< static_cast<long>(cudaBuffer->devicePtr) <<std::dec 125 << " thread=" <<thread_id 126 << std::endl; 127 } 128 129 130 WHERE{.f=__FILE__, .l=__LINE__, 131 .e=cuMemcpyDtoHAsync( 132 cudaBuffer->bufferState->ptr, 133 cudaBuffer->devicePtr, 134 cudaBuffer->bufferState->length, 135 dynamic_cast<CudaQueue*>(backend->queue)->cuStream), 136 .t="cuMemcpyDtoHAsync" 137 }.report(); 138 139 } 140 141 void CudaBackend::CudaQueue::dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) { 142 const auto cudaKernel = dynamic_cast<CudaModule::CudaKernel *>(kernel); 143 144 const int range = kernelContext->maxX; 145 int rangediv1024 = range / 1024; 146 int rangemod1024 = range % 1024; 147 if (rangemod1024 > 0) { 148 rangediv1024++; 149 } 150 // std::cout << "Running the kernel..." << std::endl; 151 // std::cout << " Requested range = " << range << std::endl; 152 // std::cout << " Range mod 1024 = " << rangemod1024 << std::endl; 153 // std::cout << " Actual range 1024 = " << (rangediv1024 * 1024) << std::endl; 154 // auto status= static_cast<CUresult>(cudaStreamSynchronize(cudaBackend->cudaQueue.cuStream)); 155 156 // cudaBackend->cudaQueue.wait(); 157 const std::thread::id thread_id = std::this_thread::get_id(); 158 if (thread_id != streamCreationThread){ 159 std::cout << "dispatch() thread=" <<thread_id<< " != "<< streamCreationThread<< std::endl; 160 } 161 162 const auto status = cuLaunchKernel(cudaKernel->function, 163 rangediv1024, 1, 1, 164 1024, 1, 1, 165 0, cuStream, 166 cudaKernel->argslist, nullptr); 167 168 WHERE{.f=__FILE__, .l=__LINE__, .e=status, .t="cuLaunchKernel"}.report(); 169 }