1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 #pragma once 26 #define CUDA_TYPES 27 #ifdef __APPLE__ 28 29 #define LongUnsignedNewline "%llu\n" 30 #define Size_tNewline "%lu\n" 31 #define LongHexNewline "(0x%llx)\n" 32 #define alignedMalloc(size, alignment) memalign(alignment, size) 33 #define SNPRINTF snprintf 34 #else 35 36 #include <malloc.h> 37 38 #define LongHexNewline "(0x%lx)\n" 39 #define LongUnsignedNewline "%lu\n" 40 #define Size_tNewline "%lu\n" 41 #if defined (_WIN32) 42 #include "windows.h" 43 #define alignedMalloc(size, alignment) _aligned_malloc(size, alignment) 44 #define SNPRINTF _snprintf 45 #else 46 #define alignedMalloc(size, alignment) memalign(alignment, size) 47 #define SNPRINTF snprintf 48 #endif 49 #endif 50 51 #include <iostream> 52 #include <cuda.h> 53 #include <builtin_types.h> 54 55 #include "shared.h" 56 57 #include <fstream> 58 #include <thread> 59 60 struct WHERE{ 61 const char* f; 62 int l; 63 cudaError_enum e; 64 const char* t; 65 void report() const { 66 if (e != CUDA_SUCCESS){ 67 const char *buf; 68 cuGetErrorName(e, &buf); 69 std::cerr << t << " CUDA error = " << e << " " << buf <<std::endl<< " " << f << " line " << l << std::endl; 70 exit(-1); 71 } 72 } 73 }; 74 75 #define CUDA_CHECK(err, functionName) { \ 76 WHERE{.f =__FILE__, \ 77 .l=__LINE__, \ 78 .e = err, \ 79 .t = functionName \ 80 }.report(); \ 81 } 82 83 class PtxSource final : public Text { 84 public: 85 PtxSource(); 86 explicit PtxSource(size_t len); 87 PtxSource(size_t len, char *text); 88 PtxSource(size_t len, char *text, bool isCopy); 89 explicit PtxSource(char *text); 90 ~PtxSource() override = default; 91 }; 92 93 class CudaSource final :public Text { 94 public: 95 CudaSource(size_t len, char *text, bool isCopy); 96 explicit CudaSource(size_t len); 97 explicit CudaSource(char* text); 98 CudaSource(); 99 ~CudaSource() override = default; 100 }; 101 102 class CudaBackend final : public Backend { 103 public: 104 class CudaQueue final : public Backend::Queue { 105 public: 106 std::thread::id streamCreationThread; 107 CUstream cuStream; 108 explicit CudaQueue(Backend *backend); 109 void init(); 110 void wait() override; 111 112 void release() override; 113 114 void computeStart() override; 115 116 void computeEnd() override; 117 118 void copyToDevice(Buffer *buffer) override; 119 120 void copyFromDevice(Buffer *buffer) override; 121 122 int estimateThreadsPerBlock(int dimensions); 123 124 void dispatch(KernelContext *kernelContext, CompilationUnit::Kernel *kernel) override; 125 126 ~CudaQueue() override; 127 }; 128 129 class CudaBuffer final : public Buffer { 130 public: 131 CUdeviceptr devicePtr; 132 CudaBuffer(Backend *backend, BufferState *bufferState); 133 ~CudaBuffer() override; 134 }; 135 136 class CudaModule final : public CompilationUnit { 137 CUmodule module; 138 CudaSource cudaSource; 139 PtxSource ptxSource; 140 Log log; 141 142 public: 143 class CudaKernel final : public Kernel { 144 145 public: 146 bool setArg(KernelArg *arg) override; 147 bool setArg(KernelArg *arg, Buffer *buffer) override; 148 CudaKernel(Backend::CompilationUnit *program, char* name, CUfunction function); 149 ~CudaKernel() override; 150 static CudaKernel * of(long kernelHandle); 151 static CudaKernel * of(Backend::CompilationUnit::Kernel *kernel); 152 153 CUfunction function; 154 void *argslist[100]{}; 155 }; 156 CudaModule(Backend *backend, char *cudaSrc, char *log, bool ok, CUmodule module); 157 ~CudaModule() override; 158 static CudaModule * of(long moduleHandle); 159 //static CudaModule * of(CompilationUnit *compilationUnit); 160 Kernel *getKernel(int nameLen, char *name) override; 161 CudaKernel *getCudaKernel(char *name); 162 CudaKernel *getCudaKernel(int nameLen, char *name); 163 bool programOK(); 164 }; 165 166 private: 167 CUresult initStatus; 168 CUdevice device; 169 CUcontext context; 170 public: 171 void info() override; 172 CudaModule * compile(const CudaSource *cudaSource); 173 CudaModule * compile(const CudaSource &cudaSource); 174 CudaModule * compile(const PtxSource *ptxSource); 175 CudaModule * compile(const PtxSource &ptxSource); 176 static PtxSource *nvcc(const CudaSource *cudaSource); 177 CompilationUnit * compile(int len, char *source) override; 178 void computeStart() override; 179 void computeEnd() override; 180 CudaBuffer * getOrCreateBuffer(BufferState *bufferState) override; 181 bool getBufferFromDeviceIfDirty(void *memorySegment, long memorySegmentLength) override; 182 183 explicit CudaBackend(int mode); 184 185 ~CudaBackend() override; 186 static CudaBackend * of(long backendHandle); 187 static CudaBackend * of(Backend *backend); 188 }; 189 190