1 #include <sys/wait.h>
  2 #include <chrono>
  3 #include <hip/hip_runtime.h>
  4 #include <hip/hiprtc.h>
  5 #include "hip_backend.h"
  6 
  7 #define CHECK_RET_CODE(call, ret_code)                                                             \
  8   {                                                                                                \
  9     if ((call) != ret_code) {                                                                      \
 10       std::cout << "Failed in call: " << #call << std::endl;                                       \
 11       std::abort();                                                                                \
 12     }                                                                                              \
 13   }
 14 #define HIP_CHECK(call) CHECK_RET_CODE(call, hipSuccess)
 15 #define HIPRTC_CHECK(call) CHECK_RET_CODE(call, HIPRTC_SUCCESS)
 16 
 17 uint64_t timeSinceEpochMillisec() {
 18     using namespace std::chrono;
 19     return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
 20 }
 21 
 22 HipBackend::HipBuffer::HipBuffer(Backend::CompilationUnit::Kernel *kernel, Arg_s *arg)
 23         : Buffer(kernel, arg), devicePtr() {
 24     /*
 25      *   (void *) arg->value.buffer.memorySegment,
 26      *   (size_t) arg->value.buffer.sizeInBytes);
 27      */
 28 #ifdef VERBOSE
 29     std::cout << "hipMalloc()" << std::endl;
 30 #endif
 31     HIP_CHECK(hipMalloc(&devicePtr, (size_t) arg->value.buffer.sizeInBytes));
 32 #ifdef VERBOSE
 33     std::cout << "devptr " << std::hex<<  (long)devicePtr <<std::dec <<std::endl;
 34 #endif
 35     arg->value.buffer.vendorPtr = static_cast<void *>(this);
 36 }
 37 
 38 HipBackend::HipBuffer::~HipBuffer() {
 39 
 40 #ifdef VERBOSE
 41     std::cout << "hipFree()"
 42               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 43               << std::endl;
 44 #endif
 45     HIP_CHECK(hipFree(devicePtr));
 46     arg->value.buffer.vendorPtr = nullptr;
 47 }
 48 
 49 void HipBackend::HipBuffer::copyToDevice() {
 50     auto hipKernel = dynamic_cast<HipKernel*>(kernel);
 51 #ifdef VERBOSE
 52     std::cout << "copyToDevice() 0x"   << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
 53               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 54               << std::endl;
 55 #endif
 56     char *ptr = (char*)arg->value.buffer.memorySegment;
 57 
 58     unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 59     unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 60 
 61     if (ifacefacade1 != 0x1face00000facadeL && ifacefacade1 != ifacefacade2) {
 62         std::cerr<<"End of buf marker before HtoD"<< std::hex << ifacefacade1 << ifacefacade2<< " buffer corrupt !" <<std::endl
 63                 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 64         exit(-1);
 65     }
 66 
 67     HIP_CHECK(hipMemcpyHtoDAsync(devicePtr, arg->value.buffer.memorySegment, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
 68 }
 69 
 70 void HipBackend::HipBuffer::copyFromDevice() {
 71     auto hipKernel = dynamic_cast<HipKernel*>(kernel);
 72 #ifdef VERBOSE
 73     std::cout << "copyFromDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
 74               << "devptr " << std::hex<<  (long)devicePtr <<std::dec
 75               << std::endl;
 76 #endif
 77     char *ptr = (char*)arg->value.buffer.memorySegment;
 78 
 79     unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 80     unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 81 
 82     if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
 83         std::cerr<<"end of buf marker before  DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
 84                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 85         exit(-1);
 86     }
 87     HIP_CHECK(hipMemcpyDtoHAsync(arg->value.buffer.memorySegment, devicePtr, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
 88 
 89     ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
 90     ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
 91 
 92     if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
 93         std::cerr<<"end of buf marker after  DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
 94                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
 95         exit(-1);
 96     }
 97 }
 98 
 99 HipBackend::HipProgram::HipKernel::HipKernel(Backend::CompilationUnit *program, char * name, hipFunction_t kernel)
100         : Backend::CompilationUnit::Kernel(program, name), kernel(kernel),hipStream() {
101 }
102 
103 HipBackend::HipProgram::HipKernel::~HipKernel() = default;
104 
105 long HipBackend::HipProgram::HipKernel::ndrange(void *argArray) {
106 #ifdef VERBOSE
107     std::cout << "ndrange(" << range << ") " << name << std::endl;
108 #endif
109 
110     hipStreamCreate(&hipStream);
111     ArgSled argSled(static_cast<ArgArray_s *>(argArray));
112     void *argslist[argSled.argc()];
113     NDRange *ndrange = nullptr;
114 #ifdef VERBOSE
115     std::cerr << "there are " << argSled.argc() << "args " << std::endl;
116 #endif
117     for (int i = 0; i < argSled.argc(); i++) {
118         Arg_s *arg = argSled.arg(i);
119         switch (arg->variant) {
120             case '&': {
121                 if (arg->idx == 0){
122                     ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
123                 }
124                 auto hipBuffer = new HipBuffer(this, arg);
125                 hipBuffer->copyToDevice();
126                 argslist[arg->idx] = static_cast<void *>(&hipBuffer->devicePtr);
127                 break;
128             }
129             case 'I':
130             case 'F':
131             case 'J':
132             case 'D':
133             case 'C':
134             case 'S': {
135                 argslist[arg->idx] = static_cast<void *>(&arg->value);
136                 break;
137             }
138             default: {
139                 std::cerr << " unhandled variant " << (char) arg->variant << std::endl;
140                 break;
141             }
142         }
143     }
144 
145     int range = ndrange->maxX;
146     int rangediv1024 = range / 1024;
147     int rangemod1024 = range % 1024;
148     if (rangemod1024 > 0) {
149         rangediv1024++;
150     }
151 
152 #ifdef VERBOSE
153     std::cout << "Running the kernel..." << std::endl;
154     std::cout << "   Requested range   = " << range << std::endl;
155     std::cout << "   Range mod 1024    = " << rangemod1024 << std::endl;
156     std::cout << "   Actual range 1024 = " << (rangediv1024 * 1024) << std::endl;
157 #endif
158 
159     HIP_CHECK(hipModuleLaunchKernel(kernel, rangediv1024, 1, 1, 1024, 1, 1, 0, hipStream, argslist, 0));
160 
161 #ifdef VERBOSE
162     std::cout << "Kernel complete..."<<hipGetErrorString(t)<<std::endl;
163 #endif
164 
165     for (int i = 0; i < argSled.argc(); i++) {
166         Arg_s *arg = argSled.arg(i);
167         if (arg->variant == '&') {
168             static_cast<HipBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice();
169 
170         }
171     }
172 
173     for (int i = 0; i < argSled.argc(); i++) {
174         Arg_s *arg = argSled.arg(i);
175         if (arg->variant == '&') {
176             delete static_cast<HipBuffer *>(arg->value.buffer.vendorPtr);
177             arg->value.buffer.vendorPtr = nullptr;
178         }
179     }
180     HIP_CHECK(hipStreamSynchronize(hipStream));
181     HIP_CHECK(hipStreamDestroy(hipStream));
182 
183     return (long) 0;
184 }
185 
186 
187 HipBackend::HipProgram::HipProgram(Backend *backend, BuildInfo *buildInfo, hipModule_t module)
188         : Backend::CompilationUnit(backend, buildInfo), module(module) {
189 }
190 
191 HipBackend::HipProgram::~HipProgram() = default;
192 
193 long HipBackend::HipProgram::getKernel(int nameLen, char *name) {
194 
195     hipFunction_t kernel;
196     HIP_CHECK(hipModuleGetFunction(&kernel, module, name));
197     long kernelHandle =  reinterpret_cast<long>(new HipKernel(this, name, kernel));
198 
199     return kernelHandle;
200 }
201 
202 bool HipBackend::HipProgram::programOK() {
203     return true;
204 }
205 
206 HipBackend::HipBackend(HipBackend::HIPConfig *hipConfig, int
207 configSchemaLen, char *configSchema)
208         : Backend((Backend::Config*) hipConfig, configSchemaLen, configSchema), device(),context()  {
209 #ifdef VERBOSE
210     std::cout << "HipBackend constructor " << ((hipConfig == nullptr) ? "hipConfig== null" : "got hipConfig")
211               << std::endl;
212 #endif
213     int deviceCount = 0;
214     hipError_t err = hipInit(0);
215     if (err == HIP_SUCCESS) {
216         hipGetDeviceCount(&deviceCount);
217         std::cout << "HipBackend device count" << std::endl;
218         hipDeviceGet(&device, 0);
219         std::cout << "HipBackend device ok" << std::endl;
220         hipCtxCreate(&context, 0, device);
221         std::cout << "HipBackend context created ok" << std::endl;
222     } else {
223         std::cout << "HipBackend failed, we seem to have the runtime library but no device, no context, nada "
224                   << std::endl;
225         exit(1);
226     }
227 }
228 
229 HipBackend::HipBackend() : HipBackend(nullptr, 0, nullptr) {
230 
231 }
232 
233 HipBackend::~HipBackend() {
234 #ifdef VERBOSE
235     std::cout << "freeing context" << std::endl;
236 #endif
237     auto status = hipCtxDestroy(context);
238     if (HIP_SUCCESS != status) {
239         std::cerr << "hipCtxDestroy(() HIP error = " << status
240                   <<" " << hipGetErrorString(static_cast<hipError_t>(status))
241                   <<" " << __FILE__ << " line " << __LINE__ << std::endl;
242         exit(-1);
243     }
244 }
245 
246 int HipBackend::getMaxComputeUnits() {
247     std::cout << "getMaxComputeUnits()" << std::endl;
248     int value = 1;
249     return value;
250 }
251 
252 void HipBackend::shortDeviceInfo() override {
253     showDeviceInfo();
254 }
255 
256 void HipBackend::showDeviceInfo() {
257     char name[100];
258     hipDeviceGetName(name, sizeof(name), device);
259     std::cout << "> Using device 0: " << name << std::endl;
260 
261     // get compute capabilities and the devicename
262     int major = 0, minor = 0;
263     hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, device);
264     hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, device);
265     std::cout << "> HIP Device has major=" << major << " minor=" << minor << " compute capability" << std::endl;
266 
267     int warpSize;
268     hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device);
269     std::cout << "> HIP Device has wave front size " << warpSize << std::endl;
270 
271     int threadsPerBlock;
272     hipDeviceGetAttribute(&threadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, device);
273     std::cout << "> HIP Device has threadsPerBlock " << threadsPerBlock << std::endl;
274 
275     int cores;
276     hipDeviceGetAttribute(&cores, hipDeviceAttributeMultiprocessorCount, device);
277     std::cout << "> HIP Cores " << cores << std::endl;
278 
279     size_t totalGlobalMem;
280     hipDeviceTotalMem(&totalGlobalMem, device);
281     std::cout << "  Total amount of global memory:   " << (unsigned long long) totalGlobalMem << std::endl;
282     std::cout << "  64-bit Memory Address:           " <<
283               ((totalGlobalMem > (unsigned long long) 4 * 1024 * 1024 * 1024L) ? "YES" : "NO") << std::endl;
284 
285 }
286 
287 long HipBackend::compileProgram(int len, char *source) {
288 
289 #ifdef VERBOSE
290     std::cout << "inside compileProgram" << std::endl;
291     std::cout << "hip " << source << std::endl;
292 #endif
293     hiprtcProgram prog;
294     auto status = hiprtcCreateProgram(&prog,
295                     source,
296                     "hip_kernel.hip",
297                     0,
298                     nullptr,
299                     nullptr);
300     if (status != HIPRTC_SUCCESS){
301         size_t logSize;
302         hiprtcGetProgramLogSize(prog, &logSize);
303 
304         std::cerr << "hiprtcCreateProgram(() HIP error = " << std::endl;
305         if (logSize) {
306             std::string log(logSize, '\0');
307             hiprtcGetProgramLog(prog, &log[0]);
308             std::cerr <<" " << log
309                       <<" " << __FILE__ << " line " << __LINE__ << std::endl;
310         }
311         exit(-1);
312     }
313 
314     status = hiprtcCompileProgram(prog, 0, nullptr);
315     if (status != HIPRTC_SUCCESS){
316         size_t logSize;
317         hiprtcGetProgramLogSize(prog, &logSize);
318 
319         std::cerr << "hiprtcCompileProgram(() HIP error = " << std::endl;
320         if (logSize) {
321             std::string log(logSize, '\0');
322             hiprtcGetProgramLog(prog, &log[0]);
323             std::cerr <<" " << log
324                       <<" " << __FILE__ << " line " << __LINE__ << std::endl;
325         }
326         exit(-1);
327     }
328 
329     size_t codeSize;
330     hiprtcGetCodeSize(prog, &codeSize);
331 #ifdef VERBOSE
332     std::cerr << "HIP compiled code size " << codeSize << std::endl;
333 #endif
334 
335     std::vector<char> kernel_binary(codeSize);
336     hiprtcGetCode(prog, kernel_binary.data());
337 
338     hipModule_t module;
339     hipModuleLoadData(&module, kernel_binary.data());
340     hiprtcDestroyProgram(&prog);
341 
342     return reinterpret_cast<long>(new HipProgram(this, nullptr, module));
343 }
344 
345 long getBackend(void *config, int configSchemaLen, char *configSchema) {
346     long backendHandle = reinterpret_cast<long>(
347             new HipBackend(static_cast<HipBackend::HIPConfig *>(config), configSchemaLen,
348                             configSchema));
349 #ifdef VERBOSE
350     std::cout << "getBackend() -> backendHandle=" << std::hex << backendHandle << std::dec << std::endl;
351 #endif
352     return backendHandle;
353 }
354 
355 
356