1 #include <sys/wait.h>
2 #include <chrono>
3 #include <hip/hip_runtime.h>
4 #include <hip/hiprtc.h>
5 #include "hip_backend.h"
6
7 #define CHECK_RET_CODE(call, ret_code) \
8 { \
9 if ((call) != ret_code) { \
10 std::cout << "Failed in call: " << #call << std::endl; \
11 std::abort(); \
12 } \
13 }
14 #define HIP_CHECK(call) CHECK_RET_CODE(call, hipSuccess)
15 #define HIPRTC_CHECK(call) CHECK_RET_CODE(call, HIPRTC_SUCCESS)
16
17 uint64_t timeSinceEpochMillisec() {
18 using namespace std::chrono;
19 return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
20 }
21
22 HipBackend::HipBuffer::HipBuffer(Backend::CompilationUnit::Kernel *kernel, Arg_s *arg)
23 : Buffer(kernel, arg), devicePtr() {
24 /*
25 * (void *) arg->value.buffer.memorySegment,
26 * (size_t) arg->value.buffer.sizeInBytes);
27 */
28 #ifdef VERBOSE
29 std::cout << "hipMalloc()" << std::endl;
30 #endif
31 HIP_CHECK(hipMalloc(&devicePtr, (size_t) arg->value.buffer.sizeInBytes));
32 #ifdef VERBOSE
33 std::cout << "devptr " << std::hex<< (long)devicePtr <<std::dec <<std::endl;
34 #endif
35 arg->value.buffer.vendorPtr = static_cast<void *>(this);
36 }
37
38 HipBackend::HipBuffer::~HipBuffer() {
39
40 #ifdef VERBOSE
41 std::cout << "hipFree()"
42 << "devptr " << std::hex<< (long)devicePtr <<std::dec
43 << std::endl;
44 #endif
45 HIP_CHECK(hipFree(devicePtr));
46 arg->value.buffer.vendorPtr = nullptr;
47 }
48
49 void HipBackend::HipBuffer::copyToDevice() {
50 auto hipKernel = dynamic_cast<HipKernel*>(kernel);
51 #ifdef VERBOSE
52 std::cout << "copyToDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
53 << "devptr " << std::hex<< (long)devicePtr <<std::dec
54 << std::endl;
55 #endif
56 char *ptr = (char*)arg->value.buffer.memorySegment;
57
58 unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
59 unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
60
61 if (ifacefacade1 != 0x1face00000facadeL && ifacefacade1 != ifacefacade2) {
62 std::cerr<<"End of buf marker before HtoD"<< std::hex << ifacefacade1 << ifacefacade2<< " buffer corrupt !" <<std::endl
63 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
64 exit(-1);
65 }
66
67 HIP_CHECK(hipMemcpyHtoDAsync(devicePtr, arg->value.buffer.memorySegment, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
68 }
69
70 void HipBackend::HipBuffer::copyFromDevice() {
71 auto hipKernel = dynamic_cast<HipKernel*>(kernel);
72 #ifdef VERBOSE
73 std::cout << "copyFromDevice() 0x" << std::hex<<arg->value.buffer.sizeInBytes<<std::dec << " "<< arg->value.buffer.sizeInBytes << " "
74 << "devptr " << std::hex<< (long)devicePtr <<std::dec
75 << std::endl;
76 #endif
77 char *ptr = (char*)arg->value.buffer.memorySegment;
78
79 unsigned long ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
80 unsigned long ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
81
82 if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
83 std::cerr<<"end of buf marker before DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
84 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
85 exit(-1);
86 }
87 HIP_CHECK(hipMemcpyDtoHAsync(arg->value.buffer.memorySegment, devicePtr, arg->value.buffer.sizeInBytes, hipKernel->hipStream));
88
89 ifacefacade1 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-16);
90 ifacefacade2 = *reinterpret_cast<unsigned long*>(ptr+arg->value.buffer.sizeInBytes-8);
91
92 if (ifacefacade1 != 0x1face00000facadeL || ifacefacade1 != ifacefacade2) {
93 std::cerr<<"end of buf marker after DtoH"<< std::hex << ifacefacade1 << ifacefacade2<< std::dec<< " buffer corrupt !"<<std::endl
94 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
95 exit(-1);
96 }
97 }
98
99 HipBackend::HipProgram::HipKernel::HipKernel(Backend::CompilationUnit *program, char * name, hipFunction_t kernel)
100 : Backend::CompilationUnit::Kernel(program, name), kernel(kernel),hipStream() {
101 }
102
103 HipBackend::HipProgram::HipKernel::~HipKernel() = default;
104
105 long HipBackend::HipProgram::HipKernel::ndrange(void *argArray) {
106 #ifdef VERBOSE
107 std::cout << "ndrange(" << range << ") " << name << std::endl;
108 #endif
109
110 hipStreamCreate(&hipStream);
111 ArgSled argSled(static_cast<ArgArray_s *>(argArray));
112 void *argslist[argSled.argc()];
113 NDRange *ndrange = nullptr;
114 #ifdef VERBOSE
115 std::cerr << "there are " << argSled.argc() << "args " << std::endl;
116 #endif
117 for (int i = 0; i < argSled.argc(); i++) {
118 Arg_s *arg = argSled.arg(i);
119 switch (arg->variant) {
120 case '&': {
121 if (arg->idx == 0){
122 ndrange = static_cast<NDRange *>(arg->value.buffer.memorySegment);
123 }
124 auto hipBuffer = new HipBuffer(this, arg);
125 hipBuffer->copyToDevice();
126 argslist[arg->idx] = static_cast<void *>(&hipBuffer->devicePtr);
127 break;
128 }
129 case 'I':
130 case 'F':
131 case 'J':
132 case 'D':
133 case 'C':
134 case 'S': {
135 argslist[arg->idx] = static_cast<void *>(&arg->value);
136 break;
137 }
138 default: {
139 std::cerr << " unhandled variant " << (char) arg->variant << std::endl;
140 break;
141 }
142 }
143 }
144
145 int range = ndrange->maxX;
146 int rangediv1024 = range / 1024;
147 int rangemod1024 = range % 1024;
148 if (rangemod1024 > 0) {
149 rangediv1024++;
150 }
151
152 #ifdef VERBOSE
153 std::cout << "Running the kernel..." << std::endl;
154 std::cout << " Requested range = " << range << std::endl;
155 std::cout << " Range mod 1024 = " << rangemod1024 << std::endl;
156 std::cout << " Actual range 1024 = " << (rangediv1024 * 1024) << std::endl;
157 #endif
158
159 HIP_CHECK(hipModuleLaunchKernel(kernel, rangediv1024, 1, 1, 1024, 1, 1, 0, hipStream, argslist, 0));
160
161 #ifdef VERBOSE
162 std::cout << "Kernel complete..."<<hipGetErrorString(t)<<std::endl;
163 #endif
164
165 for (int i = 0; i < argSled.argc(); i++) {
166 Arg_s *arg = argSled.arg(i);
167 if (arg->variant == '&') {
168 static_cast<HipBuffer *>(arg->value.buffer.vendorPtr)->copyFromDevice();
169
170 }
171 }
172
173 for (int i = 0; i < argSled.argc(); i++) {
174 Arg_s *arg = argSled.arg(i);
175 if (arg->variant == '&') {
176 delete static_cast<HipBuffer *>(arg->value.buffer.vendorPtr);
177 arg->value.buffer.vendorPtr = nullptr;
178 }
179 }
180 HIP_CHECK(hipStreamSynchronize(hipStream));
181 HIP_CHECK(hipStreamDestroy(hipStream));
182
183 return (long) 0;
184 }
185
186
187 HipBackend::HipProgram::HipProgram(Backend *backend, BuildInfo *buildInfo, hipModule_t module)
188 : Backend::CompilationUnit(backend, buildInfo), module(module) {
189 }
190
191 HipBackend::HipProgram::~HipProgram() = default;
192
193 long HipBackend::HipProgram::getKernel(int nameLen, char *name) {
194
195 hipFunction_t kernel;
196 HIP_CHECK(hipModuleGetFunction(&kernel, module, name));
197 long kernelHandle = reinterpret_cast<long>(new HipKernel(this, name, kernel));
198
199 return kernelHandle;
200 }
201
202 bool HipBackend::HipProgram::programOK() {
203 return true;
204 }
205
206 HipBackend::HipBackend(HipBackend::HIPConfig *hipConfig, int
207 configSchemaLen, char *configSchema)
208 : Backend((Backend::Config*) hipConfig, configSchemaLen, configSchema), device(),context() {
209 #ifdef VERBOSE
210 std::cout << "HipBackend constructor " << ((hipConfig == nullptr) ? "hipConfig== null" : "got hipConfig")
211 << std::endl;
212 #endif
213 int deviceCount = 0;
214 hipError_t err = hipInit(0);
215 if (err == HIP_SUCCESS) {
216 hipGetDeviceCount(&deviceCount);
217 std::cout << "HipBackend device count" << std::endl;
218 hipDeviceGet(&device, 0);
219 std::cout << "HipBackend device ok" << std::endl;
220 hipCtxCreate(&context, 0, device);
221 std::cout << "HipBackend context created ok" << std::endl;
222 } else {
223 std::cout << "HipBackend failed, we seem to have the runtime library but no device, no context, nada "
224 << std::endl;
225 exit(1);
226 }
227 }
228
229 HipBackend::HipBackend() : HipBackend(nullptr, 0, nullptr) {
230
231 }
232
233 HipBackend::~HipBackend() {
234 #ifdef VERBOSE
235 std::cout << "freeing context" << std::endl;
236 #endif
237 auto status = hipCtxDestroy(context);
238 if (HIP_SUCCESS != status) {
239 std::cerr << "hipCtxDestroy(() HIP error = " << status
240 <<" " << hipGetErrorString(static_cast<hipError_t>(status))
241 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
242 exit(-1);
243 }
244 }
245
246 int HipBackend::getMaxComputeUnits() {
247 std::cout << "getMaxComputeUnits()" << std::endl;
248 int value = 1;
249 return value;
250 }
251
252 void HipBackend::info() {
253 char name[100];
254 hipDeviceGetName(name, sizeof(name), device);
255 std::cout << "> Using device 0: " << name << std::endl;
256
257 // get compute capabilities and the devicename
258 int major = 0, minor = 0;
259 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, device);
260 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, device);
261 std::cout << "> HIP Device has major=" << major << " minor=" << minor << " compute capability" << std::endl;
262
263 int warpSize;
264 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device);
265 std::cout << "> HIP Device has wave front size " << warpSize << std::endl;
266
267 int threadsPerBlock;
268 hipDeviceGetAttribute(&threadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, device);
269 std::cout << "> HIP Device has threadsPerBlock " << threadsPerBlock << std::endl;
270
271 int cores;
272 hipDeviceGetAttribute(&cores, hipDeviceAttributeMultiprocessorCount, device);
273 std::cout << "> HIP Cores " << cores << std::endl;
274
275 size_t totalGlobalMem;
276 hipDeviceTotalMem(&totalGlobalMem, device);
277 std::cout << " Total amount of global memory: " << (unsigned long long) totalGlobalMem << std::endl;
278 std::cout << " 64-bit Memory Address: " <<
279 ((totalGlobalMem > (unsigned long long) 4 * 1024 * 1024 * 1024L) ? "YES" : "NO") << std::endl;
280
281 }
282
283 long HipBackend::compileProgram(int len, char *source) {
284
285 #ifdef VERBOSE
286 std::cout << "inside compileProgram" << std::endl;
287 std::cout << "hip " << source << std::endl;
288 #endif
289 hiprtcProgram prog;
290 auto status = hiprtcCreateProgram(&prog,
291 source,
292 "hip_kernel.hip",
293 0,
294 nullptr,
295 nullptr);
296 if (status != HIPRTC_SUCCESS){
297 size_t logSize;
298 hiprtcGetProgramLogSize(prog, &logSize);
299
300 std::cerr << "hiprtcCreateProgram(() HIP error = " << std::endl;
301 if (logSize) {
302 std::string log(logSize, '\0');
303 hiprtcGetProgramLog(prog, &log[0]);
304 std::cerr <<" " << log
305 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
306 }
307 exit(-1);
308 }
309
310 status = hiprtcCompileProgram(prog, 0, nullptr);
311 if (status != HIPRTC_SUCCESS){
312 size_t logSize;
313 hiprtcGetProgramLogSize(prog, &logSize);
314
315 std::cerr << "hiprtcCompileProgram(() HIP error = " << std::endl;
316 if (logSize) {
317 std::string log(logSize, '\0');
318 hiprtcGetProgramLog(prog, &log[0]);
319 std::cerr <<" " << log
320 <<" " << __FILE__ << " line " << __LINE__ << std::endl;
321 }
322 exit(-1);
323 }
324
325 size_t codeSize;
326 hiprtcGetCodeSize(prog, &codeSize);
327 #ifdef VERBOSE
328 std::cerr << "HIP compiled code size " << codeSize << std::endl;
329 #endif
330
331 std::vector<char> kernel_binary(codeSize);
332 hiprtcGetCode(prog, kernel_binary.data());
333
334 hipModule_t module;
335 hipModuleLoadData(&module, kernel_binary.data());
336 hiprtcDestroyProgram(&prog);
337
338 return reinterpret_cast<long>(new HipProgram(this, nullptr, module));
339 }
340
341 long getBackend(void *config, int configSchemaLen, char *configSchema) {
342 long backendHandle = reinterpret_cast<long>(
343 new HipBackend(static_cast<HipBackend::HIPConfig *>(config), configSchemaLen,
344 configSchema));
345 #ifdef VERBOSE
346 std::cout << "getBackend() -> backendHandle=" << std::hex << backendHandle << std::dec << std::endl;
347 #endif
348 return backendHandle;
349 }
350
351
352