1 /* 2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "asm/macroAssembler.hpp" 26 #include "code/codeBlob.hpp" 27 #include "code/codeBlob.hpp" 28 #include "code/vmreg.inline.hpp" 29 #include "compiler/disassembler.hpp" 30 #include "logging/logStream.hpp" 31 #include "memory/resourceArea.hpp" 32 #include "prims/foreign_globals.inline.hpp" 33 #include "prims/universalUpcallHandler.hpp" 34 #include "runtime/sharedRuntime.hpp" 35 #include "runtime/signature.hpp" 36 #include "runtime/stubRoutines.hpp" 37 #include "utilities/formatBuffer.hpp" 38 #include "utilities/globalDefinitions.hpp" 39 40 #define __ _masm-> 41 42 static bool is_valid_XMM(XMMRegister reg) { 43 return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()? 44 } 45 46 // for callee saved regs, according to the caller's ABI 47 static int compute_reg_save_area_size(const ABIDescriptor& abi) { 48 int size = 0; 49 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) { 50 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue 51 if (!abi.is_volatile_reg(reg)) { 52 size += 8; // bytes 53 } 54 } 55 56 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) { 57 if (!abi.is_volatile_reg(reg)) { 58 if (UseAVX >= 3) { 59 size += 64; // bytes 60 } else if (UseAVX >= 1) { 61 size += 32; 62 } else { 63 size += 16; 64 } 65 } 66 } 67 68 #ifndef _WIN64 69 // for mxcsr 70 size += 8; 71 #endif 72 73 return size; 74 } 75 76 constexpr int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions 77 78 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { 79 // 1. iterate all registers in the architecture 80 // - check if they are volatile or not for the given abi 81 // - if NOT, we need to save it here 82 // 2. save mxcsr on non-windows platforms 83 84 int offset = reg_save_area_offset; 85 86 __ block_comment("{ preserve_callee_saved_regs "); 87 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) { 88 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue 89 if (!abi.is_volatile_reg(reg)) { 90 __ movptr(Address(rsp, offset), reg); 91 offset += 8; 92 } 93 } 94 95 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) { 96 if (!abi.is_volatile_reg(reg)) { 97 if (UseAVX >= 3) { 98 __ evmovdqul(Address(rsp, offset), reg, Assembler::AVX_512bit); 99 offset += 64; 100 } else if (UseAVX >= 1) { 101 __ vmovdqu(Address(rsp, offset), reg); 102 offset += 32; 103 } else { 104 __ movdqu(Address(rsp, offset), reg); 105 offset += 16; 106 } 107 } 108 } 109 110 #ifndef _WIN64 111 { 112 const Address mxcsr_save(rsp, offset); 113 Label skip_ldmx; 114 __ stmxcsr(mxcsr_save); 115 __ movl(rax, mxcsr_save); 116 __ andl(rax, MXCSR_MASK); // Only check control and mask bits 117 ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); 118 __ cmp32(rax, mxcsr_std); 119 __ jcc(Assembler::equal, skip_ldmx); 120 __ ldmxcsr(mxcsr_std); 121 __ bind(skip_ldmx); 122 } 123 #endif 124 125 __ block_comment("} preserve_callee_saved_regs "); 126 } 127 128 static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) { 129 // 1. iterate all registers in the architecture 130 // - check if they are volatile or not for the given abi 131 // - if NOT, we need to restore it here 132 // 2. restore mxcsr on non-windows platforms 133 134 int offset = reg_save_area_offset; 135 136 __ block_comment("{ restore_callee_saved_regs "); 137 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) { 138 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue 139 if (!abi.is_volatile_reg(reg)) { 140 __ movptr(reg, Address(rsp, offset)); 141 offset += 8; 142 } 143 } 144 145 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) { 146 if (!abi.is_volatile_reg(reg)) { 147 if (UseAVX >= 3) { 148 __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit); 149 offset += 64; 150 } else if (UseAVX >= 1) { 151 __ vmovdqu(reg, Address(rsp, offset)); 152 offset += 32; 153 } else { 154 __ movdqu(reg, Address(rsp, offset)); 155 offset += 16; 156 } 157 } 158 } 159 160 #ifndef _WIN64 161 const Address mxcsr_save(rsp, offset); 162 __ ldmxcsr(mxcsr_save); 163 #endif 164 165 __ block_comment("} restore_callee_saved_regs "); 166 } 167 // Register is a class, but it would be assigned numerical value. 168 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull. 169 PRAGMA_DIAG_PUSH 170 PRAGMA_NONNULL_IGNORED 171 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry, 172 BasicType* in_sig_bt, int total_in_args, 173 BasicType* out_sig_bt, int total_out_args, 174 BasicType ret_type, 175 jobject jabi, jobject jconv, 176 bool needs_return_buffer, int ret_buf_size) { 177 const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi); 178 const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv); 179 CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024); 180 181 Register shuffle_reg = rbx; 182 JavaCallConv out_conv; 183 NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length); 184 ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg()); 185 int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots(); 186 int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes); 187 188 #ifdef ASSERT 189 LogTarget(Trace, panama) lt; 190 if (lt.is_enabled()) { 191 ResourceMark rm; 192 LogStream ls(lt); 193 arg_shuffle.print_on(&ls); 194 } 195 #endif 196 197 // out_arg_area (for stack arguments) doubles as shadow space for native calls. 198 // make sure it is big enough. 199 if (out_arg_area < frame::arg_reg_save_area_bytes) { 200 out_arg_area = frame::arg_reg_save_area_bytes; 201 } 202 203 int reg_save_area_size = compute_reg_save_area_size(abi); 204 RegSpiller arg_spiller(call_regs._arg_regs, call_regs._args_length); 205 RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length); 206 207 int shuffle_area_offset = 0; 208 int res_save_area_offset = shuffle_area_offset + out_arg_area; 209 int arg_save_area_offset = res_save_area_offset + result_spiller.spill_size_bytes(); 210 int reg_save_area_offset = arg_save_area_offset + arg_spiller.spill_size_bytes(); 211 int frame_data_offset = reg_save_area_offset + reg_save_area_size; 212 int frame_bottom_offset = frame_data_offset + sizeof(OptimizedEntryBlob::FrameData); 213 214 int ret_buf_offset = -1; 215 if (needs_return_buffer) { 216 ret_buf_offset = frame_bottom_offset; 217 frame_bottom_offset += ret_buf_size; 218 } 219 220 int frame_size = frame_bottom_offset; 221 frame_size = align_up(frame_size, StackAlignmentInBytes); 222 223 // Ok The space we have allocated will look like: 224 // 225 // 226 // FP-> | | 227 // |---------------------| = frame_bottom_offset = frame_size 228 // | (optional) | 229 // | ret_buf | 230 // |---------------------| = ret_buf_offset 231 // | | 232 // | FrameData | 233 // |---------------------| = frame_data_offset 234 // | | 235 // | reg_save_area | 236 // |---------------------| = reg_save_are_offset 237 // | | 238 // | arg_save_area | 239 // |---------------------| = arg_save_are_offset 240 // | | 241 // | res_save_area | 242 // |---------------------| = res_save_are_offset 243 // | | 244 // SP-> | out_arg_area | needs to be at end for shadow space 245 // 246 // 247 248 ////////////////////////////////////////////////////////////////////////////// 249 250 MacroAssembler* _masm = new MacroAssembler(&buffer); 251 address start = __ pc(); 252 __ enter(); // set up frame 253 if ((abi._stack_alignment_bytes % 16) != 0) { 254 // stack alignment of caller is not a multiple of 16 255 __ andptr(rsp, -StackAlignmentInBytes); // align stack 256 } 257 // allocate frame (frame_size is also aligned, so stack is still aligned) 258 __ subptr(rsp, frame_size); 259 260 // we have to always spill args since we need to do a call to get the thread 261 // (and maybe attach it). 262 arg_spiller.generate_spill(_masm, arg_save_area_offset); 263 264 preserve_callee_saved_registers(_masm, abi, reg_save_area_offset); 265 266 __ block_comment("{ on_entry"); 267 __ vzeroupper(); 268 __ lea(c_rarg0, Address(rsp, frame_data_offset)); 269 // stack already aligned 270 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry))); 271 __ movptr(r15_thread, rax); 272 __ reinit_heapbase(); 273 __ block_comment("} on_entry"); 274 275 __ block_comment("{ argument shuffle"); 276 arg_spiller.generate_fill(_masm, arg_save_area_offset); 277 if (needs_return_buffer) { 278 assert(ret_buf_offset != -1, "no return buffer allocated"); 279 __ lea(abi._ret_buf_addr_reg, Address(rsp, ret_buf_offset)); 280 } 281 arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0); 282 __ block_comment("} argument shuffle"); 283 284 __ block_comment("{ receiver "); 285 __ movptr(rscratch1, (intptr_t)receiver); 286 __ resolve_jobject(rscratch1, r15_thread, rscratch2); 287 __ movptr(j_rarg0, rscratch1); 288 __ block_comment("} receiver "); 289 290 __ mov_metadata(rbx, entry); 291 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized 292 293 __ call(Address(rbx, Method::from_compiled_offset())); 294 295 // return value shuffle 296 if (!needs_return_buffer) { 297 #ifdef ASSERT 298 if (call_regs._rets_length == 1) { // 0 or 1 299 VMReg j_expected_result_reg; 300 switch (ret_type) { 301 case T_BOOLEAN: 302 case T_BYTE: 303 case T_SHORT: 304 case T_CHAR: 305 case T_INT: 306 case T_LONG: 307 j_expected_result_reg = rax->as_VMReg(); 308 break; 309 case T_FLOAT: 310 case T_DOUBLE: 311 j_expected_result_reg = xmm0->as_VMReg(); 312 break; 313 default: 314 fatal("unexpected return type: %s", type2name(ret_type)); 315 } 316 // No need to move for now, since CallArranger can pick a return type 317 // that goes in the same reg for both CCs. But, at least assert they are the same 318 assert(call_regs._ret_regs[0] == j_expected_result_reg, 319 "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name()); 320 } 321 #endif 322 } else { 323 assert(ret_buf_offset != -1, "no return buffer allocated"); 324 __ lea(rscratch1, Address(rsp, ret_buf_offset)); 325 int offset = 0; 326 for (int i = 0; i < call_regs._rets_length; i++) { 327 VMReg reg = call_regs._ret_regs[i]; 328 if (reg->is_Register()) { 329 __ movptr(reg->as_Register(), Address(rscratch1, offset)); 330 offset += 8; 331 } else if (reg->is_XMMRegister()) { 332 __ movdqu(reg->as_XMMRegister(), Address(rscratch1, offset)); 333 offset += 16; 334 } else { 335 ShouldNotReachHere(); 336 } 337 } 338 } 339 340 result_spiller.generate_spill(_masm, res_save_area_offset); 341 342 __ block_comment("{ on_exit"); 343 __ vzeroupper(); 344 __ lea(c_rarg0, Address(rsp, frame_data_offset)); 345 // stack already aligned 346 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit))); 347 __ reinit_heapbase(); 348 __ block_comment("} on_exit"); 349 350 restore_callee_saved_registers(_masm, abi, reg_save_area_offset); 351 352 result_spiller.generate_fill(_masm, res_save_area_offset); 353 354 __ leave(); 355 __ ret(0); 356 357 ////////////////////////////////////////////////////////////////////////////// 358 359 __ block_comment("{ exception handler"); 360 361 intptr_t exception_handler_offset = __ pc() - start; 362 363 // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly? 364 365 // native caller has no idea how to handle exceptions 366 // we just crash here. Up to callee to catch exceptions. 367 __ verify_oop(rax); 368 __ vzeroupper(); 369 __ mov(c_rarg0, rax); 370 __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI 371 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed) 372 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception))); 373 __ should_not_reach_here(); 374 375 __ block_comment("} exception handler"); 376 377 _masm->flush(); 378 379 380 #ifndef PRODUCT 381 stringStream ss; 382 ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string()); 383 const char* name = _masm->code_string(ss.as_string()); 384 #else // PRODUCT 385 const char* name = "optimized_upcall_stub"; 386 #endif // PRODUCT 387 388 OptimizedEntryBlob* blob 389 = OptimizedEntryBlob::create(name, 390 &buffer, 391 exception_handler_offset, 392 receiver, 393 in_ByteSize(frame_data_offset)); 394 395 if (TraceOptimizedUpcallStubs) { 396 blob->print_on(tty); 397 } 398 399 return blob->code_begin(); 400 } 401 PRAGMA_DIAG_POP