1 /* 2 * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "classfile/symbolTable.hpp" 32 #include "code/compiledIC.hpp" 33 #include "code/debugInfoRec.hpp" 34 #include "code/icBuffer.hpp" 35 #include "code/nativeInst.hpp" 36 #include "code/vtableStubs.hpp" 37 #include "compiler/oopMap.hpp" 38 #include "gc/shared/collectedHeap.hpp" 39 #include "gc/shared/gcLocker.hpp" 40 #include "gc/shared/barrierSet.hpp" 41 #include "gc/shared/barrierSetAssembler.hpp" 42 #include "interpreter/interpreter.hpp" 43 #include "logging/log.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "memory/universe.hpp" 46 #include "oops/compiledICHolder.hpp" 47 #include "oops/klass.inline.hpp" 48 #include "oops/method.inline.hpp" 49 #include "prims/methodHandles.hpp" 50 #include "runtime/continuation.hpp" 51 #include "runtime/continuationEntry.inline.hpp" 52 #include "runtime/globals.hpp" 53 #include "runtime/jniHandles.hpp" 54 #include "runtime/safepointMechanism.hpp" 55 #include "runtime/sharedRuntime.hpp" 56 #include "runtime/signature.hpp" 57 #include "runtime/stubRoutines.hpp" 58 #include "runtime/vframeArray.hpp" 59 #include "runtime/vm_version.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/checkedCast.hpp" 62 #include "utilities/formatBuffer.hpp" 63 #include "vmreg_x86.inline.hpp" 64 #ifdef COMPILER1 65 #include "c1/c1_Runtime1.hpp" 66 #endif 67 #ifdef COMPILER2 68 #include "opto/runtime.hpp" 69 #endif 70 #if INCLUDE_JVMCI 71 #include "jvmci/jvmciJavaClasses.hpp" 72 #endif 73 74 #define __ masm-> 75 76 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 77 78 class SimpleRuntimeFrame { 79 80 public: 81 82 // Most of the runtime stubs have this simple frame layout. 83 // This class exists to make the layout shared in one place. 84 // Offsets are for compiler stack slots, which are jints. 85 enum layout { 86 // The frame sender code expects that rbp will be in the "natural" place and 87 // will override any oopMap setting for it. We must therefore force the layout 88 // so that it agrees with the frame sender code. 89 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 90 rbp_off2, 91 return_off, return_off2, 92 framesize 93 }; 94 }; 95 96 class RegisterSaver { 97 // Capture info about frame layout. Layout offsets are in jint 98 // units because compiler frame slots are jints. 99 #define XSAVE_AREA_BEGIN 160 100 #define XSAVE_AREA_YMM_BEGIN 576 101 #define XSAVE_AREA_OPMASK_BEGIN 1088 102 #define XSAVE_AREA_ZMM_BEGIN 1152 103 #define XSAVE_AREA_UPPERBANK 1664 104 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 105 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 106 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 107 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 108 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 109 enum layout { 110 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 111 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 112 DEF_XMM_OFFS(0), 113 DEF_XMM_OFFS(1), 114 // 2..15 are implied in range usage 115 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 116 DEF_YMM_OFFS(0), 117 DEF_YMM_OFFS(1), 118 // 2..15 are implied in range usage 119 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 120 DEF_OPMASK_OFFS(0), 121 DEF_OPMASK_OFFS(1), 122 // 2..7 are implied in range usage 123 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 124 DEF_ZMM_OFFS(0), 125 DEF_ZMM_OFFS(1), 126 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 127 DEF_ZMM_UPPER_OFFS(16), 128 DEF_ZMM_UPPER_OFFS(17), 129 // 18..31 are implied in range usage 130 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 131 fpu_stateH_end, 132 r15_off, r15H_off, 133 r14_off, r14H_off, 134 r13_off, r13H_off, 135 r12_off, r12H_off, 136 r11_off, r11H_off, 137 r10_off, r10H_off, 138 r9_off, r9H_off, 139 r8_off, r8H_off, 140 rdi_off, rdiH_off, 141 rsi_off, rsiH_off, 142 ignore_off, ignoreH_off, // extra copy of rbp 143 rsp_off, rspH_off, 144 rbx_off, rbxH_off, 145 rdx_off, rdxH_off, 146 rcx_off, rcxH_off, 147 rax_off, raxH_off, 148 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 149 align_off, alignH_off, 150 flags_off, flagsH_off, 151 // The frame sender code expects that rbp will be in the "natural" place and 152 // will override any oopMap setting for it. We must therefore force the layout 153 // so that it agrees with the frame sender code. 154 rbp_off, rbpH_off, // copy of rbp we will restore 155 return_off, returnH_off, // slot for return address 156 reg_save_size // size in compiler stack slots 157 }; 158 159 public: 160 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); 161 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); 162 163 // Offsets into the register save area 164 // Used by deoptimization when it is managing result register 165 // values on its own 166 167 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 168 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 169 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 170 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 171 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 172 173 // During deoptimization only the result registers need to be restored, 174 // all the other values have already been extracted. 175 static void restore_result_registers(MacroAssembler* masm); 176 }; 177 178 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { 179 int off = 0; 180 int num_xmm_regs = XMMRegister::available_xmm_registers(); 181 #if COMPILER2_OR_JVMCI 182 if (save_wide_vectors && UseAVX == 0) { 183 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX 184 } 185 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 186 #else 187 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI 188 #endif 189 190 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 191 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 192 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 193 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 194 // CodeBlob frame size is in words. 195 int frame_size_in_words = frame_size_in_bytes / wordSize; 196 *total_frame_words = frame_size_in_words; 197 198 // Save registers, fpu state, and flags. 199 // We assume caller has already pushed the return address onto the 200 // stack, so rsp is 8-byte aligned here. 201 // We push rpb twice in this sequence because we want the real rbp 202 // to be under the return like a normal enter. 203 204 __ enter(); // rsp becomes 16-byte aligned here 205 __ push_CPU_state(); // Push a multiple of 16 bytes 206 207 // push cpu state handles this on EVEX enabled targets 208 if (save_wide_vectors) { 209 // Save upper half of YMM registers(0..15) 210 int base_addr = XSAVE_AREA_YMM_BEGIN; 211 for (int n = 0; n < 16; n++) { 212 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 213 } 214 if (VM_Version::supports_evex()) { 215 // Save upper half of ZMM registers(0..15) 216 base_addr = XSAVE_AREA_ZMM_BEGIN; 217 for (int n = 0; n < 16; n++) { 218 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 219 } 220 // Save full ZMM registers(16..num_xmm_regs) 221 base_addr = XSAVE_AREA_UPPERBANK; 222 off = 0; 223 int vector_len = Assembler::AVX_512bit; 224 for (int n = 16; n < num_xmm_regs; n++) { 225 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 226 } 227 #if COMPILER2_OR_JVMCI 228 base_addr = XSAVE_AREA_OPMASK_BEGIN; 229 off = 0; 230 for(int n = 0; n < KRegister::number_of_registers; n++) { 231 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 232 } 233 #endif 234 } 235 } else { 236 if (VM_Version::supports_evex()) { 237 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 238 int base_addr = XSAVE_AREA_UPPERBANK; 239 off = 0; 240 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 241 for (int n = 16; n < num_xmm_regs; n++) { 242 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 243 } 244 #if COMPILER2_OR_JVMCI 245 base_addr = XSAVE_AREA_OPMASK_BEGIN; 246 off = 0; 247 for(int n = 0; n < KRegister::number_of_registers; n++) { 248 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 249 } 250 #endif 251 } 252 } 253 __ vzeroupper(); 254 if (frame::arg_reg_save_area_bytes != 0) { 255 // Allocate argument register save area 256 __ subptr(rsp, frame::arg_reg_save_area_bytes); 257 } 258 259 // Set an oopmap for the call site. This oopmap will map all 260 // oop-registers and debug-info registers as callee-saved. This 261 // will allow deoptimization at this safepoint to find all possible 262 // debug-info recordings, as well as let GC find all oops. 263 264 OopMapSet *oop_maps = new OopMapSet(); 265 OopMap* map = new OopMap(frame_size_in_slots, 0); 266 267 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 268 269 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 270 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 271 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 272 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 273 // rbp location is known implicitly by the frame sender code, needs no oopmap 274 // and the location where rbp was saved by is ignored 275 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 276 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 277 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 278 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 279 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 280 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 281 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 282 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 283 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 284 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 285 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 286 // on EVEX enabled targets, we get it included in the xsave area 287 off = xmm0_off; 288 int delta = xmm1_off - off; 289 for (int n = 0; n < 16; n++) { 290 XMMRegister xmm_name = as_XMMRegister(n); 291 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 292 off += delta; 293 } 294 if (UseAVX > 2) { 295 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 296 off = zmm16_off; 297 delta = zmm17_off - off; 298 for (int n = 16; n < num_xmm_regs; n++) { 299 XMMRegister zmm_name = as_XMMRegister(n); 300 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 301 off += delta; 302 } 303 } 304 305 #if COMPILER2_OR_JVMCI 306 if (save_wide_vectors) { 307 // Save upper half of YMM registers(0..15) 308 off = ymm0_off; 309 delta = ymm1_off - ymm0_off; 310 for (int n = 0; n < 16; n++) { 311 XMMRegister ymm_name = as_XMMRegister(n); 312 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 313 off += delta; 314 } 315 if (VM_Version::supports_evex()) { 316 // Save upper half of ZMM registers(0..15) 317 off = zmm0_off; 318 delta = zmm1_off - zmm0_off; 319 for (int n = 0; n < 16; n++) { 320 XMMRegister zmm_name = as_XMMRegister(n); 321 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 322 off += delta; 323 } 324 } 325 } 326 #endif // COMPILER2_OR_JVMCI 327 328 // %%% These should all be a waste but we'll keep things as they were for now 329 if (true) { 330 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 331 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 332 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 333 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 334 // rbp location is known implicitly by the frame sender code, needs no oopmap 335 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 336 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 337 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 338 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 339 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 340 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 341 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 342 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 343 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 344 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 345 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 346 // on EVEX enabled targets, we get it included in the xsave area 347 off = xmm0H_off; 348 delta = xmm1H_off - off; 349 for (int n = 0; n < 16; n++) { 350 XMMRegister xmm_name = as_XMMRegister(n); 351 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 352 off += delta; 353 } 354 if (UseAVX > 2) { 355 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 356 off = zmm16H_off; 357 delta = zmm17H_off - off; 358 for (int n = 16; n < num_xmm_regs; n++) { 359 XMMRegister zmm_name = as_XMMRegister(n); 360 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 361 off += delta; 362 } 363 } 364 } 365 366 return map; 367 } 368 369 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { 370 int num_xmm_regs = XMMRegister::available_xmm_registers(); 371 if (frame::arg_reg_save_area_bytes != 0) { 372 // Pop arg register save area 373 __ addptr(rsp, frame::arg_reg_save_area_bytes); 374 } 375 376 #if COMPILER2_OR_JVMCI 377 if (restore_wide_vectors) { 378 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 379 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 380 } 381 #else 382 assert(!restore_wide_vectors, "vectors are generated only by C2"); 383 #endif 384 385 __ vzeroupper(); 386 387 // On EVEX enabled targets everything is handled in pop fpu state 388 if (restore_wide_vectors) { 389 // Restore upper half of YMM registers (0..15) 390 int base_addr = XSAVE_AREA_YMM_BEGIN; 391 for (int n = 0; n < 16; n++) { 392 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 393 } 394 if (VM_Version::supports_evex()) { 395 // Restore upper half of ZMM registers (0..15) 396 base_addr = XSAVE_AREA_ZMM_BEGIN; 397 for (int n = 0; n < 16; n++) { 398 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 399 } 400 // Restore full ZMM registers(16..num_xmm_regs) 401 base_addr = XSAVE_AREA_UPPERBANK; 402 int vector_len = Assembler::AVX_512bit; 403 int off = 0; 404 for (int n = 16; n < num_xmm_regs; n++) { 405 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 406 } 407 #if COMPILER2_OR_JVMCI 408 base_addr = XSAVE_AREA_OPMASK_BEGIN; 409 off = 0; 410 for (int n = 0; n < KRegister::number_of_registers; n++) { 411 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 412 } 413 #endif 414 } 415 } else { 416 if (VM_Version::supports_evex()) { 417 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 418 int base_addr = XSAVE_AREA_UPPERBANK; 419 int off = 0; 420 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 421 for (int n = 16; n < num_xmm_regs; n++) { 422 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 423 } 424 #if COMPILER2_OR_JVMCI 425 base_addr = XSAVE_AREA_OPMASK_BEGIN; 426 off = 0; 427 for (int n = 0; n < KRegister::number_of_registers; n++) { 428 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 429 } 430 #endif 431 } 432 } 433 434 // Recover CPU state 435 __ pop_CPU_state(); 436 // Get the rbp described implicitly by the calling convention (no oopMap) 437 __ pop(rbp); 438 } 439 440 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 441 442 // Just restore result register. Only used by deoptimization. By 443 // now any callee save register that needs to be restored to a c2 444 // caller of the deoptee has been extracted into the vframeArray 445 // and will be stuffed into the c2i adapter we create for later 446 // restoration so only result registers need to be restored here. 447 448 // Restore fp result register 449 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 450 // Restore integer result register 451 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 452 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 453 454 // Pop all of the register save are off the stack except the return address 455 __ addptr(rsp, return_offset_in_bytes()); 456 } 457 458 // Is vector's size (in bytes) bigger than a size saved by default? 459 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 460 bool SharedRuntime::is_wide_vector(int size) { 461 return size > 16; 462 } 463 464 // --------------------------------------------------------------------------- 465 // Read the array of BasicTypes from a signature, and compute where the 466 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 467 // quantities. Values less than VMRegImpl::stack0 are registers, those above 468 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 469 // as framesizes are fixed. 470 // VMRegImpl::stack0 refers to the first slot 0(sp). 471 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. 472 // Register up to Register::number_of_registers are the 64-bit 473 // integer registers. 474 475 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 476 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 477 // units regardless of build. Of course for i486 there is no 64 bit build 478 479 // The Java calling convention is a "shifted" version of the C ABI. 480 // By skipping the first C ABI register we can call non-static jni methods 481 // with small numbers of arguments without having to shuffle the arguments 482 // at all. Since we control the java ABI we ought to at least get some 483 // advantage out of it. 484 485 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 486 VMRegPair *regs, 487 int total_args_passed) { 488 489 // Create the mapping between argument positions and 490 // registers. 491 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 492 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 493 }; 494 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 495 j_farg0, j_farg1, j_farg2, j_farg3, 496 j_farg4, j_farg5, j_farg6, j_farg7 497 }; 498 499 500 uint int_args = 0; 501 uint fp_args = 0; 502 uint stk_args = 0; // inc by 2 each time 503 504 for (int i = 0; i < total_args_passed; i++) { 505 switch (sig_bt[i]) { 506 case T_BOOLEAN: 507 case T_CHAR: 508 case T_BYTE: 509 case T_SHORT: 510 case T_INT: 511 if (int_args < Argument::n_int_register_parameters_j) { 512 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 513 } else { 514 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 515 stk_args += 2; 516 } 517 break; 518 case T_VOID: 519 // halves of T_LONG or T_DOUBLE 520 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 521 regs[i].set_bad(); 522 break; 523 case T_LONG: 524 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 525 // fall through 526 case T_OBJECT: 527 case T_ARRAY: 528 case T_ADDRESS: 529 if (int_args < Argument::n_int_register_parameters_j) { 530 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 531 } else { 532 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 533 stk_args += 2; 534 } 535 break; 536 case T_FLOAT: 537 if (fp_args < Argument::n_float_register_parameters_j) { 538 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 539 } else { 540 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 541 stk_args += 2; 542 } 543 break; 544 case T_DOUBLE: 545 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 546 if (fp_args < Argument::n_float_register_parameters_j) { 547 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 548 } else { 549 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 550 stk_args += 2; 551 } 552 break; 553 default: 554 ShouldNotReachHere(); 555 break; 556 } 557 } 558 559 return align_up(stk_args, 2); 560 } 561 562 // Same as java_calling_convention() but for multiple return 563 // values. There's no way to store them on the stack so if we don't 564 // have enough registers, multiple values can't be returned. 565 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 566 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 567 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 568 VMRegPair *regs, 569 int total_args_passed) { 570 // Create the mapping between argument positions and 571 // registers. 572 static const Register INT_ArgReg[java_return_convention_max_int] = { 573 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 574 }; 575 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 576 j_farg0, j_farg1, j_farg2, j_farg3, 577 j_farg4, j_farg5, j_farg6, j_farg7 578 }; 579 580 581 uint int_args = 0; 582 uint fp_args = 0; 583 584 for (int i = 0; i < total_args_passed; i++) { 585 switch (sig_bt[i]) { 586 case T_BOOLEAN: 587 case T_CHAR: 588 case T_BYTE: 589 case T_SHORT: 590 case T_INT: 591 if (int_args < Argument::n_int_register_parameters_j+1) { 592 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 593 int_args++; 594 } else { 595 return -1; 596 } 597 break; 598 case T_VOID: 599 // halves of T_LONG or T_DOUBLE 600 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 601 regs[i].set_bad(); 602 break; 603 case T_LONG: 604 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 605 // fall through 606 case T_OBJECT: 607 case T_ARRAY: 608 case T_ADDRESS: 609 case T_METADATA: 610 if (int_args < Argument::n_int_register_parameters_j+1) { 611 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 612 int_args++; 613 } else { 614 return -1; 615 } 616 break; 617 case T_FLOAT: 618 if (fp_args < Argument::n_float_register_parameters_j) { 619 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 620 fp_args++; 621 } else { 622 return -1; 623 } 624 break; 625 case T_DOUBLE: 626 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 627 if (fp_args < Argument::n_float_register_parameters_j) { 628 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 629 fp_args++; 630 } else { 631 return -1; 632 } 633 break; 634 default: 635 ShouldNotReachHere(); 636 break; 637 } 638 } 639 640 return int_args + fp_args; 641 } 642 643 // Patch the callers callsite with entry to compiled code if it exists. 644 static void patch_callers_callsite(MacroAssembler *masm) { 645 Label L; 646 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 647 __ jcc(Assembler::equal, L); 648 649 // Save the current stack pointer 650 __ mov(r13, rsp); 651 // Schedule the branch target address early. 652 // Call into the VM to patch the caller, then jump to compiled callee 653 // rax isn't live so capture return address while we easily can 654 __ movptr(rax, Address(rsp, 0)); 655 656 // align stack so push_CPU_state doesn't fault 657 __ andptr(rsp, -(StackAlignmentInBytes)); 658 __ push_CPU_state(); 659 __ vzeroupper(); 660 // VM needs caller's callsite 661 // VM needs target method 662 // This needs to be a long call since we will relocate this adapter to 663 // the codeBuffer and it may not reach 664 665 // Allocate argument register save area 666 if (frame::arg_reg_save_area_bytes != 0) { 667 __ subptr(rsp, frame::arg_reg_save_area_bytes); 668 } 669 __ mov(c_rarg0, rbx); 670 __ mov(c_rarg1, rax); 671 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 672 673 // De-allocate argument register save area 674 if (frame::arg_reg_save_area_bytes != 0) { 675 __ addptr(rsp, frame::arg_reg_save_area_bytes); 676 } 677 678 __ vzeroupper(); 679 __ pop_CPU_state(); 680 // restore sp 681 __ mov(rsp, r13); 682 __ bind(L); 683 } 684 685 // For each inline type argument, sig includes the list of fields of 686 // the inline type. This utility function computes the number of 687 // arguments for the call if inline types are passed by reference (the 688 // calling convention the interpreter expects). 689 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 690 int total_args_passed = 0; 691 if (InlineTypePassFieldsAsArgs) { 692 for (int i = 0; i < sig_extended->length(); i++) { 693 BasicType bt = sig_extended->at(i)._bt; 694 if (bt == T_METADATA) { 695 // In sig_extended, an inline type argument starts with: 696 // T_METADATA, followed by the types of the fields of the 697 // inline type and T_VOID to mark the end of the value 698 // type. Inline types are flattened so, for instance, in the 699 // case of an inline type with an int field and an inline type 700 // field that itself has 2 fields, an int and a long: 701 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second 702 // slot for the T_LONG) T_VOID (inner inline type) T_VOID 703 // (outer inline type) 704 total_args_passed++; 705 int vt = 1; 706 do { 707 i++; 708 BasicType bt = sig_extended->at(i)._bt; 709 BasicType prev_bt = sig_extended->at(i-1)._bt; 710 if (bt == T_METADATA) { 711 vt++; 712 } else if (bt == T_VOID && 713 prev_bt != T_LONG && 714 prev_bt != T_DOUBLE) { 715 vt--; 716 } 717 } while (vt != 0); 718 } else { 719 total_args_passed++; 720 } 721 } 722 } else { 723 total_args_passed = sig_extended->length(); 724 } 725 return total_args_passed; 726 } 727 728 729 static void gen_c2i_adapter_helper(MacroAssembler* masm, 730 BasicType bt, 731 BasicType prev_bt, 732 size_t size_in_bytes, 733 const VMRegPair& reg_pair, 734 const Address& to, 735 int extraspace, 736 bool is_oop) { 737 if (bt == T_VOID) { 738 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 739 return; 740 } 741 742 // Say 4 args: 743 // i st_off 744 // 0 32 T_LONG 745 // 1 24 T_VOID 746 // 2 16 T_OBJECT 747 // 3 8 T_BOOL 748 // - 0 return address 749 // 750 // However to make thing extra confusing. Because we can fit a long/double in 751 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 752 // leaves one slot empty and only stores to a single slot. In this case the 753 // slot that is occupied is the T_VOID slot. See I said it was confusing. 754 755 bool wide = (size_in_bytes == wordSize); 756 VMReg r_1 = reg_pair.first(); 757 VMReg r_2 = reg_pair.second(); 758 assert(r_2->is_valid() == wide, "invalid size"); 759 if (!r_1->is_valid()) { 760 assert(!r_2->is_valid(), "must be invalid"); 761 return; 762 } 763 764 if (!r_1->is_XMMRegister()) { 765 Register val = rax; 766 if (r_1->is_stack()) { 767 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 768 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 769 } else { 770 val = r_1->as_Register(); 771 } 772 assert_different_registers(to.base(), val, rscratch1); 773 if (is_oop) { 774 __ push(r13); 775 __ push(rbx); 776 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 777 __ pop(rbx); 778 __ pop(r13); 779 } else { 780 __ store_sized_value(to, val, size_in_bytes); 781 } 782 } else { 783 if (wide) { 784 __ movdbl(to, r_1->as_XMMRegister()); 785 } else { 786 __ movflt(to, r_1->as_XMMRegister()); 787 } 788 } 789 } 790 791 static void gen_c2i_adapter(MacroAssembler *masm, 792 const GrowableArray<SigEntry>* sig_extended, 793 const VMRegPair *regs, 794 bool requires_clinit_barrier, 795 address& c2i_no_clinit_check_entry, 796 Label& skip_fixup, 797 address start, 798 OopMapSet* oop_maps, 799 int& frame_complete, 800 int& frame_size_in_words, 801 bool alloc_inline_receiver) { 802 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) { 803 Label L_skip_barrier; 804 Register method = rbx; 805 806 { // Bypass the barrier for non-static methods 807 Register flags = rscratch1; 808 __ movl(flags, Address(method, Method::access_flags_offset())); 809 __ testl(flags, JVM_ACC_STATIC); 810 __ jcc(Assembler::zero, L_skip_barrier); // non-static 811 } 812 813 Register klass = rscratch1; 814 __ load_method_holder(klass, method); 815 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 816 817 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 818 819 __ bind(L_skip_barrier); 820 c2i_no_clinit_check_entry = __ pc(); 821 } 822 823 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 824 bs->c2i_entry_barrier(masm); 825 826 // Before we get into the guts of the C2I adapter, see if we should be here 827 // at all. We've come from compiled code and are attempting to jump to the 828 // interpreter, which means the caller made a static call to get here 829 // (vcalls always get a compiled target if there is one). Check for a 830 // compiled target. If there is one, we need to patch the caller's call. 831 patch_callers_callsite(masm); 832 833 __ bind(skip_fixup); 834 835 if (InlineTypePassFieldsAsArgs) { 836 // Is there an inline type argument? 837 bool has_inline_argument = false; 838 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 839 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); 840 } 841 if (has_inline_argument) { 842 // There is at least an inline type argument: we're coming from 843 // compiled code so we have no buffers to back the inline types. 844 // Allocate the buffers here with a runtime call. 845 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 846 847 frame_complete = __ offset(); 848 849 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 850 851 __ mov(c_rarg0, r15_thread); 852 __ mov(c_rarg1, rbx); 853 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 854 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 855 856 oop_maps->add_gc_map((int)(__ pc() - start), map); 857 __ reset_last_Java_frame(false); 858 859 RegisterSaver::restore_live_registers(masm); 860 861 Label no_exception; 862 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 863 __ jcc(Assembler::equal, no_exception); 864 865 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 866 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 867 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 868 869 __ bind(no_exception); 870 871 // We get an array of objects from the runtime call 872 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 873 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? 874 } 875 } 876 877 // Since all args are passed on the stack, total_args_passed * 878 // Interpreter::stackElementSize is the space we need. 879 int total_args_passed = compute_total_args_passed_int(sig_extended); 880 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); 881 882 int extraspace = (total_args_passed * Interpreter::stackElementSize); 883 884 // stack is aligned, keep it that way 885 // This is not currently needed or enforced by the interpreter, but 886 // we might as well conform to the ABI. 887 extraspace = align_up(extraspace, 2*wordSize); 888 889 // set senderSP value 890 __ lea(r13, Address(rsp, wordSize)); 891 892 #ifdef ASSERT 893 __ check_stack_alignment(r13, "sender stack not aligned"); 894 #endif 895 if (extraspace > 0) { 896 // Pop the return address 897 __ pop(rax); 898 899 __ subptr(rsp, extraspace); 900 901 // Push the return address 902 __ push(rax); 903 904 // Account for the return address location since we store it first rather 905 // than hold it in a register across all the shuffling 906 extraspace += wordSize; 907 } 908 909 #ifdef ASSERT 910 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax); 911 #endif 912 913 // Now write the args into the outgoing interpreter space 914 915 // next_arg_comp is the next argument from the compiler point of 916 // view (inline type fields are passed in registers/on the stack). In 917 // sig_extended, an inline type argument starts with: T_METADATA, 918 // followed by the types of the fields of the inline type and T_VOID 919 // to mark the end of the inline type. ignored counts the number of 920 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: 921 // used to get the buffer for that argument from the pool of buffers 922 // we allocated above and want to pass to the 923 // interpreter. next_arg_int is the next argument from the 924 // interpreter point of view (inline types are passed by reference). 925 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 926 next_arg_comp < sig_extended->length(); next_arg_comp++) { 927 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 928 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 929 BasicType bt = sig_extended->at(next_arg_comp)._bt; 930 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 931 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { 932 int next_off = st_off - Interpreter::stackElementSize; 933 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 934 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 935 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 936 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 937 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 938 next_arg_int++; 939 #ifdef ASSERT 940 if (bt == T_LONG || bt == T_DOUBLE) { 941 // Overwrite the unused slot with known junk 942 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 943 __ movptr(Address(rsp, st_off), rax); 944 } 945 #endif /* ASSERT */ 946 } else { 947 ignored++; 948 // get the buffer from the just allocated pool of buffers 949 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); 950 __ load_heap_oop(r14, Address(rscratch2, index)); 951 next_vt_arg++; next_arg_int++; 952 int vt = 1; 953 // write fields we get from compiled code in registers/stack 954 // slots to the buffer: we know we are done with that inline type 955 // argument when we hit the T_VOID that acts as an end of inline 956 // type delimiter for this inline type. Inline types are flattened 957 // so we might encounter embedded inline types. Each entry in 958 // sig_extended contains a field offset in the buffer. 959 Label L_null; 960 do { 961 next_arg_comp++; 962 BasicType bt = sig_extended->at(next_arg_comp)._bt; 963 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 964 if (bt == T_METADATA) { 965 vt++; 966 ignored++; 967 } else if (bt == T_VOID && 968 prev_bt != T_LONG && 969 prev_bt != T_DOUBLE) { 970 vt--; 971 ignored++; 972 } else { 973 int off = sig_extended->at(next_arg_comp)._offset; 974 if (off == -1) { 975 // Nullable inline type argument, emit null check 976 VMReg reg = regs[next_arg_comp-ignored].first(); 977 Label L_notNull; 978 if (reg->is_stack()) { 979 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 980 __ testb(Address(rsp, ld_off), 1); 981 } else { 982 __ testb(reg->as_Register(), 1); 983 } 984 __ jcc(Assembler::notZero, L_notNull); 985 __ movptr(Address(rsp, st_off), 0); 986 __ jmp(L_null); 987 __ bind(L_notNull); 988 continue; 989 } 990 assert(off > 0, "offset in object should be positive"); 991 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 992 bool is_oop = is_reference_type(bt); 993 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 994 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 995 } 996 } while (vt != 0); 997 // pass the buffer to the interpreter 998 __ movptr(Address(rsp, st_off), r14); 999 __ bind(L_null); 1000 } 1001 } 1002 1003 // Schedule the branch target address early. 1004 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1005 __ jmp(rcx); 1006 } 1007 1008 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 1009 address code_start, address code_end, 1010 Label& L_ok) { 1011 Label L_fail; 1012 __ lea(temp_reg, ExternalAddress(code_start)); 1013 __ cmpptr(pc_reg, temp_reg); 1014 __ jcc(Assembler::belowEqual, L_fail); 1015 __ lea(temp_reg, ExternalAddress(code_end)); 1016 __ cmpptr(pc_reg, temp_reg); 1017 __ jcc(Assembler::below, L_ok); 1018 __ bind(L_fail); 1019 } 1020 1021 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1022 int comp_args_on_stack, 1023 const GrowableArray<SigEntry>* sig, 1024 const VMRegPair *regs) { 1025 1026 // Note: r13 contains the senderSP on entry. We must preserve it since 1027 // we may do a i2c -> c2i transition if we lose a race where compiled 1028 // code goes non-entrant while we get args ready. 1029 // In addition we use r13 to locate all the interpreter args as 1030 // we must align the stack to 16 bytes on an i2c entry else we 1031 // lose alignment we expect in all compiled code and register 1032 // save code can segv when fxsave instructions find improperly 1033 // aligned stack pointer. 1034 1035 // Adapters can be frameless because they do not require the caller 1036 // to perform additional cleanup work, such as correcting the stack pointer. 1037 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1038 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1039 // even if a callee has modified the stack pointer. 1040 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1041 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1042 // up via the senderSP register). 1043 // In other words, if *either* the caller or callee is interpreted, we can 1044 // get the stack pointer repaired after a call. 1045 // This is why c2i and i2c adapters cannot be indefinitely composed. 1046 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1047 // both caller and callee would be compiled methods, and neither would 1048 // clean up the stack pointer changes performed by the two adapters. 1049 // If this happens, control eventually transfers back to the compiled 1050 // caller, but with an uncorrected stack, causing delayed havoc. 1051 1052 if (VerifyAdapterCalls && 1053 (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { 1054 // So, let's test for cascading c2i/i2c adapters right now. 1055 // assert(Interpreter::contains($return_addr) || 1056 // StubRoutines::contains($return_addr), 1057 // "i2c adapter must return to an interpreter frame"); 1058 __ block_comment("verify_i2c { "); 1059 // Pick up the return address 1060 __ movptr(rax, Address(rsp, 0)); 1061 Label L_ok; 1062 if (Interpreter::code() != nullptr) { 1063 range_check(masm, rax, r11, 1064 Interpreter::code()->code_start(), 1065 Interpreter::code()->code_end(), 1066 L_ok); 1067 } 1068 if (StubRoutines::initial_stubs_code() != nullptr) { 1069 range_check(masm, rax, r11, 1070 StubRoutines::initial_stubs_code()->code_begin(), 1071 StubRoutines::initial_stubs_code()->code_end(), 1072 L_ok); 1073 } 1074 if (StubRoutines::final_stubs_code() != nullptr) { 1075 range_check(masm, rax, r11, 1076 StubRoutines::final_stubs_code()->code_begin(), 1077 StubRoutines::final_stubs_code()->code_end(), 1078 L_ok); 1079 } 1080 const char* msg = "i2c adapter must return to an interpreter frame"; 1081 __ block_comment(msg); 1082 __ stop(msg); 1083 __ bind(L_ok); 1084 __ block_comment("} verify_i2ce "); 1085 } 1086 1087 // Must preserve original SP for loading incoming arguments because 1088 // we need to align the outgoing SP for compiled code. 1089 __ movptr(r11, rsp); 1090 1091 // Pick up the return address 1092 __ pop(rax); 1093 1094 // Convert 4-byte c2 stack slots to words. 1095 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1096 1097 if (comp_args_on_stack) { 1098 __ subptr(rsp, comp_words_on_stack * wordSize); 1099 } 1100 1101 // Ensure compiled code always sees stack at proper alignment 1102 __ andptr(rsp, -16); 1103 1104 // push the return address and misalign the stack that youngest frame always sees 1105 // as far as the placement of the call instruction 1106 __ push(rax); 1107 1108 // Put saved SP in another register 1109 const Register saved_sp = rax; 1110 __ movptr(saved_sp, r11); 1111 1112 // Will jump to the compiled code just as if compiled code was doing it. 1113 // Pre-load the register-jump target early, to schedule it better. 1114 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1115 1116 #if INCLUDE_JVMCI 1117 if (EnableJVMCI) { 1118 // check if this call should be routed towards a specific entry point 1119 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1120 Label no_alternative_target; 1121 __ jcc(Assembler::equal, no_alternative_target); 1122 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1123 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1124 __ bind(no_alternative_target); 1125 } 1126 #endif // INCLUDE_JVMCI 1127 1128 int total_args_passed = sig->length(); 1129 1130 // Now generate the shuffle code. Pick up all register args and move the 1131 // rest through the floating point stack top. 1132 for (int i = 0; i < total_args_passed; i++) { 1133 BasicType bt = sig->at(i)._bt; 1134 if (bt == T_VOID) { 1135 // Longs and doubles are passed in native word order, but misaligned 1136 // in the 32-bit build. 1137 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1138 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1139 continue; 1140 } 1141 1142 // Pick up 0, 1 or 2 words from SP+offset. 1143 1144 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1145 "scrambled load targets?"); 1146 // Load in argument order going down. 1147 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1148 // Point to interpreter value (vs. tag) 1149 int next_off = ld_off - Interpreter::stackElementSize; 1150 // 1151 // 1152 // 1153 VMReg r_1 = regs[i].first(); 1154 VMReg r_2 = regs[i].second(); 1155 if (!r_1->is_valid()) { 1156 assert(!r_2->is_valid(), ""); 1157 continue; 1158 } 1159 if (r_1->is_stack()) { 1160 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1161 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1162 1163 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1164 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1165 // will be generated. 1166 if (!r_2->is_valid()) { 1167 // sign extend??? 1168 __ movl(r13, Address(saved_sp, ld_off)); 1169 __ movptr(Address(rsp, st_off), r13); 1170 } else { 1171 // 1172 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1173 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1174 // So we must adjust where to pick up the data to match the interpreter. 1175 // 1176 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1177 // are accessed as negative so LSW is at LOW address 1178 1179 // ld_off is MSW so get LSW 1180 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1181 next_off : ld_off; 1182 __ movq(r13, Address(saved_sp, offset)); 1183 // st_off is LSW (i.e. reg.first()) 1184 __ movq(Address(rsp, st_off), r13); 1185 } 1186 } else if (r_1->is_Register()) { // Register argument 1187 Register r = r_1->as_Register(); 1188 assert(r != rax, "must be different"); 1189 if (r_2->is_valid()) { 1190 // 1191 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1192 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1193 // So we must adjust where to pick up the data to match the interpreter. 1194 1195 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1196 next_off : ld_off; 1197 1198 // this can be a misaligned move 1199 __ movq(r, Address(saved_sp, offset)); 1200 } else { 1201 // sign extend and use a full word? 1202 __ movl(r, Address(saved_sp, ld_off)); 1203 } 1204 } else { 1205 if (!r_2->is_valid()) { 1206 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1207 } else { 1208 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1209 } 1210 } 1211 } 1212 1213 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1214 1215 // 6243940 We might end up in handle_wrong_method if 1216 // the callee is deoptimized as we race thru here. If that 1217 // happens we don't want to take a safepoint because the 1218 // caller frame will look interpreted and arguments are now 1219 // "compiled" so it is much better to make this transition 1220 // invisible to the stack walking code. Unfortunately if 1221 // we try and find the callee by normal means a safepoint 1222 // is possible. So we stash the desired callee in the thread 1223 // and the vm will find there should this case occur. 1224 1225 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1226 1227 // put Method* where a c2i would expect should we end up there 1228 // only needed because of c2 resolve stubs return Method* as a result in 1229 // rax 1230 __ mov(rax, rbx); 1231 __ jmp(r11); 1232 } 1233 1234 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1235 Label ok; 1236 1237 Register holder = rax; 1238 Register receiver = j_rarg0; 1239 Register temp = rbx; 1240 1241 __ load_klass(temp, receiver, rscratch1); 1242 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 1243 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset())); 1244 __ jcc(Assembler::equal, ok); 1245 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1246 1247 __ bind(ok); 1248 // Method might have been compiled since the call site was patched to 1249 // interpreted if that is the case treat it as a miss so we can get 1250 // the call site corrected. 1251 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 1252 __ jcc(Assembler::equal, skip_fixup); 1253 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1254 } 1255 1256 // --------------------------------------------------------------- 1257 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1258 int comp_args_on_stack, 1259 const GrowableArray<SigEntry>* sig, 1260 const VMRegPair* regs, 1261 const GrowableArray<SigEntry>* sig_cc, 1262 const VMRegPair* regs_cc, 1263 const GrowableArray<SigEntry>* sig_cc_ro, 1264 const VMRegPair* regs_cc_ro, 1265 AdapterFingerPrint* fingerprint, 1266 AdapterBlob*& new_adapter, 1267 bool allocate_code_blob) { 1268 address i2c_entry = __ pc(); 1269 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1270 1271 // ------------------------------------------------------------------------- 1272 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1273 // to the interpreter. The args start out packed in the compiled layout. They 1274 // need to be unpacked into the interpreter layout. This will almost always 1275 // require some stack space. We grow the current (compiled) stack, then repack 1276 // the args. We finally end in a jump to the generic interpreter entry point. 1277 // On exit from the interpreter, the interpreter will restore our SP (lest the 1278 // compiled code, which relies solely on SP and not RBP, get sick). 1279 1280 address c2i_unverified_entry = __ pc(); 1281 address c2i_unverified_inline_entry = __ pc(); 1282 Label skip_fixup; 1283 1284 gen_inline_cache_check(masm, skip_fixup); 1285 1286 OopMapSet* oop_maps = new OopMapSet(); 1287 int frame_complete = CodeOffsets::frame_never_safe; 1288 int frame_size_in_words = 0; 1289 1290 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1291 address c2i_no_clinit_check_entry = nullptr; 1292 address c2i_inline_ro_entry = __ pc(); 1293 if (regs_cc != regs_cc_ro) { 1294 // No class init barrier needed because method is guaranteed to be non-static 1295 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry, 1296 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1297 skip_fixup.reset(); 1298 } 1299 1300 // Scalarized c2i adapter 1301 address c2i_entry = __ pc(); 1302 address c2i_inline_entry = __ pc(); 1303 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1304 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); 1305 1306 // Non-scalarized c2i adapter 1307 if (regs != regs_cc) { 1308 c2i_unverified_inline_entry = __ pc(); 1309 Label inline_entry_skip_fixup; 1310 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1311 1312 c2i_inline_entry = __ pc(); 1313 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1314 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1315 } 1316 1317 1318 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1319 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1320 if (allocate_code_blob) { 1321 bool caller_must_gc_arguments = (regs != regs_cc); 1322 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1323 } 1324 1325 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1326 } 1327 1328 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1329 VMRegPair *regs, 1330 int total_args_passed) { 1331 1332 // We return the amount of VMRegImpl stack slots we need to reserve for all 1333 // the arguments NOT counting out_preserve_stack_slots. 1334 1335 // NOTE: These arrays will have to change when c1 is ported 1336 #ifdef _WIN64 1337 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1338 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1339 }; 1340 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1341 c_farg0, c_farg1, c_farg2, c_farg3 1342 }; 1343 #else 1344 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1345 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1346 }; 1347 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1348 c_farg0, c_farg1, c_farg2, c_farg3, 1349 c_farg4, c_farg5, c_farg6, c_farg7 1350 }; 1351 #endif // _WIN64 1352 1353 1354 uint int_args = 0; 1355 uint fp_args = 0; 1356 uint stk_args = 0; // inc by 2 each time 1357 1358 for (int i = 0; i < total_args_passed; i++) { 1359 switch (sig_bt[i]) { 1360 case T_BOOLEAN: 1361 case T_CHAR: 1362 case T_BYTE: 1363 case T_SHORT: 1364 case T_INT: 1365 if (int_args < Argument::n_int_register_parameters_c) { 1366 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1367 #ifdef _WIN64 1368 fp_args++; 1369 // Allocate slots for callee to stuff register args the stack. 1370 stk_args += 2; 1371 #endif 1372 } else { 1373 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1374 stk_args += 2; 1375 } 1376 break; 1377 case T_LONG: 1378 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1379 // fall through 1380 case T_OBJECT: 1381 case T_ARRAY: 1382 case T_ADDRESS: 1383 case T_METADATA: 1384 if (int_args < Argument::n_int_register_parameters_c) { 1385 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1386 #ifdef _WIN64 1387 fp_args++; 1388 stk_args += 2; 1389 #endif 1390 } else { 1391 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1392 stk_args += 2; 1393 } 1394 break; 1395 case T_FLOAT: 1396 if (fp_args < Argument::n_float_register_parameters_c) { 1397 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1398 #ifdef _WIN64 1399 int_args++; 1400 // Allocate slots for callee to stuff register args the stack. 1401 stk_args += 2; 1402 #endif 1403 } else { 1404 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1405 stk_args += 2; 1406 } 1407 break; 1408 case T_DOUBLE: 1409 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1410 if (fp_args < Argument::n_float_register_parameters_c) { 1411 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1412 #ifdef _WIN64 1413 int_args++; 1414 // Allocate slots for callee to stuff register args the stack. 1415 stk_args += 2; 1416 #endif 1417 } else { 1418 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1419 stk_args += 2; 1420 } 1421 break; 1422 case T_VOID: // Halves of longs and doubles 1423 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1424 regs[i].set_bad(); 1425 break; 1426 default: 1427 ShouldNotReachHere(); 1428 break; 1429 } 1430 } 1431 #ifdef _WIN64 1432 // windows abi requires that we always allocate enough stack space 1433 // for 4 64bit registers to be stored down. 1434 if (stk_args < 8) { 1435 stk_args = 8; 1436 } 1437 #endif // _WIN64 1438 1439 return stk_args; 1440 } 1441 1442 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1443 uint num_bits, 1444 uint total_args_passed) { 1445 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1446 "only certain vector sizes are supported for now"); 1447 1448 static const XMMRegister VEC_ArgReg[32] = { 1449 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1450 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1451 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1452 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1453 }; 1454 1455 uint stk_args = 0; 1456 uint fp_args = 0; 1457 1458 for (uint i = 0; i < total_args_passed; i++) { 1459 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1460 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1461 regs[i].set_pair(vmreg->next(next_val), vmreg); 1462 } 1463 1464 return stk_args; 1465 } 1466 1467 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1468 // We always ignore the frame_slots arg and just use the space just below frame pointer 1469 // which by this time is free to use 1470 switch (ret_type) { 1471 case T_FLOAT: 1472 __ movflt(Address(rbp, -wordSize), xmm0); 1473 break; 1474 case T_DOUBLE: 1475 __ movdbl(Address(rbp, -wordSize), xmm0); 1476 break; 1477 case T_VOID: break; 1478 default: { 1479 __ movptr(Address(rbp, -wordSize), rax); 1480 } 1481 } 1482 } 1483 1484 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1485 // We always ignore the frame_slots arg and just use the space just below frame pointer 1486 // which by this time is free to use 1487 switch (ret_type) { 1488 case T_FLOAT: 1489 __ movflt(xmm0, Address(rbp, -wordSize)); 1490 break; 1491 case T_DOUBLE: 1492 __ movdbl(xmm0, Address(rbp, -wordSize)); 1493 break; 1494 case T_VOID: break; 1495 default: { 1496 __ movptr(rax, Address(rbp, -wordSize)); 1497 } 1498 } 1499 } 1500 1501 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1502 for ( int i = first_arg ; i < arg_count ; i++ ) { 1503 if (args[i].first()->is_Register()) { 1504 __ push(args[i].first()->as_Register()); 1505 } else if (args[i].first()->is_XMMRegister()) { 1506 __ subptr(rsp, 2*wordSize); 1507 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1508 } 1509 } 1510 } 1511 1512 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1513 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1514 if (args[i].first()->is_Register()) { 1515 __ pop(args[i].first()->as_Register()); 1516 } else if (args[i].first()->is_XMMRegister()) { 1517 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1518 __ addptr(rsp, 2*wordSize); 1519 } 1520 } 1521 } 1522 1523 static void verify_oop_args(MacroAssembler* masm, 1524 const methodHandle& method, 1525 const BasicType* sig_bt, 1526 const VMRegPair* regs) { 1527 Register temp_reg = rbx; // not part of any compiled calling seq 1528 if (VerifyOops) { 1529 for (int i = 0; i < method->size_of_parameters(); i++) { 1530 if (is_reference_type(sig_bt[i])) { 1531 VMReg r = regs[i].first(); 1532 assert(r->is_valid(), "bad oop arg"); 1533 if (r->is_stack()) { 1534 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1535 __ verify_oop(temp_reg); 1536 } else { 1537 __ verify_oop(r->as_Register()); 1538 } 1539 } 1540 } 1541 } 1542 } 1543 1544 static void check_continuation_enter_argument(VMReg actual_vmreg, 1545 Register expected_reg, 1546 const char* name) { 1547 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1548 assert(actual_vmreg->as_Register() == expected_reg, 1549 "%s is in unexpected register: %s instead of %s", 1550 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1551 } 1552 1553 1554 //---------------------------- continuation_enter_setup --------------------------- 1555 // 1556 // Arguments: 1557 // None. 1558 // 1559 // Results: 1560 // rsp: pointer to blank ContinuationEntry 1561 // 1562 // Kills: 1563 // rax 1564 // 1565 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) { 1566 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1567 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1568 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1569 1570 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize; 1571 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1572 1573 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size; 1574 OopMap* map = new OopMap(frame_size, 0); 1575 1576 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset())); 1577 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax); 1578 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp); 1579 1580 return map; 1581 } 1582 1583 //---------------------------- fill_continuation_entry --------------------------- 1584 // 1585 // Arguments: 1586 // rsp: pointer to blank Continuation entry 1587 // reg_cont_obj: pointer to the continuation 1588 // reg_flags: flags 1589 // 1590 // Results: 1591 // rsp: pointer to filled out ContinuationEntry 1592 // 1593 // Kills: 1594 // rax 1595 // 1596 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1597 assert_different_registers(rax, reg_cont_obj, reg_flags); 1598 #ifdef ASSERT 1599 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value()); 1600 #endif 1601 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj); 1602 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags); 1603 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0); 1604 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0); 1605 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0); 1606 1607 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset())); 1608 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax); 1609 __ movq(rax, Address(r15_thread, JavaThread::held_monitor_count_offset())); 1610 __ movq(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); 1611 1612 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); 1613 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), 0); 1614 } 1615 1616 //---------------------------- continuation_enter_cleanup --------------------------- 1617 // 1618 // Arguments: 1619 // rsp: pointer to the ContinuationEntry 1620 // 1621 // Results: 1622 // rsp: pointer to the spilled rbp in the entry frame 1623 // 1624 // Kills: 1625 // rbx 1626 // 1627 void static continuation_enter_cleanup(MacroAssembler* masm) { 1628 #ifdef ASSERT 1629 Label L_good_sp; 1630 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1631 __ jcc(Assembler::equal, L_good_sp); 1632 __ stop("Incorrect rsp at continuation_enter_cleanup"); 1633 __ bind(L_good_sp); 1634 #endif 1635 1636 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset())); 1637 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx); 1638 __ movq(rbx, Address(rsp, ContinuationEntry::parent_held_monitor_count_offset())); 1639 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), rbx); 1640 1641 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset())); 1642 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx); 1643 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1644 } 1645 1646 static void gen_continuation_enter(MacroAssembler* masm, 1647 const VMRegPair* regs, 1648 int& exception_offset, 1649 OopMapSet* oop_maps, 1650 int& frame_complete, 1651 int& stack_slots, 1652 int& interpreted_entry_offset, 1653 int& compiled_entry_offset) { 1654 1655 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1656 int pos_cont_obj = 0; 1657 int pos_is_cont = 1; 1658 int pos_is_virtual = 2; 1659 1660 // The platform-specific calling convention may present the arguments in various registers. 1661 // To simplify the rest of the code, we expect the arguments to reside at these known 1662 // registers, and we additionally check the placement here in case calling convention ever 1663 // changes. 1664 Register reg_cont_obj = c_rarg1; 1665 Register reg_is_cont = c_rarg2; 1666 Register reg_is_virtual = c_rarg3; 1667 1668 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1669 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1670 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1671 1672 // Utility methods kill rax, make sure there are no collisions 1673 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual); 1674 1675 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), 1676 relocInfo::static_call_type); 1677 1678 address start = __ pc(); 1679 1680 Label L_thaw, L_exit; 1681 1682 // i2i entry used at interp_only_mode only 1683 interpreted_entry_offset = __ pc() - start; 1684 { 1685 #ifdef ASSERT 1686 Label is_interp_only; 1687 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0); 1688 __ jcc(Assembler::notEqual, is_interp_only); 1689 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1690 __ bind(is_interp_only); 1691 #endif 1692 1693 __ pop(rax); // return address 1694 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1695 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2)); 1696 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1)); 1697 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0)); 1698 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment 1699 __ push(rax); // return address 1700 __ push_cont_fastpath(); 1701 1702 __ enter(); 1703 1704 stack_slots = 2; // will be adjusted in setup 1705 OopMap* map = continuation_enter_setup(masm, stack_slots); 1706 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1707 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1708 1709 __ verify_oop(reg_cont_obj); 1710 1711 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1712 1713 // If continuation, call to thaw. Otherwise, resolve the call and exit. 1714 __ testptr(reg_is_cont, reg_is_cont); 1715 __ jcc(Assembler::notZero, L_thaw); 1716 1717 // --- Resolve path 1718 1719 // Make sure the call is patchable 1720 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1721 // Emit stub for static call 1722 CodeBuffer* cbuf = masm->code_section()->outer(); 1723 address stub = CompiledStaticCall::emit_to_interp_stub(*cbuf, __ pc()); 1724 if (stub == nullptr) { 1725 fatal("CodeCache is full at gen_continuation_enter"); 1726 } 1727 __ call(resolve); 1728 oop_maps->add_gc_map(__ pc() - start, map); 1729 __ post_call_nop(); 1730 1731 __ jmp(L_exit); 1732 } 1733 1734 // compiled entry 1735 __ align(CodeEntryAlignment); 1736 compiled_entry_offset = __ pc() - start; 1737 __ enter(); 1738 1739 stack_slots = 2; // will be adjusted in setup 1740 OopMap* map = continuation_enter_setup(masm, stack_slots); 1741 1742 // Frame is now completed as far as size and linkage. 1743 frame_complete = __ pc() - start; 1744 1745 __ verify_oop(reg_cont_obj); 1746 1747 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1748 1749 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1750 __ testptr(reg_is_cont, reg_is_cont); 1751 __ jccb(Assembler::notZero, L_thaw); 1752 1753 // --- call Continuation.enter(Continuation c, boolean isContinue) 1754 1755 // Make sure the call is patchable 1756 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1757 1758 // Emit stub for static call 1759 CodeBuffer* cbuf = masm->code_section()->outer(); 1760 address stub = CompiledStaticCall::emit_to_interp_stub(*cbuf, __ pc()); 1761 if (stub == nullptr) { 1762 fatal("CodeCache is full at gen_continuation_enter"); 1763 } 1764 1765 // The call needs to be resolved. There's a special case for this in 1766 // SharedRuntime::find_callee_info_helper() which calls 1767 // LinkResolver::resolve_continuation_enter() which resolves the call to 1768 // Continuation.enter(Continuation c, boolean isContinue). 1769 __ call(resolve); 1770 1771 oop_maps->add_gc_map(__ pc() - start, map); 1772 __ post_call_nop(); 1773 1774 __ jmpb(L_exit); 1775 1776 // --- Thawing path 1777 1778 __ bind(L_thaw); 1779 1780 __ call(RuntimeAddress(StubRoutines::cont_thaw())); 1781 1782 ContinuationEntry::_return_pc_offset = __ pc() - start; 1783 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1784 __ post_call_nop(); 1785 1786 // --- Normal exit (resolve/thawing) 1787 1788 __ bind(L_exit); 1789 1790 continuation_enter_cleanup(masm); 1791 __ pop(rbp); 1792 __ ret(0); 1793 1794 // --- Exception handling path 1795 1796 exception_offset = __ pc() - start; 1797 1798 continuation_enter_cleanup(masm); 1799 __ pop(rbp); 1800 1801 __ movptr(c_rarg0, r15_thread); 1802 __ movptr(c_rarg1, Address(rsp, 0)); // return address 1803 1804 // rax still holds the original exception oop, save it before the call 1805 __ push(rax); 1806 1807 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2); 1808 __ movptr(rbx, rax); 1809 1810 // Continue at exception handler: 1811 // rax: exception oop 1812 // rbx: exception handler 1813 // rdx: exception pc 1814 __ pop(rax); 1815 __ verify_oop(rax); 1816 __ pop(rdx); 1817 __ jmp(rbx); 1818 } 1819 1820 static void gen_continuation_yield(MacroAssembler* masm, 1821 const VMRegPair* regs, 1822 OopMapSet* oop_maps, 1823 int& frame_complete, 1824 int& stack_slots, 1825 int& compiled_entry_offset) { 1826 enum layout { 1827 rbp_off, 1828 rbpH_off, 1829 return_off, 1830 return_off2, 1831 framesize // inclusive of return address 1832 }; 1833 stack_slots = framesize / VMRegImpl::slots_per_word; 1834 assert(stack_slots == 2, "recheck layout"); 1835 1836 address start = __ pc(); 1837 compiled_entry_offset = __ pc() - start; 1838 __ enter(); 1839 address the_pc = __ pc(); 1840 1841 frame_complete = the_pc - start; 1842 1843 // This nop must be exactly at the PC we push into the frame info. 1844 // We use this nop for fast CodeBlob lookup, associate the OopMap 1845 // with it right away. 1846 __ post_call_nop(); 1847 OopMap* map = new OopMap(framesize, 1); 1848 oop_maps->add_gc_map(frame_complete, map); 1849 1850 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 1851 __ movptr(c_rarg0, r15_thread); 1852 __ movptr(c_rarg1, rsp); 1853 __ call_VM_leaf(Continuation::freeze_entry(), 2); 1854 __ reset_last_Java_frame(true); 1855 1856 Label L_pinned; 1857 1858 __ testptr(rax, rax); 1859 __ jcc(Assembler::notZero, L_pinned); 1860 1861 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1862 continuation_enter_cleanup(masm); 1863 __ pop(rbp); 1864 __ ret(0); 1865 1866 __ bind(L_pinned); 1867 1868 // Pinned, return to caller 1869 1870 // handle pending exception thrown by freeze 1871 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 1872 Label ok; 1873 __ jcc(Assembler::equal, ok); 1874 __ leave(); 1875 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1876 __ bind(ok); 1877 1878 __ leave(); 1879 __ ret(0); 1880 } 1881 1882 static void gen_special_dispatch(MacroAssembler* masm, 1883 const methodHandle& method, 1884 const BasicType* sig_bt, 1885 const VMRegPair* regs) { 1886 verify_oop_args(masm, method, sig_bt, regs); 1887 vmIntrinsics::ID iid = method->intrinsic_id(); 1888 1889 // Now write the args into the outgoing interpreter space 1890 bool has_receiver = false; 1891 Register receiver_reg = noreg; 1892 int member_arg_pos = -1; 1893 Register member_reg = noreg; 1894 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1895 if (ref_kind != 0) { 1896 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1897 member_reg = rbx; // known to be free at this point 1898 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1899 } else if (iid == vmIntrinsics::_invokeBasic) { 1900 has_receiver = true; 1901 } else if (iid == vmIntrinsics::_linkToNative) { 1902 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1903 member_reg = rbx; // known to be free at this point 1904 } else { 1905 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1906 } 1907 1908 if (member_reg != noreg) { 1909 // Load the member_arg into register, if necessary. 1910 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1911 VMReg r = regs[member_arg_pos].first(); 1912 if (r->is_stack()) { 1913 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1914 } else { 1915 // no data motion is needed 1916 member_reg = r->as_Register(); 1917 } 1918 } 1919 1920 if (has_receiver) { 1921 // Make sure the receiver is loaded into a register. 1922 assert(method->size_of_parameters() > 0, "oob"); 1923 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1924 VMReg r = regs[0].first(); 1925 assert(r->is_valid(), "bad receiver arg"); 1926 if (r->is_stack()) { 1927 // Porting note: This assumes that compiled calling conventions always 1928 // pass the receiver oop in a register. If this is not true on some 1929 // platform, pick a temp and load the receiver from stack. 1930 fatal("receiver always in a register"); 1931 receiver_reg = j_rarg0; // known to be free at this point 1932 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1933 } else { 1934 // no data motion is needed 1935 receiver_reg = r->as_Register(); 1936 } 1937 } 1938 1939 // Figure out which address we are really jumping to: 1940 MethodHandles::generate_method_handle_dispatch(masm, iid, 1941 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1942 } 1943 1944 // --------------------------------------------------------------------------- 1945 // Generate a native wrapper for a given method. The method takes arguments 1946 // in the Java compiled code convention, marshals them to the native 1947 // convention (handlizes oops, etc), transitions to native, makes the call, 1948 // returns to java state (possibly blocking), unhandlizes any result and 1949 // returns. 1950 // 1951 // Critical native functions are a shorthand for the use of 1952 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1953 // functions. The wrapper is expected to unpack the arguments before 1954 // passing them to the callee. Critical native functions leave the state _in_Java, 1955 // since they cannot stop for GC. 1956 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1957 // block and the check for pending exceptions it's impossible for them 1958 // to be thrown. 1959 // 1960 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1961 const methodHandle& method, 1962 int compile_id, 1963 BasicType* in_sig_bt, 1964 VMRegPair* in_regs, 1965 BasicType ret_type) { 1966 if (method->is_continuation_native_intrinsic()) { 1967 int exception_offset = -1; 1968 OopMapSet* oop_maps = new OopMapSet(); 1969 int frame_complete = -1; 1970 int stack_slots = -1; 1971 int interpreted_entry_offset = -1; 1972 int vep_offset = -1; 1973 if (method->is_continuation_enter_intrinsic()) { 1974 gen_continuation_enter(masm, 1975 in_regs, 1976 exception_offset, 1977 oop_maps, 1978 frame_complete, 1979 stack_slots, 1980 interpreted_entry_offset, 1981 vep_offset); 1982 } else if (method->is_continuation_yield_intrinsic()) { 1983 gen_continuation_yield(masm, 1984 in_regs, 1985 oop_maps, 1986 frame_complete, 1987 stack_slots, 1988 vep_offset); 1989 } else { 1990 guarantee(false, "Unknown Continuation native intrinsic"); 1991 } 1992 1993 #ifdef ASSERT 1994 if (method->is_continuation_enter_intrinsic()) { 1995 assert(interpreted_entry_offset != -1, "Must be set"); 1996 assert(exception_offset != -1, "Must be set"); 1997 } else { 1998 assert(interpreted_entry_offset == -1, "Must be unset"); 1999 assert(exception_offset == -1, "Must be unset"); 2000 } 2001 assert(frame_complete != -1, "Must be set"); 2002 assert(stack_slots != -1, "Must be set"); 2003 assert(vep_offset != -1, "Must be set"); 2004 #endif 2005 2006 __ flush(); 2007 nmethod* nm = nmethod::new_native_nmethod(method, 2008 compile_id, 2009 masm->code(), 2010 vep_offset, 2011 frame_complete, 2012 stack_slots, 2013 in_ByteSize(-1), 2014 in_ByteSize(-1), 2015 oop_maps, 2016 exception_offset); 2017 if (method->is_continuation_enter_intrinsic()) { 2018 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2019 } else if (method->is_continuation_yield_intrinsic()) { 2020 _cont_doYield_stub = nm; 2021 } 2022 return nm; 2023 } 2024 2025 if (method->is_method_handle_intrinsic()) { 2026 vmIntrinsics::ID iid = method->intrinsic_id(); 2027 intptr_t start = (intptr_t)__ pc(); 2028 int vep_offset = ((intptr_t)__ pc()) - start; 2029 gen_special_dispatch(masm, 2030 method, 2031 in_sig_bt, 2032 in_regs); 2033 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2034 __ flush(); 2035 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2036 return nmethod::new_native_nmethod(method, 2037 compile_id, 2038 masm->code(), 2039 vep_offset, 2040 frame_complete, 2041 stack_slots / VMRegImpl::slots_per_word, 2042 in_ByteSize(-1), 2043 in_ByteSize(-1), 2044 nullptr); 2045 } 2046 address native_func = method->native_function(); 2047 assert(native_func != nullptr, "must have function"); 2048 2049 // An OopMap for lock (and class if static) 2050 OopMapSet *oop_maps = new OopMapSet(); 2051 intptr_t start = (intptr_t)__ pc(); 2052 2053 // We have received a description of where all the java arg are located 2054 // on entry to the wrapper. We need to convert these args to where 2055 // the jni function will expect them. To figure out where they go 2056 // we convert the java signature to a C signature by inserting 2057 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2058 2059 const int total_in_args = method->size_of_parameters(); 2060 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 2061 2062 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2063 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2064 BasicType* in_elem_bt = nullptr; 2065 2066 int argc = 0; 2067 out_sig_bt[argc++] = T_ADDRESS; 2068 if (method->is_static()) { 2069 out_sig_bt[argc++] = T_OBJECT; 2070 } 2071 2072 for (int i = 0; i < total_in_args ; i++ ) { 2073 out_sig_bt[argc++] = in_sig_bt[i]; 2074 } 2075 2076 // Now figure out where the args must be stored and how much stack space 2077 // they require. 2078 int out_arg_slots; 2079 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 2080 2081 // Compute framesize for the wrapper. We need to handlize all oops in 2082 // incoming registers 2083 2084 // Calculate the total number of stack slots we will need. 2085 2086 // First count the abi requirement plus all of the outgoing args 2087 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2088 2089 // Now the space for the inbound oop handle area 2090 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 2091 2092 int oop_handle_offset = stack_slots; 2093 stack_slots += total_save_slots; 2094 2095 // Now any space we need for handlizing a klass if static method 2096 2097 int klass_slot_offset = 0; 2098 int klass_offset = -1; 2099 int lock_slot_offset = 0; 2100 bool is_static = false; 2101 2102 if (method->is_static()) { 2103 klass_slot_offset = stack_slots; 2104 stack_slots += VMRegImpl::slots_per_word; 2105 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2106 is_static = true; 2107 } 2108 2109 // Plus a lock if needed 2110 2111 if (method->is_synchronized()) { 2112 lock_slot_offset = stack_slots; 2113 stack_slots += VMRegImpl::slots_per_word; 2114 } 2115 2116 // Now a place (+2) to save return values or temp during shuffling 2117 // + 4 for return address (which we own) and saved rbp 2118 stack_slots += 6; 2119 2120 // Ok The space we have allocated will look like: 2121 // 2122 // 2123 // FP-> | | 2124 // |---------------------| 2125 // | 2 slots for moves | 2126 // |---------------------| 2127 // | lock box (if sync) | 2128 // |---------------------| <- lock_slot_offset 2129 // | klass (if static) | 2130 // |---------------------| <- klass_slot_offset 2131 // | oopHandle area | 2132 // |---------------------| <- oop_handle_offset (6 java arg registers) 2133 // | outbound memory | 2134 // | based arguments | 2135 // | | 2136 // |---------------------| 2137 // | | 2138 // SP-> | out_preserved_slots | 2139 // 2140 // 2141 2142 2143 // Now compute actual number of stack words we need rounding to make 2144 // stack properly aligned. 2145 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 2146 2147 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2148 2149 // First thing make an ic check to see if we should even be here 2150 2151 // We are free to use all registers as temps without saving them and 2152 // restoring them except rbp. rbp is the only callee save register 2153 // as far as the interpreter and the compiler(s) are concerned. 2154 2155 2156 const Register ic_reg = rax; 2157 const Register receiver = j_rarg0; 2158 2159 Label hit; 2160 Label exception_pending; 2161 2162 assert_different_registers(ic_reg, receiver, rscratch1, rscratch2); 2163 __ verify_oop(receiver); 2164 __ load_klass(rscratch1, receiver, rscratch2); 2165 __ cmpq(ic_reg, rscratch1); 2166 __ jcc(Assembler::equal, hit); 2167 2168 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 2169 2170 // Verified entry point must be aligned 2171 __ align(8); 2172 2173 __ bind(hit); 2174 2175 int vep_offset = ((intptr_t)__ pc()) - start; 2176 2177 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2178 Label L_skip_barrier; 2179 Register klass = r10; 2180 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 2181 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 2182 2183 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 2184 2185 __ bind(L_skip_barrier); 2186 } 2187 2188 #ifdef COMPILER1 2189 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 2190 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 2191 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 2192 } 2193 #endif // COMPILER1 2194 2195 // The instruction at the verified entry point must be 5 bytes or longer 2196 // because it can be patched on the fly by make_non_entrant. The stack bang 2197 // instruction fits that requirement. 2198 2199 // Generate stack overflow check 2200 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 2201 2202 // Generate a new frame for the wrapper. 2203 __ enter(); 2204 // -2 because return address is already present and so is saved rbp 2205 __ subptr(rsp, stack_size - 2*wordSize); 2206 2207 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2208 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub 2209 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); 2210 2211 // Frame is now completed as far as size and linkage. 2212 int frame_complete = ((intptr_t)__ pc()) - start; 2213 2214 if (UseRTMLocking) { 2215 // Abort RTM transaction before calling JNI 2216 // because critical section will be large and will be 2217 // aborted anyway. Also nmethod could be deoptimized. 2218 __ xabort(0); 2219 } 2220 2221 #ifdef ASSERT 2222 __ check_stack_alignment(rsp, "improperly aligned stack"); 2223 #endif /* ASSERT */ 2224 2225 2226 // We use r14 as the oop handle for the receiver/klass 2227 // It is callee save so it survives the call to native 2228 2229 const Register oop_handle_reg = r14; 2230 2231 // 2232 // We immediately shuffle the arguments so that any vm call we have to 2233 // make from here on out (sync slow path, jvmti, etc.) we will have 2234 // captured the oops from our caller and have a valid oopMap for 2235 // them. 2236 2237 // ----------------- 2238 // The Grand Shuffle 2239 2240 // The Java calling convention is either equal (linux) or denser (win64) than the 2241 // c calling convention. However the because of the jni_env argument the c calling 2242 // convention always has at least one more (and two for static) arguments than Java. 2243 // Therefore if we move the args from java -> c backwards then we will never have 2244 // a register->register conflict and we don't have to build a dependency graph 2245 // and figure out how to break any cycles. 2246 // 2247 2248 // Record esp-based slot for receiver on stack for non-static methods 2249 int receiver_offset = -1; 2250 2251 // This is a trick. We double the stack slots so we can claim 2252 // the oops in the caller's frame. Since we are sure to have 2253 // more args than the caller doubling is enough to make 2254 // sure we can capture all the incoming oop args from the 2255 // caller. 2256 // 2257 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2258 2259 // Mark location of rbp (someday) 2260 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2261 2262 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2263 // All inbound args are referenced based on rbp and all outbound args via rsp. 2264 2265 2266 #ifdef ASSERT 2267 bool reg_destroyed[Register::number_of_registers]; 2268 bool freg_destroyed[XMMRegister::number_of_registers]; 2269 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) { 2270 reg_destroyed[r] = false; 2271 } 2272 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) { 2273 freg_destroyed[f] = false; 2274 } 2275 2276 #endif /* ASSERT */ 2277 2278 // For JNI natives the incoming and outgoing registers are offset upwards. 2279 GrowableArray<int> arg_order(2 * total_in_args); 2280 2281 VMRegPair tmp_vmreg; 2282 tmp_vmreg.set2(rbx->as_VMReg()); 2283 2284 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2285 arg_order.push(i); 2286 arg_order.push(c_arg); 2287 } 2288 2289 int temploc = -1; 2290 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2291 int i = arg_order.at(ai); 2292 int c_arg = arg_order.at(ai + 1); 2293 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2294 #ifdef ASSERT 2295 if (in_regs[i].first()->is_Register()) { 2296 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2297 } else if (in_regs[i].first()->is_XMMRegister()) { 2298 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2299 } 2300 if (out_regs[c_arg].first()->is_Register()) { 2301 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2302 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2303 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2304 } 2305 #endif /* ASSERT */ 2306 switch (in_sig_bt[i]) { 2307 case T_ARRAY: 2308 case T_OBJECT: 2309 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2310 ((i == 0) && (!is_static)), 2311 &receiver_offset); 2312 break; 2313 case T_VOID: 2314 break; 2315 2316 case T_FLOAT: 2317 __ float_move(in_regs[i], out_regs[c_arg]); 2318 break; 2319 2320 case T_DOUBLE: 2321 assert( i + 1 < total_in_args && 2322 in_sig_bt[i + 1] == T_VOID && 2323 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2324 __ double_move(in_regs[i], out_regs[c_arg]); 2325 break; 2326 2327 case T_LONG : 2328 __ long_move(in_regs[i], out_regs[c_arg]); 2329 break; 2330 2331 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2332 2333 default: 2334 __ move32_64(in_regs[i], out_regs[c_arg]); 2335 } 2336 } 2337 2338 int c_arg; 2339 2340 // Pre-load a static method's oop into r14. Used both by locking code and 2341 // the normal JNI call code. 2342 // point c_arg at the first arg that is already loaded in case we 2343 // need to spill before we call out 2344 c_arg = total_c_args - total_in_args; 2345 2346 if (method->is_static()) { 2347 2348 // load oop into a register 2349 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2350 2351 // Now handlize the static class mirror it's known not-null. 2352 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2353 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2354 2355 // Now get the handle 2356 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2357 // store the klass handle as second argument 2358 __ movptr(c_rarg1, oop_handle_reg); 2359 // and protect the arg if we must spill 2360 c_arg--; 2361 } 2362 2363 // Change state to native (we save the return address in the thread, since it might not 2364 // be pushed on the stack when we do a stack traversal). It is enough that the pc() 2365 // points into the right code segment. It does not have to be the correct return pc. 2366 // We use the same pc/oopMap repeatedly when we call out 2367 2368 intptr_t the_pc = (intptr_t) __ pc(); 2369 oop_maps->add_gc_map(the_pc - start, map); 2370 2371 __ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch1); 2372 2373 2374 // We have all of the arguments setup at this point. We must not touch any register 2375 // argument registers at this point (what if we save/restore them there are no oop? 2376 2377 { 2378 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2379 // protect the args we've loaded 2380 save_args(masm, total_c_args, c_arg, out_regs); 2381 __ mov_metadata(c_rarg1, method()); 2382 __ call_VM_leaf( 2383 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2384 r15_thread, c_rarg1); 2385 restore_args(masm, total_c_args, c_arg, out_regs); 2386 } 2387 2388 // RedefineClasses() tracing support for obsolete method entry 2389 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2390 // protect the args we've loaded 2391 save_args(masm, total_c_args, c_arg, out_regs); 2392 __ mov_metadata(c_rarg1, method()); 2393 __ call_VM_leaf( 2394 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2395 r15_thread, c_rarg1); 2396 restore_args(masm, total_c_args, c_arg, out_regs); 2397 } 2398 2399 // Lock a synchronized method 2400 2401 // Register definitions used by locking and unlocking 2402 2403 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2404 const Register obj_reg = rbx; // Will contain the oop 2405 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2406 const Register old_hdr = r13; // value of old header at unlock time 2407 2408 Label slow_path_lock; 2409 Label lock_done; 2410 2411 if (method->is_synchronized()) { 2412 Label count_mon; 2413 2414 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 2415 2416 // Get the handle (the 2nd argument) 2417 __ mov(oop_handle_reg, c_rarg1); 2418 2419 // Get address of the box 2420 2421 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2422 2423 // Load the oop from the handle 2424 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2425 2426 if (LockingMode == LM_MONITOR) { 2427 __ jmp(slow_path_lock); 2428 } else if (LockingMode == LM_LEGACY) { 2429 // Load immediate 1 into swap_reg %rax 2430 __ movl(swap_reg, 1); 2431 2432 // Load (object->mark() | 1) into swap_reg %rax 2433 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2434 if (EnableValhalla) { 2435 // Mask inline_type bit such that we go to the slow path if object is an inline type 2436 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place)); 2437 } 2438 2439 // Save (object->mark() | 1) into BasicLock's displaced header 2440 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2441 2442 // src -> dest iff dest == rax else rax <- dest 2443 __ lock(); 2444 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2445 __ jcc(Assembler::equal, count_mon); 2446 2447 // Hmm should this move to the slow path code area??? 2448 2449 // Test if the oopMark is an obvious stack pointer, i.e., 2450 // 1) (mark & 3) == 0, and 2451 // 2) rsp <= mark < mark + os::pagesize() 2452 // These 3 tests can be done by evaluating the following 2453 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 2454 // assuming both stack pointer and pagesize have their 2455 // least significant 2 bits clear. 2456 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 2457 2458 __ subptr(swap_reg, rsp); 2459 __ andptr(swap_reg, 3 - (int)os::vm_page_size()); 2460 2461 // Save the test result, for recursive case, the result is zero 2462 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2463 __ jcc(Assembler::notEqual, slow_path_lock); 2464 } else { 2465 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2466 // Load object header 2467 __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2468 __ lightweight_lock(obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock); 2469 } 2470 __ bind(count_mon); 2471 __ inc_held_monitor_count(); 2472 2473 // Slow path will re-enter here 2474 __ bind(lock_done); 2475 } 2476 2477 // Finally just about ready to make the JNI call 2478 2479 // get JNIEnv* which is first argument to native 2480 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2481 2482 // Now set thread in native 2483 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2484 2485 __ call(RuntimeAddress(native_func)); 2486 2487 // Verify or restore cpu control state after JNI call 2488 __ restore_cpu_control_state_after_jni(rscratch1); 2489 2490 // Unpack native results. 2491 switch (ret_type) { 2492 case T_BOOLEAN: __ c2bool(rax); break; 2493 case T_CHAR : __ movzwl(rax, rax); break; 2494 case T_BYTE : __ sign_extend_byte (rax); break; 2495 case T_SHORT : __ sign_extend_short(rax); break; 2496 case T_INT : /* nothing to do */ break; 2497 case T_DOUBLE : 2498 case T_FLOAT : 2499 // Result is in xmm0 we'll save as needed 2500 break; 2501 case T_ARRAY: // Really a handle 2502 case T_OBJECT: // Really a handle 2503 break; // can't de-handlize until after safepoint check 2504 case T_VOID: break; 2505 case T_LONG: break; 2506 default : ShouldNotReachHere(); 2507 } 2508 2509 Label after_transition; 2510 2511 // Switch thread to "native transition" state before reading the synchronization state. 2512 // This additional state is necessary because reading and testing the synchronization 2513 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2514 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2515 // VM thread changes sync state to synchronizing and suspends threads for GC. 2516 // Thread A is resumed to finish this native method, but doesn't block here since it 2517 // didn't see any synchronization is progress, and escapes. 2518 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2519 2520 // Force this write out before the read below 2521 if (!UseSystemMemoryBarrier) { 2522 __ membar(Assembler::Membar_mask_bits( 2523 Assembler::LoadLoad | Assembler::LoadStore | 2524 Assembler::StoreLoad | Assembler::StoreStore)); 2525 } 2526 2527 // check for safepoint operation in progress and/or pending suspend requests 2528 { 2529 Label Continue; 2530 Label slow_path; 2531 2532 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2533 2534 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2535 __ jcc(Assembler::equal, Continue); 2536 __ bind(slow_path); 2537 2538 // Don't use call_VM as it will see a possible pending exception and forward it 2539 // and never return here preventing us from clearing _last_native_pc down below. 2540 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2541 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2542 // by hand. 2543 // 2544 __ vzeroupper(); 2545 save_native_result(masm, ret_type, stack_slots); 2546 __ mov(c_rarg0, r15_thread); 2547 __ mov(r12, rsp); // remember sp 2548 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2549 __ andptr(rsp, -16); // align stack as required by ABI 2550 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2551 __ mov(rsp, r12); // restore sp 2552 __ reinit_heapbase(); 2553 // Restore any method result value 2554 restore_native_result(masm, ret_type, stack_slots); 2555 __ bind(Continue); 2556 } 2557 2558 // change thread state 2559 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2560 __ bind(after_transition); 2561 2562 Label reguard; 2563 Label reguard_done; 2564 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2565 __ jcc(Assembler::equal, reguard); 2566 __ bind(reguard_done); 2567 2568 // native result if any is live 2569 2570 // Unlock 2571 Label slow_path_unlock; 2572 Label unlock_done; 2573 if (method->is_synchronized()) { 2574 2575 Label fast_done; 2576 2577 // Get locked oop from the handle we passed to jni 2578 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2579 2580 if (LockingMode == LM_LEGACY) { 2581 Label not_recur; 2582 // Simple recursive lock? 2583 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD); 2584 __ jcc(Assembler::notEqual, not_recur); 2585 __ dec_held_monitor_count(); 2586 __ jmpb(fast_done); 2587 __ bind(not_recur); 2588 } 2589 2590 // Must save rax if it is live now because cmpxchg must use it 2591 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2592 save_native_result(masm, ret_type, stack_slots); 2593 } 2594 2595 if (LockingMode == LM_MONITOR) { 2596 __ jmp(slow_path_unlock); 2597 } else if (LockingMode == LM_LEGACY) { 2598 // get address of the stack lock 2599 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2600 // get old displaced header 2601 __ movptr(old_hdr, Address(rax, 0)); 2602 2603 // Atomic swap old header if oop still contains the stack lock 2604 __ lock(); 2605 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2606 __ jcc(Assembler::notEqual, slow_path_unlock); 2607 __ dec_held_monitor_count(); 2608 } else { 2609 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2610 __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2611 __ andptr(swap_reg, ~(int32_t)markWord::lock_mask_in_place); 2612 __ lightweight_unlock(obj_reg, swap_reg, lock_reg, slow_path_unlock); 2613 __ dec_held_monitor_count(); 2614 } 2615 2616 // slow path re-enters here 2617 __ bind(unlock_done); 2618 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2619 restore_native_result(masm, ret_type, stack_slots); 2620 } 2621 2622 __ bind(fast_done); 2623 } 2624 { 2625 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2626 save_native_result(masm, ret_type, stack_slots); 2627 __ mov_metadata(c_rarg1, method()); 2628 __ call_VM_leaf( 2629 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2630 r15_thread, c_rarg1); 2631 restore_native_result(masm, ret_type, stack_slots); 2632 } 2633 2634 __ reset_last_Java_frame(false); 2635 2636 // Unbox oop result, e.g. JNIHandles::resolve value. 2637 if (is_reference_type(ret_type)) { 2638 __ resolve_jobject(rax /* value */, 2639 r15_thread /* thread */, 2640 rcx /* tmp */); 2641 } 2642 2643 if (CheckJNICalls) { 2644 // clear_pending_jni_exception_check 2645 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2646 } 2647 2648 // reset handle block 2649 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2650 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); 2651 2652 // pop our frame 2653 2654 __ leave(); 2655 2656 // Any exception pending? 2657 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2658 __ jcc(Assembler::notEqual, exception_pending); 2659 2660 // Return 2661 2662 __ ret(0); 2663 2664 // Unexpected paths are out of line and go here 2665 2666 // forward the exception 2667 __ bind(exception_pending); 2668 2669 // and forward the exception 2670 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2671 2672 // Slow path locking & unlocking 2673 if (method->is_synchronized()) { 2674 2675 // BEGIN Slow path lock 2676 __ bind(slow_path_lock); 2677 2678 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2679 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2680 2681 // protect the args we've loaded 2682 save_args(masm, total_c_args, c_arg, out_regs); 2683 2684 __ mov(c_rarg0, obj_reg); 2685 __ mov(c_rarg1, lock_reg); 2686 __ mov(c_rarg2, r15_thread); 2687 2688 // Not a leaf but we have last_Java_frame setup as we want 2689 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2690 restore_args(masm, total_c_args, c_arg, out_regs); 2691 2692 #ifdef ASSERT 2693 { Label L; 2694 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2695 __ jcc(Assembler::equal, L); 2696 __ stop("no pending exception allowed on exit from monitorenter"); 2697 __ bind(L); 2698 } 2699 #endif 2700 __ jmp(lock_done); 2701 2702 // END Slow path lock 2703 2704 // BEGIN Slow path unlock 2705 __ bind(slow_path_unlock); 2706 2707 // If we haven't already saved the native result we must save it now as xmm registers 2708 // are still exposed. 2709 __ vzeroupper(); 2710 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2711 save_native_result(masm, ret_type, stack_slots); 2712 } 2713 2714 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2715 2716 __ mov(c_rarg0, obj_reg); 2717 __ mov(c_rarg2, r15_thread); 2718 __ mov(r12, rsp); // remember sp 2719 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2720 __ andptr(rsp, -16); // align stack as required by ABI 2721 2722 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2723 // NOTE that obj_reg == rbx currently 2724 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2725 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2726 2727 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2728 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2729 __ mov(rsp, r12); // restore sp 2730 __ reinit_heapbase(); 2731 #ifdef ASSERT 2732 { 2733 Label L; 2734 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2735 __ jcc(Assembler::equal, L); 2736 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2737 __ bind(L); 2738 } 2739 #endif /* ASSERT */ 2740 2741 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2742 2743 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2744 restore_native_result(masm, ret_type, stack_slots); 2745 } 2746 __ jmp(unlock_done); 2747 2748 // END Slow path unlock 2749 2750 } // synchronized 2751 2752 // SLOW PATH Reguard the stack if needed 2753 2754 __ bind(reguard); 2755 __ vzeroupper(); 2756 save_native_result(masm, ret_type, stack_slots); 2757 __ mov(r12, rsp); // remember sp 2758 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2759 __ andptr(rsp, -16); // align stack as required by ABI 2760 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2761 __ mov(rsp, r12); // restore sp 2762 __ reinit_heapbase(); 2763 restore_native_result(masm, ret_type, stack_slots); 2764 // and continue 2765 __ jmp(reguard_done); 2766 2767 2768 2769 __ flush(); 2770 2771 nmethod *nm = nmethod::new_native_nmethod(method, 2772 compile_id, 2773 masm->code(), 2774 vep_offset, 2775 frame_complete, 2776 stack_slots / VMRegImpl::slots_per_word, 2777 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2778 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2779 oop_maps); 2780 2781 return nm; 2782 } 2783 2784 // this function returns the adjust size (in number of words) to a c2i adapter 2785 // activation for use during deoptimization 2786 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2787 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2788 } 2789 2790 2791 uint SharedRuntime::out_preserve_stack_slots() { 2792 return 0; 2793 } 2794 2795 2796 // Number of stack slots between incoming argument block and the start of 2797 // a new frame. The PROLOG must add this many slots to the stack. The 2798 // EPILOG must remove this many slots. amd64 needs two slots for 2799 // return address. 2800 uint SharedRuntime::in_preserve_stack_slots() { 2801 return 4 + 2 * VerifyStackAtCalls; 2802 } 2803 2804 //------------------------------generate_deopt_blob---------------------------- 2805 void SharedRuntime::generate_deopt_blob() { 2806 // Allocate space for the code 2807 ResourceMark rm; 2808 // Setup code generation tools 2809 int pad = 0; 2810 if (UseAVX > 2) { 2811 pad += 1024; 2812 } 2813 #if INCLUDE_JVMCI 2814 if (EnableJVMCI) { 2815 pad += 512; // Increase the buffer size when compiling for JVMCI 2816 } 2817 #endif 2818 CodeBuffer buffer("deopt_blob", 2560+pad, 1024); 2819 MacroAssembler* masm = new MacroAssembler(&buffer); 2820 int frame_size_in_words; 2821 OopMap* map = nullptr; 2822 OopMapSet *oop_maps = new OopMapSet(); 2823 2824 // ------------- 2825 // This code enters when returning to a de-optimized nmethod. A return 2826 // address has been pushed on the stack, and return values are in 2827 // registers. 2828 // If we are doing a normal deopt then we were called from the patched 2829 // nmethod from the point we returned to the nmethod. So the return 2830 // address on the stack is wrong by NativeCall::instruction_size 2831 // We will adjust the value so it looks like we have the original return 2832 // address on the stack (like when we eagerly deoptimized). 2833 // In the case of an exception pending when deoptimizing, we enter 2834 // with a return address on the stack that points after the call we patched 2835 // into the exception handler. We have the following register state from, 2836 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2837 // rax: exception oop 2838 // rbx: exception handler 2839 // rdx: throwing pc 2840 // So in this case we simply jam rdx into the useless return address and 2841 // the stack looks just like we want. 2842 // 2843 // At this point we need to de-opt. We save the argument return 2844 // registers. We call the first C routine, fetch_unroll_info(). This 2845 // routine captures the return values and returns a structure which 2846 // describes the current frame size and the sizes of all replacement frames. 2847 // The current frame is compiled code and may contain many inlined 2848 // functions, each with their own JVM state. We pop the current frame, then 2849 // push all the new frames. Then we call the C routine unpack_frames() to 2850 // populate these frames. Finally unpack_frames() returns us the new target 2851 // address. Notice that callee-save registers are BLOWN here; they have 2852 // already been captured in the vframeArray at the time the return PC was 2853 // patched. 2854 address start = __ pc(); 2855 Label cont; 2856 2857 // Prolog for non exception case! 2858 2859 // Save everything in sight. 2860 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2861 2862 // Normal deoptimization. Save exec mode for unpack_frames. 2863 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2864 __ jmp(cont); 2865 2866 int reexecute_offset = __ pc() - start; 2867 #if INCLUDE_JVMCI && !defined(COMPILER1) 2868 if (EnableJVMCI && UseJVMCICompiler) { 2869 // JVMCI does not use this kind of deoptimization 2870 __ should_not_reach_here(); 2871 } 2872 #endif 2873 2874 // Reexecute case 2875 // return address is the pc describes what bci to do re-execute at 2876 2877 // No need to update map as each call to save_live_registers will produce identical oopmap 2878 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2879 2880 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2881 __ jmp(cont); 2882 2883 #if INCLUDE_JVMCI 2884 Label after_fetch_unroll_info_call; 2885 int implicit_exception_uncommon_trap_offset = 0; 2886 int uncommon_trap_offset = 0; 2887 2888 if (EnableJVMCI) { 2889 implicit_exception_uncommon_trap_offset = __ pc() - start; 2890 2891 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2892 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD); 2893 2894 uncommon_trap_offset = __ pc() - start; 2895 2896 // Save everything in sight. 2897 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2898 // fetch_unroll_info needs to call last_java_frame() 2899 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2900 2901 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2902 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2903 2904 __ movl(r14, Deoptimization::Unpack_reexecute); 2905 __ mov(c_rarg0, r15_thread); 2906 __ movl(c_rarg2, r14); // exec mode 2907 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2908 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2909 2910 __ reset_last_Java_frame(false); 2911 2912 __ jmp(after_fetch_unroll_info_call); 2913 } // EnableJVMCI 2914 #endif // INCLUDE_JVMCI 2915 2916 int exception_offset = __ pc() - start; 2917 2918 // Prolog for exception case 2919 2920 // all registers are dead at this entry point, except for rax, and 2921 // rdx which contain the exception oop and exception pc 2922 // respectively. Set them in TLS and fall thru to the 2923 // unpack_with_exception_in_tls entry point. 2924 2925 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2926 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2927 2928 int exception_in_tls_offset = __ pc() - start; 2929 2930 // new implementation because exception oop is now passed in JavaThread 2931 2932 // Prolog for exception case 2933 // All registers must be preserved because they might be used by LinearScan 2934 // Exceptiop oop and throwing PC are passed in JavaThread 2935 // tos: stack at point of call to method that threw the exception (i.e. only 2936 // args are on the stack, no return address) 2937 2938 // make room on stack for the return address 2939 // It will be patched later with the throwing pc. The correct value is not 2940 // available now because loading it from memory would destroy registers. 2941 __ push(0); 2942 2943 // Save everything in sight. 2944 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2945 2946 // Now it is safe to overwrite any register 2947 2948 // Deopt during an exception. Save exec mode for unpack_frames. 2949 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2950 2951 // load throwing pc from JavaThread and patch it as the return address 2952 // of the current frame. Then clear the field in JavaThread 2953 2954 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2955 __ movptr(Address(rbp, wordSize), rdx); 2956 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 2957 2958 #ifdef ASSERT 2959 // verify that there is really an exception oop in JavaThread 2960 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2961 __ verify_oop(rax); 2962 2963 // verify that there is no pending exception 2964 Label no_pending_exception; 2965 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2966 __ testptr(rax, rax); 2967 __ jcc(Assembler::zero, no_pending_exception); 2968 __ stop("must not have pending exception here"); 2969 __ bind(no_pending_exception); 2970 #endif 2971 2972 __ bind(cont); 2973 2974 // Call C code. Need thread and this frame, but NOT official VM entry 2975 // crud. We cannot block on this call, no GC can happen. 2976 // 2977 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2978 2979 // fetch_unroll_info needs to call last_java_frame(). 2980 2981 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2982 #ifdef ASSERT 2983 { Label L; 2984 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2985 __ jcc(Assembler::equal, L); 2986 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2987 __ bind(L); 2988 } 2989 #endif // ASSERT 2990 __ mov(c_rarg0, r15_thread); 2991 __ movl(c_rarg1, r14); // exec_mode 2992 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2993 2994 // Need to have an oopmap that tells fetch_unroll_info where to 2995 // find any register it might need. 2996 oop_maps->add_gc_map(__ pc() - start, map); 2997 2998 __ reset_last_Java_frame(false); 2999 3000 #if INCLUDE_JVMCI 3001 if (EnableJVMCI) { 3002 __ bind(after_fetch_unroll_info_call); 3003 } 3004 #endif 3005 3006 // Load UnrollBlock* into rdi 3007 __ mov(rdi, rax); 3008 3009 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset())); 3010 Label noException; 3011 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 3012 __ jcc(Assembler::notEqual, noException); 3013 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3014 // QQQ this is useless it was null above 3015 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3016 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3017 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3018 3019 __ verify_oop(rax); 3020 3021 // Overwrite the result registers with the exception results. 3022 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3023 // I think this is useless 3024 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 3025 3026 __ bind(noException); 3027 3028 // Only register save data is on the stack. 3029 // Now restore the result registers. Everything else is either dead 3030 // or captured in the vframeArray. 3031 RegisterSaver::restore_result_registers(masm); 3032 3033 // All of the register save area has been popped of the stack. Only the 3034 // return address remains. 3035 3036 // Pop all the frames we must move/replace. 3037 // 3038 // Frame picture (youngest to oldest) 3039 // 1: self-frame (no frame link) 3040 // 2: deopting frame (no frame link) 3041 // 3: caller of deopting frame (could be compiled/interpreted). 3042 // 3043 // Note: by leaving the return address of self-frame on the stack 3044 // and using the size of frame 2 to adjust the stack 3045 // when we are done the return to frame 3 will still be on the stack. 3046 3047 // Pop deoptimized frame 3048 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); 3049 __ addptr(rsp, rcx); 3050 3051 // rsp should be pointing at the return address to the caller (3) 3052 3053 // Pick up the initial fp we should save 3054 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3055 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3056 3057 #ifdef ASSERT 3058 // Compilers generate code that bang the stack by as much as the 3059 // interpreter would need. So this stack banging should never 3060 // trigger a fault. Verify that it does not on non product builds. 3061 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3062 __ bang_stack_size(rbx, rcx); 3063 #endif 3064 3065 // Load address of array of frame pcs into rcx 3066 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3067 3068 // Trash the old pc 3069 __ addptr(rsp, wordSize); 3070 3071 // Load address of array of frame sizes into rsi 3072 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset())); 3073 3074 // Load counter into rdx 3075 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); 3076 3077 // Now adjust the caller's stack to make up for the extra locals 3078 // but record the original sp so that we can save it in the skeletal interpreter 3079 // frame and the stack walking of interpreter_sender will get the unextended sp 3080 // value and not the "real" sp value. 3081 3082 const Register sender_sp = r8; 3083 3084 __ mov(sender_sp, rsp); 3085 __ movl(rbx, Address(rdi, 3086 Deoptimization::UnrollBlock:: 3087 caller_adjustment_offset())); 3088 __ subptr(rsp, rbx); 3089 3090 // Push interpreter frames in a loop 3091 Label loop; 3092 __ bind(loop); 3093 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3094 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 3095 __ pushptr(Address(rcx, 0)); // Save return address 3096 __ enter(); // Save old & set new ebp 3097 __ subptr(rsp, rbx); // Prolog 3098 // This value is corrected by layout_activation_impl 3099 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3100 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 3101 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3102 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3103 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3104 __ decrementl(rdx); // Decrement counter 3105 __ jcc(Assembler::notZero, loop); 3106 __ pushptr(Address(rcx, 0)); // Save final return address 3107 3108 // Re-push self-frame 3109 __ enter(); // Save old & set new ebp 3110 3111 // Allocate a full sized register save area. 3112 // Return address and rbp are in place, so we allocate two less words. 3113 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 3114 3115 // Restore frame locals after moving the frame 3116 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 3117 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3118 3119 // Call C code. Need thread but NOT official VM entry 3120 // crud. We cannot block on this call, no GC can happen. Call should 3121 // restore return values to their stack-slots with the new SP. 3122 // 3123 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 3124 3125 // Use rbp because the frames look interpreted now 3126 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3127 // Don't need the precise return PC here, just precise enough to point into this code blob. 3128 address the_pc = __ pc(); 3129 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3130 3131 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 3132 __ mov(c_rarg0, r15_thread); 3133 __ movl(c_rarg1, r14); // second arg: exec_mode 3134 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3135 // Revert SP alignment after call since we're going to do some SP relative addressing below 3136 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 3137 3138 // Set an oopmap for the call site 3139 // Use the same PC we used for the last java frame 3140 oop_maps->add_gc_map(the_pc - start, 3141 new OopMap( frame_size_in_words, 0 )); 3142 3143 // Clear fp AND pc 3144 __ reset_last_Java_frame(true); 3145 3146 // Collect return values 3147 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 3148 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 3149 // I think this is useless (throwing pc?) 3150 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 3151 3152 // Pop self-frame. 3153 __ leave(); // Epilog 3154 3155 // Jump to interpreter 3156 __ ret(0); 3157 3158 // Make sure all code is generated 3159 masm->flush(); 3160 3161 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 3162 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3163 #if INCLUDE_JVMCI 3164 if (EnableJVMCI) { 3165 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 3166 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 3167 } 3168 #endif 3169 } 3170 3171 #ifdef COMPILER2 3172 //------------------------------generate_uncommon_trap_blob-------------------- 3173 void SharedRuntime::generate_uncommon_trap_blob() { 3174 // Allocate space for the code 3175 ResourceMark rm; 3176 // Setup code generation tools 3177 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 3178 MacroAssembler* masm = new MacroAssembler(&buffer); 3179 3180 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3181 3182 address start = __ pc(); 3183 3184 if (UseRTMLocking) { 3185 // Abort RTM transaction before possible nmethod deoptimization. 3186 __ xabort(0); 3187 } 3188 3189 // Push self-frame. We get here with a return address on the 3190 // stack, so rsp is 8-byte aligned until we allocate our frame. 3191 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 3192 3193 // No callee saved registers. rbp is assumed implicitly saved 3194 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3195 3196 // compiler left unloaded_class_index in j_rarg0 move to where the 3197 // runtime expects it. 3198 __ movl(c_rarg1, j_rarg0); 3199 3200 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3201 3202 // Call C code. Need thread but NOT official VM entry 3203 // crud. We cannot block on this call, no GC can happen. Call should 3204 // capture callee-saved registers as well as return values. 3205 // Thread is in rdi already. 3206 // 3207 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 3208 3209 __ mov(c_rarg0, r15_thread); 3210 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); 3211 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 3212 3213 // Set an oopmap for the call site 3214 OopMapSet* oop_maps = new OopMapSet(); 3215 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 3216 3217 // location of rbp is known implicitly by the frame sender code 3218 3219 oop_maps->add_gc_map(__ pc() - start, map); 3220 3221 __ reset_last_Java_frame(false); 3222 3223 // Load UnrollBlock* into rdi 3224 __ mov(rdi, rax); 3225 3226 #ifdef ASSERT 3227 { Label L; 3228 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()), 3229 Deoptimization::Unpack_uncommon_trap); 3230 __ jcc(Assembler::equal, L); 3231 __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); 3232 __ bind(L); 3233 } 3234 #endif 3235 3236 // Pop all the frames we must move/replace. 3237 // 3238 // Frame picture (youngest to oldest) 3239 // 1: self-frame (no frame link) 3240 // 2: deopting frame (no frame link) 3241 // 3: caller of deopting frame (could be compiled/interpreted). 3242 3243 // Pop self-frame. We have no frame, and must rely only on rax and rsp. 3244 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 3245 3246 // Pop deoptimized frame (int) 3247 __ movl(rcx, Address(rdi, 3248 Deoptimization::UnrollBlock:: 3249 size_of_deoptimized_frame_offset())); 3250 __ addptr(rsp, rcx); 3251 3252 // rsp should be pointing at the return address to the caller (3) 3253 3254 // Pick up the initial fp we should save 3255 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3256 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3257 3258 #ifdef ASSERT 3259 // Compilers generate code that bang the stack by as much as the 3260 // interpreter would need. So this stack banging should never 3261 // trigger a fault. Verify that it does not on non product builds. 3262 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3263 __ bang_stack_size(rbx, rcx); 3264 #endif 3265 3266 // Load address of array of frame pcs into rcx (address*) 3267 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3268 3269 // Trash the return pc 3270 __ addptr(rsp, wordSize); 3271 3272 // Load address of array of frame sizes into rsi (intptr_t*) 3273 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset())); 3274 3275 // Counter 3276 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset())); // (int) 3277 3278 // Now adjust the caller's stack to make up for the extra locals but 3279 // record the original sp so that we can save it in the skeletal 3280 // interpreter frame and the stack walking of interpreter_sender 3281 // will get the unextended sp value and not the "real" sp value. 3282 3283 const Register sender_sp = r8; 3284 3285 __ mov(sender_sp, rsp); 3286 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset())); // (int) 3287 __ subptr(rsp, rbx); 3288 3289 // Push interpreter frames in a loop 3290 Label loop; 3291 __ bind(loop); 3292 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3293 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 3294 __ pushptr(Address(rcx, 0)); // Save return address 3295 __ enter(); // Save old & set new rbp 3296 __ subptr(rsp, rbx); // Prolog 3297 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 3298 sender_sp); // Make it walkable 3299 // This value is corrected by layout_activation_impl 3300 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3301 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3302 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3303 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3304 __ decrementl(rdx); // Decrement counter 3305 __ jcc(Assembler::notZero, loop); 3306 __ pushptr(Address(rcx, 0)); // Save final return address 3307 3308 // Re-push self-frame 3309 __ enter(); // Save old & set new rbp 3310 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 3311 // Prolog 3312 3313 // Use rbp because the frames look interpreted now 3314 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3315 // Don't need the precise return PC here, just precise enough to point into this code blob. 3316 address the_pc = __ pc(); 3317 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3318 3319 // Call C code. Need thread but NOT official VM entry 3320 // crud. We cannot block on this call, no GC can happen. Call should 3321 // restore return values to their stack-slots with the new SP. 3322 // Thread is in rdi already. 3323 // 3324 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 3325 3326 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 3327 __ mov(c_rarg0, r15_thread); 3328 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 3329 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3330 3331 // Set an oopmap for the call site 3332 // Use the same PC we used for the last java frame 3333 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3334 3335 // Clear fp AND pc 3336 __ reset_last_Java_frame(true); 3337 3338 // Pop self-frame. 3339 __ leave(); // Epilog 3340 3341 // Jump to interpreter 3342 __ ret(0); 3343 3344 // Make sure all code is generated 3345 masm->flush(); 3346 3347 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 3348 SimpleRuntimeFrame::framesize >> 1); 3349 } 3350 #endif // COMPILER2 3351 3352 //------------------------------generate_handler_blob------ 3353 // 3354 // Generate a special Compile2Runtime blob that saves all registers, 3355 // and setup oopmap. 3356 // 3357 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 3358 assert(StubRoutines::forward_exception_entry() != nullptr, 3359 "must be generated before"); 3360 3361 ResourceMark rm; 3362 OopMapSet *oop_maps = new OopMapSet(); 3363 OopMap* map; 3364 3365 // Allocate space for the code. Setup code generation tools. 3366 CodeBuffer buffer("handler_blob", 2048, 1024); 3367 MacroAssembler* masm = new MacroAssembler(&buffer); 3368 3369 address start = __ pc(); 3370 address call_pc = nullptr; 3371 int frame_size_in_words; 3372 bool cause_return = (poll_type == POLL_AT_RETURN); 3373 bool save_wide_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 3374 3375 if (UseRTMLocking) { 3376 // Abort RTM transaction before calling runtime 3377 // because critical section will be large and will be 3378 // aborted anyway. Also nmethod could be deoptimized. 3379 __ xabort(0); 3380 } 3381 3382 // Make room for return address (or push it again) 3383 if (!cause_return) { 3384 __ push(rbx); 3385 } 3386 3387 // Save registers, fpu state, and flags 3388 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); 3389 3390 // The following is basically a call_VM. However, we need the precise 3391 // address of the call in order to generate an oopmap. Hence, we do all the 3392 // work ourselves. 3393 3394 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next: 3395 3396 // The return address must always be correct so that frame constructor never 3397 // sees an invalid pc. 3398 3399 if (!cause_return) { 3400 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3401 // Additionally, rbx is a callee saved register and we can look at it later to determine 3402 // if someone changed the return address for us! 3403 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3404 __ movptr(Address(rbp, wordSize), rbx); 3405 } 3406 3407 // Do the call 3408 __ mov(c_rarg0, r15_thread); 3409 __ call(RuntimeAddress(call_ptr)); 3410 3411 // Set an oopmap for the call site. This oopmap will map all 3412 // oop-registers and debug-info registers as callee-saved. This 3413 // will allow deoptimization at this safepoint to find all possible 3414 // debug-info recordings, as well as let GC find all oops. 3415 3416 oop_maps->add_gc_map( __ pc() - start, map); 3417 3418 Label noException; 3419 3420 __ reset_last_Java_frame(false); 3421 3422 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3423 __ jcc(Assembler::equal, noException); 3424 3425 // Exception pending 3426 3427 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3428 3429 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3430 3431 // No exception case 3432 __ bind(noException); 3433 3434 Label no_adjust; 3435 #ifdef ASSERT 3436 Label bail; 3437 #endif 3438 if (!cause_return) { 3439 Label no_prefix, not_special; 3440 3441 // If our stashed return pc was modified by the runtime we avoid touching it 3442 __ cmpptr(rbx, Address(rbp, wordSize)); 3443 __ jccb(Assembler::notEqual, no_adjust); 3444 3445 // Skip over the poll instruction. 3446 // See NativeInstruction::is_safepoint_poll() 3447 // Possible encodings: 3448 // 85 00 test %eax,(%rax) 3449 // 85 01 test %eax,(%rcx) 3450 // 85 02 test %eax,(%rdx) 3451 // 85 03 test %eax,(%rbx) 3452 // 85 06 test %eax,(%rsi) 3453 // 85 07 test %eax,(%rdi) 3454 // 3455 // 41 85 00 test %eax,(%r8) 3456 // 41 85 01 test %eax,(%r9) 3457 // 41 85 02 test %eax,(%r10) 3458 // 41 85 03 test %eax,(%r11) 3459 // 41 85 06 test %eax,(%r14) 3460 // 41 85 07 test %eax,(%r15) 3461 // 3462 // 85 04 24 test %eax,(%rsp) 3463 // 41 85 04 24 test %eax,(%r12) 3464 // 85 45 00 test %eax,0x0(%rbp) 3465 // 41 85 45 00 test %eax,0x0(%r13) 3466 3467 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3468 __ jcc(Assembler::notEqual, no_prefix); 3469 __ addptr(rbx, 1); 3470 __ bind(no_prefix); 3471 #ifdef ASSERT 3472 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3473 #endif 3474 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3475 // r12/rsp 0x04 3476 // r13/rbp 0x05 3477 __ movzbq(rcx, Address(rbx, 1)); 3478 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3479 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3480 __ cmpptr(rcx, 1); 3481 __ jcc(Assembler::above, not_special); 3482 __ addptr(rbx, 1); 3483 __ bind(not_special); 3484 #ifdef ASSERT 3485 // Verify the correct encoding of the poll we're about to skip. 3486 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3487 __ jcc(Assembler::notEqual, bail); 3488 // Mask out the modrm bits 3489 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3490 // rax encodes to 0, so if the bits are nonzero it's incorrect 3491 __ jcc(Assembler::notZero, bail); 3492 #endif 3493 // Adjust return pc forward to step over the safepoint poll instruction 3494 __ addptr(rbx, 2); 3495 __ movptr(Address(rbp, wordSize), rbx); 3496 } 3497 3498 __ bind(no_adjust); 3499 // Normal exit, restore registers and exit. 3500 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3501 __ ret(0); 3502 3503 #ifdef ASSERT 3504 __ bind(bail); 3505 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3506 #endif 3507 3508 // Make sure all code is generated 3509 masm->flush(); 3510 3511 // Fill-out other meta info 3512 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3513 } 3514 3515 // 3516 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3517 // 3518 // Generate a stub that calls into vm to find out the proper destination 3519 // of a java call. All the argument registers are live at this point 3520 // but since this is generic code we don't know what they are and the caller 3521 // must do any gc of the args. 3522 // 3523 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3524 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); 3525 3526 // allocate space for the code 3527 ResourceMark rm; 3528 3529 CodeBuffer buffer(name, 1200, 512); 3530 MacroAssembler* masm = new MacroAssembler(&buffer); 3531 3532 int frame_size_in_words; 3533 3534 OopMapSet *oop_maps = new OopMapSet(); 3535 OopMap* map = nullptr; 3536 3537 int start = __ offset(); 3538 3539 // No need to save vector registers since they are caller-saved anyway. 3540 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); 3541 3542 int frame_complete = __ offset(); 3543 3544 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3545 3546 __ mov(c_rarg0, r15_thread); 3547 3548 __ call(RuntimeAddress(destination)); 3549 3550 3551 // Set an oopmap for the call site. 3552 // We need this not only for callee-saved registers, but also for volatile 3553 // registers that the compiler might be keeping live across a safepoint. 3554 3555 oop_maps->add_gc_map( __ offset() - start, map); 3556 3557 // rax contains the address we are going to jump to assuming no exception got installed 3558 3559 // clear last_Java_sp 3560 __ reset_last_Java_frame(false); 3561 // check for pending exceptions 3562 Label pending; 3563 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3564 __ jcc(Assembler::notEqual, pending); 3565 3566 // get the returned Method* 3567 __ get_vm_result_2(rbx, r15_thread); 3568 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3569 3570 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3571 3572 RegisterSaver::restore_live_registers(masm); 3573 3574 // We are back to the original state on entry and ready to go. 3575 3576 __ jmp(rax); 3577 3578 // Pending exception after the safepoint 3579 3580 __ bind(pending); 3581 3582 RegisterSaver::restore_live_registers(masm); 3583 3584 // exception pending => remove activation and forward to exception handler 3585 3586 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 3587 3588 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3589 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3590 3591 // ------------- 3592 // make sure all code is generated 3593 masm->flush(); 3594 3595 // return the blob 3596 // frame_size_words or bytes?? 3597 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3598 } 3599 3600 //------------------------------Montgomery multiplication------------------------ 3601 // 3602 3603 #ifndef _WINDOWS 3604 3605 // Subtract 0:b from carry:a. Return carry. 3606 static julong 3607 sub(julong a[], julong b[], julong carry, long len) { 3608 long long i = 0, cnt = len; 3609 julong tmp; 3610 asm volatile("clc; " 3611 "0: ; " 3612 "mov (%[b], %[i], 8), %[tmp]; " 3613 "sbb %[tmp], (%[a], %[i], 8); " 3614 "inc %[i]; dec %[cnt]; " 3615 "jne 0b; " 3616 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3617 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3618 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3619 : "memory"); 3620 return tmp; 3621 } 3622 3623 // Multiply (unsigned) Long A by Long B, accumulating the double- 3624 // length result into the accumulator formed of T0, T1, and T2. 3625 #define MACC(A, B, T0, T1, T2) \ 3626 do { \ 3627 unsigned long hi, lo; \ 3628 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3629 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3630 : "r"(A), "a"(B) : "cc"); \ 3631 } while(0) 3632 3633 // As above, but add twice the double-length result into the 3634 // accumulator. 3635 #define MACC2(A, B, T0, T1, T2) \ 3636 do { \ 3637 unsigned long hi, lo; \ 3638 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3639 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3640 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3641 : "r"(A), "a"(B) : "cc"); \ 3642 } while(0) 3643 3644 #else //_WINDOWS 3645 3646 static julong 3647 sub(julong a[], julong b[], julong carry, long len) { 3648 long i; 3649 julong tmp; 3650 unsigned char c = 1; 3651 for (i = 0; i < len; i++) { 3652 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3653 a[i] = tmp; 3654 } 3655 c = _addcarry_u64(c, carry, ~0, &tmp); 3656 return tmp; 3657 } 3658 3659 // Multiply (unsigned) Long A by Long B, accumulating the double- 3660 // length result into the accumulator formed of T0, T1, and T2. 3661 #define MACC(A, B, T0, T1, T2) \ 3662 do { \ 3663 julong hi, lo; \ 3664 lo = _umul128(A, B, &hi); \ 3665 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3666 c = _addcarry_u64(c, hi, T1, &T1); \ 3667 _addcarry_u64(c, T2, 0, &T2); \ 3668 } while(0) 3669 3670 // As above, but add twice the double-length result into the 3671 // accumulator. 3672 #define MACC2(A, B, T0, T1, T2) \ 3673 do { \ 3674 julong hi, lo; \ 3675 lo = _umul128(A, B, &hi); \ 3676 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3677 c = _addcarry_u64(c, hi, T1, &T1); \ 3678 _addcarry_u64(c, T2, 0, &T2); \ 3679 c = _addcarry_u64(0, lo, T0, &T0); \ 3680 c = _addcarry_u64(c, hi, T1, &T1); \ 3681 _addcarry_u64(c, T2, 0, &T2); \ 3682 } while(0) 3683 3684 #endif //_WINDOWS 3685 3686 // Fast Montgomery multiplication. The derivation of the algorithm is 3687 // in A Cryptographic Library for the Motorola DSP56000, 3688 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3689 3690 static void NOINLINE 3691 montgomery_multiply(julong a[], julong b[], julong n[], 3692 julong m[], julong inv, int len) { 3693 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3694 int i; 3695 3696 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3697 3698 for (i = 0; i < len; i++) { 3699 int j; 3700 for (j = 0; j < i; j++) { 3701 MACC(a[j], b[i-j], t0, t1, t2); 3702 MACC(m[j], n[i-j], t0, t1, t2); 3703 } 3704 MACC(a[i], b[0], t0, t1, t2); 3705 m[i] = t0 * inv; 3706 MACC(m[i], n[0], t0, t1, t2); 3707 3708 assert(t0 == 0, "broken Montgomery multiply"); 3709 3710 t0 = t1; t1 = t2; t2 = 0; 3711 } 3712 3713 for (i = len; i < 2*len; i++) { 3714 int j; 3715 for (j = i-len+1; j < len; j++) { 3716 MACC(a[j], b[i-j], t0, t1, t2); 3717 MACC(m[j], n[i-j], t0, t1, t2); 3718 } 3719 m[i-len] = t0; 3720 t0 = t1; t1 = t2; t2 = 0; 3721 } 3722 3723 while (t0) 3724 t0 = sub(m, n, t0, len); 3725 } 3726 3727 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3728 // multiplies so it should be up to 25% faster than Montgomery 3729 // multiplication. However, its loop control is more complex and it 3730 // may actually run slower on some machines. 3731 3732 static void NOINLINE 3733 montgomery_square(julong a[], julong n[], 3734 julong m[], julong inv, int len) { 3735 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3736 int i; 3737 3738 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3739 3740 for (i = 0; i < len; i++) { 3741 int j; 3742 int end = (i+1)/2; 3743 for (j = 0; j < end; j++) { 3744 MACC2(a[j], a[i-j], t0, t1, t2); 3745 MACC(m[j], n[i-j], t0, t1, t2); 3746 } 3747 if ((i & 1) == 0) { 3748 MACC(a[j], a[j], t0, t1, t2); 3749 } 3750 for (; j < i; j++) { 3751 MACC(m[j], n[i-j], t0, t1, t2); 3752 } 3753 m[i] = t0 * inv; 3754 MACC(m[i], n[0], t0, t1, t2); 3755 3756 assert(t0 == 0, "broken Montgomery square"); 3757 3758 t0 = t1; t1 = t2; t2 = 0; 3759 } 3760 3761 for (i = len; i < 2*len; i++) { 3762 int start = i-len+1; 3763 int end = start + (len - start)/2; 3764 int j; 3765 for (j = start; j < end; j++) { 3766 MACC2(a[j], a[i-j], t0, t1, t2); 3767 MACC(m[j], n[i-j], t0, t1, t2); 3768 } 3769 if ((i & 1) == 0) { 3770 MACC(a[j], a[j], t0, t1, t2); 3771 } 3772 for (; j < len; j++) { 3773 MACC(m[j], n[i-j], t0, t1, t2); 3774 } 3775 m[i-len] = t0; 3776 t0 = t1; t1 = t2; t2 = 0; 3777 } 3778 3779 while (t0) 3780 t0 = sub(m, n, t0, len); 3781 } 3782 3783 // Swap words in a longword. 3784 static julong swap(julong x) { 3785 return (x << 32) | (x >> 32); 3786 } 3787 3788 // Copy len longwords from s to d, word-swapping as we go. The 3789 // destination array is reversed. 3790 static void reverse_words(julong *s, julong *d, int len) { 3791 d += len; 3792 while(len-- > 0) { 3793 d--; 3794 *d = swap(*s); 3795 s++; 3796 } 3797 } 3798 3799 // The threshold at which squaring is advantageous was determined 3800 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3801 #define MONTGOMERY_SQUARING_THRESHOLD 64 3802 3803 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3804 jint len, jlong inv, 3805 jint *m_ints) { 3806 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3807 int longwords = len/2; 3808 3809 // Make very sure we don't use so much space that the stack might 3810 // overflow. 512 jints corresponds to an 16384-bit integer and 3811 // will use here a total of 8k bytes of stack space. 3812 int divisor = sizeof(julong) * 4; 3813 guarantee(longwords <= 8192 / divisor, "must be"); 3814 int total_allocation = longwords * sizeof (julong) * 4; 3815 julong *scratch = (julong *)alloca(total_allocation); 3816 3817 // Local scratch arrays 3818 julong 3819 *a = scratch + 0 * longwords, 3820 *b = scratch + 1 * longwords, 3821 *n = scratch + 2 * longwords, 3822 *m = scratch + 3 * longwords; 3823 3824 reverse_words((julong *)a_ints, a, longwords); 3825 reverse_words((julong *)b_ints, b, longwords); 3826 reverse_words((julong *)n_ints, n, longwords); 3827 3828 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3829 3830 reverse_words(m, (julong *)m_ints, longwords); 3831 } 3832 3833 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3834 jint len, jlong inv, 3835 jint *m_ints) { 3836 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3837 int longwords = len/2; 3838 3839 // Make very sure we don't use so much space that the stack might 3840 // overflow. 512 jints corresponds to an 16384-bit integer and 3841 // will use here a total of 6k bytes of stack space. 3842 int divisor = sizeof(julong) * 3; 3843 guarantee(longwords <= (8192 / divisor), "must be"); 3844 int total_allocation = longwords * sizeof (julong) * 3; 3845 julong *scratch = (julong *)alloca(total_allocation); 3846 3847 // Local scratch arrays 3848 julong 3849 *a = scratch + 0 * longwords, 3850 *n = scratch + 1 * longwords, 3851 *m = scratch + 2 * longwords; 3852 3853 reverse_words((julong *)a_ints, a, longwords); 3854 reverse_words((julong *)n_ints, n, longwords); 3855 3856 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3857 ::montgomery_square(a, n, m, (julong)inv, longwords); 3858 } else { 3859 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3860 } 3861 3862 reverse_words(m, (julong *)m_ints, longwords); 3863 } 3864 3865 #ifdef COMPILER2 3866 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3867 // 3868 //------------------------------generate_exception_blob--------------------------- 3869 // creates exception blob at the end 3870 // Using exception blob, this code is jumped from a compiled method. 3871 // (see emit_exception_handler in x86_64.ad file) 3872 // 3873 // Given an exception pc at a call we call into the runtime for the 3874 // handler in this method. This handler might merely restore state 3875 // (i.e. callee save registers) unwind the frame and jump to the 3876 // exception handler for the nmethod if there is no Java level handler 3877 // for the nmethod. 3878 // 3879 // This code is entered with a jmp. 3880 // 3881 // Arguments: 3882 // rax: exception oop 3883 // rdx: exception pc 3884 // 3885 // Results: 3886 // rax: exception oop 3887 // rdx: exception pc in caller or ??? 3888 // destination: exception handler of caller 3889 // 3890 // Note: the exception pc MUST be at a call (precise debug information) 3891 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 3892 // 3893 3894 void OptoRuntime::generate_exception_blob() { 3895 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 3896 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 3897 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 3898 3899 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3900 3901 // Allocate space for the code 3902 ResourceMark rm; 3903 // Setup code generation tools 3904 CodeBuffer buffer("exception_blob", 2048, 1024); 3905 MacroAssembler* masm = new MacroAssembler(&buffer); 3906 3907 3908 address start = __ pc(); 3909 3910 // Exception pc is 'return address' for stack walker 3911 __ push(rdx); 3912 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 3913 3914 // Save callee-saved registers. See x86_64.ad. 3915 3916 // rbp is an implicitly saved callee saved register (i.e., the calling 3917 // convention will save/restore it in the prolog/epilog). Other than that 3918 // there are no callee save registers now that adapter frames are gone. 3919 3920 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3921 3922 // Store exception in Thread object. We cannot pass any arguments to the 3923 // handle_exception call, since we do not want to make any assumption 3924 // about the size of the frame where the exception happened in. 3925 // c_rarg0 is either rdi (Linux) or rcx (Windows). 3926 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 3927 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3928 3929 // This call does all the hard work. It checks if an exception handler 3930 // exists in the method. 3931 // If so, it returns the handler address. 3932 // If not, it prepares for stack-unwinding, restoring the callee-save 3933 // registers of the frame being removed. 3934 // 3935 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3936 3937 // At a method handle call, the stack may not be properly aligned 3938 // when returning with an exception. 3939 address the_pc = __ pc(); 3940 __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1); 3941 __ mov(c_rarg0, r15_thread); 3942 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3943 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3944 3945 // Set an oopmap for the call site. This oopmap will only be used if we 3946 // are unwinding the stack. Hence, all locations will be dead. 3947 // Callee-saved registers will be the same as the frame above (i.e., 3948 // handle_exception_stub), since they were restored when we got the 3949 // exception. 3950 3951 OopMapSet* oop_maps = new OopMapSet(); 3952 3953 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3954 3955 __ reset_last_Java_frame(false); 3956 3957 // Restore callee-saved registers 3958 3959 // rbp is an implicitly saved callee-saved register (i.e., the calling 3960 // convention will save restore it in prolog/epilog) Other than that 3961 // there are no callee save registers now that adapter frames are gone. 3962 3963 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 3964 3965 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 3966 __ pop(rdx); // No need for exception pc anymore 3967 3968 // rax: exception handler 3969 3970 // We have a handler in rax (could be deopt blob). 3971 __ mov(r8, rax); 3972 3973 // Get the exception oop 3974 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3975 // Get the exception pc in case we are deoptimized 3976 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3977 #ifdef ASSERT 3978 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), NULL_WORD); 3979 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3980 #endif 3981 // Clear the exception oop so GC no longer processes it as a root. 3982 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3983 3984 // rax: exception oop 3985 // r8: exception handler 3986 // rdx: exception pc 3987 // Jump to handler 3988 3989 __ jmp(r8); 3990 3991 // Make sure all code is generated 3992 masm->flush(); 3993 3994 // Set exception blob 3995 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 3996 } 3997 #endif // COMPILER2 3998 3999 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 4000 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 4001 CodeBuffer buffer(buf); 4002 short buffer_locs[20]; 4003 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 4004 sizeof(buffer_locs)/sizeof(relocInfo)); 4005 4006 MacroAssembler* masm = new MacroAssembler(&buffer); 4007 4008 const Array<SigEntry>* sig_vk = vk->extended_sig(); 4009 const Array<VMRegPair>* regs = vk->return_regs(); 4010 4011 int pack_fields_jobject_off = __ offset(); 4012 // Resolve pre-allocated buffer from JNI handle. 4013 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 4014 __ movptr(rax, Address(r13, 0)); 4015 __ resolve_jobject(rax /* value */, 4016 r15_thread /* thread */, 4017 r12 /* tmp */); 4018 __ movptr(Address(r13, 0), rax); 4019 4020 int pack_fields_off = __ offset(); 4021 4022 int j = 1; 4023 for (int i = 0; i < sig_vk->length(); i++) { 4024 BasicType bt = sig_vk->at(i)._bt; 4025 if (bt == T_METADATA) { 4026 continue; 4027 } 4028 if (bt == T_VOID) { 4029 if (sig_vk->at(i-1)._bt == T_LONG || 4030 sig_vk->at(i-1)._bt == T_DOUBLE) { 4031 j++; 4032 } 4033 continue; 4034 } 4035 int off = sig_vk->at(i)._offset; 4036 assert(off > 0, "offset in object should be positive"); 4037 VMRegPair pair = regs->at(j); 4038 VMReg r_1 = pair.first(); 4039 VMReg r_2 = pair.second(); 4040 Address to(rax, off); 4041 if (bt == T_FLOAT) { 4042 __ movflt(to, r_1->as_XMMRegister()); 4043 } else if (bt == T_DOUBLE) { 4044 __ movdbl(to, r_1->as_XMMRegister()); 4045 } else { 4046 Register val = r_1->as_Register(); 4047 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 4048 if (is_reference_type(bt)) { 4049 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 4050 } else { 4051 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 4052 } 4053 } 4054 j++; 4055 } 4056 assert(j == regs->length(), "missed a field?"); 4057 4058 __ ret(0); 4059 4060 int unpack_fields_off = __ offset(); 4061 4062 Label skip; 4063 __ testptr(rax, rax); 4064 __ jcc(Assembler::zero, skip); 4065 4066 j = 1; 4067 for (int i = 0; i < sig_vk->length(); i++) { 4068 BasicType bt = sig_vk->at(i)._bt; 4069 if (bt == T_METADATA) { 4070 continue; 4071 } 4072 if (bt == T_VOID) { 4073 if (sig_vk->at(i-1)._bt == T_LONG || 4074 sig_vk->at(i-1)._bt == T_DOUBLE) { 4075 j++; 4076 } 4077 continue; 4078 } 4079 int off = sig_vk->at(i)._offset; 4080 assert(off > 0, "offset in object should be positive"); 4081 VMRegPair pair = regs->at(j); 4082 VMReg r_1 = pair.first(); 4083 VMReg r_2 = pair.second(); 4084 Address from(rax, off); 4085 if (bt == T_FLOAT) { 4086 __ movflt(r_1->as_XMMRegister(), from); 4087 } else if (bt == T_DOUBLE) { 4088 __ movdbl(r_1->as_XMMRegister(), from); 4089 } else if (bt == T_OBJECT || bt == T_ARRAY) { 4090 assert_different_registers(rax, r_1->as_Register()); 4091 __ load_heap_oop(r_1->as_Register(), from); 4092 } else { 4093 assert(is_java_primitive(bt), "unexpected basic type"); 4094 assert_different_registers(rax, r_1->as_Register()); 4095 size_t size_in_bytes = type2aelembytes(bt); 4096 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 4097 } 4098 j++; 4099 } 4100 assert(j == regs->length(), "missed a field?"); 4101 4102 __ bind(skip); 4103 __ ret(0); 4104 4105 __ flush(); 4106 4107 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 4108 }