1 /* 2 * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "classfile/symbolTable.hpp" 32 #include "code/compiledIC.hpp" 33 #include "code/debugInfoRec.hpp" 34 #include "code/nativeInst.hpp" 35 #include "code/vtableStubs.hpp" 36 #include "compiler/oopMap.hpp" 37 #include "gc/shared/collectedHeap.hpp" 38 #include "gc/shared/gcLocker.hpp" 39 #include "gc/shared/barrierSet.hpp" 40 #include "gc/shared/barrierSetAssembler.hpp" 41 #include "interpreter/interpreter.hpp" 42 #include "logging/log.hpp" 43 #include "memory/resourceArea.hpp" 44 #include "memory/universe.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/method.inline.hpp" 47 #include "prims/methodHandles.hpp" 48 #include "runtime/continuation.hpp" 49 #include "runtime/continuationEntry.inline.hpp" 50 #include "runtime/globals.hpp" 51 #include "runtime/jniHandles.hpp" 52 #include "runtime/safepointMechanism.hpp" 53 #include "runtime/sharedRuntime.hpp" 54 #include "runtime/signature.hpp" 55 #include "runtime/stubRoutines.hpp" 56 #include "runtime/vframeArray.hpp" 57 #include "runtime/vm_version.hpp" 58 #include "utilities/align.hpp" 59 #include "utilities/checkedCast.hpp" 60 #include "utilities/formatBuffer.hpp" 61 #include "vmreg_x86.inline.hpp" 62 #ifdef COMPILER1 63 #include "c1/c1_Runtime1.hpp" 64 #endif 65 #ifdef COMPILER2 66 #include "opto/runtime.hpp" 67 #endif 68 #if INCLUDE_JVMCI 69 #include "jvmci/jvmciJavaClasses.hpp" 70 #endif 71 72 #define __ masm-> 73 74 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 75 76 class SimpleRuntimeFrame { 77 78 public: 79 80 // Most of the runtime stubs have this simple frame layout. 81 // This class exists to make the layout shared in one place. 82 // Offsets are for compiler stack slots, which are jints. 83 enum layout { 84 // The frame sender code expects that rbp will be in the "natural" place and 85 // will override any oopMap setting for it. We must therefore force the layout 86 // so that it agrees with the frame sender code. 87 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 88 rbp_off2, 89 return_off, return_off2, 90 framesize 91 }; 92 }; 93 94 class RegisterSaver { 95 // Capture info about frame layout. Layout offsets are in jint 96 // units because compiler frame slots are jints. 97 #define XSAVE_AREA_BEGIN 160 98 #define XSAVE_AREA_YMM_BEGIN 576 99 #define XSAVE_AREA_OPMASK_BEGIN 1088 100 #define XSAVE_AREA_ZMM_BEGIN 1152 101 #define XSAVE_AREA_UPPERBANK 1664 102 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 103 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 104 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 105 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 106 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 107 enum layout { 108 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 109 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 110 DEF_XMM_OFFS(0), 111 DEF_XMM_OFFS(1), 112 // 2..15 are implied in range usage 113 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 114 DEF_YMM_OFFS(0), 115 DEF_YMM_OFFS(1), 116 // 2..15 are implied in range usage 117 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 118 DEF_OPMASK_OFFS(0), 119 DEF_OPMASK_OFFS(1), 120 // 2..7 are implied in range usage 121 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 122 DEF_ZMM_OFFS(0), 123 DEF_ZMM_OFFS(1), 124 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 125 DEF_ZMM_UPPER_OFFS(16), 126 DEF_ZMM_UPPER_OFFS(17), 127 // 18..31 are implied in range usage 128 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 129 fpu_stateH_end, 130 r15_off, r15H_off, 131 r14_off, r14H_off, 132 r13_off, r13H_off, 133 r12_off, r12H_off, 134 r11_off, r11H_off, 135 r10_off, r10H_off, 136 r9_off, r9H_off, 137 r8_off, r8H_off, 138 rdi_off, rdiH_off, 139 rsi_off, rsiH_off, 140 ignore_off, ignoreH_off, // extra copy of rbp 141 rsp_off, rspH_off, 142 rbx_off, rbxH_off, 143 rdx_off, rdxH_off, 144 rcx_off, rcxH_off, 145 rax_off, raxH_off, 146 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 147 align_off, alignH_off, 148 flags_off, flagsH_off, 149 // The frame sender code expects that rbp will be in the "natural" place and 150 // will override any oopMap setting for it. We must therefore force the layout 151 // so that it agrees with the frame sender code. 152 rbp_off, rbpH_off, // copy of rbp we will restore 153 return_off, returnH_off, // slot for return address 154 reg_save_size // size in compiler stack slots 155 }; 156 157 public: 158 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); 159 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); 160 161 // Offsets into the register save area 162 // Used by deoptimization when it is managing result register 163 // values on its own 164 165 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 166 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 167 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 168 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 169 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 170 171 // During deoptimization only the result registers need to be restored, 172 // all the other values have already been extracted. 173 static void restore_result_registers(MacroAssembler* masm); 174 }; 175 176 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { 177 int off = 0; 178 int num_xmm_regs = XMMRegister::available_xmm_registers(); 179 #if COMPILER2_OR_JVMCI 180 if (save_wide_vectors && UseAVX == 0) { 181 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX 182 } 183 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 184 #else 185 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI 186 #endif 187 188 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 189 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 190 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 191 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 192 // CodeBlob frame size is in words. 193 int frame_size_in_words = frame_size_in_bytes / wordSize; 194 *total_frame_words = frame_size_in_words; 195 196 // Save registers, fpu state, and flags. 197 // We assume caller has already pushed the return address onto the 198 // stack, so rsp is 8-byte aligned here. 199 // We push rpb twice in this sequence because we want the real rbp 200 // to be under the return like a normal enter. 201 202 __ enter(); // rsp becomes 16-byte aligned here 203 __ push_CPU_state(); // Push a multiple of 16 bytes 204 205 // push cpu state handles this on EVEX enabled targets 206 if (save_wide_vectors) { 207 // Save upper half of YMM registers(0..15) 208 int base_addr = XSAVE_AREA_YMM_BEGIN; 209 for (int n = 0; n < 16; n++) { 210 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 211 } 212 if (VM_Version::supports_evex()) { 213 // Save upper half of ZMM registers(0..15) 214 base_addr = XSAVE_AREA_ZMM_BEGIN; 215 for (int n = 0; n < 16; n++) { 216 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 217 } 218 // Save full ZMM registers(16..num_xmm_regs) 219 base_addr = XSAVE_AREA_UPPERBANK; 220 off = 0; 221 int vector_len = Assembler::AVX_512bit; 222 for (int n = 16; n < num_xmm_regs; n++) { 223 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 224 } 225 #if COMPILER2_OR_JVMCI 226 base_addr = XSAVE_AREA_OPMASK_BEGIN; 227 off = 0; 228 for(int n = 0; n < KRegister::number_of_registers; n++) { 229 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 230 } 231 #endif 232 } 233 } else { 234 if (VM_Version::supports_evex()) { 235 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 236 int base_addr = XSAVE_AREA_UPPERBANK; 237 off = 0; 238 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 239 for (int n = 16; n < num_xmm_regs; n++) { 240 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 241 } 242 #if COMPILER2_OR_JVMCI 243 base_addr = XSAVE_AREA_OPMASK_BEGIN; 244 off = 0; 245 for(int n = 0; n < KRegister::number_of_registers; n++) { 246 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 247 } 248 #endif 249 } 250 } 251 __ vzeroupper(); 252 if (frame::arg_reg_save_area_bytes != 0) { 253 // Allocate argument register save area 254 __ subptr(rsp, frame::arg_reg_save_area_bytes); 255 } 256 257 // Set an oopmap for the call site. This oopmap will map all 258 // oop-registers and debug-info registers as callee-saved. This 259 // will allow deoptimization at this safepoint to find all possible 260 // debug-info recordings, as well as let GC find all oops. 261 262 OopMapSet *oop_maps = new OopMapSet(); 263 OopMap* map = new OopMap(frame_size_in_slots, 0); 264 265 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 266 267 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 268 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 269 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 270 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 271 // rbp location is known implicitly by the frame sender code, needs no oopmap 272 // and the location where rbp was saved by is ignored 273 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 274 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 275 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 276 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 277 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 278 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 279 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 280 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 281 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 282 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 283 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 284 // on EVEX enabled targets, we get it included in the xsave area 285 off = xmm0_off; 286 int delta = xmm1_off - off; 287 for (int n = 0; n < 16; n++) { 288 XMMRegister xmm_name = as_XMMRegister(n); 289 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 290 off += delta; 291 } 292 if (UseAVX > 2) { 293 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 294 off = zmm16_off; 295 delta = zmm17_off - off; 296 for (int n = 16; n < num_xmm_regs; n++) { 297 XMMRegister zmm_name = as_XMMRegister(n); 298 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 299 off += delta; 300 } 301 } 302 303 #if COMPILER2_OR_JVMCI 304 if (save_wide_vectors) { 305 // Save upper half of YMM registers(0..15) 306 off = ymm0_off; 307 delta = ymm1_off - ymm0_off; 308 for (int n = 0; n < 16; n++) { 309 XMMRegister ymm_name = as_XMMRegister(n); 310 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 311 off += delta; 312 } 313 if (VM_Version::supports_evex()) { 314 // Save upper half of ZMM registers(0..15) 315 off = zmm0_off; 316 delta = zmm1_off - zmm0_off; 317 for (int n = 0; n < 16; n++) { 318 XMMRegister zmm_name = as_XMMRegister(n); 319 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 320 off += delta; 321 } 322 } 323 } 324 #endif // COMPILER2_OR_JVMCI 325 326 // %%% These should all be a waste but we'll keep things as they were for now 327 if (true) { 328 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 329 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 330 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 331 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 332 // rbp location is known implicitly by the frame sender code, needs no oopmap 333 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 334 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 335 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 336 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 337 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 338 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 339 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 340 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 341 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 342 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 343 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 344 // on EVEX enabled targets, we get it included in the xsave area 345 off = xmm0H_off; 346 delta = xmm1H_off - off; 347 for (int n = 0; n < 16; n++) { 348 XMMRegister xmm_name = as_XMMRegister(n); 349 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 350 off += delta; 351 } 352 if (UseAVX > 2) { 353 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 354 off = zmm16H_off; 355 delta = zmm17H_off - off; 356 for (int n = 16; n < num_xmm_regs; n++) { 357 XMMRegister zmm_name = as_XMMRegister(n); 358 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 359 off += delta; 360 } 361 } 362 } 363 364 return map; 365 } 366 367 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { 368 int num_xmm_regs = XMMRegister::available_xmm_registers(); 369 if (frame::arg_reg_save_area_bytes != 0) { 370 // Pop arg register save area 371 __ addptr(rsp, frame::arg_reg_save_area_bytes); 372 } 373 374 #if COMPILER2_OR_JVMCI 375 if (restore_wide_vectors) { 376 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 377 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 378 } 379 #else 380 assert(!restore_wide_vectors, "vectors are generated only by C2"); 381 #endif 382 383 __ vzeroupper(); 384 385 // On EVEX enabled targets everything is handled in pop fpu state 386 if (restore_wide_vectors) { 387 // Restore upper half of YMM registers (0..15) 388 int base_addr = XSAVE_AREA_YMM_BEGIN; 389 for (int n = 0; n < 16; n++) { 390 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 391 } 392 if (VM_Version::supports_evex()) { 393 // Restore upper half of ZMM registers (0..15) 394 base_addr = XSAVE_AREA_ZMM_BEGIN; 395 for (int n = 0; n < 16; n++) { 396 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 397 } 398 // Restore full ZMM registers(16..num_xmm_regs) 399 base_addr = XSAVE_AREA_UPPERBANK; 400 int vector_len = Assembler::AVX_512bit; 401 int off = 0; 402 for (int n = 16; n < num_xmm_regs; n++) { 403 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 404 } 405 #if COMPILER2_OR_JVMCI 406 base_addr = XSAVE_AREA_OPMASK_BEGIN; 407 off = 0; 408 for (int n = 0; n < KRegister::number_of_registers; n++) { 409 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 410 } 411 #endif 412 } 413 } else { 414 if (VM_Version::supports_evex()) { 415 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 416 int base_addr = XSAVE_AREA_UPPERBANK; 417 int off = 0; 418 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 419 for (int n = 16; n < num_xmm_regs; n++) { 420 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 421 } 422 #if COMPILER2_OR_JVMCI 423 base_addr = XSAVE_AREA_OPMASK_BEGIN; 424 off = 0; 425 for (int n = 0; n < KRegister::number_of_registers; n++) { 426 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 427 } 428 #endif 429 } 430 } 431 432 // Recover CPU state 433 __ pop_CPU_state(); 434 // Get the rbp described implicitly by the calling convention (no oopMap) 435 __ pop(rbp); 436 } 437 438 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 439 440 // Just restore result register. Only used by deoptimization. By 441 // now any callee save register that needs to be restored to a c2 442 // caller of the deoptee has been extracted into the vframeArray 443 // and will be stuffed into the c2i adapter we create for later 444 // restoration so only result registers need to be restored here. 445 446 // Restore fp result register 447 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 448 // Restore integer result register 449 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 450 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 451 452 // Pop all of the register save are off the stack except the return address 453 __ addptr(rsp, return_offset_in_bytes()); 454 } 455 456 // Is vector's size (in bytes) bigger than a size saved by default? 457 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 458 bool SharedRuntime::is_wide_vector(int size) { 459 return size > 16; 460 } 461 462 // --------------------------------------------------------------------------- 463 // Read the array of BasicTypes from a signature, and compute where the 464 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 465 // quantities. Values less than VMRegImpl::stack0 are registers, those above 466 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 467 // as framesizes are fixed. 468 // VMRegImpl::stack0 refers to the first slot 0(sp). 469 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. 470 // Register up to Register::number_of_registers are the 64-bit 471 // integer registers. 472 473 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 474 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 475 // units regardless of build. Of course for i486 there is no 64 bit build 476 477 // The Java calling convention is a "shifted" version of the C ABI. 478 // By skipping the first C ABI register we can call non-static jni methods 479 // with small numbers of arguments without having to shuffle the arguments 480 // at all. Since we control the java ABI we ought to at least get some 481 // advantage out of it. 482 483 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 484 VMRegPair *regs, 485 int total_args_passed) { 486 487 // Create the mapping between argument positions and 488 // registers. 489 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 490 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 491 }; 492 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 493 j_farg0, j_farg1, j_farg2, j_farg3, 494 j_farg4, j_farg5, j_farg6, j_farg7 495 }; 496 497 498 uint int_args = 0; 499 uint fp_args = 0; 500 uint stk_args = 0; 501 502 for (int i = 0; i < total_args_passed; i++) { 503 switch (sig_bt[i]) { 504 case T_BOOLEAN: 505 case T_CHAR: 506 case T_BYTE: 507 case T_SHORT: 508 case T_INT: 509 if (int_args < Argument::n_int_register_parameters_j) { 510 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 511 } else { 512 stk_args = align_up(stk_args, 2); 513 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 514 stk_args += 1; 515 } 516 break; 517 case T_VOID: 518 // halves of T_LONG or T_DOUBLE 519 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 520 regs[i].set_bad(); 521 break; 522 case T_LONG: 523 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 524 // fall through 525 case T_OBJECT: 526 case T_ARRAY: 527 case T_ADDRESS: 528 if (int_args < Argument::n_int_register_parameters_j) { 529 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 530 } else { 531 stk_args = align_up(stk_args, 2); 532 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 533 stk_args += 2; 534 } 535 break; 536 case T_FLOAT: 537 if (fp_args < Argument::n_float_register_parameters_j) { 538 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 539 } else { 540 stk_args = align_up(stk_args, 2); 541 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 542 stk_args += 1; 543 } 544 break; 545 case T_DOUBLE: 546 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 547 if (fp_args < Argument::n_float_register_parameters_j) { 548 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 549 } else { 550 stk_args = align_up(stk_args, 2); 551 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 552 stk_args += 2; 553 } 554 break; 555 default: 556 ShouldNotReachHere(); 557 break; 558 } 559 } 560 561 return stk_args; 562 } 563 564 // Same as java_calling_convention() but for multiple return 565 // values. There's no way to store them on the stack so if we don't 566 // have enough registers, multiple values can't be returned. 567 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 568 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 569 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 570 VMRegPair *regs, 571 int total_args_passed) { 572 // Create the mapping between argument positions and 573 // registers. 574 static const Register INT_ArgReg[java_return_convention_max_int] = { 575 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 576 }; 577 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 578 j_farg0, j_farg1, j_farg2, j_farg3, 579 j_farg4, j_farg5, j_farg6, j_farg7 580 }; 581 582 583 uint int_args = 0; 584 uint fp_args = 0; 585 586 for (int i = 0; i < total_args_passed; i++) { 587 switch (sig_bt[i]) { 588 case T_BOOLEAN: 589 case T_CHAR: 590 case T_BYTE: 591 case T_SHORT: 592 case T_INT: 593 if (int_args < Argument::n_int_register_parameters_j+1) { 594 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 595 int_args++; 596 } else { 597 return -1; 598 } 599 break; 600 case T_VOID: 601 // halves of T_LONG or T_DOUBLE 602 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 603 regs[i].set_bad(); 604 break; 605 case T_LONG: 606 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 607 // fall through 608 case T_OBJECT: 609 case T_ARRAY: 610 case T_ADDRESS: 611 case T_METADATA: 612 if (int_args < Argument::n_int_register_parameters_j+1) { 613 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 614 int_args++; 615 } else { 616 return -1; 617 } 618 break; 619 case T_FLOAT: 620 if (fp_args < Argument::n_float_register_parameters_j) { 621 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 622 fp_args++; 623 } else { 624 return -1; 625 } 626 break; 627 case T_DOUBLE: 628 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 629 if (fp_args < Argument::n_float_register_parameters_j) { 630 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 631 fp_args++; 632 } else { 633 return -1; 634 } 635 break; 636 default: 637 ShouldNotReachHere(); 638 break; 639 } 640 } 641 642 return int_args + fp_args; 643 } 644 645 // Patch the callers callsite with entry to compiled code if it exists. 646 static void patch_callers_callsite(MacroAssembler *masm) { 647 Label L; 648 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 649 __ jcc(Assembler::equal, L); 650 651 // Save the current stack pointer 652 __ mov(r13, rsp); 653 // Schedule the branch target address early. 654 // Call into the VM to patch the caller, then jump to compiled callee 655 // rax isn't live so capture return address while we easily can 656 __ movptr(rax, Address(rsp, 0)); 657 658 // align stack so push_CPU_state doesn't fault 659 __ andptr(rsp, -(StackAlignmentInBytes)); 660 __ push_CPU_state(); 661 __ vzeroupper(); 662 // VM needs caller's callsite 663 // VM needs target method 664 // This needs to be a long call since we will relocate this adapter to 665 // the codeBuffer and it may not reach 666 667 // Allocate argument register save area 668 if (frame::arg_reg_save_area_bytes != 0) { 669 __ subptr(rsp, frame::arg_reg_save_area_bytes); 670 } 671 __ mov(c_rarg0, rbx); 672 __ mov(c_rarg1, rax); 673 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 674 675 // De-allocate argument register save area 676 if (frame::arg_reg_save_area_bytes != 0) { 677 __ addptr(rsp, frame::arg_reg_save_area_bytes); 678 } 679 680 __ vzeroupper(); 681 __ pop_CPU_state(); 682 // restore sp 683 __ mov(rsp, r13); 684 __ bind(L); 685 } 686 687 // For each inline type argument, sig includes the list of fields of 688 // the inline type. This utility function computes the number of 689 // arguments for the call if inline types are passed by reference (the 690 // calling convention the interpreter expects). 691 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 692 int total_args_passed = 0; 693 if (InlineTypePassFieldsAsArgs) { 694 for (int i = 0; i < sig_extended->length(); i++) { 695 BasicType bt = sig_extended->at(i)._bt; 696 if (bt == T_METADATA) { 697 // In sig_extended, an inline type argument starts with: 698 // T_METADATA, followed by the types of the fields of the 699 // inline type and T_VOID to mark the end of the value 700 // type. Inline types are flattened so, for instance, in the 701 // case of an inline type with an int field and an inline type 702 // field that itself has 2 fields, an int and a long: 703 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second 704 // slot for the T_LONG) T_VOID (inner inline type) T_VOID 705 // (outer inline type) 706 total_args_passed++; 707 int vt = 1; 708 do { 709 i++; 710 BasicType bt = sig_extended->at(i)._bt; 711 BasicType prev_bt = sig_extended->at(i-1)._bt; 712 if (bt == T_METADATA) { 713 vt++; 714 } else if (bt == T_VOID && 715 prev_bt != T_LONG && 716 prev_bt != T_DOUBLE) { 717 vt--; 718 } 719 } while (vt != 0); 720 } else { 721 total_args_passed++; 722 } 723 } 724 } else { 725 total_args_passed = sig_extended->length(); 726 } 727 return total_args_passed; 728 } 729 730 731 static void gen_c2i_adapter_helper(MacroAssembler* masm, 732 BasicType bt, 733 BasicType prev_bt, 734 size_t size_in_bytes, 735 const VMRegPair& reg_pair, 736 const Address& to, 737 int extraspace, 738 bool is_oop) { 739 if (bt == T_VOID) { 740 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 741 return; 742 } 743 744 // Say 4 args: 745 // i st_off 746 // 0 32 T_LONG 747 // 1 24 T_VOID 748 // 2 16 T_OBJECT 749 // 3 8 T_BOOL 750 // - 0 return address 751 // 752 // However to make thing extra confusing. Because we can fit a long/double in 753 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 754 // leaves one slot empty and only stores to a single slot. In this case the 755 // slot that is occupied is the T_VOID slot. See I said it was confusing. 756 757 bool wide = (size_in_bytes == wordSize); 758 VMReg r_1 = reg_pair.first(); 759 VMReg r_2 = reg_pair.second(); 760 assert(r_2->is_valid() == wide, "invalid size"); 761 if (!r_1->is_valid()) { 762 assert(!r_2->is_valid(), "must be invalid"); 763 return; 764 } 765 766 if (!r_1->is_XMMRegister()) { 767 Register val = rax; 768 if (r_1->is_stack()) { 769 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 770 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 771 } else { 772 val = r_1->as_Register(); 773 } 774 assert_different_registers(to.base(), val, rscratch1); 775 if (is_oop) { 776 __ push(r13); 777 __ push(rbx); 778 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 779 __ pop(rbx); 780 __ pop(r13); 781 } else { 782 __ store_sized_value(to, val, size_in_bytes); 783 } 784 } else { 785 if (wide) { 786 __ movdbl(to, r_1->as_XMMRegister()); 787 } else { 788 __ movflt(to, r_1->as_XMMRegister()); 789 } 790 } 791 } 792 793 static void gen_c2i_adapter(MacroAssembler *masm, 794 const GrowableArray<SigEntry>* sig_extended, 795 const VMRegPair *regs, 796 bool requires_clinit_barrier, 797 address& c2i_no_clinit_check_entry, 798 Label& skip_fixup, 799 address start, 800 OopMapSet* oop_maps, 801 int& frame_complete, 802 int& frame_size_in_words, 803 bool alloc_inline_receiver) { 804 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) { 805 Label L_skip_barrier; 806 Register method = rbx; 807 808 { // Bypass the barrier for non-static methods 809 Register flags = rscratch1; 810 __ movl(flags, Address(method, Method::access_flags_offset())); 811 __ testl(flags, JVM_ACC_STATIC); 812 __ jcc(Assembler::zero, L_skip_barrier); // non-static 813 } 814 815 Register klass = rscratch1; 816 __ load_method_holder(klass, method); 817 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 818 819 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 820 821 __ bind(L_skip_barrier); 822 c2i_no_clinit_check_entry = __ pc(); 823 } 824 825 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 826 bs->c2i_entry_barrier(masm); 827 828 // Before we get into the guts of the C2I adapter, see if we should be here 829 // at all. We've come from compiled code and are attempting to jump to the 830 // interpreter, which means the caller made a static call to get here 831 // (vcalls always get a compiled target if there is one). Check for a 832 // compiled target. If there is one, we need to patch the caller's call. 833 patch_callers_callsite(masm); 834 835 __ bind(skip_fixup); 836 837 if (InlineTypePassFieldsAsArgs) { 838 // Is there an inline type argument? 839 bool has_inline_argument = false; 840 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 841 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); 842 } 843 if (has_inline_argument) { 844 // There is at least an inline type argument: we're coming from 845 // compiled code so we have no buffers to back the inline types. 846 // Allocate the buffers here with a runtime call. 847 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 848 849 frame_complete = __ offset(); 850 851 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 852 853 __ mov(c_rarg0, r15_thread); 854 __ mov(c_rarg1, rbx); 855 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 856 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 857 858 oop_maps->add_gc_map((int)(__ pc() - start), map); 859 __ reset_last_Java_frame(false); 860 861 RegisterSaver::restore_live_registers(masm); 862 863 Label no_exception; 864 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 865 __ jcc(Assembler::equal, no_exception); 866 867 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 868 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 869 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 870 871 __ bind(no_exception); 872 873 // We get an array of objects from the runtime call 874 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 875 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? 876 } 877 } 878 879 // Since all args are passed on the stack, total_args_passed * 880 // Interpreter::stackElementSize is the space we need. 881 int total_args_passed = compute_total_args_passed_int(sig_extended); 882 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); 883 884 int extraspace = (total_args_passed * Interpreter::stackElementSize); 885 886 // stack is aligned, keep it that way 887 // This is not currently needed or enforced by the interpreter, but 888 // we might as well conform to the ABI. 889 extraspace = align_up(extraspace, 2*wordSize); 890 891 // set senderSP value 892 __ lea(r13, Address(rsp, wordSize)); 893 894 #ifdef ASSERT 895 __ check_stack_alignment(r13, "sender stack not aligned"); 896 #endif 897 if (extraspace > 0) { 898 // Pop the return address 899 __ pop(rax); 900 901 __ subptr(rsp, extraspace); 902 903 // Push the return address 904 __ push(rax); 905 906 // Account for the return address location since we store it first rather 907 // than hold it in a register across all the shuffling 908 extraspace += wordSize; 909 } 910 911 #ifdef ASSERT 912 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax); 913 #endif 914 915 // Now write the args into the outgoing interpreter space 916 917 // next_arg_comp is the next argument from the compiler point of 918 // view (inline type fields are passed in registers/on the stack). In 919 // sig_extended, an inline type argument starts with: T_METADATA, 920 // followed by the types of the fields of the inline type and T_VOID 921 // to mark the end of the inline type. ignored counts the number of 922 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: 923 // used to get the buffer for that argument from the pool of buffers 924 // we allocated above and want to pass to the 925 // interpreter. next_arg_int is the next argument from the 926 // interpreter point of view (inline types are passed by reference). 927 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 928 next_arg_comp < sig_extended->length(); next_arg_comp++) { 929 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 930 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 931 BasicType bt = sig_extended->at(next_arg_comp)._bt; 932 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 933 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { 934 int next_off = st_off - Interpreter::stackElementSize; 935 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 936 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 937 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 938 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 939 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 940 next_arg_int++; 941 #ifdef ASSERT 942 if (bt == T_LONG || bt == T_DOUBLE) { 943 // Overwrite the unused slot with known junk 944 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 945 __ movptr(Address(rsp, st_off), rax); 946 } 947 #endif /* ASSERT */ 948 } else { 949 ignored++; 950 // get the buffer from the just allocated pool of buffers 951 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); 952 __ load_heap_oop(r14, Address(rscratch2, index)); 953 next_vt_arg++; next_arg_int++; 954 int vt = 1; 955 // write fields we get from compiled code in registers/stack 956 // slots to the buffer: we know we are done with that inline type 957 // argument when we hit the T_VOID that acts as an end of inline 958 // type delimiter for this inline type. Inline types are flattened 959 // so we might encounter embedded inline types. Each entry in 960 // sig_extended contains a field offset in the buffer. 961 Label L_null; 962 do { 963 next_arg_comp++; 964 BasicType bt = sig_extended->at(next_arg_comp)._bt; 965 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 966 if (bt == T_METADATA) { 967 vt++; 968 ignored++; 969 } else if (bt == T_VOID && 970 prev_bt != T_LONG && 971 prev_bt != T_DOUBLE) { 972 vt--; 973 ignored++; 974 } else { 975 int off = sig_extended->at(next_arg_comp)._offset; 976 if (off == -1) { 977 // Nullable inline type argument, emit null check 978 VMReg reg = regs[next_arg_comp-ignored].first(); 979 Label L_notNull; 980 if (reg->is_stack()) { 981 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 982 __ testb(Address(rsp, ld_off), 1); 983 } else { 984 __ testb(reg->as_Register(), 1); 985 } 986 __ jcc(Assembler::notZero, L_notNull); 987 __ movptr(Address(rsp, st_off), 0); 988 __ jmp(L_null); 989 __ bind(L_notNull); 990 continue; 991 } 992 assert(off > 0, "offset in object should be positive"); 993 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 994 bool is_oop = is_reference_type(bt); 995 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 996 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 997 } 998 } while (vt != 0); 999 // pass the buffer to the interpreter 1000 __ movptr(Address(rsp, st_off), r14); 1001 __ bind(L_null); 1002 } 1003 } 1004 1005 // Schedule the branch target address early. 1006 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1007 __ jmp(rcx); 1008 } 1009 1010 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 1011 address code_start, address code_end, 1012 Label& L_ok) { 1013 Label L_fail; 1014 __ lea(temp_reg, ExternalAddress(code_start)); 1015 __ cmpptr(pc_reg, temp_reg); 1016 __ jcc(Assembler::belowEqual, L_fail); 1017 __ lea(temp_reg, ExternalAddress(code_end)); 1018 __ cmpptr(pc_reg, temp_reg); 1019 __ jcc(Assembler::below, L_ok); 1020 __ bind(L_fail); 1021 } 1022 1023 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1024 int comp_args_on_stack, 1025 const GrowableArray<SigEntry>* sig, 1026 const VMRegPair *regs) { 1027 1028 // Note: r13 contains the senderSP on entry. We must preserve it since 1029 // we may do a i2c -> c2i transition if we lose a race where compiled 1030 // code goes non-entrant while we get args ready. 1031 // In addition we use r13 to locate all the interpreter args as 1032 // we must align the stack to 16 bytes on an i2c entry else we 1033 // lose alignment we expect in all compiled code and register 1034 // save code can segv when fxsave instructions find improperly 1035 // aligned stack pointer. 1036 1037 // Adapters can be frameless because they do not require the caller 1038 // to perform additional cleanup work, such as correcting the stack pointer. 1039 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1040 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1041 // even if a callee has modified the stack pointer. 1042 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1043 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1044 // up via the senderSP register). 1045 // In other words, if *either* the caller or callee is interpreted, we can 1046 // get the stack pointer repaired after a call. 1047 // This is why c2i and i2c adapters cannot be indefinitely composed. 1048 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1049 // both caller and callee would be compiled methods, and neither would 1050 // clean up the stack pointer changes performed by the two adapters. 1051 // If this happens, control eventually transfers back to the compiled 1052 // caller, but with an uncorrected stack, causing delayed havoc. 1053 1054 if (VerifyAdapterCalls && 1055 (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { 1056 // So, let's test for cascading c2i/i2c adapters right now. 1057 // assert(Interpreter::contains($return_addr) || 1058 // StubRoutines::contains($return_addr), 1059 // "i2c adapter must return to an interpreter frame"); 1060 __ block_comment("verify_i2c { "); 1061 // Pick up the return address 1062 __ movptr(rax, Address(rsp, 0)); 1063 Label L_ok; 1064 if (Interpreter::code() != nullptr) { 1065 range_check(masm, rax, r11, 1066 Interpreter::code()->code_start(), 1067 Interpreter::code()->code_end(), 1068 L_ok); 1069 } 1070 if (StubRoutines::initial_stubs_code() != nullptr) { 1071 range_check(masm, rax, r11, 1072 StubRoutines::initial_stubs_code()->code_begin(), 1073 StubRoutines::initial_stubs_code()->code_end(), 1074 L_ok); 1075 } 1076 if (StubRoutines::final_stubs_code() != nullptr) { 1077 range_check(masm, rax, r11, 1078 StubRoutines::final_stubs_code()->code_begin(), 1079 StubRoutines::final_stubs_code()->code_end(), 1080 L_ok); 1081 } 1082 const char* msg = "i2c adapter must return to an interpreter frame"; 1083 __ block_comment(msg); 1084 __ stop(msg); 1085 __ bind(L_ok); 1086 __ block_comment("} verify_i2ce "); 1087 } 1088 1089 // Must preserve original SP for loading incoming arguments because 1090 // we need to align the outgoing SP for compiled code. 1091 __ movptr(r11, rsp); 1092 1093 // Pick up the return address 1094 __ pop(rax); 1095 1096 // Convert 4-byte c2 stack slots to words. 1097 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1098 1099 if (comp_args_on_stack) { 1100 __ subptr(rsp, comp_words_on_stack * wordSize); 1101 } 1102 1103 // Ensure compiled code always sees stack at proper alignment 1104 __ andptr(rsp, -16); 1105 1106 // push the return address and misalign the stack that youngest frame always sees 1107 // as far as the placement of the call instruction 1108 __ push(rax); 1109 1110 // Put saved SP in another register 1111 const Register saved_sp = rax; 1112 __ movptr(saved_sp, r11); 1113 1114 // Will jump to the compiled code just as if compiled code was doing it. 1115 // Pre-load the register-jump target early, to schedule it better. 1116 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1117 1118 #if INCLUDE_JVMCI 1119 if (EnableJVMCI) { 1120 // check if this call should be routed towards a specific entry point 1121 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1122 Label no_alternative_target; 1123 __ jcc(Assembler::equal, no_alternative_target); 1124 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1125 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1126 __ bind(no_alternative_target); 1127 } 1128 #endif // INCLUDE_JVMCI 1129 1130 int total_args_passed = sig->length(); 1131 1132 // Now generate the shuffle code. Pick up all register args and move the 1133 // rest through the floating point stack top. 1134 for (int i = 0; i < total_args_passed; i++) { 1135 BasicType bt = sig->at(i)._bt; 1136 if (bt == T_VOID) { 1137 // Longs and doubles are passed in native word order, but misaligned 1138 // in the 32-bit build. 1139 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1140 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1141 continue; 1142 } 1143 1144 // Pick up 0, 1 or 2 words from SP+offset. 1145 1146 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1147 "scrambled load targets?"); 1148 // Load in argument order going down. 1149 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1150 // Point to interpreter value (vs. tag) 1151 int next_off = ld_off - Interpreter::stackElementSize; 1152 // 1153 // 1154 // 1155 VMReg r_1 = regs[i].first(); 1156 VMReg r_2 = regs[i].second(); 1157 if (!r_1->is_valid()) { 1158 assert(!r_2->is_valid(), ""); 1159 continue; 1160 } 1161 if (r_1->is_stack()) { 1162 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1163 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1164 1165 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1166 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1167 // will be generated. 1168 if (!r_2->is_valid()) { 1169 // sign extend??? 1170 __ movl(r13, Address(saved_sp, ld_off)); 1171 __ movptr(Address(rsp, st_off), r13); 1172 } else { 1173 // 1174 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1175 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1176 // So we must adjust where to pick up the data to match the interpreter. 1177 // 1178 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1179 // are accessed as negative so LSW is at LOW address 1180 1181 // ld_off is MSW so get LSW 1182 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1183 next_off : ld_off; 1184 __ movq(r13, Address(saved_sp, offset)); 1185 // st_off is LSW (i.e. reg.first()) 1186 __ movq(Address(rsp, st_off), r13); 1187 } 1188 } else if (r_1->is_Register()) { // Register argument 1189 Register r = r_1->as_Register(); 1190 assert(r != rax, "must be different"); 1191 if (r_2->is_valid()) { 1192 // 1193 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1194 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1195 // So we must adjust where to pick up the data to match the interpreter. 1196 1197 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1198 next_off : ld_off; 1199 1200 // this can be a misaligned move 1201 __ movq(r, Address(saved_sp, offset)); 1202 } else { 1203 // sign extend and use a full word? 1204 __ movl(r, Address(saved_sp, ld_off)); 1205 } 1206 } else { 1207 if (!r_2->is_valid()) { 1208 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1209 } else { 1210 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1211 } 1212 } 1213 } 1214 1215 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1216 1217 // 6243940 We might end up in handle_wrong_method if 1218 // the callee is deoptimized as we race thru here. If that 1219 // happens we don't want to take a safepoint because the 1220 // caller frame will look interpreted and arguments are now 1221 // "compiled" so it is much better to make this transition 1222 // invisible to the stack walking code. Unfortunately if 1223 // we try and find the callee by normal means a safepoint 1224 // is possible. So we stash the desired callee in the thread 1225 // and the vm will find there should this case occur. 1226 1227 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1228 1229 // put Method* where a c2i would expect should we end up there 1230 // only needed because of c2 resolve stubs return Method* as a result in 1231 // rax 1232 __ mov(rax, rbx); 1233 __ jmp(r11); 1234 } 1235 1236 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1237 Register data = rax; 1238 __ ic_check(1 /* end_alignment */); 1239 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); 1240 1241 // Method might have been compiled since the call site was patched to 1242 // interpreted if that is the case treat it as a miss so we can get 1243 // the call site corrected. 1244 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 1245 __ jcc(Assembler::equal, skip_fixup); 1246 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1247 } 1248 1249 // --------------------------------------------------------------- 1250 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1251 int comp_args_on_stack, 1252 const GrowableArray<SigEntry>* sig, 1253 const VMRegPair* regs, 1254 const GrowableArray<SigEntry>* sig_cc, 1255 const VMRegPair* regs_cc, 1256 const GrowableArray<SigEntry>* sig_cc_ro, 1257 const VMRegPair* regs_cc_ro, 1258 AdapterFingerPrint* fingerprint, 1259 AdapterBlob*& new_adapter, 1260 bool allocate_code_blob) { 1261 address i2c_entry = __ pc(); 1262 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1263 1264 // ------------------------------------------------------------------------- 1265 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1266 // to the interpreter. The args start out packed in the compiled layout. They 1267 // need to be unpacked into the interpreter layout. This will almost always 1268 // require some stack space. We grow the current (compiled) stack, then repack 1269 // the args. We finally end in a jump to the generic interpreter entry point. 1270 // On exit from the interpreter, the interpreter will restore our SP (lest the 1271 // compiled code, which relies solely on SP and not RBP, get sick). 1272 1273 address c2i_unverified_entry = __ pc(); 1274 address c2i_unverified_inline_entry = __ pc(); 1275 Label skip_fixup; 1276 1277 gen_inline_cache_check(masm, skip_fixup); 1278 1279 OopMapSet* oop_maps = new OopMapSet(); 1280 int frame_complete = CodeOffsets::frame_never_safe; 1281 int frame_size_in_words = 0; 1282 1283 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1284 address c2i_no_clinit_check_entry = nullptr; 1285 address c2i_inline_ro_entry = __ pc(); 1286 if (regs_cc != regs_cc_ro) { 1287 // No class init barrier needed because method is guaranteed to be non-static 1288 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry, 1289 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1290 skip_fixup.reset(); 1291 } 1292 1293 // Scalarized c2i adapter 1294 address c2i_entry = __ pc(); 1295 address c2i_inline_entry = __ pc(); 1296 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1297 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); 1298 1299 // Non-scalarized c2i adapter 1300 if (regs != regs_cc) { 1301 c2i_unverified_inline_entry = __ pc(); 1302 Label inline_entry_skip_fixup; 1303 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1304 1305 c2i_inline_entry = __ pc(); 1306 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1307 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1308 } 1309 1310 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1311 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1312 if (allocate_code_blob) { 1313 bool caller_must_gc_arguments = (regs != regs_cc); 1314 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1315 } 1316 1317 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1318 } 1319 1320 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1321 VMRegPair *regs, 1322 int total_args_passed) { 1323 1324 // We return the amount of VMRegImpl stack slots we need to reserve for all 1325 // the arguments NOT counting out_preserve_stack_slots. 1326 1327 // NOTE: These arrays will have to change when c1 is ported 1328 #ifdef _WIN64 1329 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1330 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1331 }; 1332 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1333 c_farg0, c_farg1, c_farg2, c_farg3 1334 }; 1335 #else 1336 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1337 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1338 }; 1339 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1340 c_farg0, c_farg1, c_farg2, c_farg3, 1341 c_farg4, c_farg5, c_farg6, c_farg7 1342 }; 1343 #endif // _WIN64 1344 1345 1346 uint int_args = 0; 1347 uint fp_args = 0; 1348 uint stk_args = 0; // inc by 2 each time 1349 1350 for (int i = 0; i < total_args_passed; i++) { 1351 switch (sig_bt[i]) { 1352 case T_BOOLEAN: 1353 case T_CHAR: 1354 case T_BYTE: 1355 case T_SHORT: 1356 case T_INT: 1357 if (int_args < Argument::n_int_register_parameters_c) { 1358 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1359 #ifdef _WIN64 1360 fp_args++; 1361 // Allocate slots for callee to stuff register args the stack. 1362 stk_args += 2; 1363 #endif 1364 } else { 1365 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1366 stk_args += 2; 1367 } 1368 break; 1369 case T_LONG: 1370 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1371 // fall through 1372 case T_OBJECT: 1373 case T_ARRAY: 1374 case T_ADDRESS: 1375 case T_METADATA: 1376 if (int_args < Argument::n_int_register_parameters_c) { 1377 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1378 #ifdef _WIN64 1379 fp_args++; 1380 stk_args += 2; 1381 #endif 1382 } else { 1383 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1384 stk_args += 2; 1385 } 1386 break; 1387 case T_FLOAT: 1388 if (fp_args < Argument::n_float_register_parameters_c) { 1389 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1390 #ifdef _WIN64 1391 int_args++; 1392 // Allocate slots for callee to stuff register args the stack. 1393 stk_args += 2; 1394 #endif 1395 } else { 1396 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1397 stk_args += 2; 1398 } 1399 break; 1400 case T_DOUBLE: 1401 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1402 if (fp_args < Argument::n_float_register_parameters_c) { 1403 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1404 #ifdef _WIN64 1405 int_args++; 1406 // Allocate slots for callee to stuff register args the stack. 1407 stk_args += 2; 1408 #endif 1409 } else { 1410 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1411 stk_args += 2; 1412 } 1413 break; 1414 case T_VOID: // Halves of longs and doubles 1415 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1416 regs[i].set_bad(); 1417 break; 1418 default: 1419 ShouldNotReachHere(); 1420 break; 1421 } 1422 } 1423 #ifdef _WIN64 1424 // windows abi requires that we always allocate enough stack space 1425 // for 4 64bit registers to be stored down. 1426 if (stk_args < 8) { 1427 stk_args = 8; 1428 } 1429 #endif // _WIN64 1430 1431 return stk_args; 1432 } 1433 1434 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1435 uint num_bits, 1436 uint total_args_passed) { 1437 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1438 "only certain vector sizes are supported for now"); 1439 1440 static const XMMRegister VEC_ArgReg[32] = { 1441 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1442 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1443 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1444 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1445 }; 1446 1447 uint stk_args = 0; 1448 uint fp_args = 0; 1449 1450 for (uint i = 0; i < total_args_passed; i++) { 1451 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1452 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1453 regs[i].set_pair(vmreg->next(next_val), vmreg); 1454 } 1455 1456 return stk_args; 1457 } 1458 1459 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1460 // We always ignore the frame_slots arg and just use the space just below frame pointer 1461 // which by this time is free to use 1462 switch (ret_type) { 1463 case T_FLOAT: 1464 __ movflt(Address(rbp, -wordSize), xmm0); 1465 break; 1466 case T_DOUBLE: 1467 __ movdbl(Address(rbp, -wordSize), xmm0); 1468 break; 1469 case T_VOID: break; 1470 default: { 1471 __ movptr(Address(rbp, -wordSize), rax); 1472 } 1473 } 1474 } 1475 1476 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1477 // We always ignore the frame_slots arg and just use the space just below frame pointer 1478 // which by this time is free to use 1479 switch (ret_type) { 1480 case T_FLOAT: 1481 __ movflt(xmm0, Address(rbp, -wordSize)); 1482 break; 1483 case T_DOUBLE: 1484 __ movdbl(xmm0, Address(rbp, -wordSize)); 1485 break; 1486 case T_VOID: break; 1487 default: { 1488 __ movptr(rax, Address(rbp, -wordSize)); 1489 } 1490 } 1491 } 1492 1493 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1494 for ( int i = first_arg ; i < arg_count ; i++ ) { 1495 if (args[i].first()->is_Register()) { 1496 __ push(args[i].first()->as_Register()); 1497 } else if (args[i].first()->is_XMMRegister()) { 1498 __ subptr(rsp, 2*wordSize); 1499 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1500 } 1501 } 1502 } 1503 1504 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1505 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1506 if (args[i].first()->is_Register()) { 1507 __ pop(args[i].first()->as_Register()); 1508 } else if (args[i].first()->is_XMMRegister()) { 1509 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1510 __ addptr(rsp, 2*wordSize); 1511 } 1512 } 1513 } 1514 1515 static void verify_oop_args(MacroAssembler* masm, 1516 const methodHandle& method, 1517 const BasicType* sig_bt, 1518 const VMRegPair* regs) { 1519 Register temp_reg = rbx; // not part of any compiled calling seq 1520 if (VerifyOops) { 1521 for (int i = 0; i < method->size_of_parameters(); i++) { 1522 if (is_reference_type(sig_bt[i])) { 1523 VMReg r = regs[i].first(); 1524 assert(r->is_valid(), "bad oop arg"); 1525 if (r->is_stack()) { 1526 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1527 __ verify_oop(temp_reg); 1528 } else { 1529 __ verify_oop(r->as_Register()); 1530 } 1531 } 1532 } 1533 } 1534 } 1535 1536 static void check_continuation_enter_argument(VMReg actual_vmreg, 1537 Register expected_reg, 1538 const char* name) { 1539 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1540 assert(actual_vmreg->as_Register() == expected_reg, 1541 "%s is in unexpected register: %s instead of %s", 1542 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1543 } 1544 1545 1546 //---------------------------- continuation_enter_setup --------------------------- 1547 // 1548 // Arguments: 1549 // None. 1550 // 1551 // Results: 1552 // rsp: pointer to blank ContinuationEntry 1553 // 1554 // Kills: 1555 // rax 1556 // 1557 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) { 1558 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1559 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1560 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1561 1562 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize; 1563 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1564 1565 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size; 1566 OopMap* map = new OopMap(frame_size, 0); 1567 1568 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset())); 1569 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax); 1570 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp); 1571 1572 return map; 1573 } 1574 1575 //---------------------------- fill_continuation_entry --------------------------- 1576 // 1577 // Arguments: 1578 // rsp: pointer to blank Continuation entry 1579 // reg_cont_obj: pointer to the continuation 1580 // reg_flags: flags 1581 // 1582 // Results: 1583 // rsp: pointer to filled out ContinuationEntry 1584 // 1585 // Kills: 1586 // rax 1587 // 1588 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1589 assert_different_registers(rax, reg_cont_obj, reg_flags); 1590 #ifdef ASSERT 1591 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value()); 1592 #endif 1593 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj); 1594 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags); 1595 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0); 1596 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0); 1597 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0); 1598 1599 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset())); 1600 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax); 1601 __ movq(rax, Address(r15_thread, JavaThread::held_monitor_count_offset())); 1602 __ movq(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); 1603 1604 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); 1605 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), 0); 1606 } 1607 1608 //---------------------------- continuation_enter_cleanup --------------------------- 1609 // 1610 // Arguments: 1611 // rsp: pointer to the ContinuationEntry 1612 // 1613 // Results: 1614 // rsp: pointer to the spilled rbp in the entry frame 1615 // 1616 // Kills: 1617 // rbx 1618 // 1619 void static continuation_enter_cleanup(MacroAssembler* masm) { 1620 #ifdef ASSERT 1621 Label L_good_sp; 1622 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1623 __ jcc(Assembler::equal, L_good_sp); 1624 __ stop("Incorrect rsp at continuation_enter_cleanup"); 1625 __ bind(L_good_sp); 1626 #endif 1627 1628 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset())); 1629 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx); 1630 __ movq(rbx, Address(rsp, ContinuationEntry::parent_held_monitor_count_offset())); 1631 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), rbx); 1632 1633 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset())); 1634 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx); 1635 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1636 } 1637 1638 static void gen_continuation_enter(MacroAssembler* masm, 1639 const VMRegPair* regs, 1640 int& exception_offset, 1641 OopMapSet* oop_maps, 1642 int& frame_complete, 1643 int& stack_slots, 1644 int& interpreted_entry_offset, 1645 int& compiled_entry_offset) { 1646 1647 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1648 int pos_cont_obj = 0; 1649 int pos_is_cont = 1; 1650 int pos_is_virtual = 2; 1651 1652 // The platform-specific calling convention may present the arguments in various registers. 1653 // To simplify the rest of the code, we expect the arguments to reside at these known 1654 // registers, and we additionally check the placement here in case calling convention ever 1655 // changes. 1656 Register reg_cont_obj = c_rarg1; 1657 Register reg_is_cont = c_rarg2; 1658 Register reg_is_virtual = c_rarg3; 1659 1660 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1661 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1662 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1663 1664 // Utility methods kill rax, make sure there are no collisions 1665 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual); 1666 1667 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), 1668 relocInfo::static_call_type); 1669 1670 address start = __ pc(); 1671 1672 Label L_thaw, L_exit; 1673 1674 // i2i entry used at interp_only_mode only 1675 interpreted_entry_offset = __ pc() - start; 1676 { 1677 #ifdef ASSERT 1678 Label is_interp_only; 1679 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0); 1680 __ jcc(Assembler::notEqual, is_interp_only); 1681 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1682 __ bind(is_interp_only); 1683 #endif 1684 1685 __ pop(rax); // return address 1686 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1687 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2)); 1688 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1)); 1689 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0)); 1690 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment 1691 __ push(rax); // return address 1692 __ push_cont_fastpath(); 1693 1694 __ enter(); 1695 1696 stack_slots = 2; // will be adjusted in setup 1697 OopMap* map = continuation_enter_setup(masm, stack_slots); 1698 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1699 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1700 1701 __ verify_oop(reg_cont_obj); 1702 1703 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1704 1705 // If continuation, call to thaw. Otherwise, resolve the call and exit. 1706 __ testptr(reg_is_cont, reg_is_cont); 1707 __ jcc(Assembler::notZero, L_thaw); 1708 1709 // --- Resolve path 1710 1711 // Make sure the call is patchable 1712 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1713 // Emit stub for static call 1714 CodeBuffer* cbuf = masm->code_section()->outer(); 1715 address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, __ pc()); 1716 if (stub == nullptr) { 1717 fatal("CodeCache is full at gen_continuation_enter"); 1718 } 1719 __ call(resolve); 1720 oop_maps->add_gc_map(__ pc() - start, map); 1721 __ post_call_nop(); 1722 1723 __ jmp(L_exit); 1724 } 1725 1726 // compiled entry 1727 __ align(CodeEntryAlignment); 1728 compiled_entry_offset = __ pc() - start; 1729 __ enter(); 1730 1731 stack_slots = 2; // will be adjusted in setup 1732 OopMap* map = continuation_enter_setup(masm, stack_slots); 1733 1734 // Frame is now completed as far as size and linkage. 1735 frame_complete = __ pc() - start; 1736 1737 __ verify_oop(reg_cont_obj); 1738 1739 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1740 1741 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1742 __ testptr(reg_is_cont, reg_is_cont); 1743 __ jccb(Assembler::notZero, L_thaw); 1744 1745 // --- call Continuation.enter(Continuation c, boolean isContinue) 1746 1747 // Make sure the call is patchable 1748 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1749 1750 // Emit stub for static call 1751 CodeBuffer* cbuf = masm->code_section()->outer(); 1752 address stub = CompiledDirectCall::emit_to_interp_stub(*cbuf, __ pc()); 1753 if (stub == nullptr) { 1754 fatal("CodeCache is full at gen_continuation_enter"); 1755 } 1756 1757 // The call needs to be resolved. There's a special case for this in 1758 // SharedRuntime::find_callee_info_helper() which calls 1759 // LinkResolver::resolve_continuation_enter() which resolves the call to 1760 // Continuation.enter(Continuation c, boolean isContinue). 1761 __ call(resolve); 1762 1763 oop_maps->add_gc_map(__ pc() - start, map); 1764 __ post_call_nop(); 1765 1766 __ jmpb(L_exit); 1767 1768 // --- Thawing path 1769 1770 __ bind(L_thaw); 1771 1772 __ call(RuntimeAddress(StubRoutines::cont_thaw())); 1773 1774 ContinuationEntry::_return_pc_offset = __ pc() - start; 1775 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1776 __ post_call_nop(); 1777 1778 // --- Normal exit (resolve/thawing) 1779 1780 __ bind(L_exit); 1781 1782 continuation_enter_cleanup(masm); 1783 __ pop(rbp); 1784 __ ret(0); 1785 1786 // --- Exception handling path 1787 1788 exception_offset = __ pc() - start; 1789 1790 continuation_enter_cleanup(masm); 1791 __ pop(rbp); 1792 1793 __ movptr(c_rarg0, r15_thread); 1794 __ movptr(c_rarg1, Address(rsp, 0)); // return address 1795 1796 // rax still holds the original exception oop, save it before the call 1797 __ push(rax); 1798 1799 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2); 1800 __ movptr(rbx, rax); 1801 1802 // Continue at exception handler: 1803 // rax: exception oop 1804 // rbx: exception handler 1805 // rdx: exception pc 1806 __ pop(rax); 1807 __ verify_oop(rax); 1808 __ pop(rdx); 1809 __ jmp(rbx); 1810 } 1811 1812 static void gen_continuation_yield(MacroAssembler* masm, 1813 const VMRegPair* regs, 1814 OopMapSet* oop_maps, 1815 int& frame_complete, 1816 int& stack_slots, 1817 int& compiled_entry_offset) { 1818 enum layout { 1819 rbp_off, 1820 rbpH_off, 1821 return_off, 1822 return_off2, 1823 framesize // inclusive of return address 1824 }; 1825 stack_slots = framesize / VMRegImpl::slots_per_word; 1826 assert(stack_slots == 2, "recheck layout"); 1827 1828 address start = __ pc(); 1829 compiled_entry_offset = __ pc() - start; 1830 __ enter(); 1831 address the_pc = __ pc(); 1832 1833 frame_complete = the_pc - start; 1834 1835 // This nop must be exactly at the PC we push into the frame info. 1836 // We use this nop for fast CodeBlob lookup, associate the OopMap 1837 // with it right away. 1838 __ post_call_nop(); 1839 OopMap* map = new OopMap(framesize, 1); 1840 oop_maps->add_gc_map(frame_complete, map); 1841 1842 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 1843 __ movptr(c_rarg0, r15_thread); 1844 __ movptr(c_rarg1, rsp); 1845 __ call_VM_leaf(Continuation::freeze_entry(), 2); 1846 __ reset_last_Java_frame(true); 1847 1848 Label L_pinned; 1849 1850 __ testptr(rax, rax); 1851 __ jcc(Assembler::notZero, L_pinned); 1852 1853 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1854 continuation_enter_cleanup(masm); 1855 __ pop(rbp); 1856 __ ret(0); 1857 1858 __ bind(L_pinned); 1859 1860 // Pinned, return to caller 1861 1862 // handle pending exception thrown by freeze 1863 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 1864 Label ok; 1865 __ jcc(Assembler::equal, ok); 1866 __ leave(); 1867 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1868 __ bind(ok); 1869 1870 __ leave(); 1871 __ ret(0); 1872 } 1873 1874 static void gen_special_dispatch(MacroAssembler* masm, 1875 const methodHandle& method, 1876 const BasicType* sig_bt, 1877 const VMRegPair* regs) { 1878 verify_oop_args(masm, method, sig_bt, regs); 1879 vmIntrinsics::ID iid = method->intrinsic_id(); 1880 1881 // Now write the args into the outgoing interpreter space 1882 bool has_receiver = false; 1883 Register receiver_reg = noreg; 1884 int member_arg_pos = -1; 1885 Register member_reg = noreg; 1886 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1887 if (ref_kind != 0) { 1888 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1889 member_reg = rbx; // known to be free at this point 1890 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1891 } else if (iid == vmIntrinsics::_invokeBasic) { 1892 has_receiver = true; 1893 } else if (iid == vmIntrinsics::_linkToNative) { 1894 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1895 member_reg = rbx; // known to be free at this point 1896 } else { 1897 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1898 } 1899 1900 if (member_reg != noreg) { 1901 // Load the member_arg into register, if necessary. 1902 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1903 VMReg r = regs[member_arg_pos].first(); 1904 if (r->is_stack()) { 1905 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1906 } else { 1907 // no data motion is needed 1908 member_reg = r->as_Register(); 1909 } 1910 } 1911 1912 if (has_receiver) { 1913 // Make sure the receiver is loaded into a register. 1914 assert(method->size_of_parameters() > 0, "oob"); 1915 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1916 VMReg r = regs[0].first(); 1917 assert(r->is_valid(), "bad receiver arg"); 1918 if (r->is_stack()) { 1919 // Porting note: This assumes that compiled calling conventions always 1920 // pass the receiver oop in a register. If this is not true on some 1921 // platform, pick a temp and load the receiver from stack. 1922 fatal("receiver always in a register"); 1923 receiver_reg = j_rarg0; // known to be free at this point 1924 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1925 } else { 1926 // no data motion is needed 1927 receiver_reg = r->as_Register(); 1928 } 1929 } 1930 1931 // Figure out which address we are really jumping to: 1932 MethodHandles::generate_method_handle_dispatch(masm, iid, 1933 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1934 } 1935 1936 // --------------------------------------------------------------------------- 1937 // Generate a native wrapper for a given method. The method takes arguments 1938 // in the Java compiled code convention, marshals them to the native 1939 // convention (handlizes oops, etc), transitions to native, makes the call, 1940 // returns to java state (possibly blocking), unhandlizes any result and 1941 // returns. 1942 // 1943 // Critical native functions are a shorthand for the use of 1944 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1945 // functions. The wrapper is expected to unpack the arguments before 1946 // passing them to the callee. Critical native functions leave the state _in_Java, 1947 // since they cannot stop for GC. 1948 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1949 // block and the check for pending exceptions it's impossible for them 1950 // to be thrown. 1951 // 1952 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1953 const methodHandle& method, 1954 int compile_id, 1955 BasicType* in_sig_bt, 1956 VMRegPair* in_regs, 1957 BasicType ret_type) { 1958 if (method->is_continuation_native_intrinsic()) { 1959 int exception_offset = -1; 1960 OopMapSet* oop_maps = new OopMapSet(); 1961 int frame_complete = -1; 1962 int stack_slots = -1; 1963 int interpreted_entry_offset = -1; 1964 int vep_offset = -1; 1965 if (method->is_continuation_enter_intrinsic()) { 1966 gen_continuation_enter(masm, 1967 in_regs, 1968 exception_offset, 1969 oop_maps, 1970 frame_complete, 1971 stack_slots, 1972 interpreted_entry_offset, 1973 vep_offset); 1974 } else if (method->is_continuation_yield_intrinsic()) { 1975 gen_continuation_yield(masm, 1976 in_regs, 1977 oop_maps, 1978 frame_complete, 1979 stack_slots, 1980 vep_offset); 1981 } else { 1982 guarantee(false, "Unknown Continuation native intrinsic"); 1983 } 1984 1985 #ifdef ASSERT 1986 if (method->is_continuation_enter_intrinsic()) { 1987 assert(interpreted_entry_offset != -1, "Must be set"); 1988 assert(exception_offset != -1, "Must be set"); 1989 } else { 1990 assert(interpreted_entry_offset == -1, "Must be unset"); 1991 assert(exception_offset == -1, "Must be unset"); 1992 } 1993 assert(frame_complete != -1, "Must be set"); 1994 assert(stack_slots != -1, "Must be set"); 1995 assert(vep_offset != -1, "Must be set"); 1996 #endif 1997 1998 __ flush(); 1999 nmethod* nm = nmethod::new_native_nmethod(method, 2000 compile_id, 2001 masm->code(), 2002 vep_offset, 2003 frame_complete, 2004 stack_slots, 2005 in_ByteSize(-1), 2006 in_ByteSize(-1), 2007 oop_maps, 2008 exception_offset); 2009 if (nm == nullptr) return nm; 2010 if (method->is_continuation_enter_intrinsic()) { 2011 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2012 } else if (method->is_continuation_yield_intrinsic()) { 2013 _cont_doYield_stub = nm; 2014 } 2015 return nm; 2016 } 2017 2018 if (method->is_method_handle_intrinsic()) { 2019 vmIntrinsics::ID iid = method->intrinsic_id(); 2020 intptr_t start = (intptr_t)__ pc(); 2021 int vep_offset = ((intptr_t)__ pc()) - start; 2022 gen_special_dispatch(masm, 2023 method, 2024 in_sig_bt, 2025 in_regs); 2026 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2027 __ flush(); 2028 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2029 return nmethod::new_native_nmethod(method, 2030 compile_id, 2031 masm->code(), 2032 vep_offset, 2033 frame_complete, 2034 stack_slots / VMRegImpl::slots_per_word, 2035 in_ByteSize(-1), 2036 in_ByteSize(-1), 2037 nullptr); 2038 } 2039 address native_func = method->native_function(); 2040 assert(native_func != nullptr, "must have function"); 2041 2042 // An OopMap for lock (and class if static) 2043 OopMapSet *oop_maps = new OopMapSet(); 2044 intptr_t start = (intptr_t)__ pc(); 2045 2046 // We have received a description of where all the java arg are located 2047 // on entry to the wrapper. We need to convert these args to where 2048 // the jni function will expect them. To figure out where they go 2049 // we convert the java signature to a C signature by inserting 2050 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2051 2052 const int total_in_args = method->size_of_parameters(); 2053 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 2054 2055 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2056 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2057 BasicType* in_elem_bt = nullptr; 2058 2059 int argc = 0; 2060 out_sig_bt[argc++] = T_ADDRESS; 2061 if (method->is_static()) { 2062 out_sig_bt[argc++] = T_OBJECT; 2063 } 2064 2065 for (int i = 0; i < total_in_args ; i++ ) { 2066 out_sig_bt[argc++] = in_sig_bt[i]; 2067 } 2068 2069 // Now figure out where the args must be stored and how much stack space 2070 // they require. 2071 int out_arg_slots; 2072 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 2073 2074 // Compute framesize for the wrapper. We need to handlize all oops in 2075 // incoming registers 2076 2077 // Calculate the total number of stack slots we will need. 2078 2079 // First count the abi requirement plus all of the outgoing args 2080 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2081 2082 // Now the space for the inbound oop handle area 2083 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 2084 2085 int oop_handle_offset = stack_slots; 2086 stack_slots += total_save_slots; 2087 2088 // Now any space we need for handlizing a klass if static method 2089 2090 int klass_slot_offset = 0; 2091 int klass_offset = -1; 2092 int lock_slot_offset = 0; 2093 bool is_static = false; 2094 2095 if (method->is_static()) { 2096 klass_slot_offset = stack_slots; 2097 stack_slots += VMRegImpl::slots_per_word; 2098 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2099 is_static = true; 2100 } 2101 2102 // Plus a lock if needed 2103 2104 if (method->is_synchronized()) { 2105 lock_slot_offset = stack_slots; 2106 stack_slots += VMRegImpl::slots_per_word; 2107 } 2108 2109 // Now a place (+2) to save return values or temp during shuffling 2110 // + 4 for return address (which we own) and saved rbp 2111 stack_slots += 6; 2112 2113 // Ok The space we have allocated will look like: 2114 // 2115 // 2116 // FP-> | | 2117 // |---------------------| 2118 // | 2 slots for moves | 2119 // |---------------------| 2120 // | lock box (if sync) | 2121 // |---------------------| <- lock_slot_offset 2122 // | klass (if static) | 2123 // |---------------------| <- klass_slot_offset 2124 // | oopHandle area | 2125 // |---------------------| <- oop_handle_offset (6 java arg registers) 2126 // | outbound memory | 2127 // | based arguments | 2128 // | | 2129 // |---------------------| 2130 // | | 2131 // SP-> | out_preserved_slots | 2132 // 2133 // 2134 2135 2136 // Now compute actual number of stack words we need rounding to make 2137 // stack properly aligned. 2138 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 2139 2140 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2141 2142 // First thing make an ic check to see if we should even be here 2143 2144 // We are free to use all registers as temps without saving them and 2145 // restoring them except rbp. rbp is the only callee save register 2146 // as far as the interpreter and the compiler(s) are concerned. 2147 2148 const Register receiver = j_rarg0; 2149 2150 Label exception_pending; 2151 2152 assert_different_registers(receiver, rscratch1, rscratch2); 2153 __ verify_oop(receiver); 2154 __ ic_check(8 /* end_alignment */); 2155 2156 int vep_offset = ((intptr_t)__ pc()) - start; 2157 2158 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2159 Label L_skip_barrier; 2160 Register klass = r10; 2161 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 2162 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 2163 2164 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 2165 2166 __ bind(L_skip_barrier); 2167 } 2168 2169 #ifdef COMPILER1 2170 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 2171 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 2172 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 2173 } 2174 #endif // COMPILER1 2175 2176 // The instruction at the verified entry point must be 5 bytes or longer 2177 // because it can be patched on the fly by make_non_entrant. The stack bang 2178 // instruction fits that requirement. 2179 2180 // Generate stack overflow check 2181 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 2182 2183 // Generate a new frame for the wrapper. 2184 __ enter(); 2185 // -2 because return address is already present and so is saved rbp 2186 __ subptr(rsp, stack_size - 2*wordSize); 2187 2188 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2189 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub 2190 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); 2191 2192 // Frame is now completed as far as size and linkage. 2193 int frame_complete = ((intptr_t)__ pc()) - start; 2194 2195 if (UseRTMLocking) { 2196 // Abort RTM transaction before calling JNI 2197 // because critical section will be large and will be 2198 // aborted anyway. Also nmethod could be deoptimized. 2199 __ xabort(0); 2200 } 2201 2202 #ifdef ASSERT 2203 __ check_stack_alignment(rsp, "improperly aligned stack"); 2204 #endif /* ASSERT */ 2205 2206 2207 // We use r14 as the oop handle for the receiver/klass 2208 // It is callee save so it survives the call to native 2209 2210 const Register oop_handle_reg = r14; 2211 2212 // 2213 // We immediately shuffle the arguments so that any vm call we have to 2214 // make from here on out (sync slow path, jvmti, etc.) we will have 2215 // captured the oops from our caller and have a valid oopMap for 2216 // them. 2217 2218 // ----------------- 2219 // The Grand Shuffle 2220 2221 // The Java calling convention is either equal (linux) or denser (win64) than the 2222 // c calling convention. However the because of the jni_env argument the c calling 2223 // convention always has at least one more (and two for static) arguments than Java. 2224 // Therefore if we move the args from java -> c backwards then we will never have 2225 // a register->register conflict and we don't have to build a dependency graph 2226 // and figure out how to break any cycles. 2227 // 2228 2229 // Record esp-based slot for receiver on stack for non-static methods 2230 int receiver_offset = -1; 2231 2232 // This is a trick. We double the stack slots so we can claim 2233 // the oops in the caller's frame. Since we are sure to have 2234 // more args than the caller doubling is enough to make 2235 // sure we can capture all the incoming oop args from the 2236 // caller. 2237 // 2238 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2239 2240 // Mark location of rbp (someday) 2241 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2242 2243 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2244 // All inbound args are referenced based on rbp and all outbound args via rsp. 2245 2246 2247 #ifdef ASSERT 2248 bool reg_destroyed[Register::number_of_registers]; 2249 bool freg_destroyed[XMMRegister::number_of_registers]; 2250 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) { 2251 reg_destroyed[r] = false; 2252 } 2253 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) { 2254 freg_destroyed[f] = false; 2255 } 2256 2257 #endif /* ASSERT */ 2258 2259 // For JNI natives the incoming and outgoing registers are offset upwards. 2260 GrowableArray<int> arg_order(2 * total_in_args); 2261 2262 VMRegPair tmp_vmreg; 2263 tmp_vmreg.set2(rbx->as_VMReg()); 2264 2265 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2266 arg_order.push(i); 2267 arg_order.push(c_arg); 2268 } 2269 2270 int temploc = -1; 2271 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2272 int i = arg_order.at(ai); 2273 int c_arg = arg_order.at(ai + 1); 2274 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2275 #ifdef ASSERT 2276 if (in_regs[i].first()->is_Register()) { 2277 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2278 } else if (in_regs[i].first()->is_XMMRegister()) { 2279 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2280 } 2281 if (out_regs[c_arg].first()->is_Register()) { 2282 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2283 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2284 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2285 } 2286 #endif /* ASSERT */ 2287 switch (in_sig_bt[i]) { 2288 case T_ARRAY: 2289 case T_OBJECT: 2290 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2291 ((i == 0) && (!is_static)), 2292 &receiver_offset); 2293 break; 2294 case T_VOID: 2295 break; 2296 2297 case T_FLOAT: 2298 __ float_move(in_regs[i], out_regs[c_arg]); 2299 break; 2300 2301 case T_DOUBLE: 2302 assert( i + 1 < total_in_args && 2303 in_sig_bt[i + 1] == T_VOID && 2304 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2305 __ double_move(in_regs[i], out_regs[c_arg]); 2306 break; 2307 2308 case T_LONG : 2309 __ long_move(in_regs[i], out_regs[c_arg]); 2310 break; 2311 2312 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2313 2314 default: 2315 __ move32_64(in_regs[i], out_regs[c_arg]); 2316 } 2317 } 2318 2319 int c_arg; 2320 2321 // Pre-load a static method's oop into r14. Used both by locking code and 2322 // the normal JNI call code. 2323 // point c_arg at the first arg that is already loaded in case we 2324 // need to spill before we call out 2325 c_arg = total_c_args - total_in_args; 2326 2327 if (method->is_static()) { 2328 2329 // load oop into a register 2330 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2331 2332 // Now handlize the static class mirror it's known not-null. 2333 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2334 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2335 2336 // Now get the handle 2337 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2338 // store the klass handle as second argument 2339 __ movptr(c_rarg1, oop_handle_reg); 2340 // and protect the arg if we must spill 2341 c_arg--; 2342 } 2343 2344 // Change state to native (we save the return address in the thread, since it might not 2345 // be pushed on the stack when we do a stack traversal). It is enough that the pc() 2346 // points into the right code segment. It does not have to be the correct return pc. 2347 // We use the same pc/oopMap repeatedly when we call out 2348 2349 intptr_t the_pc = (intptr_t) __ pc(); 2350 oop_maps->add_gc_map(the_pc - start, map); 2351 2352 __ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch1); 2353 2354 2355 // We have all of the arguments setup at this point. We must not touch any register 2356 // argument registers at this point (what if we save/restore them there are no oop? 2357 2358 { 2359 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2360 // protect the args we've loaded 2361 save_args(masm, total_c_args, c_arg, out_regs); 2362 __ mov_metadata(c_rarg1, method()); 2363 __ call_VM_leaf( 2364 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2365 r15_thread, c_rarg1); 2366 restore_args(masm, total_c_args, c_arg, out_regs); 2367 } 2368 2369 // RedefineClasses() tracing support for obsolete method entry 2370 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2371 // protect the args we've loaded 2372 save_args(masm, total_c_args, c_arg, out_regs); 2373 __ mov_metadata(c_rarg1, method()); 2374 __ call_VM_leaf( 2375 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2376 r15_thread, c_rarg1); 2377 restore_args(masm, total_c_args, c_arg, out_regs); 2378 } 2379 2380 // Lock a synchronized method 2381 2382 // Register definitions used by locking and unlocking 2383 2384 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2385 const Register obj_reg = rbx; // Will contain the oop 2386 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2387 const Register old_hdr = r13; // value of old header at unlock time 2388 2389 Label slow_path_lock; 2390 Label lock_done; 2391 2392 if (method->is_synchronized()) { 2393 Label count_mon; 2394 2395 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 2396 2397 // Get the handle (the 2nd argument) 2398 __ mov(oop_handle_reg, c_rarg1); 2399 2400 // Get address of the box 2401 2402 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2403 2404 // Load the oop from the handle 2405 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2406 2407 if (LockingMode == LM_MONITOR) { 2408 __ jmp(slow_path_lock); 2409 } else if (LockingMode == LM_LEGACY) { 2410 // Load immediate 1 into swap_reg %rax 2411 __ movl(swap_reg, 1); 2412 2413 // Load (object->mark() | 1) into swap_reg %rax 2414 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2415 if (EnableValhalla) { 2416 // Mask inline_type bit such that we go to the slow path if object is an inline type 2417 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place)); 2418 } 2419 2420 // Save (object->mark() | 1) into BasicLock's displaced header 2421 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2422 2423 // src -> dest iff dest == rax else rax <- dest 2424 __ lock(); 2425 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2426 __ jcc(Assembler::equal, count_mon); 2427 2428 // Hmm should this move to the slow path code area??? 2429 2430 // Test if the oopMark is an obvious stack pointer, i.e., 2431 // 1) (mark & 3) == 0, and 2432 // 2) rsp <= mark < mark + os::pagesize() 2433 // These 3 tests can be done by evaluating the following 2434 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 2435 // assuming both stack pointer and pagesize have their 2436 // least significant 2 bits clear. 2437 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 2438 2439 __ subptr(swap_reg, rsp); 2440 __ andptr(swap_reg, 3 - (int)os::vm_page_size()); 2441 2442 // Save the test result, for recursive case, the result is zero 2443 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2444 __ jcc(Assembler::notEqual, slow_path_lock); 2445 } else { 2446 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2447 __ lightweight_lock(obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock); 2448 } 2449 __ bind(count_mon); 2450 __ inc_held_monitor_count(); 2451 2452 // Slow path will re-enter here 2453 __ bind(lock_done); 2454 } 2455 2456 // Finally just about ready to make the JNI call 2457 2458 // get JNIEnv* which is first argument to native 2459 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2460 2461 // Now set thread in native 2462 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2463 2464 __ call(RuntimeAddress(native_func)); 2465 2466 // Verify or restore cpu control state after JNI call 2467 __ restore_cpu_control_state_after_jni(rscratch1); 2468 2469 // Unpack native results. 2470 switch (ret_type) { 2471 case T_BOOLEAN: __ c2bool(rax); break; 2472 case T_CHAR : __ movzwl(rax, rax); break; 2473 case T_BYTE : __ sign_extend_byte (rax); break; 2474 case T_SHORT : __ sign_extend_short(rax); break; 2475 case T_INT : /* nothing to do */ break; 2476 case T_DOUBLE : 2477 case T_FLOAT : 2478 // Result is in xmm0 we'll save as needed 2479 break; 2480 case T_ARRAY: // Really a handle 2481 case T_OBJECT: // Really a handle 2482 break; // can't de-handlize until after safepoint check 2483 case T_VOID: break; 2484 case T_LONG: break; 2485 default : ShouldNotReachHere(); 2486 } 2487 2488 Label after_transition; 2489 2490 // Switch thread to "native transition" state before reading the synchronization state. 2491 // This additional state is necessary because reading and testing the synchronization 2492 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2493 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2494 // VM thread changes sync state to synchronizing and suspends threads for GC. 2495 // Thread A is resumed to finish this native method, but doesn't block here since it 2496 // didn't see any synchronization is progress, and escapes. 2497 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2498 2499 // Force this write out before the read below 2500 if (!UseSystemMemoryBarrier) { 2501 __ membar(Assembler::Membar_mask_bits( 2502 Assembler::LoadLoad | Assembler::LoadStore | 2503 Assembler::StoreLoad | Assembler::StoreStore)); 2504 } 2505 2506 // check for safepoint operation in progress and/or pending suspend requests 2507 { 2508 Label Continue; 2509 Label slow_path; 2510 2511 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2512 2513 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2514 __ jcc(Assembler::equal, Continue); 2515 __ bind(slow_path); 2516 2517 // Don't use call_VM as it will see a possible pending exception and forward it 2518 // and never return here preventing us from clearing _last_native_pc down below. 2519 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2520 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2521 // by hand. 2522 // 2523 __ vzeroupper(); 2524 save_native_result(masm, ret_type, stack_slots); 2525 __ mov(c_rarg0, r15_thread); 2526 __ mov(r12, rsp); // remember sp 2527 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2528 __ andptr(rsp, -16); // align stack as required by ABI 2529 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2530 __ mov(rsp, r12); // restore sp 2531 __ reinit_heapbase(); 2532 // Restore any method result value 2533 restore_native_result(masm, ret_type, stack_slots); 2534 __ bind(Continue); 2535 } 2536 2537 // change thread state 2538 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2539 __ bind(after_transition); 2540 2541 Label reguard; 2542 Label reguard_done; 2543 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2544 __ jcc(Assembler::equal, reguard); 2545 __ bind(reguard_done); 2546 2547 // native result if any is live 2548 2549 // Unlock 2550 Label slow_path_unlock; 2551 Label unlock_done; 2552 if (method->is_synchronized()) { 2553 2554 Label fast_done; 2555 2556 // Get locked oop from the handle we passed to jni 2557 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2558 2559 if (LockingMode == LM_LEGACY) { 2560 Label not_recur; 2561 // Simple recursive lock? 2562 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD); 2563 __ jcc(Assembler::notEqual, not_recur); 2564 __ dec_held_monitor_count(); 2565 __ jmpb(fast_done); 2566 __ bind(not_recur); 2567 } 2568 2569 // Must save rax if it is live now because cmpxchg must use it 2570 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2571 save_native_result(masm, ret_type, stack_slots); 2572 } 2573 2574 if (LockingMode == LM_MONITOR) { 2575 __ jmp(slow_path_unlock); 2576 } else if (LockingMode == LM_LEGACY) { 2577 // get address of the stack lock 2578 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2579 // get old displaced header 2580 __ movptr(old_hdr, Address(rax, 0)); 2581 2582 // Atomic swap old header if oop still contains the stack lock 2583 __ lock(); 2584 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2585 __ jcc(Assembler::notEqual, slow_path_unlock); 2586 __ dec_held_monitor_count(); 2587 } else { 2588 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2589 __ lightweight_unlock(obj_reg, swap_reg, r15_thread, lock_reg, slow_path_unlock); 2590 __ dec_held_monitor_count(); 2591 } 2592 2593 // slow path re-enters here 2594 __ bind(unlock_done); 2595 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2596 restore_native_result(masm, ret_type, stack_slots); 2597 } 2598 2599 __ bind(fast_done); 2600 } 2601 { 2602 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2603 save_native_result(masm, ret_type, stack_slots); 2604 __ mov_metadata(c_rarg1, method()); 2605 __ call_VM_leaf( 2606 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2607 r15_thread, c_rarg1); 2608 restore_native_result(masm, ret_type, stack_slots); 2609 } 2610 2611 __ reset_last_Java_frame(false); 2612 2613 // Unbox oop result, e.g. JNIHandles::resolve value. 2614 if (is_reference_type(ret_type)) { 2615 __ resolve_jobject(rax /* value */, 2616 r15_thread /* thread */, 2617 rcx /* tmp */); 2618 } 2619 2620 if (CheckJNICalls) { 2621 // clear_pending_jni_exception_check 2622 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2623 } 2624 2625 // reset handle block 2626 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2627 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); 2628 2629 // pop our frame 2630 2631 __ leave(); 2632 2633 // Any exception pending? 2634 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2635 __ jcc(Assembler::notEqual, exception_pending); 2636 2637 // Return 2638 2639 __ ret(0); 2640 2641 // Unexpected paths are out of line and go here 2642 2643 // forward the exception 2644 __ bind(exception_pending); 2645 2646 // and forward the exception 2647 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2648 2649 // Slow path locking & unlocking 2650 if (method->is_synchronized()) { 2651 2652 // BEGIN Slow path lock 2653 __ bind(slow_path_lock); 2654 2655 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2656 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2657 2658 // protect the args we've loaded 2659 save_args(masm, total_c_args, c_arg, out_regs); 2660 2661 __ mov(c_rarg0, obj_reg); 2662 __ mov(c_rarg1, lock_reg); 2663 __ mov(c_rarg2, r15_thread); 2664 2665 // Not a leaf but we have last_Java_frame setup as we want 2666 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2667 restore_args(masm, total_c_args, c_arg, out_regs); 2668 2669 #ifdef ASSERT 2670 { Label L; 2671 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2672 __ jcc(Assembler::equal, L); 2673 __ stop("no pending exception allowed on exit from monitorenter"); 2674 __ bind(L); 2675 } 2676 #endif 2677 __ jmp(lock_done); 2678 2679 // END Slow path lock 2680 2681 // BEGIN Slow path unlock 2682 __ bind(slow_path_unlock); 2683 2684 // If we haven't already saved the native result we must save it now as xmm registers 2685 // are still exposed. 2686 __ vzeroupper(); 2687 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2688 save_native_result(masm, ret_type, stack_slots); 2689 } 2690 2691 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2692 2693 __ mov(c_rarg0, obj_reg); 2694 __ mov(c_rarg2, r15_thread); 2695 __ mov(r12, rsp); // remember sp 2696 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2697 __ andptr(rsp, -16); // align stack as required by ABI 2698 2699 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2700 // NOTE that obj_reg == rbx currently 2701 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2702 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2703 2704 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2705 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2706 __ mov(rsp, r12); // restore sp 2707 __ reinit_heapbase(); 2708 #ifdef ASSERT 2709 { 2710 Label L; 2711 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2712 __ jcc(Assembler::equal, L); 2713 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2714 __ bind(L); 2715 } 2716 #endif /* ASSERT */ 2717 2718 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2719 2720 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2721 restore_native_result(masm, ret_type, stack_slots); 2722 } 2723 __ jmp(unlock_done); 2724 2725 // END Slow path unlock 2726 2727 } // synchronized 2728 2729 // SLOW PATH Reguard the stack if needed 2730 2731 __ bind(reguard); 2732 __ vzeroupper(); 2733 save_native_result(masm, ret_type, stack_slots); 2734 __ mov(r12, rsp); // remember sp 2735 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2736 __ andptr(rsp, -16); // align stack as required by ABI 2737 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2738 __ mov(rsp, r12); // restore sp 2739 __ reinit_heapbase(); 2740 restore_native_result(masm, ret_type, stack_slots); 2741 // and continue 2742 __ jmp(reguard_done); 2743 2744 2745 2746 __ flush(); 2747 2748 nmethod *nm = nmethod::new_native_nmethod(method, 2749 compile_id, 2750 masm->code(), 2751 vep_offset, 2752 frame_complete, 2753 stack_slots / VMRegImpl::slots_per_word, 2754 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2755 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2756 oop_maps); 2757 2758 return nm; 2759 } 2760 2761 // this function returns the adjust size (in number of words) to a c2i adapter 2762 // activation for use during deoptimization 2763 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2764 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2765 } 2766 2767 2768 uint SharedRuntime::out_preserve_stack_slots() { 2769 return 0; 2770 } 2771 2772 2773 // Number of stack slots between incoming argument block and the start of 2774 // a new frame. The PROLOG must add this many slots to the stack. The 2775 // EPILOG must remove this many slots. amd64 needs two slots for 2776 // return address. 2777 uint SharedRuntime::in_preserve_stack_slots() { 2778 return 4 + 2 * VerifyStackAtCalls; 2779 } 2780 2781 //------------------------------generate_deopt_blob---------------------------- 2782 void SharedRuntime::generate_deopt_blob() { 2783 // Allocate space for the code 2784 ResourceMark rm; 2785 // Setup code generation tools 2786 int pad = 0; 2787 if (UseAVX > 2) { 2788 pad += 1024; 2789 } 2790 #if INCLUDE_JVMCI 2791 if (EnableJVMCI) { 2792 pad += 512; // Increase the buffer size when compiling for JVMCI 2793 } 2794 #endif 2795 CodeBuffer buffer("deopt_blob", 2560+pad, 1024); 2796 MacroAssembler* masm = new MacroAssembler(&buffer); 2797 int frame_size_in_words; 2798 OopMap* map = nullptr; 2799 OopMapSet *oop_maps = new OopMapSet(); 2800 2801 // ------------- 2802 // This code enters when returning to a de-optimized nmethod. A return 2803 // address has been pushed on the stack, and return values are in 2804 // registers. 2805 // If we are doing a normal deopt then we were called from the patched 2806 // nmethod from the point we returned to the nmethod. So the return 2807 // address on the stack is wrong by NativeCall::instruction_size 2808 // We will adjust the value so it looks like we have the original return 2809 // address on the stack (like when we eagerly deoptimized). 2810 // In the case of an exception pending when deoptimizing, we enter 2811 // with a return address on the stack that points after the call we patched 2812 // into the exception handler. We have the following register state from, 2813 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2814 // rax: exception oop 2815 // rbx: exception handler 2816 // rdx: throwing pc 2817 // So in this case we simply jam rdx into the useless return address and 2818 // the stack looks just like we want. 2819 // 2820 // At this point we need to de-opt. We save the argument return 2821 // registers. We call the first C routine, fetch_unroll_info(). This 2822 // routine captures the return values and returns a structure which 2823 // describes the current frame size and the sizes of all replacement frames. 2824 // The current frame is compiled code and may contain many inlined 2825 // functions, each with their own JVM state. We pop the current frame, then 2826 // push all the new frames. Then we call the C routine unpack_frames() to 2827 // populate these frames. Finally unpack_frames() returns us the new target 2828 // address. Notice that callee-save registers are BLOWN here; they have 2829 // already been captured in the vframeArray at the time the return PC was 2830 // patched. 2831 address start = __ pc(); 2832 Label cont; 2833 2834 // Prolog for non exception case! 2835 2836 // Save everything in sight. 2837 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2838 2839 // Normal deoptimization. Save exec mode for unpack_frames. 2840 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2841 __ jmp(cont); 2842 2843 int reexecute_offset = __ pc() - start; 2844 #if INCLUDE_JVMCI && !defined(COMPILER1) 2845 if (EnableJVMCI && UseJVMCICompiler) { 2846 // JVMCI does not use this kind of deoptimization 2847 __ should_not_reach_here(); 2848 } 2849 #endif 2850 2851 // Reexecute case 2852 // return address is the pc describes what bci to do re-execute at 2853 2854 // No need to update map as each call to save_live_registers will produce identical oopmap 2855 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2856 2857 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2858 __ jmp(cont); 2859 2860 #if INCLUDE_JVMCI 2861 Label after_fetch_unroll_info_call; 2862 int implicit_exception_uncommon_trap_offset = 0; 2863 int uncommon_trap_offset = 0; 2864 2865 if (EnableJVMCI) { 2866 implicit_exception_uncommon_trap_offset = __ pc() - start; 2867 2868 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2869 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD); 2870 2871 uncommon_trap_offset = __ pc() - start; 2872 2873 // Save everything in sight. 2874 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2875 // fetch_unroll_info needs to call last_java_frame() 2876 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2877 2878 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2879 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2880 2881 __ movl(r14, Deoptimization::Unpack_reexecute); 2882 __ mov(c_rarg0, r15_thread); 2883 __ movl(c_rarg2, r14); // exec mode 2884 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2885 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2886 2887 __ reset_last_Java_frame(false); 2888 2889 __ jmp(after_fetch_unroll_info_call); 2890 } // EnableJVMCI 2891 #endif // INCLUDE_JVMCI 2892 2893 int exception_offset = __ pc() - start; 2894 2895 // Prolog for exception case 2896 2897 // all registers are dead at this entry point, except for rax, and 2898 // rdx which contain the exception oop and exception pc 2899 // respectively. Set them in TLS and fall thru to the 2900 // unpack_with_exception_in_tls entry point. 2901 2902 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2903 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2904 2905 int exception_in_tls_offset = __ pc() - start; 2906 2907 // new implementation because exception oop is now passed in JavaThread 2908 2909 // Prolog for exception case 2910 // All registers must be preserved because they might be used by LinearScan 2911 // Exceptiop oop and throwing PC are passed in JavaThread 2912 // tos: stack at point of call to method that threw the exception (i.e. only 2913 // args are on the stack, no return address) 2914 2915 // make room on stack for the return address 2916 // It will be patched later with the throwing pc. The correct value is not 2917 // available now because loading it from memory would destroy registers. 2918 __ push(0); 2919 2920 // Save everything in sight. 2921 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2922 2923 // Now it is safe to overwrite any register 2924 2925 // Deopt during an exception. Save exec mode for unpack_frames. 2926 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2927 2928 // load throwing pc from JavaThread and patch it as the return address 2929 // of the current frame. Then clear the field in JavaThread 2930 2931 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2932 __ movptr(Address(rbp, wordSize), rdx); 2933 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 2934 2935 #ifdef ASSERT 2936 // verify that there is really an exception oop in JavaThread 2937 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2938 __ verify_oop(rax); 2939 2940 // verify that there is no pending exception 2941 Label no_pending_exception; 2942 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2943 __ testptr(rax, rax); 2944 __ jcc(Assembler::zero, no_pending_exception); 2945 __ stop("must not have pending exception here"); 2946 __ bind(no_pending_exception); 2947 #endif 2948 2949 __ bind(cont); 2950 2951 // Call C code. Need thread and this frame, but NOT official VM entry 2952 // crud. We cannot block on this call, no GC can happen. 2953 // 2954 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2955 2956 // fetch_unroll_info needs to call last_java_frame(). 2957 2958 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2959 #ifdef ASSERT 2960 { Label L; 2961 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2962 __ jcc(Assembler::equal, L); 2963 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2964 __ bind(L); 2965 } 2966 #endif // ASSERT 2967 __ mov(c_rarg0, r15_thread); 2968 __ movl(c_rarg1, r14); // exec_mode 2969 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2970 2971 // Need to have an oopmap that tells fetch_unroll_info where to 2972 // find any register it might need. 2973 oop_maps->add_gc_map(__ pc() - start, map); 2974 2975 __ reset_last_Java_frame(false); 2976 2977 #if INCLUDE_JVMCI 2978 if (EnableJVMCI) { 2979 __ bind(after_fetch_unroll_info_call); 2980 } 2981 #endif 2982 2983 // Load UnrollBlock* into rdi 2984 __ mov(rdi, rax); 2985 2986 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset())); 2987 Label noException; 2988 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 2989 __ jcc(Assembler::notEqual, noException); 2990 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2991 // QQQ this is useless it was null above 2992 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2993 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 2994 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 2995 2996 __ verify_oop(rax); 2997 2998 // Overwrite the result registers with the exception results. 2999 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3000 // I think this is useless 3001 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 3002 3003 __ bind(noException); 3004 3005 // Only register save data is on the stack. 3006 // Now restore the result registers. Everything else is either dead 3007 // or captured in the vframeArray. 3008 RegisterSaver::restore_result_registers(masm); 3009 3010 // All of the register save area has been popped of the stack. Only the 3011 // return address remains. 3012 3013 // Pop all the frames we must move/replace. 3014 // 3015 // Frame picture (youngest to oldest) 3016 // 1: self-frame (no frame link) 3017 // 2: deopting frame (no frame link) 3018 // 3: caller of deopting frame (could be compiled/interpreted). 3019 // 3020 // Note: by leaving the return address of self-frame on the stack 3021 // and using the size of frame 2 to adjust the stack 3022 // when we are done the return to frame 3 will still be on the stack. 3023 3024 // Pop deoptimized frame 3025 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); 3026 __ addptr(rsp, rcx); 3027 3028 // rsp should be pointing at the return address to the caller (3) 3029 3030 // Pick up the initial fp we should save 3031 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3032 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3033 3034 #ifdef ASSERT 3035 // Compilers generate code that bang the stack by as much as the 3036 // interpreter would need. So this stack banging should never 3037 // trigger a fault. Verify that it does not on non product builds. 3038 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3039 __ bang_stack_size(rbx, rcx); 3040 #endif 3041 3042 // Load address of array of frame pcs into rcx 3043 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3044 3045 // Trash the old pc 3046 __ addptr(rsp, wordSize); 3047 3048 // Load address of array of frame sizes into rsi 3049 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset())); 3050 3051 // Load counter into rdx 3052 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); 3053 3054 // Now adjust the caller's stack to make up for the extra locals 3055 // but record the original sp so that we can save it in the skeletal interpreter 3056 // frame and the stack walking of interpreter_sender will get the unextended sp 3057 // value and not the "real" sp value. 3058 3059 const Register sender_sp = r8; 3060 3061 __ mov(sender_sp, rsp); 3062 __ movl(rbx, Address(rdi, 3063 Deoptimization::UnrollBlock:: 3064 caller_adjustment_offset())); 3065 __ subptr(rsp, rbx); 3066 3067 // Push interpreter frames in a loop 3068 Label loop; 3069 __ bind(loop); 3070 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3071 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 3072 __ pushptr(Address(rcx, 0)); // Save return address 3073 __ enter(); // Save old & set new ebp 3074 __ subptr(rsp, rbx); // Prolog 3075 // This value is corrected by layout_activation_impl 3076 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3077 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 3078 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3079 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3080 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3081 __ decrementl(rdx); // Decrement counter 3082 __ jcc(Assembler::notZero, loop); 3083 __ pushptr(Address(rcx, 0)); // Save final return address 3084 3085 // Re-push self-frame 3086 __ enter(); // Save old & set new ebp 3087 3088 // Allocate a full sized register save area. 3089 // Return address and rbp are in place, so we allocate two less words. 3090 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 3091 3092 // Restore frame locals after moving the frame 3093 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 3094 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3095 3096 // Call C code. Need thread but NOT official VM entry 3097 // crud. We cannot block on this call, no GC can happen. Call should 3098 // restore return values to their stack-slots with the new SP. 3099 // 3100 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 3101 3102 // Use rbp because the frames look interpreted now 3103 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3104 // Don't need the precise return PC here, just precise enough to point into this code blob. 3105 address the_pc = __ pc(); 3106 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3107 3108 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 3109 __ mov(c_rarg0, r15_thread); 3110 __ movl(c_rarg1, r14); // second arg: exec_mode 3111 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3112 // Revert SP alignment after call since we're going to do some SP relative addressing below 3113 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 3114 3115 // Set an oopmap for the call site 3116 // Use the same PC we used for the last java frame 3117 oop_maps->add_gc_map(the_pc - start, 3118 new OopMap( frame_size_in_words, 0 )); 3119 3120 // Clear fp AND pc 3121 __ reset_last_Java_frame(true); 3122 3123 // Collect return values 3124 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 3125 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 3126 // I think this is useless (throwing pc?) 3127 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 3128 3129 // Pop self-frame. 3130 __ leave(); // Epilog 3131 3132 // Jump to interpreter 3133 __ ret(0); 3134 3135 // Make sure all code is generated 3136 masm->flush(); 3137 3138 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 3139 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3140 #if INCLUDE_JVMCI 3141 if (EnableJVMCI) { 3142 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 3143 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 3144 } 3145 #endif 3146 } 3147 3148 #ifdef COMPILER2 3149 //------------------------------generate_uncommon_trap_blob-------------------- 3150 void SharedRuntime::generate_uncommon_trap_blob() { 3151 // Allocate space for the code 3152 ResourceMark rm; 3153 // Setup code generation tools 3154 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 3155 MacroAssembler* masm = new MacroAssembler(&buffer); 3156 3157 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3158 3159 address start = __ pc(); 3160 3161 if (UseRTMLocking) { 3162 // Abort RTM transaction before possible nmethod deoptimization. 3163 __ xabort(0); 3164 } 3165 3166 // Push self-frame. We get here with a return address on the 3167 // stack, so rsp is 8-byte aligned until we allocate our frame. 3168 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 3169 3170 // No callee saved registers. rbp is assumed implicitly saved 3171 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3172 3173 // compiler left unloaded_class_index in j_rarg0 move to where the 3174 // runtime expects it. 3175 __ movl(c_rarg1, j_rarg0); 3176 3177 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3178 3179 // Call C code. Need thread but NOT official VM entry 3180 // crud. We cannot block on this call, no GC can happen. Call should 3181 // capture callee-saved registers as well as return values. 3182 // Thread is in rdi already. 3183 // 3184 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 3185 3186 __ mov(c_rarg0, r15_thread); 3187 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); 3188 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 3189 3190 // Set an oopmap for the call site 3191 OopMapSet* oop_maps = new OopMapSet(); 3192 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 3193 3194 // location of rbp is known implicitly by the frame sender code 3195 3196 oop_maps->add_gc_map(__ pc() - start, map); 3197 3198 __ reset_last_Java_frame(false); 3199 3200 // Load UnrollBlock* into rdi 3201 __ mov(rdi, rax); 3202 3203 #ifdef ASSERT 3204 { Label L; 3205 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()), 3206 Deoptimization::Unpack_uncommon_trap); 3207 __ jcc(Assembler::equal, L); 3208 __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); 3209 __ bind(L); 3210 } 3211 #endif 3212 3213 // Pop all the frames we must move/replace. 3214 // 3215 // Frame picture (youngest to oldest) 3216 // 1: self-frame (no frame link) 3217 // 2: deopting frame (no frame link) 3218 // 3: caller of deopting frame (could be compiled/interpreted). 3219 3220 // Pop self-frame. We have no frame, and must rely only on rax and rsp. 3221 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 3222 3223 // Pop deoptimized frame (int) 3224 __ movl(rcx, Address(rdi, 3225 Deoptimization::UnrollBlock:: 3226 size_of_deoptimized_frame_offset())); 3227 __ addptr(rsp, rcx); 3228 3229 // rsp should be pointing at the return address to the caller (3) 3230 3231 // Pick up the initial fp we should save 3232 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3233 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3234 3235 #ifdef ASSERT 3236 // Compilers generate code that bang the stack by as much as the 3237 // interpreter would need. So this stack banging should never 3238 // trigger a fault. Verify that it does not on non product builds. 3239 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3240 __ bang_stack_size(rbx, rcx); 3241 #endif 3242 3243 // Load address of array of frame pcs into rcx (address*) 3244 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3245 3246 // Trash the return pc 3247 __ addptr(rsp, wordSize); 3248 3249 // Load address of array of frame sizes into rsi (intptr_t*) 3250 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset())); 3251 3252 // Counter 3253 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset())); // (int) 3254 3255 // Now adjust the caller's stack to make up for the extra locals but 3256 // record the original sp so that we can save it in the skeletal 3257 // interpreter frame and the stack walking of interpreter_sender 3258 // will get the unextended sp value and not the "real" sp value. 3259 3260 const Register sender_sp = r8; 3261 3262 __ mov(sender_sp, rsp); 3263 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset())); // (int) 3264 __ subptr(rsp, rbx); 3265 3266 // Push interpreter frames in a loop 3267 Label loop; 3268 __ bind(loop); 3269 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3270 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 3271 __ pushptr(Address(rcx, 0)); // Save return address 3272 __ enter(); // Save old & set new rbp 3273 __ subptr(rsp, rbx); // Prolog 3274 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 3275 sender_sp); // Make it walkable 3276 // This value is corrected by layout_activation_impl 3277 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3278 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3279 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3280 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3281 __ decrementl(rdx); // Decrement counter 3282 __ jcc(Assembler::notZero, loop); 3283 __ pushptr(Address(rcx, 0)); // Save final return address 3284 3285 // Re-push self-frame 3286 __ enter(); // Save old & set new rbp 3287 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 3288 // Prolog 3289 3290 // Use rbp because the frames look interpreted now 3291 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3292 // Don't need the precise return PC here, just precise enough to point into this code blob. 3293 address the_pc = __ pc(); 3294 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3295 3296 // Call C code. Need thread but NOT official VM entry 3297 // crud. We cannot block on this call, no GC can happen. Call should 3298 // restore return values to their stack-slots with the new SP. 3299 // Thread is in rdi already. 3300 // 3301 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 3302 3303 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 3304 __ mov(c_rarg0, r15_thread); 3305 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 3306 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3307 3308 // Set an oopmap for the call site 3309 // Use the same PC we used for the last java frame 3310 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3311 3312 // Clear fp AND pc 3313 __ reset_last_Java_frame(true); 3314 3315 // Pop self-frame. 3316 __ leave(); // Epilog 3317 3318 // Jump to interpreter 3319 __ ret(0); 3320 3321 // Make sure all code is generated 3322 masm->flush(); 3323 3324 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 3325 SimpleRuntimeFrame::framesize >> 1); 3326 } 3327 #endif // COMPILER2 3328 3329 //------------------------------generate_handler_blob------ 3330 // 3331 // Generate a special Compile2Runtime blob that saves all registers, 3332 // and setup oopmap. 3333 // 3334 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 3335 assert(StubRoutines::forward_exception_entry() != nullptr, 3336 "must be generated before"); 3337 3338 ResourceMark rm; 3339 OopMapSet *oop_maps = new OopMapSet(); 3340 OopMap* map; 3341 3342 // Allocate space for the code. Setup code generation tools. 3343 CodeBuffer buffer("handler_blob", 2048, 1024); 3344 MacroAssembler* masm = new MacroAssembler(&buffer); 3345 3346 address start = __ pc(); 3347 address call_pc = nullptr; 3348 int frame_size_in_words; 3349 bool cause_return = (poll_type == POLL_AT_RETURN); 3350 bool save_wide_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 3351 3352 if (UseRTMLocking) { 3353 // Abort RTM transaction before calling runtime 3354 // because critical section will be large and will be 3355 // aborted anyway. Also nmethod could be deoptimized. 3356 __ xabort(0); 3357 } 3358 3359 // Make room for return address (or push it again) 3360 if (!cause_return) { 3361 __ push(rbx); 3362 } 3363 3364 // Save registers, fpu state, and flags 3365 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); 3366 3367 // The following is basically a call_VM. However, we need the precise 3368 // address of the call in order to generate an oopmap. Hence, we do all the 3369 // work ourselves. 3370 3371 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next: 3372 3373 // The return address must always be correct so that frame constructor never 3374 // sees an invalid pc. 3375 3376 if (!cause_return) { 3377 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3378 // Additionally, rbx is a callee saved register and we can look at it later to determine 3379 // if someone changed the return address for us! 3380 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3381 __ movptr(Address(rbp, wordSize), rbx); 3382 } 3383 3384 // Do the call 3385 __ mov(c_rarg0, r15_thread); 3386 __ call(RuntimeAddress(call_ptr)); 3387 3388 // Set an oopmap for the call site. This oopmap will map all 3389 // oop-registers and debug-info registers as callee-saved. This 3390 // will allow deoptimization at this safepoint to find all possible 3391 // debug-info recordings, as well as let GC find all oops. 3392 3393 oop_maps->add_gc_map( __ pc() - start, map); 3394 3395 Label noException; 3396 3397 __ reset_last_Java_frame(false); 3398 3399 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3400 __ jcc(Assembler::equal, noException); 3401 3402 // Exception pending 3403 3404 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3405 3406 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3407 3408 // No exception case 3409 __ bind(noException); 3410 3411 Label no_adjust; 3412 #ifdef ASSERT 3413 Label bail; 3414 #endif 3415 if (!cause_return) { 3416 Label no_prefix, not_special; 3417 3418 // If our stashed return pc was modified by the runtime we avoid touching it 3419 __ cmpptr(rbx, Address(rbp, wordSize)); 3420 __ jccb(Assembler::notEqual, no_adjust); 3421 3422 // Skip over the poll instruction. 3423 // See NativeInstruction::is_safepoint_poll() 3424 // Possible encodings: 3425 // 85 00 test %eax,(%rax) 3426 // 85 01 test %eax,(%rcx) 3427 // 85 02 test %eax,(%rdx) 3428 // 85 03 test %eax,(%rbx) 3429 // 85 06 test %eax,(%rsi) 3430 // 85 07 test %eax,(%rdi) 3431 // 3432 // 41 85 00 test %eax,(%r8) 3433 // 41 85 01 test %eax,(%r9) 3434 // 41 85 02 test %eax,(%r10) 3435 // 41 85 03 test %eax,(%r11) 3436 // 41 85 06 test %eax,(%r14) 3437 // 41 85 07 test %eax,(%r15) 3438 // 3439 // 85 04 24 test %eax,(%rsp) 3440 // 41 85 04 24 test %eax,(%r12) 3441 // 85 45 00 test %eax,0x0(%rbp) 3442 // 41 85 45 00 test %eax,0x0(%r13) 3443 3444 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3445 __ jcc(Assembler::notEqual, no_prefix); 3446 __ addptr(rbx, 1); 3447 __ bind(no_prefix); 3448 #ifdef ASSERT 3449 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3450 #endif 3451 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3452 // r12/rsp 0x04 3453 // r13/rbp 0x05 3454 __ movzbq(rcx, Address(rbx, 1)); 3455 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3456 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3457 __ cmpptr(rcx, 1); 3458 __ jcc(Assembler::above, not_special); 3459 __ addptr(rbx, 1); 3460 __ bind(not_special); 3461 #ifdef ASSERT 3462 // Verify the correct encoding of the poll we're about to skip. 3463 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3464 __ jcc(Assembler::notEqual, bail); 3465 // Mask out the modrm bits 3466 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3467 // rax encodes to 0, so if the bits are nonzero it's incorrect 3468 __ jcc(Assembler::notZero, bail); 3469 #endif 3470 // Adjust return pc forward to step over the safepoint poll instruction 3471 __ addptr(rbx, 2); 3472 __ movptr(Address(rbp, wordSize), rbx); 3473 } 3474 3475 __ bind(no_adjust); 3476 // Normal exit, restore registers and exit. 3477 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3478 __ ret(0); 3479 3480 #ifdef ASSERT 3481 __ bind(bail); 3482 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3483 #endif 3484 3485 // Make sure all code is generated 3486 masm->flush(); 3487 3488 // Fill-out other meta info 3489 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3490 } 3491 3492 // 3493 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3494 // 3495 // Generate a stub that calls into vm to find out the proper destination 3496 // of a java call. All the argument registers are live at this point 3497 // but since this is generic code we don't know what they are and the caller 3498 // must do any gc of the args. 3499 // 3500 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3501 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); 3502 3503 // allocate space for the code 3504 ResourceMark rm; 3505 3506 CodeBuffer buffer(name, 1200, 512); 3507 MacroAssembler* masm = new MacroAssembler(&buffer); 3508 3509 int frame_size_in_words; 3510 3511 OopMapSet *oop_maps = new OopMapSet(); 3512 OopMap* map = nullptr; 3513 3514 int start = __ offset(); 3515 3516 // No need to save vector registers since they are caller-saved anyway. 3517 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); 3518 3519 int frame_complete = __ offset(); 3520 3521 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3522 3523 __ mov(c_rarg0, r15_thread); 3524 3525 __ call(RuntimeAddress(destination)); 3526 3527 3528 // Set an oopmap for the call site. 3529 // We need this not only for callee-saved registers, but also for volatile 3530 // registers that the compiler might be keeping live across a safepoint. 3531 3532 oop_maps->add_gc_map( __ offset() - start, map); 3533 3534 // rax contains the address we are going to jump to assuming no exception got installed 3535 3536 // clear last_Java_sp 3537 __ reset_last_Java_frame(false); 3538 // check for pending exceptions 3539 Label pending; 3540 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3541 __ jcc(Assembler::notEqual, pending); 3542 3543 // get the returned Method* 3544 __ get_vm_result_2(rbx, r15_thread); 3545 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3546 3547 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3548 3549 RegisterSaver::restore_live_registers(masm); 3550 3551 // We are back to the original state on entry and ready to go. 3552 3553 __ jmp(rax); 3554 3555 // Pending exception after the safepoint 3556 3557 __ bind(pending); 3558 3559 RegisterSaver::restore_live_registers(masm); 3560 3561 // exception pending => remove activation and forward to exception handler 3562 3563 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 3564 3565 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3566 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3567 3568 // ------------- 3569 // make sure all code is generated 3570 masm->flush(); 3571 3572 // return the blob 3573 // frame_size_words or bytes?? 3574 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3575 } 3576 3577 //------------------------------Montgomery multiplication------------------------ 3578 // 3579 3580 #ifndef _WINDOWS 3581 3582 // Subtract 0:b from carry:a. Return carry. 3583 static julong 3584 sub(julong a[], julong b[], julong carry, long len) { 3585 long long i = 0, cnt = len; 3586 julong tmp; 3587 asm volatile("clc; " 3588 "0: ; " 3589 "mov (%[b], %[i], 8), %[tmp]; " 3590 "sbb %[tmp], (%[a], %[i], 8); " 3591 "inc %[i]; dec %[cnt]; " 3592 "jne 0b; " 3593 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3594 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3595 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3596 : "memory"); 3597 return tmp; 3598 } 3599 3600 // Multiply (unsigned) Long A by Long B, accumulating the double- 3601 // length result into the accumulator formed of T0, T1, and T2. 3602 #define MACC(A, B, T0, T1, T2) \ 3603 do { \ 3604 unsigned long hi, lo; \ 3605 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3606 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3607 : "r"(A), "a"(B) : "cc"); \ 3608 } while(0) 3609 3610 // As above, but add twice the double-length result into the 3611 // accumulator. 3612 #define MACC2(A, B, T0, T1, T2) \ 3613 do { \ 3614 unsigned long hi, lo; \ 3615 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3616 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3617 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3618 : "r"(A), "a"(B) : "cc"); \ 3619 } while(0) 3620 3621 #else //_WINDOWS 3622 3623 static julong 3624 sub(julong a[], julong b[], julong carry, long len) { 3625 long i; 3626 julong tmp; 3627 unsigned char c = 1; 3628 for (i = 0; i < len; i++) { 3629 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3630 a[i] = tmp; 3631 } 3632 c = _addcarry_u64(c, carry, ~0, &tmp); 3633 return tmp; 3634 } 3635 3636 // Multiply (unsigned) Long A by Long B, accumulating the double- 3637 // length result into the accumulator formed of T0, T1, and T2. 3638 #define MACC(A, B, T0, T1, T2) \ 3639 do { \ 3640 julong hi, lo; \ 3641 lo = _umul128(A, B, &hi); \ 3642 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3643 c = _addcarry_u64(c, hi, T1, &T1); \ 3644 _addcarry_u64(c, T2, 0, &T2); \ 3645 } while(0) 3646 3647 // As above, but add twice the double-length result into the 3648 // accumulator. 3649 #define MACC2(A, B, T0, T1, T2) \ 3650 do { \ 3651 julong hi, lo; \ 3652 lo = _umul128(A, B, &hi); \ 3653 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3654 c = _addcarry_u64(c, hi, T1, &T1); \ 3655 _addcarry_u64(c, T2, 0, &T2); \ 3656 c = _addcarry_u64(0, lo, T0, &T0); \ 3657 c = _addcarry_u64(c, hi, T1, &T1); \ 3658 _addcarry_u64(c, T2, 0, &T2); \ 3659 } while(0) 3660 3661 #endif //_WINDOWS 3662 3663 // Fast Montgomery multiplication. The derivation of the algorithm is 3664 // in A Cryptographic Library for the Motorola DSP56000, 3665 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3666 3667 static void NOINLINE 3668 montgomery_multiply(julong a[], julong b[], julong n[], 3669 julong m[], julong inv, int len) { 3670 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3671 int i; 3672 3673 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3674 3675 for (i = 0; i < len; i++) { 3676 int j; 3677 for (j = 0; j < i; j++) { 3678 MACC(a[j], b[i-j], t0, t1, t2); 3679 MACC(m[j], n[i-j], t0, t1, t2); 3680 } 3681 MACC(a[i], b[0], t0, t1, t2); 3682 m[i] = t0 * inv; 3683 MACC(m[i], n[0], t0, t1, t2); 3684 3685 assert(t0 == 0, "broken Montgomery multiply"); 3686 3687 t0 = t1; t1 = t2; t2 = 0; 3688 } 3689 3690 for (i = len; i < 2*len; i++) { 3691 int j; 3692 for (j = i-len+1; j < len; j++) { 3693 MACC(a[j], b[i-j], t0, t1, t2); 3694 MACC(m[j], n[i-j], t0, t1, t2); 3695 } 3696 m[i-len] = t0; 3697 t0 = t1; t1 = t2; t2 = 0; 3698 } 3699 3700 while (t0) 3701 t0 = sub(m, n, t0, len); 3702 } 3703 3704 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3705 // multiplies so it should be up to 25% faster than Montgomery 3706 // multiplication. However, its loop control is more complex and it 3707 // may actually run slower on some machines. 3708 3709 static void NOINLINE 3710 montgomery_square(julong a[], julong n[], 3711 julong m[], julong inv, int len) { 3712 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3713 int i; 3714 3715 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3716 3717 for (i = 0; i < len; i++) { 3718 int j; 3719 int end = (i+1)/2; 3720 for (j = 0; j < end; j++) { 3721 MACC2(a[j], a[i-j], t0, t1, t2); 3722 MACC(m[j], n[i-j], t0, t1, t2); 3723 } 3724 if ((i & 1) == 0) { 3725 MACC(a[j], a[j], t0, t1, t2); 3726 } 3727 for (; j < i; j++) { 3728 MACC(m[j], n[i-j], t0, t1, t2); 3729 } 3730 m[i] = t0 * inv; 3731 MACC(m[i], n[0], t0, t1, t2); 3732 3733 assert(t0 == 0, "broken Montgomery square"); 3734 3735 t0 = t1; t1 = t2; t2 = 0; 3736 } 3737 3738 for (i = len; i < 2*len; i++) { 3739 int start = i-len+1; 3740 int end = start + (len - start)/2; 3741 int j; 3742 for (j = start; j < end; j++) { 3743 MACC2(a[j], a[i-j], t0, t1, t2); 3744 MACC(m[j], n[i-j], t0, t1, t2); 3745 } 3746 if ((i & 1) == 0) { 3747 MACC(a[j], a[j], t0, t1, t2); 3748 } 3749 for (; j < len; j++) { 3750 MACC(m[j], n[i-j], t0, t1, t2); 3751 } 3752 m[i-len] = t0; 3753 t0 = t1; t1 = t2; t2 = 0; 3754 } 3755 3756 while (t0) 3757 t0 = sub(m, n, t0, len); 3758 } 3759 3760 // Swap words in a longword. 3761 static julong swap(julong x) { 3762 return (x << 32) | (x >> 32); 3763 } 3764 3765 // Copy len longwords from s to d, word-swapping as we go. The 3766 // destination array is reversed. 3767 static void reverse_words(julong *s, julong *d, int len) { 3768 d += len; 3769 while(len-- > 0) { 3770 d--; 3771 *d = swap(*s); 3772 s++; 3773 } 3774 } 3775 3776 // The threshold at which squaring is advantageous was determined 3777 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3778 #define MONTGOMERY_SQUARING_THRESHOLD 64 3779 3780 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3781 jint len, jlong inv, 3782 jint *m_ints) { 3783 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3784 int longwords = len/2; 3785 3786 // Make very sure we don't use so much space that the stack might 3787 // overflow. 512 jints corresponds to an 16384-bit integer and 3788 // will use here a total of 8k bytes of stack space. 3789 int divisor = sizeof(julong) * 4; 3790 guarantee(longwords <= 8192 / divisor, "must be"); 3791 int total_allocation = longwords * sizeof (julong) * 4; 3792 julong *scratch = (julong *)alloca(total_allocation); 3793 3794 // Local scratch arrays 3795 julong 3796 *a = scratch + 0 * longwords, 3797 *b = scratch + 1 * longwords, 3798 *n = scratch + 2 * longwords, 3799 *m = scratch + 3 * longwords; 3800 3801 reverse_words((julong *)a_ints, a, longwords); 3802 reverse_words((julong *)b_ints, b, longwords); 3803 reverse_words((julong *)n_ints, n, longwords); 3804 3805 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3806 3807 reverse_words(m, (julong *)m_ints, longwords); 3808 } 3809 3810 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3811 jint len, jlong inv, 3812 jint *m_ints) { 3813 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3814 int longwords = len/2; 3815 3816 // Make very sure we don't use so much space that the stack might 3817 // overflow. 512 jints corresponds to an 16384-bit integer and 3818 // will use here a total of 6k bytes of stack space. 3819 int divisor = sizeof(julong) * 3; 3820 guarantee(longwords <= (8192 / divisor), "must be"); 3821 int total_allocation = longwords * sizeof (julong) * 3; 3822 julong *scratch = (julong *)alloca(total_allocation); 3823 3824 // Local scratch arrays 3825 julong 3826 *a = scratch + 0 * longwords, 3827 *n = scratch + 1 * longwords, 3828 *m = scratch + 2 * longwords; 3829 3830 reverse_words((julong *)a_ints, a, longwords); 3831 reverse_words((julong *)n_ints, n, longwords); 3832 3833 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3834 ::montgomery_square(a, n, m, (julong)inv, longwords); 3835 } else { 3836 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3837 } 3838 3839 reverse_words(m, (julong *)m_ints, longwords); 3840 } 3841 3842 #ifdef COMPILER2 3843 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3844 // 3845 //------------------------------generate_exception_blob--------------------------- 3846 // creates exception blob at the end 3847 // Using exception blob, this code is jumped from a compiled method. 3848 // (see emit_exception_handler in x86_64.ad file) 3849 // 3850 // Given an exception pc at a call we call into the runtime for the 3851 // handler in this method. This handler might merely restore state 3852 // (i.e. callee save registers) unwind the frame and jump to the 3853 // exception handler for the nmethod if there is no Java level handler 3854 // for the nmethod. 3855 // 3856 // This code is entered with a jmp. 3857 // 3858 // Arguments: 3859 // rax: exception oop 3860 // rdx: exception pc 3861 // 3862 // Results: 3863 // rax: exception oop 3864 // rdx: exception pc in caller or ??? 3865 // destination: exception handler of caller 3866 // 3867 // Note: the exception pc MUST be at a call (precise debug information) 3868 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 3869 // 3870 3871 void OptoRuntime::generate_exception_blob() { 3872 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 3873 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 3874 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 3875 3876 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3877 3878 // Allocate space for the code 3879 ResourceMark rm; 3880 // Setup code generation tools 3881 CodeBuffer buffer("exception_blob", 2048, 1024); 3882 MacroAssembler* masm = new MacroAssembler(&buffer); 3883 3884 3885 address start = __ pc(); 3886 3887 // Exception pc is 'return address' for stack walker 3888 __ push(rdx); 3889 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 3890 3891 // Save callee-saved registers. See x86_64.ad. 3892 3893 // rbp is an implicitly saved callee saved register (i.e., the calling 3894 // convention will save/restore it in the prolog/epilog). Other than that 3895 // there are no callee save registers now that adapter frames are gone. 3896 3897 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3898 3899 // Store exception in Thread object. We cannot pass any arguments to the 3900 // handle_exception call, since we do not want to make any assumption 3901 // about the size of the frame where the exception happened in. 3902 // c_rarg0 is either rdi (Linux) or rcx (Windows). 3903 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 3904 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3905 3906 // This call does all the hard work. It checks if an exception handler 3907 // exists in the method. 3908 // If so, it returns the handler address. 3909 // If not, it prepares for stack-unwinding, restoring the callee-save 3910 // registers of the frame being removed. 3911 // 3912 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3913 3914 // At a method handle call, the stack may not be properly aligned 3915 // when returning with an exception. 3916 address the_pc = __ pc(); 3917 __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1); 3918 __ mov(c_rarg0, r15_thread); 3919 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3920 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3921 3922 // Set an oopmap for the call site. This oopmap will only be used if we 3923 // are unwinding the stack. Hence, all locations will be dead. 3924 // Callee-saved registers will be the same as the frame above (i.e., 3925 // handle_exception_stub), since they were restored when we got the 3926 // exception. 3927 3928 OopMapSet* oop_maps = new OopMapSet(); 3929 3930 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3931 3932 __ reset_last_Java_frame(false); 3933 3934 // Restore callee-saved registers 3935 3936 // rbp is an implicitly saved callee-saved register (i.e., the calling 3937 // convention will save restore it in prolog/epilog) Other than that 3938 // there are no callee save registers now that adapter frames are gone. 3939 3940 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 3941 3942 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 3943 __ pop(rdx); // No need for exception pc anymore 3944 3945 // rax: exception handler 3946 3947 // We have a handler in rax (could be deopt blob). 3948 __ mov(r8, rax); 3949 3950 // Get the exception oop 3951 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3952 // Get the exception pc in case we are deoptimized 3953 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3954 #ifdef ASSERT 3955 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), NULL_WORD); 3956 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3957 #endif 3958 // Clear the exception oop so GC no longer processes it as a root. 3959 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3960 3961 // rax: exception oop 3962 // r8: exception handler 3963 // rdx: exception pc 3964 // Jump to handler 3965 3966 __ jmp(r8); 3967 3968 // Make sure all code is generated 3969 masm->flush(); 3970 3971 // Set exception blob 3972 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 3973 } 3974 #endif // COMPILER2 3975 3976 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 3977 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 3978 CodeBuffer buffer(buf); 3979 short buffer_locs[20]; 3980 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 3981 sizeof(buffer_locs)/sizeof(relocInfo)); 3982 3983 MacroAssembler* masm = new MacroAssembler(&buffer); 3984 3985 const Array<SigEntry>* sig_vk = vk->extended_sig(); 3986 const Array<VMRegPair>* regs = vk->return_regs(); 3987 3988 int pack_fields_jobject_off = __ offset(); 3989 // Resolve pre-allocated buffer from JNI handle. 3990 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 3991 __ movptr(rax, Address(r13, 0)); 3992 __ resolve_jobject(rax /* value */, 3993 r15_thread /* thread */, 3994 r12 /* tmp */); 3995 __ movptr(Address(r13, 0), rax); 3996 3997 int pack_fields_off = __ offset(); 3998 3999 int j = 1; 4000 for (int i = 0; i < sig_vk->length(); i++) { 4001 BasicType bt = sig_vk->at(i)._bt; 4002 if (bt == T_METADATA) { 4003 continue; 4004 } 4005 if (bt == T_VOID) { 4006 if (sig_vk->at(i-1)._bt == T_LONG || 4007 sig_vk->at(i-1)._bt == T_DOUBLE) { 4008 j++; 4009 } 4010 continue; 4011 } 4012 int off = sig_vk->at(i)._offset; 4013 assert(off > 0, "offset in object should be positive"); 4014 VMRegPair pair = regs->at(j); 4015 VMReg r_1 = pair.first(); 4016 VMReg r_2 = pair.second(); 4017 Address to(rax, off); 4018 if (bt == T_FLOAT) { 4019 __ movflt(to, r_1->as_XMMRegister()); 4020 } else if (bt == T_DOUBLE) { 4021 __ movdbl(to, r_1->as_XMMRegister()); 4022 } else { 4023 Register val = r_1->as_Register(); 4024 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 4025 if (is_reference_type(bt)) { 4026 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 4027 } else { 4028 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 4029 } 4030 } 4031 j++; 4032 } 4033 assert(j == regs->length(), "missed a field?"); 4034 4035 __ ret(0); 4036 4037 int unpack_fields_off = __ offset(); 4038 4039 Label skip; 4040 __ testptr(rax, rax); 4041 __ jcc(Assembler::zero, skip); 4042 4043 j = 1; 4044 for (int i = 0; i < sig_vk->length(); i++) { 4045 BasicType bt = sig_vk->at(i)._bt; 4046 if (bt == T_METADATA) { 4047 continue; 4048 } 4049 if (bt == T_VOID) { 4050 if (sig_vk->at(i-1)._bt == T_LONG || 4051 sig_vk->at(i-1)._bt == T_DOUBLE) { 4052 j++; 4053 } 4054 continue; 4055 } 4056 int off = sig_vk->at(i)._offset; 4057 assert(off > 0, "offset in object should be positive"); 4058 VMRegPair pair = regs->at(j); 4059 VMReg r_1 = pair.first(); 4060 VMReg r_2 = pair.second(); 4061 Address from(rax, off); 4062 if (bt == T_FLOAT) { 4063 __ movflt(r_1->as_XMMRegister(), from); 4064 } else if (bt == T_DOUBLE) { 4065 __ movdbl(r_1->as_XMMRegister(), from); 4066 } else if (bt == T_OBJECT || bt == T_ARRAY) { 4067 assert_different_registers(rax, r_1->as_Register()); 4068 __ load_heap_oop(r_1->as_Register(), from); 4069 } else { 4070 assert(is_java_primitive(bt), "unexpected basic type"); 4071 assert_different_registers(rax, r_1->as_Register()); 4072 size_t size_in_bytes = type2aelembytes(bt); 4073 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 4074 } 4075 j++; 4076 } 4077 assert(j == regs->length(), "missed a field?"); 4078 4079 __ bind(skip); 4080 __ ret(0); 4081 4082 __ flush(); 4083 4084 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 4085 }