1 /* 2 * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "classfile/symbolTable.hpp" 32 #include "code/compiledIC.hpp" 33 #include "code/debugInfoRec.hpp" 34 #include "code/icBuffer.hpp" 35 #include "code/nativeInst.hpp" 36 #include "code/vtableStubs.hpp" 37 #include "compiler/oopMap.hpp" 38 #include "gc/shared/collectedHeap.hpp" 39 #include "gc/shared/gcLocker.hpp" 40 #include "gc/shared/barrierSet.hpp" 41 #include "gc/shared/barrierSetAssembler.hpp" 42 #include "interpreter/interpreter.hpp" 43 #include "logging/log.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "memory/universe.hpp" 46 #include "oops/compiledICHolder.hpp" 47 #include "oops/klass.inline.hpp" 48 #include "oops/method.inline.hpp" 49 #include "prims/methodHandles.hpp" 50 #include "runtime/continuation.hpp" 51 #include "runtime/continuationEntry.inline.hpp" 52 #include "runtime/globals.hpp" 53 #include "runtime/jniHandles.hpp" 54 #include "runtime/safepointMechanism.hpp" 55 #include "runtime/sharedRuntime.hpp" 56 #include "runtime/signature.hpp" 57 #include "runtime/stubRoutines.hpp" 58 #include "runtime/vframeArray.hpp" 59 #include "runtime/vm_version.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/formatBuffer.hpp" 62 #include "vmreg_x86.inline.hpp" 63 #ifdef COMPILER1 64 #include "c1/c1_Runtime1.hpp" 65 #endif 66 #ifdef COMPILER2 67 #include "opto/runtime.hpp" 68 #endif 69 #if INCLUDE_JVMCI 70 #include "jvmci/jvmciJavaClasses.hpp" 71 #endif 72 73 #define __ masm-> 74 75 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 76 77 class SimpleRuntimeFrame { 78 79 public: 80 81 // Most of the runtime stubs have this simple frame layout. 82 // This class exists to make the layout shared in one place. 83 // Offsets are for compiler stack slots, which are jints. 84 enum layout { 85 // The frame sender code expects that rbp will be in the "natural" place and 86 // will override any oopMap setting for it. We must therefore force the layout 87 // so that it agrees with the frame sender code. 88 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 89 rbp_off2, 90 return_off, return_off2, 91 framesize 92 }; 93 }; 94 95 class RegisterSaver { 96 // Capture info about frame layout. Layout offsets are in jint 97 // units because compiler frame slots are jints. 98 #define XSAVE_AREA_BEGIN 160 99 #define XSAVE_AREA_YMM_BEGIN 576 100 #define XSAVE_AREA_OPMASK_BEGIN 1088 101 #define XSAVE_AREA_ZMM_BEGIN 1152 102 #define XSAVE_AREA_UPPERBANK 1664 103 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 104 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 105 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 106 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 107 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 108 enum layout { 109 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 110 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 111 DEF_XMM_OFFS(0), 112 DEF_XMM_OFFS(1), 113 // 2..15 are implied in range usage 114 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 115 DEF_YMM_OFFS(0), 116 DEF_YMM_OFFS(1), 117 // 2..15 are implied in range usage 118 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 119 DEF_OPMASK_OFFS(0), 120 DEF_OPMASK_OFFS(1), 121 // 2..7 are implied in range usage 122 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 123 DEF_ZMM_OFFS(0), 124 DEF_ZMM_OFFS(1), 125 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 126 DEF_ZMM_UPPER_OFFS(16), 127 DEF_ZMM_UPPER_OFFS(17), 128 // 18..31 are implied in range usage 129 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 130 fpu_stateH_end, 131 r15_off, r15H_off, 132 r14_off, r14H_off, 133 r13_off, r13H_off, 134 r12_off, r12H_off, 135 r11_off, r11H_off, 136 r10_off, r10H_off, 137 r9_off, r9H_off, 138 r8_off, r8H_off, 139 rdi_off, rdiH_off, 140 rsi_off, rsiH_off, 141 ignore_off, ignoreH_off, // extra copy of rbp 142 rsp_off, rspH_off, 143 rbx_off, rbxH_off, 144 rdx_off, rdxH_off, 145 rcx_off, rcxH_off, 146 rax_off, raxH_off, 147 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 148 align_off, alignH_off, 149 flags_off, flagsH_off, 150 // The frame sender code expects that rbp will be in the "natural" place and 151 // will override any oopMap setting for it. We must therefore force the layout 152 // so that it agrees with the frame sender code. 153 rbp_off, rbpH_off, // copy of rbp we will restore 154 return_off, returnH_off, // slot for return address 155 reg_save_size // size in compiler stack slots 156 }; 157 158 public: 159 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); 160 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); 161 162 // Offsets into the register save area 163 // Used by deoptimization when it is managing result register 164 // values on its own 165 166 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 167 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 168 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 169 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 170 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 171 172 // During deoptimization only the result registers need to be restored, 173 // all the other values have already been extracted. 174 static void restore_result_registers(MacroAssembler* masm); 175 }; 176 177 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { 178 int off = 0; 179 int num_xmm_regs = XMMRegister::available_xmm_registers(); 180 #if COMPILER2_OR_JVMCI 181 if (save_wide_vectors && UseAVX == 0) { 182 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX 183 } 184 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 185 #else 186 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI 187 #endif 188 189 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 190 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 191 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 192 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 193 // CodeBlob frame size is in words. 194 int frame_size_in_words = frame_size_in_bytes / wordSize; 195 *total_frame_words = frame_size_in_words; 196 197 // Save registers, fpu state, and flags. 198 // We assume caller has already pushed the return address onto the 199 // stack, so rsp is 8-byte aligned here. 200 // We push rpb twice in this sequence because we want the real rbp 201 // to be under the return like a normal enter. 202 203 __ enter(); // rsp becomes 16-byte aligned here 204 __ push_CPU_state(); // Push a multiple of 16 bytes 205 206 // push cpu state handles this on EVEX enabled targets 207 if (save_wide_vectors) { 208 // Save upper half of YMM registers(0..15) 209 int base_addr = XSAVE_AREA_YMM_BEGIN; 210 for (int n = 0; n < 16; n++) { 211 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 212 } 213 if (VM_Version::supports_evex()) { 214 // Save upper half of ZMM registers(0..15) 215 base_addr = XSAVE_AREA_ZMM_BEGIN; 216 for (int n = 0; n < 16; n++) { 217 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 218 } 219 // Save full ZMM registers(16..num_xmm_regs) 220 base_addr = XSAVE_AREA_UPPERBANK; 221 off = 0; 222 int vector_len = Assembler::AVX_512bit; 223 for (int n = 16; n < num_xmm_regs; n++) { 224 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 225 } 226 #if COMPILER2_OR_JVMCI 227 base_addr = XSAVE_AREA_OPMASK_BEGIN; 228 off = 0; 229 for(int n = 0; n < KRegister::number_of_registers; n++) { 230 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 231 } 232 #endif 233 } 234 } else { 235 if (VM_Version::supports_evex()) { 236 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 237 int base_addr = XSAVE_AREA_UPPERBANK; 238 off = 0; 239 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 240 for (int n = 16; n < num_xmm_regs; n++) { 241 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 242 } 243 #if COMPILER2_OR_JVMCI 244 base_addr = XSAVE_AREA_OPMASK_BEGIN; 245 off = 0; 246 for(int n = 0; n < KRegister::number_of_registers; n++) { 247 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 248 } 249 #endif 250 } 251 } 252 __ vzeroupper(); 253 if (frame::arg_reg_save_area_bytes != 0) { 254 // Allocate argument register save area 255 __ subptr(rsp, frame::arg_reg_save_area_bytes); 256 } 257 258 // Set an oopmap for the call site. This oopmap will map all 259 // oop-registers and debug-info registers as callee-saved. This 260 // will allow deoptimization at this safepoint to find all possible 261 // debug-info recordings, as well as let GC find all oops. 262 263 OopMapSet *oop_maps = new OopMapSet(); 264 OopMap* map = new OopMap(frame_size_in_slots, 0); 265 266 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 267 268 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 269 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 270 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 271 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 272 // rbp location is known implicitly by the frame sender code, needs no oopmap 273 // and the location where rbp was saved by is ignored 274 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 275 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 276 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 277 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 278 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 279 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 280 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 281 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 282 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 283 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 284 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 285 // on EVEX enabled targets, we get it included in the xsave area 286 off = xmm0_off; 287 int delta = xmm1_off - off; 288 for (int n = 0; n < 16; n++) { 289 XMMRegister xmm_name = as_XMMRegister(n); 290 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 291 off += delta; 292 } 293 if (UseAVX > 2) { 294 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 295 off = zmm16_off; 296 delta = zmm17_off - off; 297 for (int n = 16; n < num_xmm_regs; n++) { 298 XMMRegister zmm_name = as_XMMRegister(n); 299 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 300 off += delta; 301 } 302 } 303 304 #if COMPILER2_OR_JVMCI 305 if (save_wide_vectors) { 306 // Save upper half of YMM registers(0..15) 307 off = ymm0_off; 308 delta = ymm1_off - ymm0_off; 309 for (int n = 0; n < 16; n++) { 310 XMMRegister ymm_name = as_XMMRegister(n); 311 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 312 off += delta; 313 } 314 if (VM_Version::supports_evex()) { 315 // Save upper half of ZMM registers(0..15) 316 off = zmm0_off; 317 delta = zmm1_off - zmm0_off; 318 for (int n = 0; n < 16; n++) { 319 XMMRegister zmm_name = as_XMMRegister(n); 320 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 321 off += delta; 322 } 323 } 324 } 325 #endif // COMPILER2_OR_JVMCI 326 327 // %%% These should all be a waste but we'll keep things as they were for now 328 if (true) { 329 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 330 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 331 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 332 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 333 // rbp location is known implicitly by the frame sender code, needs no oopmap 334 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 335 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 336 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 337 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 338 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 339 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 340 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 341 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 342 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 343 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 344 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 345 // on EVEX enabled targets, we get it included in the xsave area 346 off = xmm0H_off; 347 delta = xmm1H_off - off; 348 for (int n = 0; n < 16; n++) { 349 XMMRegister xmm_name = as_XMMRegister(n); 350 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 351 off += delta; 352 } 353 if (UseAVX > 2) { 354 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 355 off = zmm16H_off; 356 delta = zmm17H_off - off; 357 for (int n = 16; n < num_xmm_regs; n++) { 358 XMMRegister zmm_name = as_XMMRegister(n); 359 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 360 off += delta; 361 } 362 } 363 } 364 365 return map; 366 } 367 368 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { 369 int num_xmm_regs = XMMRegister::available_xmm_registers(); 370 if (frame::arg_reg_save_area_bytes != 0) { 371 // Pop arg register save area 372 __ addptr(rsp, frame::arg_reg_save_area_bytes); 373 } 374 375 #if COMPILER2_OR_JVMCI 376 if (restore_wide_vectors) { 377 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 378 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 379 } 380 #else 381 assert(!restore_wide_vectors, "vectors are generated only by C2"); 382 #endif 383 384 __ vzeroupper(); 385 386 // On EVEX enabled targets everything is handled in pop fpu state 387 if (restore_wide_vectors) { 388 // Restore upper half of YMM registers (0..15) 389 int base_addr = XSAVE_AREA_YMM_BEGIN; 390 for (int n = 0; n < 16; n++) { 391 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 392 } 393 if (VM_Version::supports_evex()) { 394 // Restore upper half of ZMM registers (0..15) 395 base_addr = XSAVE_AREA_ZMM_BEGIN; 396 for (int n = 0; n < 16; n++) { 397 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 398 } 399 // Restore full ZMM registers(16..num_xmm_regs) 400 base_addr = XSAVE_AREA_UPPERBANK; 401 int vector_len = Assembler::AVX_512bit; 402 int off = 0; 403 for (int n = 16; n < num_xmm_regs; n++) { 404 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 405 } 406 #if COMPILER2_OR_JVMCI 407 base_addr = XSAVE_AREA_OPMASK_BEGIN; 408 off = 0; 409 for (int n = 0; n < KRegister::number_of_registers; n++) { 410 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 411 } 412 #endif 413 } 414 } else { 415 if (VM_Version::supports_evex()) { 416 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 417 int base_addr = XSAVE_AREA_UPPERBANK; 418 int off = 0; 419 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 420 for (int n = 16; n < num_xmm_regs; n++) { 421 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 422 } 423 #if COMPILER2_OR_JVMCI 424 base_addr = XSAVE_AREA_OPMASK_BEGIN; 425 off = 0; 426 for (int n = 0; n < KRegister::number_of_registers; n++) { 427 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 428 } 429 #endif 430 } 431 } 432 433 // Recover CPU state 434 __ pop_CPU_state(); 435 // Get the rbp described implicitly by the calling convention (no oopMap) 436 __ pop(rbp); 437 } 438 439 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 440 441 // Just restore result register. Only used by deoptimization. By 442 // now any callee save register that needs to be restored to a c2 443 // caller of the deoptee has been extracted into the vframeArray 444 // and will be stuffed into the c2i adapter we create for later 445 // restoration so only result registers need to be restored here. 446 447 // Restore fp result register 448 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 449 // Restore integer result register 450 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 451 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 452 453 // Pop all of the register save are off the stack except the return address 454 __ addptr(rsp, return_offset_in_bytes()); 455 } 456 457 // Is vector's size (in bytes) bigger than a size saved by default? 458 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 459 bool SharedRuntime::is_wide_vector(int size) { 460 return size > 16; 461 } 462 463 // --------------------------------------------------------------------------- 464 // Read the array of BasicTypes from a signature, and compute where the 465 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 466 // quantities. Values less than VMRegImpl::stack0 are registers, those above 467 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 468 // as framesizes are fixed. 469 // VMRegImpl::stack0 refers to the first slot 0(sp). 470 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. 471 // Register up to Register::number_of_registers are the 64-bit 472 // integer registers. 473 474 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 475 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 476 // units regardless of build. Of course for i486 there is no 64 bit build 477 478 // The Java calling convention is a "shifted" version of the C ABI. 479 // By skipping the first C ABI register we can call non-static jni methods 480 // with small numbers of arguments without having to shuffle the arguments 481 // at all. Since we control the java ABI we ought to at least get some 482 // advantage out of it. 483 484 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 485 VMRegPair *regs, 486 int total_args_passed) { 487 488 // Create the mapping between argument positions and 489 // registers. 490 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 491 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 492 }; 493 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 494 j_farg0, j_farg1, j_farg2, j_farg3, 495 j_farg4, j_farg5, j_farg6, j_farg7 496 }; 497 498 499 uint int_args = 0; 500 uint fp_args = 0; 501 uint stk_args = 0; // inc by 2 each time 502 503 for (int i = 0; i < total_args_passed; i++) { 504 switch (sig_bt[i]) { 505 case T_BOOLEAN: 506 case T_CHAR: 507 case T_BYTE: 508 case T_SHORT: 509 case T_INT: 510 if (int_args < Argument::n_int_register_parameters_j) { 511 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 512 } else { 513 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 514 stk_args += 2; 515 } 516 break; 517 case T_VOID: 518 // halves of T_LONG or T_DOUBLE 519 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 520 regs[i].set_bad(); 521 break; 522 case T_LONG: 523 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 524 // fall through 525 case T_OBJECT: 526 case T_ARRAY: 527 case T_ADDRESS: 528 case T_PRIMITIVE_OBJECT: 529 if (int_args < Argument::n_int_register_parameters_j) { 530 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 531 } else { 532 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 533 stk_args += 2; 534 } 535 break; 536 case T_FLOAT: 537 if (fp_args < Argument::n_float_register_parameters_j) { 538 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 539 } else { 540 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 541 stk_args += 2; 542 } 543 break; 544 case T_DOUBLE: 545 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 546 if (fp_args < Argument::n_float_register_parameters_j) { 547 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 548 } else { 549 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 550 stk_args += 2; 551 } 552 break; 553 default: 554 ShouldNotReachHere(); 555 break; 556 } 557 } 558 559 return align_up(stk_args, 2); 560 } 561 562 // Same as java_calling_convention() but for multiple return 563 // values. There's no way to store them on the stack so if we don't 564 // have enough registers, multiple values can't be returned. 565 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 566 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 567 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 568 VMRegPair *regs, 569 int total_args_passed) { 570 // Create the mapping between argument positions and 571 // registers. 572 static const Register INT_ArgReg[java_return_convention_max_int] = { 573 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 574 }; 575 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 576 j_farg0, j_farg1, j_farg2, j_farg3, 577 j_farg4, j_farg5, j_farg6, j_farg7 578 }; 579 580 581 uint int_args = 0; 582 uint fp_args = 0; 583 584 for (int i = 0; i < total_args_passed; i++) { 585 switch (sig_bt[i]) { 586 case T_BOOLEAN: 587 case T_CHAR: 588 case T_BYTE: 589 case T_SHORT: 590 case T_INT: 591 if (int_args < Argument::n_int_register_parameters_j+1) { 592 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 593 int_args++; 594 } else { 595 return -1; 596 } 597 break; 598 case T_VOID: 599 // halves of T_LONG or T_DOUBLE 600 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 601 regs[i].set_bad(); 602 break; 603 case T_LONG: 604 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 605 // fall through 606 case T_OBJECT: 607 case T_PRIMITIVE_OBJECT: 608 case T_ARRAY: 609 case T_ADDRESS: 610 case T_METADATA: 611 if (int_args < Argument::n_int_register_parameters_j+1) { 612 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 613 int_args++; 614 } else { 615 return -1; 616 } 617 break; 618 case T_FLOAT: 619 if (fp_args < Argument::n_float_register_parameters_j) { 620 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 621 fp_args++; 622 } else { 623 return -1; 624 } 625 break; 626 case T_DOUBLE: 627 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 628 if (fp_args < Argument::n_float_register_parameters_j) { 629 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 630 fp_args++; 631 } else { 632 return -1; 633 } 634 break; 635 default: 636 ShouldNotReachHere(); 637 break; 638 } 639 } 640 641 return int_args + fp_args; 642 } 643 644 // Patch the callers callsite with entry to compiled code if it exists. 645 static void patch_callers_callsite(MacroAssembler *masm) { 646 Label L; 647 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 648 __ jcc(Assembler::equal, L); 649 650 // Save the current stack pointer 651 __ mov(r13, rsp); 652 // Schedule the branch target address early. 653 // Call into the VM to patch the caller, then jump to compiled callee 654 // rax isn't live so capture return address while we easily can 655 __ movptr(rax, Address(rsp, 0)); 656 657 // align stack so push_CPU_state doesn't fault 658 __ andptr(rsp, -(StackAlignmentInBytes)); 659 __ push_CPU_state(); 660 __ vzeroupper(); 661 // VM needs caller's callsite 662 // VM needs target method 663 // This needs to be a long call since we will relocate this adapter to 664 // the codeBuffer and it may not reach 665 666 // Allocate argument register save area 667 if (frame::arg_reg_save_area_bytes != 0) { 668 __ subptr(rsp, frame::arg_reg_save_area_bytes); 669 } 670 __ mov(c_rarg0, rbx); 671 __ mov(c_rarg1, rax); 672 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 673 674 // De-allocate argument register save area 675 if (frame::arg_reg_save_area_bytes != 0) { 676 __ addptr(rsp, frame::arg_reg_save_area_bytes); 677 } 678 679 __ vzeroupper(); 680 __ pop_CPU_state(); 681 // restore sp 682 __ mov(rsp, r13); 683 __ bind(L); 684 } 685 686 // For each inline type argument, sig includes the list of fields of 687 // the inline type. This utility function computes the number of 688 // arguments for the call if inline types are passed by reference (the 689 // calling convention the interpreter expects). 690 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 691 int total_args_passed = 0; 692 if (InlineTypePassFieldsAsArgs) { 693 for (int i = 0; i < sig_extended->length(); i++) { 694 BasicType bt = sig_extended->at(i)._bt; 695 if (bt == T_PRIMITIVE_OBJECT) { 696 // In sig_extended, an inline type argument starts with: 697 // T_PRIMITIVE_OBJECT, followed by the types of the fields of the 698 // inline type and T_VOID to mark the end of the value 699 // type. Inline types are flattened so, for instance, in the 700 // case of an inline type with an int field and an inline type 701 // field that itself has 2 fields, an int and a long: 702 // T_PRIMITIVE_OBJECT T_INT T_PRIMITIVE_OBJECT T_INT T_LONG T_VOID (second 703 // slot for the T_LONG) T_VOID (inner T_PRIMITIVE_OBJECT) T_VOID 704 // (outer T_PRIMITIVE_OBJECT) 705 total_args_passed++; 706 int vt = 1; 707 do { 708 i++; 709 BasicType bt = sig_extended->at(i)._bt; 710 BasicType prev_bt = sig_extended->at(i-1)._bt; 711 if (bt == T_PRIMITIVE_OBJECT) { 712 vt++; 713 } else if (bt == T_VOID && 714 prev_bt != T_LONG && 715 prev_bt != T_DOUBLE) { 716 vt--; 717 } 718 } while (vt != 0); 719 } else { 720 total_args_passed++; 721 } 722 } 723 } else { 724 total_args_passed = sig_extended->length(); 725 } 726 return total_args_passed; 727 } 728 729 730 static void gen_c2i_adapter_helper(MacroAssembler* masm, 731 BasicType bt, 732 BasicType prev_bt, 733 size_t size_in_bytes, 734 const VMRegPair& reg_pair, 735 const Address& to, 736 int extraspace, 737 bool is_oop) { 738 assert(bt != T_PRIMITIVE_OBJECT || !InlineTypePassFieldsAsArgs, "no inline type here"); 739 if (bt == T_VOID) { 740 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 741 return; 742 } 743 744 // Say 4 args: 745 // i st_off 746 // 0 32 T_LONG 747 // 1 24 T_VOID 748 // 2 16 T_OBJECT 749 // 3 8 T_BOOL 750 // - 0 return address 751 // 752 // However to make thing extra confusing. Because we can fit a long/double in 753 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 754 // leaves one slot empty and only stores to a single slot. In this case the 755 // slot that is occupied is the T_VOID slot. See I said it was confusing. 756 757 bool wide = (size_in_bytes == wordSize); 758 VMReg r_1 = reg_pair.first(); 759 VMReg r_2 = reg_pair.second(); 760 assert(r_2->is_valid() == wide, "invalid size"); 761 if (!r_1->is_valid()) { 762 assert(!r_2->is_valid(), "must be invalid"); 763 return; 764 } 765 766 if (!r_1->is_XMMRegister()) { 767 Register val = rax; 768 if (r_1->is_stack()) { 769 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 770 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 771 } else { 772 val = r_1->as_Register(); 773 } 774 assert_different_registers(to.base(), val, rscratch1); 775 if (is_oop) { 776 __ push(r13); 777 __ push(rbx); 778 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 779 __ pop(rbx); 780 __ pop(r13); 781 } else { 782 __ store_sized_value(to, val, size_in_bytes); 783 } 784 } else { 785 if (wide) { 786 __ movdbl(to, r_1->as_XMMRegister()); 787 } else { 788 __ movflt(to, r_1->as_XMMRegister()); 789 } 790 } 791 } 792 793 static void gen_c2i_adapter(MacroAssembler *masm, 794 const GrowableArray<SigEntry>* sig_extended, 795 const VMRegPair *regs, 796 bool requires_clinit_barrier, 797 address& c2i_no_clinit_check_entry, 798 Label& skip_fixup, 799 address start, 800 OopMapSet* oop_maps, 801 int& frame_complete, 802 int& frame_size_in_words, 803 bool alloc_inline_receiver) { 804 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) { 805 Label L_skip_barrier; 806 Register method = rbx; 807 808 { // Bypass the barrier for non-static methods 809 Register flags = rscratch1; 810 __ movl(flags, Address(method, Method::access_flags_offset())); 811 __ testl(flags, JVM_ACC_STATIC); 812 __ jcc(Assembler::zero, L_skip_barrier); // non-static 813 } 814 815 Register klass = rscratch1; 816 __ load_method_holder(klass, method); 817 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 818 819 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 820 821 __ bind(L_skip_barrier); 822 c2i_no_clinit_check_entry = __ pc(); 823 } 824 825 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 826 bs->c2i_entry_barrier(masm); 827 828 // Before we get into the guts of the C2I adapter, see if we should be here 829 // at all. We've come from compiled code and are attempting to jump to the 830 // interpreter, which means the caller made a static call to get here 831 // (vcalls always get a compiled target if there is one). Check for a 832 // compiled target. If there is one, we need to patch the caller's call. 833 patch_callers_callsite(masm); 834 835 __ bind(skip_fixup); 836 837 if (InlineTypePassFieldsAsArgs) { 838 // Is there an inline type argument? 839 bool has_inline_argument = false; 840 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 841 has_inline_argument = (sig_extended->at(i)._bt == T_PRIMITIVE_OBJECT); 842 } 843 if (has_inline_argument) { 844 // There is at least an inline type argument: we're coming from 845 // compiled code so we have no buffers to back the inline types. 846 // Allocate the buffers here with a runtime call. 847 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 848 849 frame_complete = __ offset(); 850 851 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); 852 853 __ mov(c_rarg0, r15_thread); 854 __ mov(c_rarg1, rbx); 855 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 856 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 857 858 oop_maps->add_gc_map((int)(__ pc() - start), map); 859 __ reset_last_Java_frame(false); 860 861 RegisterSaver::restore_live_registers(masm); 862 863 Label no_exception; 864 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 865 __ jcc(Assembler::equal, no_exception); 866 867 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 868 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 869 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 870 871 __ bind(no_exception); 872 873 // We get an array of objects from the runtime call 874 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 875 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? 876 } 877 } 878 879 // Since all args are passed on the stack, total_args_passed * 880 // Interpreter::stackElementSize is the space we need. 881 int total_args_passed = compute_total_args_passed_int(sig_extended); 882 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); 883 884 int extraspace = (total_args_passed * Interpreter::stackElementSize); 885 886 // stack is aligned, keep it that way 887 // This is not currently needed or enforced by the interpreter, but 888 // we might as well conform to the ABI. 889 extraspace = align_up(extraspace, 2*wordSize); 890 891 // set senderSP value 892 __ lea(r13, Address(rsp, wordSize)); 893 894 #ifdef ASSERT 895 __ check_stack_alignment(r13, "sender stack not aligned"); 896 #endif 897 if (extraspace > 0) { 898 // Pop the return address 899 __ pop(rax); 900 901 __ subptr(rsp, extraspace); 902 903 // Push the return address 904 __ push(rax); 905 906 // Account for the return address location since we store it first rather 907 // than hold it in a register across all the shuffling 908 extraspace += wordSize; 909 } 910 911 #ifdef ASSERT 912 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax); 913 #endif 914 915 // Now write the args into the outgoing interpreter space 916 917 // next_arg_comp is the next argument from the compiler point of 918 // view (inline type fields are passed in registers/on the stack). In 919 // sig_extended, an inline type argument starts with: T_PRIMITIVE_OBJECT, 920 // followed by the types of the fields of the inline type and T_VOID 921 // to mark the end of the inline type. ignored counts the number of 922 // T_PRIMITIVE_OBJECT/T_VOID. next_vt_arg is the next inline type argument: 923 // used to get the buffer for that argument from the pool of buffers 924 // we allocated above and want to pass to the 925 // interpreter. next_arg_int is the next argument from the 926 // interpreter point of view (inline types are passed by reference). 927 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 928 next_arg_comp < sig_extended->length(); next_arg_comp++) { 929 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 930 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 931 BasicType bt = sig_extended->at(next_arg_comp)._bt; 932 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 933 if (!InlineTypePassFieldsAsArgs || bt != T_PRIMITIVE_OBJECT) { 934 int next_off = st_off - Interpreter::stackElementSize; 935 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 936 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 937 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 938 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 939 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 940 next_arg_int++; 941 #ifdef ASSERT 942 if (bt == T_LONG || bt == T_DOUBLE) { 943 // Overwrite the unused slot with known junk 944 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 945 __ movptr(Address(rsp, st_off), rax); 946 } 947 #endif /* ASSERT */ 948 } else { 949 ignored++; 950 // get the buffer from the just allocated pool of buffers 951 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_PRIMITIVE_OBJECT); 952 __ load_heap_oop(r14, Address(rscratch2, index)); 953 next_vt_arg++; next_arg_int++; 954 int vt = 1; 955 // write fields we get from compiled code in registers/stack 956 // slots to the buffer: we know we are done with that inline type 957 // argument when we hit the T_VOID that acts as an end of inline 958 // type delimiter for this inline type. Inline types are flattened 959 // so we might encounter embedded inline types. Each entry in 960 // sig_extended contains a field offset in the buffer. 961 Label L_null; 962 do { 963 next_arg_comp++; 964 BasicType bt = sig_extended->at(next_arg_comp)._bt; 965 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 966 if (bt == T_PRIMITIVE_OBJECT) { 967 vt++; 968 ignored++; 969 } else if (bt == T_VOID && 970 prev_bt != T_LONG && 971 prev_bt != T_DOUBLE) { 972 vt--; 973 ignored++; 974 } else { 975 int off = sig_extended->at(next_arg_comp)._offset; 976 if (off == -1) { 977 // Nullable inline type argument, emit null check 978 VMReg reg = regs[next_arg_comp-ignored].first(); 979 Label L_notNull; 980 if (reg->is_stack()) { 981 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 982 __ testb(Address(rsp, ld_off), 1); 983 } else { 984 __ testb(reg->as_Register(), 1); 985 } 986 __ jcc(Assembler::notZero, L_notNull); 987 __ movptr(Address(rsp, st_off), 0); 988 __ jmp(L_null); 989 __ bind(L_notNull); 990 continue; 991 } 992 assert(off > 0, "offset in object should be positive"); 993 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 994 bool is_oop = is_reference_type(bt); 995 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 996 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 997 } 998 } while (vt != 0); 999 // pass the buffer to the interpreter 1000 __ movptr(Address(rsp, st_off), r14); 1001 __ bind(L_null); 1002 } 1003 } 1004 1005 // Schedule the branch target address early. 1006 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1007 __ jmp(rcx); 1008 } 1009 1010 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 1011 address code_start, address code_end, 1012 Label& L_ok) { 1013 Label L_fail; 1014 __ lea(temp_reg, ExternalAddress(code_start)); 1015 __ cmpptr(pc_reg, temp_reg); 1016 __ jcc(Assembler::belowEqual, L_fail); 1017 __ lea(temp_reg, ExternalAddress(code_end)); 1018 __ cmpptr(pc_reg, temp_reg); 1019 __ jcc(Assembler::below, L_ok); 1020 __ bind(L_fail); 1021 } 1022 1023 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1024 int comp_args_on_stack, 1025 const GrowableArray<SigEntry>* sig, 1026 const VMRegPair *regs) { 1027 1028 // Note: r13 contains the senderSP on entry. We must preserve it since 1029 // we may do a i2c -> c2i transition if we lose a race where compiled 1030 // code goes non-entrant while we get args ready. 1031 // In addition we use r13 to locate all the interpreter args as 1032 // we must align the stack to 16 bytes on an i2c entry else we 1033 // lose alignment we expect in all compiled code and register 1034 // save code can segv when fxsave instructions find improperly 1035 // aligned stack pointer. 1036 1037 // Adapters can be frameless because they do not require the caller 1038 // to perform additional cleanup work, such as correcting the stack pointer. 1039 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1040 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1041 // even if a callee has modified the stack pointer. 1042 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1043 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1044 // up via the senderSP register). 1045 // In other words, if *either* the caller or callee is interpreted, we can 1046 // get the stack pointer repaired after a call. 1047 // This is why c2i and i2c adapters cannot be indefinitely composed. 1048 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1049 // both caller and callee would be compiled methods, and neither would 1050 // clean up the stack pointer changes performed by the two adapters. 1051 // If this happens, control eventually transfers back to the compiled 1052 // caller, but with an uncorrected stack, causing delayed havoc. 1053 1054 if (VerifyAdapterCalls && 1055 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 1056 // So, let's test for cascading c2i/i2c adapters right now. 1057 // assert(Interpreter::contains($return_addr) || 1058 // StubRoutines::contains($return_addr), 1059 // "i2c adapter must return to an interpreter frame"); 1060 __ block_comment("verify_i2c { "); 1061 // Pick up the return address 1062 __ movptr(rax, Address(rsp, 0)); 1063 Label L_ok; 1064 if (Interpreter::code() != NULL) 1065 range_check(masm, rax, r11, 1066 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 1067 L_ok); 1068 if (StubRoutines::code1() != NULL) 1069 range_check(masm, rax, r11, 1070 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 1071 L_ok); 1072 if (StubRoutines::code2() != NULL) 1073 range_check(masm, rax, r11, 1074 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 1075 L_ok); 1076 const char* msg = "i2c adapter must return to an interpreter frame"; 1077 __ block_comment(msg); 1078 __ stop(msg); 1079 __ bind(L_ok); 1080 __ block_comment("} verify_i2ce "); 1081 } 1082 1083 // Must preserve original SP for loading incoming arguments because 1084 // we need to align the outgoing SP for compiled code. 1085 __ movptr(r11, rsp); 1086 1087 // Pick up the return address 1088 __ pop(rax); 1089 1090 // Convert 4-byte c2 stack slots to words. 1091 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1092 1093 if (comp_args_on_stack) { 1094 __ subptr(rsp, comp_words_on_stack * wordSize); 1095 } 1096 1097 // Ensure compiled code always sees stack at proper alignment 1098 __ andptr(rsp, -16); 1099 1100 // push the return address and misalign the stack that youngest frame always sees 1101 // as far as the placement of the call instruction 1102 __ push(rax); 1103 1104 // Put saved SP in another register 1105 const Register saved_sp = rax; 1106 __ movptr(saved_sp, r11); 1107 1108 // Will jump to the compiled code just as if compiled code was doing it. 1109 // Pre-load the register-jump target early, to schedule it better. 1110 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1111 1112 #if INCLUDE_JVMCI 1113 if (EnableJVMCI) { 1114 // check if this call should be routed towards a specific entry point 1115 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1116 Label no_alternative_target; 1117 __ jcc(Assembler::equal, no_alternative_target); 1118 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1119 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1120 __ bind(no_alternative_target); 1121 } 1122 #endif // INCLUDE_JVMCI 1123 1124 int total_args_passed = sig->length(); 1125 1126 // Now generate the shuffle code. Pick up all register args and move the 1127 // rest through the floating point stack top. 1128 for (int i = 0; i < total_args_passed; i++) { 1129 BasicType bt = sig->at(i)._bt; 1130 assert(bt != T_PRIMITIVE_OBJECT, "i2c adapter doesn't unpack inline type args"); 1131 if (bt == T_VOID) { 1132 // Longs and doubles are passed in native word order, but misaligned 1133 // in the 32-bit build. 1134 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1135 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1136 continue; 1137 } 1138 1139 // Pick up 0, 1 or 2 words from SP+offset. 1140 1141 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1142 "scrambled load targets?"); 1143 // Load in argument order going down. 1144 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1145 // Point to interpreter value (vs. tag) 1146 int next_off = ld_off - Interpreter::stackElementSize; 1147 // 1148 // 1149 // 1150 VMReg r_1 = regs[i].first(); 1151 VMReg r_2 = regs[i].second(); 1152 if (!r_1->is_valid()) { 1153 assert(!r_2->is_valid(), ""); 1154 continue; 1155 } 1156 if (r_1->is_stack()) { 1157 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1158 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1159 1160 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1161 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1162 // will be generated. 1163 if (!r_2->is_valid()) { 1164 // sign extend??? 1165 __ movl(r13, Address(saved_sp, ld_off)); 1166 __ movptr(Address(rsp, st_off), r13); 1167 } else { 1168 // 1169 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1170 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1171 // So we must adjust where to pick up the data to match the interpreter. 1172 // 1173 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1174 // are accessed as negative so LSW is at LOW address 1175 1176 // ld_off is MSW so get LSW 1177 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1178 next_off : ld_off; 1179 __ movq(r13, Address(saved_sp, offset)); 1180 // st_off is LSW (i.e. reg.first()) 1181 __ movq(Address(rsp, st_off), r13); 1182 } 1183 } else if (r_1->is_Register()) { // Register argument 1184 Register r = r_1->as_Register(); 1185 assert(r != rax, "must be different"); 1186 if (r_2->is_valid()) { 1187 // 1188 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1189 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1190 // So we must adjust where to pick up the data to match the interpreter. 1191 1192 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1193 next_off : ld_off; 1194 1195 // this can be a misaligned move 1196 __ movq(r, Address(saved_sp, offset)); 1197 } else { 1198 // sign extend and use a full word? 1199 __ movl(r, Address(saved_sp, ld_off)); 1200 } 1201 } else { 1202 if (!r_2->is_valid()) { 1203 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1204 } else { 1205 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1206 } 1207 } 1208 } 1209 1210 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1211 1212 // 6243940 We might end up in handle_wrong_method if 1213 // the callee is deoptimized as we race thru here. If that 1214 // happens we don't want to take a safepoint because the 1215 // caller frame will look interpreted and arguments are now 1216 // "compiled" so it is much better to make this transition 1217 // invisible to the stack walking code. Unfortunately if 1218 // we try and find the callee by normal means a safepoint 1219 // is possible. So we stash the desired callee in the thread 1220 // and the vm will find there should this case occur. 1221 1222 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1223 1224 // put Method* where a c2i would expect should we end up there 1225 // only needed because of c2 resolve stubs return Method* as a result in 1226 // rax 1227 __ mov(rax, rbx); 1228 __ jmp(r11); 1229 } 1230 1231 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1232 Label ok; 1233 1234 Register holder = rax; 1235 Register receiver = j_rarg0; 1236 Register temp = rbx; 1237 1238 __ load_klass(temp, receiver, rscratch1); 1239 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 1240 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset())); 1241 __ jcc(Assembler::equal, ok); 1242 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1243 1244 __ bind(ok); 1245 // Method might have been compiled since the call site was patched to 1246 // interpreted if that is the case treat it as a miss so we can get 1247 // the call site corrected. 1248 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 1249 __ jcc(Assembler::equal, skip_fixup); 1250 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1251 } 1252 1253 // --------------------------------------------------------------- 1254 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1255 int comp_args_on_stack, 1256 const GrowableArray<SigEntry>* sig, 1257 const VMRegPair* regs, 1258 const GrowableArray<SigEntry>* sig_cc, 1259 const VMRegPair* regs_cc, 1260 const GrowableArray<SigEntry>* sig_cc_ro, 1261 const VMRegPair* regs_cc_ro, 1262 AdapterFingerPrint* fingerprint, 1263 AdapterBlob*& new_adapter, 1264 bool allocate_code_blob) { 1265 address i2c_entry = __ pc(); 1266 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1267 1268 // ------------------------------------------------------------------------- 1269 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1270 // to the interpreter. The args start out packed in the compiled layout. They 1271 // need to be unpacked into the interpreter layout. This will almost always 1272 // require some stack space. We grow the current (compiled) stack, then repack 1273 // the args. We finally end in a jump to the generic interpreter entry point. 1274 // On exit from the interpreter, the interpreter will restore our SP (lest the 1275 // compiled code, which relies solely on SP and not RBP, get sick). 1276 1277 address c2i_unverified_entry = __ pc(); 1278 address c2i_unverified_inline_entry = __ pc(); 1279 Label skip_fixup; 1280 1281 gen_inline_cache_check(masm, skip_fixup); 1282 1283 OopMapSet* oop_maps = new OopMapSet(); 1284 int frame_complete = CodeOffsets::frame_never_safe; 1285 int frame_size_in_words = 0; 1286 1287 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1288 address c2i_no_clinit_check_entry = NULL; 1289 address c2i_inline_ro_entry = __ pc(); 1290 if (regs_cc != regs_cc_ro) { 1291 // No class init barrier needed because method is guaranteed to be non-static 1292 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry, 1293 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1294 skip_fixup.reset(); 1295 } 1296 1297 // Scalarized c2i adapter 1298 address c2i_entry = __ pc(); 1299 address c2i_inline_entry = __ pc(); 1300 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1301 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); 1302 1303 // Non-scalarized c2i adapter 1304 if (regs != regs_cc) { 1305 c2i_unverified_inline_entry = __ pc(); 1306 Label inline_entry_skip_fixup; 1307 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1308 1309 c2i_inline_entry = __ pc(); 1310 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1311 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1312 } 1313 1314 __ flush(); 1315 1316 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1317 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1318 if (allocate_code_blob) { 1319 bool caller_must_gc_arguments = (regs != regs_cc); 1320 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1321 } 1322 1323 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1324 } 1325 1326 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1327 VMRegPair *regs, 1328 VMRegPair *regs2, 1329 int total_args_passed) { 1330 assert(regs2 == NULL, "not needed on x86"); 1331 // We return the amount of VMRegImpl stack slots we need to reserve for all 1332 // the arguments NOT counting out_preserve_stack_slots. 1333 1334 // NOTE: These arrays will have to change when c1 is ported 1335 #ifdef _WIN64 1336 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1337 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1338 }; 1339 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1340 c_farg0, c_farg1, c_farg2, c_farg3 1341 }; 1342 #else 1343 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1344 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1345 }; 1346 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1347 c_farg0, c_farg1, c_farg2, c_farg3, 1348 c_farg4, c_farg5, c_farg6, c_farg7 1349 }; 1350 #endif // _WIN64 1351 1352 1353 uint int_args = 0; 1354 uint fp_args = 0; 1355 uint stk_args = 0; // inc by 2 each time 1356 1357 for (int i = 0; i < total_args_passed; i++) { 1358 switch (sig_bt[i]) { 1359 case T_BOOLEAN: 1360 case T_CHAR: 1361 case T_BYTE: 1362 case T_SHORT: 1363 case T_INT: 1364 if (int_args < Argument::n_int_register_parameters_c) { 1365 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1366 #ifdef _WIN64 1367 fp_args++; 1368 // Allocate slots for callee to stuff register args the stack. 1369 stk_args += 2; 1370 #endif 1371 } else { 1372 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1373 stk_args += 2; 1374 } 1375 break; 1376 case T_LONG: 1377 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1378 // fall through 1379 case T_OBJECT: 1380 case T_ARRAY: 1381 case T_PRIMITIVE_OBJECT: 1382 case T_ADDRESS: 1383 case T_METADATA: 1384 if (int_args < Argument::n_int_register_parameters_c) { 1385 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1386 #ifdef _WIN64 1387 fp_args++; 1388 stk_args += 2; 1389 #endif 1390 } else { 1391 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1392 stk_args += 2; 1393 } 1394 break; 1395 case T_FLOAT: 1396 if (fp_args < Argument::n_float_register_parameters_c) { 1397 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1398 #ifdef _WIN64 1399 int_args++; 1400 // Allocate slots for callee to stuff register args the stack. 1401 stk_args += 2; 1402 #endif 1403 } else { 1404 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1405 stk_args += 2; 1406 } 1407 break; 1408 case T_DOUBLE: 1409 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1410 if (fp_args < Argument::n_float_register_parameters_c) { 1411 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1412 #ifdef _WIN64 1413 int_args++; 1414 // Allocate slots for callee to stuff register args the stack. 1415 stk_args += 2; 1416 #endif 1417 } else { 1418 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1419 stk_args += 2; 1420 } 1421 break; 1422 case T_VOID: // Halves of longs and doubles 1423 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1424 regs[i].set_bad(); 1425 break; 1426 default: 1427 ShouldNotReachHere(); 1428 break; 1429 } 1430 } 1431 #ifdef _WIN64 1432 // windows abi requires that we always allocate enough stack space 1433 // for 4 64bit registers to be stored down. 1434 if (stk_args < 8) { 1435 stk_args = 8; 1436 } 1437 #endif // _WIN64 1438 1439 return stk_args; 1440 } 1441 1442 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1443 uint num_bits, 1444 uint total_args_passed) { 1445 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1446 "only certain vector sizes are supported for now"); 1447 1448 static const XMMRegister VEC_ArgReg[32] = { 1449 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1450 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1451 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1452 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1453 }; 1454 1455 uint stk_args = 0; 1456 uint fp_args = 0; 1457 1458 for (uint i = 0; i < total_args_passed; i++) { 1459 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1460 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1461 regs[i].set_pair(vmreg->next(next_val), vmreg); 1462 } 1463 1464 return stk_args; 1465 } 1466 1467 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1468 // We always ignore the frame_slots arg and just use the space just below frame pointer 1469 // which by this time is free to use 1470 switch (ret_type) { 1471 case T_FLOAT: 1472 __ movflt(Address(rbp, -wordSize), xmm0); 1473 break; 1474 case T_DOUBLE: 1475 __ movdbl(Address(rbp, -wordSize), xmm0); 1476 break; 1477 case T_VOID: break; 1478 default: { 1479 __ movptr(Address(rbp, -wordSize), rax); 1480 } 1481 } 1482 } 1483 1484 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1485 // We always ignore the frame_slots arg and just use the space just below frame pointer 1486 // which by this time is free to use 1487 switch (ret_type) { 1488 case T_FLOAT: 1489 __ movflt(xmm0, Address(rbp, -wordSize)); 1490 break; 1491 case T_DOUBLE: 1492 __ movdbl(xmm0, Address(rbp, -wordSize)); 1493 break; 1494 case T_VOID: break; 1495 default: { 1496 __ movptr(rax, Address(rbp, -wordSize)); 1497 } 1498 } 1499 } 1500 1501 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1502 for ( int i = first_arg ; i < arg_count ; i++ ) { 1503 if (args[i].first()->is_Register()) { 1504 __ push(args[i].first()->as_Register()); 1505 } else if (args[i].first()->is_XMMRegister()) { 1506 __ subptr(rsp, 2*wordSize); 1507 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1508 } 1509 } 1510 } 1511 1512 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1513 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1514 if (args[i].first()->is_Register()) { 1515 __ pop(args[i].first()->as_Register()); 1516 } else if (args[i].first()->is_XMMRegister()) { 1517 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1518 __ addptr(rsp, 2*wordSize); 1519 } 1520 } 1521 } 1522 1523 static void verify_oop_args(MacroAssembler* masm, 1524 const methodHandle& method, 1525 const BasicType* sig_bt, 1526 const VMRegPair* regs) { 1527 Register temp_reg = rbx; // not part of any compiled calling seq 1528 if (VerifyOops) { 1529 for (int i = 0; i < method->size_of_parameters(); i++) { 1530 if (is_reference_type(sig_bt[i])) { 1531 VMReg r = regs[i].first(); 1532 assert(r->is_valid(), "bad oop arg"); 1533 if (r->is_stack()) { 1534 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1535 __ verify_oop(temp_reg); 1536 } else { 1537 __ verify_oop(r->as_Register()); 1538 } 1539 } 1540 } 1541 } 1542 } 1543 1544 static void check_continuation_enter_argument(VMReg actual_vmreg, 1545 Register expected_reg, 1546 const char* name) { 1547 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1548 assert(actual_vmreg->as_Register() == expected_reg, 1549 "%s is in unexpected register: %s instead of %s", 1550 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1551 } 1552 1553 1554 //---------------------------- continuation_enter_setup --------------------------- 1555 // 1556 // Arguments: 1557 // None. 1558 // 1559 // Results: 1560 // rsp: pointer to blank ContinuationEntry 1561 // 1562 // Kills: 1563 // rax 1564 // 1565 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) { 1566 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1567 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1568 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1569 1570 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize; 1571 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1572 1573 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size; 1574 OopMap* map = new OopMap(frame_size, 0); 1575 1576 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset())); 1577 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax); 1578 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp); 1579 1580 return map; 1581 } 1582 1583 //---------------------------- fill_continuation_entry --------------------------- 1584 // 1585 // Arguments: 1586 // rsp: pointer to blank Continuation entry 1587 // reg_cont_obj: pointer to the continuation 1588 // reg_flags: flags 1589 // 1590 // Results: 1591 // rsp: pointer to filled out ContinuationEntry 1592 // 1593 // Kills: 1594 // rax 1595 // 1596 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1597 assert_different_registers(rax, reg_cont_obj, reg_flags); 1598 #ifdef ASSERT 1599 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value()); 1600 #endif 1601 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj); 1602 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags); 1603 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0); 1604 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0); 1605 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0); 1606 1607 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset())); 1608 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax); 1609 __ movq(rax, Address(r15_thread, JavaThread::held_monitor_count_offset())); 1610 __ movq(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); 1611 1612 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); 1613 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), 0); 1614 } 1615 1616 //---------------------------- continuation_enter_cleanup --------------------------- 1617 // 1618 // Arguments: 1619 // rsp: pointer to the ContinuationEntry 1620 // 1621 // Results: 1622 // rsp: pointer to the spilled rbp in the entry frame 1623 // 1624 // Kills: 1625 // rbx 1626 // 1627 void static continuation_enter_cleanup(MacroAssembler* masm) { 1628 #ifdef ASSERT 1629 Label L_good_sp; 1630 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1631 __ jcc(Assembler::equal, L_good_sp); 1632 __ stop("Incorrect rsp at continuation_enter_cleanup"); 1633 __ bind(L_good_sp); 1634 #endif 1635 1636 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset())); 1637 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx); 1638 __ movq(rbx, Address(rsp, ContinuationEntry::parent_held_monitor_count_offset())); 1639 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), rbx); 1640 1641 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset())); 1642 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx); 1643 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1644 } 1645 1646 static void gen_continuation_enter(MacroAssembler* masm, 1647 const VMRegPair* regs, 1648 int& exception_offset, 1649 OopMapSet* oop_maps, 1650 int& frame_complete, 1651 int& stack_slots, 1652 int& interpreted_entry_offset, 1653 int& compiled_entry_offset) { 1654 1655 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1656 int pos_cont_obj = 0; 1657 int pos_is_cont = 1; 1658 int pos_is_virtual = 2; 1659 1660 // The platform-specific calling convention may present the arguments in various registers. 1661 // To simplify the rest of the code, we expect the arguments to reside at these known 1662 // registers, and we additionally check the placement here in case calling convention ever 1663 // changes. 1664 Register reg_cont_obj = c_rarg1; 1665 Register reg_is_cont = c_rarg2; 1666 Register reg_is_virtual = c_rarg3; 1667 1668 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1669 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1670 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1671 1672 // Utility methods kill rax, make sure there are no collisions 1673 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual); 1674 1675 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), 1676 relocInfo::static_call_type); 1677 1678 address start = __ pc(); 1679 1680 Label L_thaw, L_exit; 1681 1682 // i2i entry used at interp_only_mode only 1683 interpreted_entry_offset = __ pc() - start; 1684 { 1685 #ifdef ASSERT 1686 Label is_interp_only; 1687 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0); 1688 __ jcc(Assembler::notEqual, is_interp_only); 1689 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1690 __ bind(is_interp_only); 1691 #endif 1692 1693 __ pop(rax); // return address 1694 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1695 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2)); 1696 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1)); 1697 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0)); 1698 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment 1699 __ push(rax); // return address 1700 __ push_cont_fastpath(); 1701 1702 __ enter(); 1703 1704 stack_slots = 2; // will be adjusted in setup 1705 OopMap* map = continuation_enter_setup(masm, stack_slots); 1706 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1707 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1708 1709 __ verify_oop(reg_cont_obj); 1710 1711 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1712 1713 // If continuation, call to thaw. Otherwise, resolve the call and exit. 1714 __ testptr(reg_is_cont, reg_is_cont); 1715 __ jcc(Assembler::notZero, L_thaw); 1716 1717 // --- Resolve path 1718 1719 // Make sure the call is patchable 1720 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1721 // Emit stub for static call 1722 CodeBuffer* cbuf = masm->code_section()->outer(); 1723 address stub = CompiledStaticCall::emit_to_interp_stub(*cbuf, __ pc()); 1724 if (stub == nullptr) { 1725 fatal("CodeCache is full at gen_continuation_enter"); 1726 } 1727 __ call(resolve); 1728 oop_maps->add_gc_map(__ pc() - start, map); 1729 __ post_call_nop(); 1730 1731 __ jmp(L_exit); 1732 } 1733 1734 // compiled entry 1735 __ align(CodeEntryAlignment); 1736 compiled_entry_offset = __ pc() - start; 1737 __ enter(); 1738 1739 stack_slots = 2; // will be adjusted in setup 1740 OopMap* map = continuation_enter_setup(masm, stack_slots); 1741 1742 // Frame is now completed as far as size and linkage. 1743 frame_complete = __ pc() - start; 1744 1745 __ verify_oop(reg_cont_obj); 1746 1747 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1748 1749 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1750 __ testptr(reg_is_cont, reg_is_cont); 1751 __ jccb(Assembler::notZero, L_thaw); 1752 1753 // --- call Continuation.enter(Continuation c, boolean isContinue) 1754 1755 // Make sure the call is patchable 1756 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1757 1758 // Emit stub for static call 1759 CodeBuffer* cbuf = masm->code_section()->outer(); 1760 address stub = CompiledStaticCall::emit_to_interp_stub(*cbuf, __ pc()); 1761 if (stub == nullptr) { 1762 fatal("CodeCache is full at gen_continuation_enter"); 1763 } 1764 1765 // The call needs to be resolved. There's a special case for this in 1766 // SharedRuntime::find_callee_info_helper() which calls 1767 // LinkResolver::resolve_continuation_enter() which resolves the call to 1768 // Continuation.enter(Continuation c, boolean isContinue). 1769 __ call(resolve); 1770 1771 oop_maps->add_gc_map(__ pc() - start, map); 1772 __ post_call_nop(); 1773 1774 __ jmpb(L_exit); 1775 1776 // --- Thawing path 1777 1778 __ bind(L_thaw); 1779 1780 __ call(RuntimeAddress(StubRoutines::cont_thaw())); 1781 1782 ContinuationEntry::_return_pc_offset = __ pc() - start; 1783 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1784 __ post_call_nop(); 1785 1786 // --- Normal exit (resolve/thawing) 1787 1788 __ bind(L_exit); 1789 1790 continuation_enter_cleanup(masm); 1791 __ pop(rbp); 1792 __ ret(0); 1793 1794 // --- Exception handling path 1795 1796 exception_offset = __ pc() - start; 1797 1798 continuation_enter_cleanup(masm); 1799 __ pop(rbp); 1800 1801 __ movptr(c_rarg0, r15_thread); 1802 __ movptr(c_rarg1, Address(rsp, 0)); // return address 1803 1804 // rax still holds the original exception oop, save it before the call 1805 __ push(rax); 1806 1807 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2); 1808 __ movptr(rbx, rax); 1809 1810 // Continue at exception handler: 1811 // rax: exception oop 1812 // rbx: exception handler 1813 // rdx: exception pc 1814 __ pop(rax); 1815 __ verify_oop(rax); 1816 __ pop(rdx); 1817 __ jmp(rbx); 1818 } 1819 1820 static void gen_continuation_yield(MacroAssembler* masm, 1821 const VMRegPair* regs, 1822 OopMapSet* oop_maps, 1823 int& frame_complete, 1824 int& stack_slots, 1825 int& compiled_entry_offset) { 1826 enum layout { 1827 rbp_off, 1828 rbpH_off, 1829 return_off, 1830 return_off2, 1831 framesize // inclusive of return address 1832 }; 1833 stack_slots = framesize / VMRegImpl::slots_per_word; 1834 assert(stack_slots == 2, "recheck layout"); 1835 1836 address start = __ pc(); 1837 compiled_entry_offset = __ pc() - start; 1838 __ enter(); 1839 address the_pc = __ pc(); 1840 1841 frame_complete = the_pc - start; 1842 1843 // This nop must be exactly at the PC we push into the frame info. 1844 // We use this nop for fast CodeBlob lookup, associate the OopMap 1845 // with it right away. 1846 __ post_call_nop(); 1847 OopMap* map = new OopMap(framesize, 1); 1848 oop_maps->add_gc_map(frame_complete, map); 1849 1850 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 1851 __ movptr(c_rarg0, r15_thread); 1852 __ movptr(c_rarg1, rsp); 1853 __ call_VM_leaf(Continuation::freeze_entry(), 2); 1854 __ reset_last_Java_frame(true); 1855 1856 Label L_pinned; 1857 1858 __ testptr(rax, rax); 1859 __ jcc(Assembler::notZero, L_pinned); 1860 1861 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1862 continuation_enter_cleanup(masm); 1863 __ pop(rbp); 1864 __ ret(0); 1865 1866 __ bind(L_pinned); 1867 1868 // Pinned, return to caller 1869 __ leave(); 1870 __ ret(0); 1871 } 1872 1873 static void gen_special_dispatch(MacroAssembler* masm, 1874 const methodHandle& method, 1875 const BasicType* sig_bt, 1876 const VMRegPair* regs) { 1877 verify_oop_args(masm, method, sig_bt, regs); 1878 vmIntrinsics::ID iid = method->intrinsic_id(); 1879 1880 // Now write the args into the outgoing interpreter space 1881 bool has_receiver = false; 1882 Register receiver_reg = noreg; 1883 int member_arg_pos = -1; 1884 Register member_reg = noreg; 1885 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1886 if (ref_kind != 0) { 1887 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1888 member_reg = rbx; // known to be free at this point 1889 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1890 } else if (iid == vmIntrinsics::_invokeBasic) { 1891 has_receiver = true; 1892 } else if (iid == vmIntrinsics::_linkToNative) { 1893 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1894 member_reg = rbx; // known to be free at this point 1895 } else { 1896 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1897 } 1898 1899 if (member_reg != noreg) { 1900 // Load the member_arg into register, if necessary. 1901 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1902 VMReg r = regs[member_arg_pos].first(); 1903 if (r->is_stack()) { 1904 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1905 } else { 1906 // no data motion is needed 1907 member_reg = r->as_Register(); 1908 } 1909 } 1910 1911 if (has_receiver) { 1912 // Make sure the receiver is loaded into a register. 1913 assert(method->size_of_parameters() > 0, "oob"); 1914 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1915 VMReg r = regs[0].first(); 1916 assert(r->is_valid(), "bad receiver arg"); 1917 if (r->is_stack()) { 1918 // Porting note: This assumes that compiled calling conventions always 1919 // pass the receiver oop in a register. If this is not true on some 1920 // platform, pick a temp and load the receiver from stack. 1921 fatal("receiver always in a register"); 1922 receiver_reg = j_rarg0; // known to be free at this point 1923 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1924 } else { 1925 // no data motion is needed 1926 receiver_reg = r->as_Register(); 1927 } 1928 } 1929 1930 // Figure out which address we are really jumping to: 1931 MethodHandles::generate_method_handle_dispatch(masm, iid, 1932 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1933 } 1934 1935 // --------------------------------------------------------------------------- 1936 // Generate a native wrapper for a given method. The method takes arguments 1937 // in the Java compiled code convention, marshals them to the native 1938 // convention (handlizes oops, etc), transitions to native, makes the call, 1939 // returns to java state (possibly blocking), unhandlizes any result and 1940 // returns. 1941 // 1942 // Critical native functions are a shorthand for the use of 1943 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1944 // functions. The wrapper is expected to unpack the arguments before 1945 // passing them to the callee. Critical native functions leave the state _in_Java, 1946 // since they cannot stop for GC. 1947 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1948 // block and the check for pending exceptions it's impossible for them 1949 // to be thrown. 1950 // 1951 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1952 const methodHandle& method, 1953 int compile_id, 1954 BasicType* in_sig_bt, 1955 VMRegPair* in_regs, 1956 BasicType ret_type) { 1957 if (method->is_continuation_native_intrinsic()) { 1958 int exception_offset = -1; 1959 OopMapSet* oop_maps = new OopMapSet(); 1960 int frame_complete = -1; 1961 int stack_slots = -1; 1962 int interpreted_entry_offset = -1; 1963 int vep_offset = -1; 1964 if (method->is_continuation_enter_intrinsic()) { 1965 gen_continuation_enter(masm, 1966 in_regs, 1967 exception_offset, 1968 oop_maps, 1969 frame_complete, 1970 stack_slots, 1971 interpreted_entry_offset, 1972 vep_offset); 1973 } else if (method->is_continuation_yield_intrinsic()) { 1974 gen_continuation_yield(masm, 1975 in_regs, 1976 oop_maps, 1977 frame_complete, 1978 stack_slots, 1979 vep_offset); 1980 } else { 1981 guarantee(false, "Unknown Continuation native intrinsic"); 1982 } 1983 1984 #ifdef ASSERT 1985 if (method->is_continuation_enter_intrinsic()) { 1986 assert(interpreted_entry_offset != -1, "Must be set"); 1987 assert(exception_offset != -1, "Must be set"); 1988 } else { 1989 assert(interpreted_entry_offset == -1, "Must be unset"); 1990 assert(exception_offset == -1, "Must be unset"); 1991 } 1992 assert(frame_complete != -1, "Must be set"); 1993 assert(stack_slots != -1, "Must be set"); 1994 assert(vep_offset != -1, "Must be set"); 1995 #endif 1996 1997 __ flush(); 1998 nmethod* nm = nmethod::new_native_nmethod(method, 1999 compile_id, 2000 masm->code(), 2001 vep_offset, 2002 frame_complete, 2003 stack_slots, 2004 in_ByteSize(-1), 2005 in_ByteSize(-1), 2006 oop_maps, 2007 exception_offset); 2008 if (method->is_continuation_enter_intrinsic()) { 2009 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2010 } else if (method->is_continuation_yield_intrinsic()) { 2011 _cont_doYield_stub = nm; 2012 } 2013 return nm; 2014 } 2015 2016 if (method->is_method_handle_intrinsic()) { 2017 vmIntrinsics::ID iid = method->intrinsic_id(); 2018 intptr_t start = (intptr_t)__ pc(); 2019 int vep_offset = ((intptr_t)__ pc()) - start; 2020 gen_special_dispatch(masm, 2021 method, 2022 in_sig_bt, 2023 in_regs); 2024 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2025 __ flush(); 2026 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2027 return nmethod::new_native_nmethod(method, 2028 compile_id, 2029 masm->code(), 2030 vep_offset, 2031 frame_complete, 2032 stack_slots / VMRegImpl::slots_per_word, 2033 in_ByteSize(-1), 2034 in_ByteSize(-1), 2035 (OopMapSet*)NULL); 2036 } 2037 address native_func = method->native_function(); 2038 assert(native_func != NULL, "must have function"); 2039 2040 // An OopMap for lock (and class if static) 2041 OopMapSet *oop_maps = new OopMapSet(); 2042 intptr_t start = (intptr_t)__ pc(); 2043 2044 // We have received a description of where all the java arg are located 2045 // on entry to the wrapper. We need to convert these args to where 2046 // the jni function will expect them. To figure out where they go 2047 // we convert the java signature to a C signature by inserting 2048 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2049 2050 const int total_in_args = method->size_of_parameters(); 2051 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 2052 2053 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2054 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2055 BasicType* in_elem_bt = NULL; 2056 2057 int argc = 0; 2058 out_sig_bt[argc++] = T_ADDRESS; 2059 if (method->is_static()) { 2060 out_sig_bt[argc++] = T_OBJECT; 2061 } 2062 2063 for (int i = 0; i < total_in_args ; i++ ) { 2064 out_sig_bt[argc++] = in_sig_bt[i]; 2065 } 2066 2067 // Now figure out where the args must be stored and how much stack space 2068 // they require. 2069 int out_arg_slots; 2070 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 2071 2072 // Compute framesize for the wrapper. We need to handlize all oops in 2073 // incoming registers 2074 2075 // Calculate the total number of stack slots we will need. 2076 2077 // First count the abi requirement plus all of the outgoing args 2078 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2079 2080 // Now the space for the inbound oop handle area 2081 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 2082 2083 int oop_handle_offset = stack_slots; 2084 stack_slots += total_save_slots; 2085 2086 // Now any space we need for handlizing a klass if static method 2087 2088 int klass_slot_offset = 0; 2089 int klass_offset = -1; 2090 int lock_slot_offset = 0; 2091 bool is_static = false; 2092 2093 if (method->is_static()) { 2094 klass_slot_offset = stack_slots; 2095 stack_slots += VMRegImpl::slots_per_word; 2096 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2097 is_static = true; 2098 } 2099 2100 // Plus a lock if needed 2101 2102 if (method->is_synchronized()) { 2103 lock_slot_offset = stack_slots; 2104 stack_slots += VMRegImpl::slots_per_word; 2105 } 2106 2107 // Now a place (+2) to save return values or temp during shuffling 2108 // + 4 for return address (which we own) and saved rbp 2109 stack_slots += 6; 2110 2111 // Ok The space we have allocated will look like: 2112 // 2113 // 2114 // FP-> | | 2115 // |---------------------| 2116 // | 2 slots for moves | 2117 // |---------------------| 2118 // | lock box (if sync) | 2119 // |---------------------| <- lock_slot_offset 2120 // | klass (if static) | 2121 // |---------------------| <- klass_slot_offset 2122 // | oopHandle area | 2123 // |---------------------| <- oop_handle_offset (6 java arg registers) 2124 // | outbound memory | 2125 // | based arguments | 2126 // | | 2127 // |---------------------| 2128 // | | 2129 // SP-> | out_preserved_slots | 2130 // 2131 // 2132 2133 2134 // Now compute actual number of stack words we need rounding to make 2135 // stack properly aligned. 2136 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 2137 2138 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2139 2140 // First thing make an ic check to see if we should even be here 2141 2142 // We are free to use all registers as temps without saving them and 2143 // restoring them except rbp. rbp is the only callee save register 2144 // as far as the interpreter and the compiler(s) are concerned. 2145 2146 2147 const Register ic_reg = rax; 2148 const Register receiver = j_rarg0; 2149 2150 Label hit; 2151 Label exception_pending; 2152 2153 assert_different_registers(ic_reg, receiver, rscratch1, rscratch2); 2154 __ verify_oop(receiver); 2155 __ load_klass(rscratch1, receiver, rscratch2); 2156 __ cmpq(ic_reg, rscratch1); 2157 __ jcc(Assembler::equal, hit); 2158 2159 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 2160 2161 // Verified entry point must be aligned 2162 __ align(8); 2163 2164 __ bind(hit); 2165 2166 int vep_offset = ((intptr_t)__ pc()) - start; 2167 2168 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2169 Label L_skip_barrier; 2170 Register klass = r10; 2171 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 2172 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 2173 2174 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 2175 2176 __ bind(L_skip_barrier); 2177 } 2178 2179 #ifdef COMPILER1 2180 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 2181 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 2182 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 2183 } 2184 #endif // COMPILER1 2185 2186 // The instruction at the verified entry point must be 5 bytes or longer 2187 // because it can be patched on the fly by make_non_entrant. The stack bang 2188 // instruction fits that requirement. 2189 2190 // Generate stack overflow check 2191 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 2192 2193 // Generate a new frame for the wrapper. 2194 __ enter(); 2195 // -2 because return address is already present and so is saved rbp 2196 __ subptr(rsp, stack_size - 2*wordSize); 2197 2198 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2199 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub 2200 bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */); 2201 2202 // Frame is now completed as far as size and linkage. 2203 int frame_complete = ((intptr_t)__ pc()) - start; 2204 2205 if (UseRTMLocking) { 2206 // Abort RTM transaction before calling JNI 2207 // because critical section will be large and will be 2208 // aborted anyway. Also nmethod could be deoptimized. 2209 __ xabort(0); 2210 } 2211 2212 #ifdef ASSERT 2213 __ check_stack_alignment(rsp, "improperly aligned stack"); 2214 #endif /* ASSERT */ 2215 2216 2217 // We use r14 as the oop handle for the receiver/klass 2218 // It is callee save so it survives the call to native 2219 2220 const Register oop_handle_reg = r14; 2221 2222 // 2223 // We immediately shuffle the arguments so that any vm call we have to 2224 // make from here on out (sync slow path, jvmti, etc.) we will have 2225 // captured the oops from our caller and have a valid oopMap for 2226 // them. 2227 2228 // ----------------- 2229 // The Grand Shuffle 2230 2231 // The Java calling convention is either equal (linux) or denser (win64) than the 2232 // c calling convention. However the because of the jni_env argument the c calling 2233 // convention always has at least one more (and two for static) arguments than Java. 2234 // Therefore if we move the args from java -> c backwards then we will never have 2235 // a register->register conflict and we don't have to build a dependency graph 2236 // and figure out how to break any cycles. 2237 // 2238 2239 // Record esp-based slot for receiver on stack for non-static methods 2240 int receiver_offset = -1; 2241 2242 // This is a trick. We double the stack slots so we can claim 2243 // the oops in the caller's frame. Since we are sure to have 2244 // more args than the caller doubling is enough to make 2245 // sure we can capture all the incoming oop args from the 2246 // caller. 2247 // 2248 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2249 2250 // Mark location of rbp (someday) 2251 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2252 2253 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2254 // All inbound args are referenced based on rbp and all outbound args via rsp. 2255 2256 2257 #ifdef ASSERT 2258 bool reg_destroyed[Register::number_of_registers]; 2259 bool freg_destroyed[XMMRegister::number_of_registers]; 2260 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) { 2261 reg_destroyed[r] = false; 2262 } 2263 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) { 2264 freg_destroyed[f] = false; 2265 } 2266 2267 #endif /* ASSERT */ 2268 2269 // For JNI natives the incoming and outgoing registers are offset upwards. 2270 GrowableArray<int> arg_order(2 * total_in_args); 2271 2272 VMRegPair tmp_vmreg; 2273 tmp_vmreg.set2(rbx->as_VMReg()); 2274 2275 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2276 arg_order.push(i); 2277 arg_order.push(c_arg); 2278 } 2279 2280 int temploc = -1; 2281 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2282 int i = arg_order.at(ai); 2283 int c_arg = arg_order.at(ai + 1); 2284 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2285 #ifdef ASSERT 2286 if (in_regs[i].first()->is_Register()) { 2287 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2288 } else if (in_regs[i].first()->is_XMMRegister()) { 2289 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2290 } 2291 if (out_regs[c_arg].first()->is_Register()) { 2292 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2293 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2294 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2295 } 2296 #endif /* ASSERT */ 2297 switch (in_sig_bt[i]) { 2298 case T_ARRAY: 2299 case T_PRIMITIVE_OBJECT: 2300 case T_OBJECT: 2301 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2302 ((i == 0) && (!is_static)), 2303 &receiver_offset); 2304 break; 2305 case T_VOID: 2306 break; 2307 2308 case T_FLOAT: 2309 __ float_move(in_regs[i], out_regs[c_arg]); 2310 break; 2311 2312 case T_DOUBLE: 2313 assert( i + 1 < total_in_args && 2314 in_sig_bt[i + 1] == T_VOID && 2315 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2316 __ double_move(in_regs[i], out_regs[c_arg]); 2317 break; 2318 2319 case T_LONG : 2320 __ long_move(in_regs[i], out_regs[c_arg]); 2321 break; 2322 2323 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2324 2325 default: 2326 __ move32_64(in_regs[i], out_regs[c_arg]); 2327 } 2328 } 2329 2330 int c_arg; 2331 2332 // Pre-load a static method's oop into r14. Used both by locking code and 2333 // the normal JNI call code. 2334 // point c_arg at the first arg that is already loaded in case we 2335 // need to spill before we call out 2336 c_arg = total_c_args - total_in_args; 2337 2338 if (method->is_static()) { 2339 2340 // load oop into a register 2341 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2342 2343 // Now handlize the static class mirror it's known not-null. 2344 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2345 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2346 2347 // Now get the handle 2348 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2349 // store the klass handle as second argument 2350 __ movptr(c_rarg1, oop_handle_reg); 2351 // and protect the arg if we must spill 2352 c_arg--; 2353 } 2354 2355 // Change state to native (we save the return address in the thread, since it might not 2356 // be pushed on the stack when we do a stack traversal). It is enough that the pc() 2357 // points into the right code segment. It does not have to be the correct return pc. 2358 // We use the same pc/oopMap repeatedly when we call out 2359 2360 intptr_t the_pc = (intptr_t) __ pc(); 2361 oop_maps->add_gc_map(the_pc - start, map); 2362 2363 __ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch1); 2364 2365 2366 // We have all of the arguments setup at this point. We must not touch any register 2367 // argument registers at this point (what if we save/restore them there are no oop? 2368 2369 { 2370 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2371 // protect the args we've loaded 2372 save_args(masm, total_c_args, c_arg, out_regs); 2373 __ mov_metadata(c_rarg1, method()); 2374 __ call_VM_leaf( 2375 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2376 r15_thread, c_rarg1); 2377 restore_args(masm, total_c_args, c_arg, out_regs); 2378 } 2379 2380 // RedefineClasses() tracing support for obsolete method entry 2381 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2382 // protect the args we've loaded 2383 save_args(masm, total_c_args, c_arg, out_regs); 2384 __ mov_metadata(c_rarg1, method()); 2385 __ call_VM_leaf( 2386 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2387 r15_thread, c_rarg1); 2388 restore_args(masm, total_c_args, c_arg, out_regs); 2389 } 2390 2391 // Lock a synchronized method 2392 2393 // Register definitions used by locking and unlocking 2394 2395 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2396 const Register obj_reg = rbx; // Will contain the oop 2397 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2398 const Register old_hdr = r13; // value of old header at unlock time 2399 2400 Label slow_path_lock; 2401 Label lock_done; 2402 2403 if (method->is_synchronized()) { 2404 Label count_mon; 2405 2406 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 2407 2408 // Get the handle (the 2nd argument) 2409 __ mov(oop_handle_reg, c_rarg1); 2410 2411 // Get address of the box 2412 2413 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2414 2415 // Load the oop from the handle 2416 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2417 2418 if (!UseHeavyMonitors) { 2419 2420 // Load immediate 1 into swap_reg %rax 2421 __ movl(swap_reg, 1); 2422 2423 // Load (object->mark() | 1) into swap_reg %rax 2424 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2425 if (EnableValhalla) { 2426 // Mask inline_type bit such that we go to the slow path if object is an inline type 2427 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place)); 2428 } 2429 2430 // Save (object->mark() | 1) into BasicLock's displaced header 2431 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2432 2433 // src -> dest iff dest == rax else rax <- dest 2434 __ lock(); 2435 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2436 __ jcc(Assembler::equal, count_mon); 2437 2438 // Hmm should this move to the slow path code area??? 2439 2440 // Test if the oopMark is an obvious stack pointer, i.e., 2441 // 1) (mark & 3) == 0, and 2442 // 2) rsp <= mark < mark + os::pagesize() 2443 // These 3 tests can be done by evaluating the following 2444 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 2445 // assuming both stack pointer and pagesize have their 2446 // least significant 2 bits clear. 2447 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 2448 2449 __ subptr(swap_reg, rsp); 2450 __ andptr(swap_reg, 3 - os::vm_page_size()); 2451 2452 // Save the test result, for recursive case, the result is zero 2453 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2454 __ jcc(Assembler::notEqual, slow_path_lock); 2455 } else { 2456 __ jmp(slow_path_lock); 2457 } 2458 __ bind(count_mon); 2459 __ inc_held_monitor_count(); 2460 2461 // Slow path will re-enter here 2462 __ bind(lock_done); 2463 } 2464 2465 // Finally just about ready to make the JNI call 2466 2467 // get JNIEnv* which is first argument to native 2468 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2469 2470 // Now set thread in native 2471 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2472 2473 __ call(RuntimeAddress(native_func)); 2474 2475 // Verify or restore cpu control state after JNI call 2476 __ restore_cpu_control_state_after_jni(rscratch1); 2477 2478 // Unpack native results. 2479 switch (ret_type) { 2480 case T_BOOLEAN: __ c2bool(rax); break; 2481 case T_CHAR : __ movzwl(rax, rax); break; 2482 case T_BYTE : __ sign_extend_byte (rax); break; 2483 case T_SHORT : __ sign_extend_short(rax); break; 2484 case T_INT : /* nothing to do */ break; 2485 case T_DOUBLE : 2486 case T_FLOAT : 2487 // Result is in xmm0 we'll save as needed 2488 break; 2489 case T_ARRAY: // Really a handle 2490 case T_PRIMITIVE_OBJECT: // Really a handle 2491 case T_OBJECT: // Really a handle 2492 break; // can't de-handlize until after safepoint check 2493 case T_VOID: break; 2494 case T_LONG: break; 2495 default : ShouldNotReachHere(); 2496 } 2497 2498 Label after_transition; 2499 2500 // Switch thread to "native transition" state before reading the synchronization state. 2501 // This additional state is necessary because reading and testing the synchronization 2502 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2503 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2504 // VM thread changes sync state to synchronizing and suspends threads for GC. 2505 // Thread A is resumed to finish this native method, but doesn't block here since it 2506 // didn't see any synchronization is progress, and escapes. 2507 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2508 2509 // Force this write out before the read below 2510 if (!UseSystemMemoryBarrier) { 2511 __ membar(Assembler::Membar_mask_bits( 2512 Assembler::LoadLoad | Assembler::LoadStore | 2513 Assembler::StoreLoad | Assembler::StoreStore)); 2514 } 2515 2516 // check for safepoint operation in progress and/or pending suspend requests 2517 { 2518 Label Continue; 2519 Label slow_path; 2520 2521 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2522 2523 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2524 __ jcc(Assembler::equal, Continue); 2525 __ bind(slow_path); 2526 2527 // Don't use call_VM as it will see a possible pending exception and forward it 2528 // and never return here preventing us from clearing _last_native_pc down below. 2529 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2530 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2531 // by hand. 2532 // 2533 __ vzeroupper(); 2534 save_native_result(masm, ret_type, stack_slots); 2535 __ mov(c_rarg0, r15_thread); 2536 __ mov(r12, rsp); // remember sp 2537 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2538 __ andptr(rsp, -16); // align stack as required by ABI 2539 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2540 __ mov(rsp, r12); // restore sp 2541 __ reinit_heapbase(); 2542 // Restore any method result value 2543 restore_native_result(masm, ret_type, stack_slots); 2544 __ bind(Continue); 2545 } 2546 2547 // change thread state 2548 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2549 __ bind(after_transition); 2550 2551 Label reguard; 2552 Label reguard_done; 2553 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2554 __ jcc(Assembler::equal, reguard); 2555 __ bind(reguard_done); 2556 2557 // native result if any is live 2558 2559 // Unlock 2560 Label slow_path_unlock; 2561 Label unlock_done; 2562 if (method->is_synchronized()) { 2563 2564 Label fast_done; 2565 2566 // Get locked oop from the handle we passed to jni 2567 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2568 2569 if (!UseHeavyMonitors) { 2570 Label not_recur; 2571 // Simple recursive lock? 2572 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD); 2573 __ jcc(Assembler::notEqual, not_recur); 2574 __ dec_held_monitor_count(); 2575 __ jmpb(fast_done); 2576 __ bind(not_recur); 2577 } 2578 2579 // Must save rax if it is live now because cmpxchg must use it 2580 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2581 save_native_result(masm, ret_type, stack_slots); 2582 } 2583 2584 if (!UseHeavyMonitors) { 2585 // get address of the stack lock 2586 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2587 // get old displaced header 2588 __ movptr(old_hdr, Address(rax, 0)); 2589 2590 // Atomic swap old header if oop still contains the stack lock 2591 __ lock(); 2592 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2593 __ jcc(Assembler::notEqual, slow_path_unlock); 2594 __ dec_held_monitor_count(); 2595 } else { 2596 __ jmp(slow_path_unlock); 2597 } 2598 2599 // slow path re-enters here 2600 __ bind(unlock_done); 2601 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2602 restore_native_result(masm, ret_type, stack_slots); 2603 } 2604 2605 __ bind(fast_done); 2606 } 2607 { 2608 SkipIfEqual skip(masm, &DTraceMethodProbes, false, rscratch1); 2609 save_native_result(masm, ret_type, stack_slots); 2610 __ mov_metadata(c_rarg1, method()); 2611 __ call_VM_leaf( 2612 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2613 r15_thread, c_rarg1); 2614 restore_native_result(masm, ret_type, stack_slots); 2615 } 2616 2617 __ reset_last_Java_frame(false); 2618 2619 // Unbox oop result, e.g. JNIHandles::resolve value. 2620 if (is_reference_type(ret_type)) { 2621 __ resolve_jobject(rax /* value */, 2622 r15_thread /* thread */, 2623 rcx /* tmp */); 2624 } 2625 2626 if (CheckJNICalls) { 2627 // clear_pending_jni_exception_check 2628 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2629 } 2630 2631 // reset handle block 2632 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2633 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD); 2634 2635 // pop our frame 2636 2637 __ leave(); 2638 2639 // Any exception pending? 2640 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2641 __ jcc(Assembler::notEqual, exception_pending); 2642 2643 // Return 2644 2645 __ ret(0); 2646 2647 // Unexpected paths are out of line and go here 2648 2649 // forward the exception 2650 __ bind(exception_pending); 2651 2652 // and forward the exception 2653 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2654 2655 // Slow path locking & unlocking 2656 if (method->is_synchronized()) { 2657 2658 // BEGIN Slow path lock 2659 __ bind(slow_path_lock); 2660 2661 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2662 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2663 2664 // protect the args we've loaded 2665 save_args(masm, total_c_args, c_arg, out_regs); 2666 2667 __ mov(c_rarg0, obj_reg); 2668 __ mov(c_rarg1, lock_reg); 2669 __ mov(c_rarg2, r15_thread); 2670 2671 // Not a leaf but we have last_Java_frame setup as we want 2672 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2673 restore_args(masm, total_c_args, c_arg, out_regs); 2674 2675 #ifdef ASSERT 2676 { Label L; 2677 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2678 __ jcc(Assembler::equal, L); 2679 __ stop("no pending exception allowed on exit from monitorenter"); 2680 __ bind(L); 2681 } 2682 #endif 2683 __ jmp(lock_done); 2684 2685 // END Slow path lock 2686 2687 // BEGIN Slow path unlock 2688 __ bind(slow_path_unlock); 2689 2690 // If we haven't already saved the native result we must save it now as xmm registers 2691 // are still exposed. 2692 __ vzeroupper(); 2693 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2694 save_native_result(masm, ret_type, stack_slots); 2695 } 2696 2697 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2698 2699 __ mov(c_rarg0, obj_reg); 2700 __ mov(c_rarg2, r15_thread); 2701 __ mov(r12, rsp); // remember sp 2702 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2703 __ andptr(rsp, -16); // align stack as required by ABI 2704 2705 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2706 // NOTE that obj_reg == rbx currently 2707 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2708 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2709 2710 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2711 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2712 __ mov(rsp, r12); // restore sp 2713 __ reinit_heapbase(); 2714 #ifdef ASSERT 2715 { 2716 Label L; 2717 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2718 __ jcc(Assembler::equal, L); 2719 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2720 __ bind(L); 2721 } 2722 #endif /* ASSERT */ 2723 2724 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2725 2726 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2727 restore_native_result(masm, ret_type, stack_slots); 2728 } 2729 __ jmp(unlock_done); 2730 2731 // END Slow path unlock 2732 2733 } // synchronized 2734 2735 // SLOW PATH Reguard the stack if needed 2736 2737 __ bind(reguard); 2738 __ vzeroupper(); 2739 save_native_result(masm, ret_type, stack_slots); 2740 __ mov(r12, rsp); // remember sp 2741 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2742 __ andptr(rsp, -16); // align stack as required by ABI 2743 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2744 __ mov(rsp, r12); // restore sp 2745 __ reinit_heapbase(); 2746 restore_native_result(masm, ret_type, stack_slots); 2747 // and continue 2748 __ jmp(reguard_done); 2749 2750 2751 2752 __ flush(); 2753 2754 nmethod *nm = nmethod::new_native_nmethod(method, 2755 compile_id, 2756 masm->code(), 2757 vep_offset, 2758 frame_complete, 2759 stack_slots / VMRegImpl::slots_per_word, 2760 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2761 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2762 oop_maps); 2763 2764 return nm; 2765 } 2766 2767 // this function returns the adjust size (in number of words) to a c2i adapter 2768 // activation for use during deoptimization 2769 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2770 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2771 } 2772 2773 2774 uint SharedRuntime::out_preserve_stack_slots() { 2775 return 0; 2776 } 2777 2778 2779 // Number of stack slots between incoming argument block and the start of 2780 // a new frame. The PROLOG must add this many slots to the stack. The 2781 // EPILOG must remove this many slots. amd64 needs two slots for 2782 // return address. 2783 uint SharedRuntime::in_preserve_stack_slots() { 2784 return 4 + 2 * VerifyStackAtCalls; 2785 } 2786 2787 //------------------------------generate_deopt_blob---------------------------- 2788 void SharedRuntime::generate_deopt_blob() { 2789 // Allocate space for the code 2790 ResourceMark rm; 2791 // Setup code generation tools 2792 int pad = 0; 2793 if (UseAVX > 2) { 2794 pad += 1024; 2795 } 2796 #if INCLUDE_JVMCI 2797 if (EnableJVMCI) { 2798 pad += 512; // Increase the buffer size when compiling for JVMCI 2799 } 2800 #endif 2801 CodeBuffer buffer("deopt_blob", 2560+pad, 1024); 2802 MacroAssembler* masm = new MacroAssembler(&buffer); 2803 int frame_size_in_words; 2804 OopMap* map = NULL; 2805 OopMapSet *oop_maps = new OopMapSet(); 2806 2807 // ------------- 2808 // This code enters when returning to a de-optimized nmethod. A return 2809 // address has been pushed on the stack, and return values are in 2810 // registers. 2811 // If we are doing a normal deopt then we were called from the patched 2812 // nmethod from the point we returned to the nmethod. So the return 2813 // address on the stack is wrong by NativeCall::instruction_size 2814 // We will adjust the value so it looks like we have the original return 2815 // address on the stack (like when we eagerly deoptimized). 2816 // In the case of an exception pending when deoptimizing, we enter 2817 // with a return address on the stack that points after the call we patched 2818 // into the exception handler. We have the following register state from, 2819 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2820 // rax: exception oop 2821 // rbx: exception handler 2822 // rdx: throwing pc 2823 // So in this case we simply jam rdx into the useless return address and 2824 // the stack looks just like we want. 2825 // 2826 // At this point we need to de-opt. We save the argument return 2827 // registers. We call the first C routine, fetch_unroll_info(). This 2828 // routine captures the return values and returns a structure which 2829 // describes the current frame size and the sizes of all replacement frames. 2830 // The current frame is compiled code and may contain many inlined 2831 // functions, each with their own JVM state. We pop the current frame, then 2832 // push all the new frames. Then we call the C routine unpack_frames() to 2833 // populate these frames. Finally unpack_frames() returns us the new target 2834 // address. Notice that callee-save registers are BLOWN here; they have 2835 // already been captured in the vframeArray at the time the return PC was 2836 // patched. 2837 address start = __ pc(); 2838 Label cont; 2839 2840 // Prolog for non exception case! 2841 2842 // Save everything in sight. 2843 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2844 2845 // Normal deoptimization. Save exec mode for unpack_frames. 2846 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2847 __ jmp(cont); 2848 2849 int reexecute_offset = __ pc() - start; 2850 #if INCLUDE_JVMCI && !defined(COMPILER1) 2851 if (EnableJVMCI && UseJVMCICompiler) { 2852 // JVMCI does not use this kind of deoptimization 2853 __ should_not_reach_here(); 2854 } 2855 #endif 2856 2857 // Reexecute case 2858 // return address is the pc describes what bci to do re-execute at 2859 2860 // No need to update map as each call to save_live_registers will produce identical oopmap 2861 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2862 2863 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2864 __ jmp(cont); 2865 2866 #if INCLUDE_JVMCI 2867 Label after_fetch_unroll_info_call; 2868 int implicit_exception_uncommon_trap_offset = 0; 2869 int uncommon_trap_offset = 0; 2870 2871 if (EnableJVMCI) { 2872 implicit_exception_uncommon_trap_offset = __ pc() - start; 2873 2874 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2875 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD); 2876 2877 uncommon_trap_offset = __ pc() - start; 2878 2879 // Save everything in sight. 2880 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2881 // fetch_unroll_info needs to call last_java_frame() 2882 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); 2883 2884 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2885 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2886 2887 __ movl(r14, Deoptimization::Unpack_reexecute); 2888 __ mov(c_rarg0, r15_thread); 2889 __ movl(c_rarg2, r14); // exec mode 2890 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2891 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2892 2893 __ reset_last_Java_frame(false); 2894 2895 __ jmp(after_fetch_unroll_info_call); 2896 } // EnableJVMCI 2897 #endif // INCLUDE_JVMCI 2898 2899 int exception_offset = __ pc() - start; 2900 2901 // Prolog for exception case 2902 2903 // all registers are dead at this entry point, except for rax, and 2904 // rdx which contain the exception oop and exception pc 2905 // respectively. Set them in TLS and fall thru to the 2906 // unpack_with_exception_in_tls entry point. 2907 2908 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2909 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2910 2911 int exception_in_tls_offset = __ pc() - start; 2912 2913 // new implementation because exception oop is now passed in JavaThread 2914 2915 // Prolog for exception case 2916 // All registers must be preserved because they might be used by LinearScan 2917 // Exceptiop oop and throwing PC are passed in JavaThread 2918 // tos: stack at point of call to method that threw the exception (i.e. only 2919 // args are on the stack, no return address) 2920 2921 // make room on stack for the return address 2922 // It will be patched later with the throwing pc. The correct value is not 2923 // available now because loading it from memory would destroy registers. 2924 __ push(0); 2925 2926 // Save everything in sight. 2927 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2928 2929 // Now it is safe to overwrite any register 2930 2931 // Deopt during an exception. Save exec mode for unpack_frames. 2932 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2933 2934 // load throwing pc from JavaThread and patch it as the return address 2935 // of the current frame. Then clear the field in JavaThread 2936 2937 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2938 __ movptr(Address(rbp, wordSize), rdx); 2939 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 2940 2941 #ifdef ASSERT 2942 // verify that there is really an exception oop in JavaThread 2943 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2944 __ verify_oop(rax); 2945 2946 // verify that there is no pending exception 2947 Label no_pending_exception; 2948 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2949 __ testptr(rax, rax); 2950 __ jcc(Assembler::zero, no_pending_exception); 2951 __ stop("must not have pending exception here"); 2952 __ bind(no_pending_exception); 2953 #endif 2954 2955 __ bind(cont); 2956 2957 // Call C code. Need thread and this frame, but NOT official VM entry 2958 // crud. We cannot block on this call, no GC can happen. 2959 // 2960 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2961 2962 // fetch_unroll_info needs to call last_java_frame(). 2963 2964 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); 2965 #ifdef ASSERT 2966 { Label L; 2967 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2968 __ jcc(Assembler::equal, L); 2969 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2970 __ bind(L); 2971 } 2972 #endif // ASSERT 2973 __ mov(c_rarg0, r15_thread); 2974 __ movl(c_rarg1, r14); // exec_mode 2975 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2976 2977 // Need to have an oopmap that tells fetch_unroll_info where to 2978 // find any register it might need. 2979 oop_maps->add_gc_map(__ pc() - start, map); 2980 2981 __ reset_last_Java_frame(false); 2982 2983 #if INCLUDE_JVMCI 2984 if (EnableJVMCI) { 2985 __ bind(after_fetch_unroll_info_call); 2986 } 2987 #endif 2988 2989 // Load UnrollBlock* into rdi 2990 __ mov(rdi, rax); 2991 2992 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); 2993 Label noException; 2994 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 2995 __ jcc(Assembler::notEqual, noException); 2996 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2997 // QQQ this is useless it was NULL above 2998 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2999 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3000 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3001 3002 __ verify_oop(rax); 3003 3004 // Overwrite the result registers with the exception results. 3005 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3006 // I think this is useless 3007 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 3008 3009 __ bind(noException); 3010 3011 // Only register save data is on the stack. 3012 // Now restore the result registers. Everything else is either dead 3013 // or captured in the vframeArray. 3014 RegisterSaver::restore_result_registers(masm); 3015 3016 // All of the register save area has been popped of the stack. Only the 3017 // return address remains. 3018 3019 // Pop all the frames we must move/replace. 3020 // 3021 // Frame picture (youngest to oldest) 3022 // 1: self-frame (no frame link) 3023 // 2: deopting frame (no frame link) 3024 // 3: caller of deopting frame (could be compiled/interpreted). 3025 // 3026 // Note: by leaving the return address of self-frame on the stack 3027 // and using the size of frame 2 to adjust the stack 3028 // when we are done the return to frame 3 will still be on the stack. 3029 3030 // Pop deoptimized frame 3031 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); 3032 __ addptr(rsp, rcx); 3033 3034 // rsp should be pointing at the return address to the caller (3) 3035 3036 // Pick up the initial fp we should save 3037 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3038 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 3039 3040 #ifdef ASSERT 3041 // Compilers generate code that bang the stack by as much as the 3042 // interpreter would need. So this stack banging should never 3043 // trigger a fault. Verify that it does not on non product builds. 3044 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 3045 __ bang_stack_size(rbx, rcx); 3046 #endif 3047 3048 // Load address of array of frame pcs into rcx 3049 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 3050 3051 // Trash the old pc 3052 __ addptr(rsp, wordSize); 3053 3054 // Load address of array of frame sizes into rsi 3055 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); 3056 3057 // Load counter into rdx 3058 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); 3059 3060 // Now adjust the caller's stack to make up for the extra locals 3061 // but record the original sp so that we can save it in the skeletal interpreter 3062 // frame and the stack walking of interpreter_sender will get the unextended sp 3063 // value and not the "real" sp value. 3064 3065 const Register sender_sp = r8; 3066 3067 __ mov(sender_sp, rsp); 3068 __ movl(rbx, Address(rdi, 3069 Deoptimization::UnrollBlock:: 3070 caller_adjustment_offset_in_bytes())); 3071 __ subptr(rsp, rbx); 3072 3073 // Push interpreter frames in a loop 3074 Label loop; 3075 __ bind(loop); 3076 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3077 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 3078 __ pushptr(Address(rcx, 0)); // Save return address 3079 __ enter(); // Save old & set new ebp 3080 __ subptr(rsp, rbx); // Prolog 3081 // This value is corrected by layout_activation_impl 3082 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3083 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 3084 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3085 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3086 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3087 __ decrementl(rdx); // Decrement counter 3088 __ jcc(Assembler::notZero, loop); 3089 __ pushptr(Address(rcx, 0)); // Save final return address 3090 3091 // Re-push self-frame 3092 __ enter(); // Save old & set new ebp 3093 3094 // Allocate a full sized register save area. 3095 // Return address and rbp are in place, so we allocate two less words. 3096 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 3097 3098 // Restore frame locals after moving the frame 3099 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 3100 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3101 3102 // Call C code. Need thread but NOT official VM entry 3103 // crud. We cannot block on this call, no GC can happen. Call should 3104 // restore return values to their stack-slots with the new SP. 3105 // 3106 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 3107 3108 // Use rbp because the frames look interpreted now 3109 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3110 // Don't need the precise return PC here, just precise enough to point into this code blob. 3111 address the_pc = __ pc(); 3112 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3113 3114 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 3115 __ mov(c_rarg0, r15_thread); 3116 __ movl(c_rarg1, r14); // second arg: exec_mode 3117 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3118 // Revert SP alignment after call since we're going to do some SP relative addressing below 3119 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 3120 3121 // Set an oopmap for the call site 3122 // Use the same PC we used for the last java frame 3123 oop_maps->add_gc_map(the_pc - start, 3124 new OopMap( frame_size_in_words, 0 )); 3125 3126 // Clear fp AND pc 3127 __ reset_last_Java_frame(true); 3128 3129 // Collect return values 3130 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 3131 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 3132 // I think this is useless (throwing pc?) 3133 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 3134 3135 // Pop self-frame. 3136 __ leave(); // Epilog 3137 3138 // Jump to interpreter 3139 __ ret(0); 3140 3141 // Make sure all code is generated 3142 masm->flush(); 3143 3144 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 3145 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3146 #if INCLUDE_JVMCI 3147 if (EnableJVMCI) { 3148 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 3149 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 3150 } 3151 #endif 3152 } 3153 3154 #ifdef COMPILER2 3155 //------------------------------generate_uncommon_trap_blob-------------------- 3156 void SharedRuntime::generate_uncommon_trap_blob() { 3157 // Allocate space for the code 3158 ResourceMark rm; 3159 // Setup code generation tools 3160 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 3161 MacroAssembler* masm = new MacroAssembler(&buffer); 3162 3163 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3164 3165 address start = __ pc(); 3166 3167 if (UseRTMLocking) { 3168 // Abort RTM transaction before possible nmethod deoptimization. 3169 __ xabort(0); 3170 } 3171 3172 // Push self-frame. We get here with a return address on the 3173 // stack, so rsp is 8-byte aligned until we allocate our frame. 3174 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 3175 3176 // No callee saved registers. rbp is assumed implicitly saved 3177 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3178 3179 // compiler left unloaded_class_index in j_rarg0 move to where the 3180 // runtime expects it. 3181 __ movl(c_rarg1, j_rarg0); 3182 3183 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); 3184 3185 // Call C code. Need thread but NOT official VM entry 3186 // crud. We cannot block on this call, no GC can happen. Call should 3187 // capture callee-saved registers as well as return values. 3188 // Thread is in rdi already. 3189 // 3190 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 3191 3192 __ mov(c_rarg0, r15_thread); 3193 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); 3194 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 3195 3196 // Set an oopmap for the call site 3197 OopMapSet* oop_maps = new OopMapSet(); 3198 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 3199 3200 // location of rbp is known implicitly by the frame sender code 3201 3202 oop_maps->add_gc_map(__ pc() - start, map); 3203 3204 __ reset_last_Java_frame(false); 3205 3206 // Load UnrollBlock* into rdi 3207 __ mov(rdi, rax); 3208 3209 #ifdef ASSERT 3210 { Label L; 3211 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), 3212 Deoptimization::Unpack_uncommon_trap); 3213 __ jcc(Assembler::equal, L); 3214 __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); 3215 __ bind(L); 3216 } 3217 #endif 3218 3219 // Pop all the frames we must move/replace. 3220 // 3221 // Frame picture (youngest to oldest) 3222 // 1: self-frame (no frame link) 3223 // 2: deopting frame (no frame link) 3224 // 3: caller of deopting frame (could be compiled/interpreted). 3225 3226 // Pop self-frame. We have no frame, and must rely only on rax and rsp. 3227 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 3228 3229 // Pop deoptimized frame (int) 3230 __ movl(rcx, Address(rdi, 3231 Deoptimization::UnrollBlock:: 3232 size_of_deoptimized_frame_offset_in_bytes())); 3233 __ addptr(rsp, rcx); 3234 3235 // rsp should be pointing at the return address to the caller (3) 3236 3237 // Pick up the initial fp we should save 3238 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3239 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 3240 3241 #ifdef ASSERT 3242 // Compilers generate code that bang the stack by as much as the 3243 // interpreter would need. So this stack banging should never 3244 // trigger a fault. Verify that it does not on non product builds. 3245 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 3246 __ bang_stack_size(rbx, rcx); 3247 #endif 3248 3249 // Load address of array of frame pcs into rcx (address*) 3250 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 3251 3252 // Trash the return pc 3253 __ addptr(rsp, wordSize); 3254 3255 // Load address of array of frame sizes into rsi (intptr_t*) 3256 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); 3257 3258 // Counter 3259 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) 3260 3261 // Now adjust the caller's stack to make up for the extra locals but 3262 // record the original sp so that we can save it in the skeletal 3263 // interpreter frame and the stack walking of interpreter_sender 3264 // will get the unextended sp value and not the "real" sp value. 3265 3266 const Register sender_sp = r8; 3267 3268 __ mov(sender_sp, rsp); 3269 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) 3270 __ subptr(rsp, rbx); 3271 3272 // Push interpreter frames in a loop 3273 Label loop; 3274 __ bind(loop); 3275 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3276 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 3277 __ pushptr(Address(rcx, 0)); // Save return address 3278 __ enter(); // Save old & set new rbp 3279 __ subptr(rsp, rbx); // Prolog 3280 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 3281 sender_sp); // Make it walkable 3282 // This value is corrected by layout_activation_impl 3283 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3284 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3285 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3286 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3287 __ decrementl(rdx); // Decrement counter 3288 __ jcc(Assembler::notZero, loop); 3289 __ pushptr(Address(rcx, 0)); // Save final return address 3290 3291 // Re-push self-frame 3292 __ enter(); // Save old & set new rbp 3293 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 3294 // Prolog 3295 3296 // Use rbp because the frames look interpreted now 3297 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3298 // Don't need the precise return PC here, just precise enough to point into this code blob. 3299 address the_pc = __ pc(); 3300 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3301 3302 // Call C code. Need thread but NOT official VM entry 3303 // crud. We cannot block on this call, no GC can happen. Call should 3304 // restore return values to their stack-slots with the new SP. 3305 // Thread is in rdi already. 3306 // 3307 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 3308 3309 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 3310 __ mov(c_rarg0, r15_thread); 3311 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 3312 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3313 3314 // Set an oopmap for the call site 3315 // Use the same PC we used for the last java frame 3316 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3317 3318 // Clear fp AND pc 3319 __ reset_last_Java_frame(true); 3320 3321 // Pop self-frame. 3322 __ leave(); // Epilog 3323 3324 // Jump to interpreter 3325 __ ret(0); 3326 3327 // Make sure all code is generated 3328 masm->flush(); 3329 3330 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 3331 SimpleRuntimeFrame::framesize >> 1); 3332 } 3333 #endif // COMPILER2 3334 3335 //------------------------------generate_handler_blob------ 3336 // 3337 // Generate a special Compile2Runtime blob that saves all registers, 3338 // and setup oopmap. 3339 // 3340 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 3341 assert(StubRoutines::forward_exception_entry() != NULL, 3342 "must be generated before"); 3343 3344 ResourceMark rm; 3345 OopMapSet *oop_maps = new OopMapSet(); 3346 OopMap* map; 3347 3348 // Allocate space for the code. Setup code generation tools. 3349 CodeBuffer buffer("handler_blob", 2048, 1024); 3350 MacroAssembler* masm = new MacroAssembler(&buffer); 3351 3352 address start = __ pc(); 3353 address call_pc = NULL; 3354 int frame_size_in_words; 3355 bool cause_return = (poll_type == POLL_AT_RETURN); 3356 bool save_wide_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 3357 3358 if (UseRTMLocking) { 3359 // Abort RTM transaction before calling runtime 3360 // because critical section will be large and will be 3361 // aborted anyway. Also nmethod could be deoptimized. 3362 __ xabort(0); 3363 } 3364 3365 // Make room for return address (or push it again) 3366 if (!cause_return) { 3367 __ push(rbx); 3368 } 3369 3370 // Save registers, fpu state, and flags 3371 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); 3372 3373 // The following is basically a call_VM. However, we need the precise 3374 // address of the call in order to generate an oopmap. Hence, we do all the 3375 // work ourselves. 3376 3377 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next: 3378 3379 // The return address must always be correct so that frame constructor never 3380 // sees an invalid pc. 3381 3382 if (!cause_return) { 3383 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3384 // Additionally, rbx is a callee saved register and we can look at it later to determine 3385 // if someone changed the return address for us! 3386 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3387 __ movptr(Address(rbp, wordSize), rbx); 3388 } 3389 3390 // Do the call 3391 __ mov(c_rarg0, r15_thread); 3392 __ call(RuntimeAddress(call_ptr)); 3393 3394 // Set an oopmap for the call site. This oopmap will map all 3395 // oop-registers and debug-info registers as callee-saved. This 3396 // will allow deoptimization at this safepoint to find all possible 3397 // debug-info recordings, as well as let GC find all oops. 3398 3399 oop_maps->add_gc_map( __ pc() - start, map); 3400 3401 Label noException; 3402 3403 __ reset_last_Java_frame(false); 3404 3405 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3406 __ jcc(Assembler::equal, noException); 3407 3408 // Exception pending 3409 3410 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3411 3412 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3413 3414 // No exception case 3415 __ bind(noException); 3416 3417 Label no_adjust; 3418 #ifdef ASSERT 3419 Label bail; 3420 #endif 3421 if (!cause_return) { 3422 Label no_prefix, not_special; 3423 3424 // If our stashed return pc was modified by the runtime we avoid touching it 3425 __ cmpptr(rbx, Address(rbp, wordSize)); 3426 __ jccb(Assembler::notEqual, no_adjust); 3427 3428 // Skip over the poll instruction. 3429 // See NativeInstruction::is_safepoint_poll() 3430 // Possible encodings: 3431 // 85 00 test %eax,(%rax) 3432 // 85 01 test %eax,(%rcx) 3433 // 85 02 test %eax,(%rdx) 3434 // 85 03 test %eax,(%rbx) 3435 // 85 06 test %eax,(%rsi) 3436 // 85 07 test %eax,(%rdi) 3437 // 3438 // 41 85 00 test %eax,(%r8) 3439 // 41 85 01 test %eax,(%r9) 3440 // 41 85 02 test %eax,(%r10) 3441 // 41 85 03 test %eax,(%r11) 3442 // 41 85 06 test %eax,(%r14) 3443 // 41 85 07 test %eax,(%r15) 3444 // 3445 // 85 04 24 test %eax,(%rsp) 3446 // 41 85 04 24 test %eax,(%r12) 3447 // 85 45 00 test %eax,0x0(%rbp) 3448 // 41 85 45 00 test %eax,0x0(%r13) 3449 3450 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3451 __ jcc(Assembler::notEqual, no_prefix); 3452 __ addptr(rbx, 1); 3453 __ bind(no_prefix); 3454 #ifdef ASSERT 3455 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3456 #endif 3457 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3458 // r12/rsp 0x04 3459 // r13/rbp 0x05 3460 __ movzbq(rcx, Address(rbx, 1)); 3461 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3462 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3463 __ cmpptr(rcx, 1); 3464 __ jcc(Assembler::above, not_special); 3465 __ addptr(rbx, 1); 3466 __ bind(not_special); 3467 #ifdef ASSERT 3468 // Verify the correct encoding of the poll we're about to skip. 3469 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3470 __ jcc(Assembler::notEqual, bail); 3471 // Mask out the modrm bits 3472 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3473 // rax encodes to 0, so if the bits are nonzero it's incorrect 3474 __ jcc(Assembler::notZero, bail); 3475 #endif 3476 // Adjust return pc forward to step over the safepoint poll instruction 3477 __ addptr(rbx, 2); 3478 __ movptr(Address(rbp, wordSize), rbx); 3479 } 3480 3481 __ bind(no_adjust); 3482 // Normal exit, restore registers and exit. 3483 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3484 __ ret(0); 3485 3486 #ifdef ASSERT 3487 __ bind(bail); 3488 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3489 #endif 3490 3491 // Make sure all code is generated 3492 masm->flush(); 3493 3494 // Fill-out other meta info 3495 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3496 } 3497 3498 // 3499 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3500 // 3501 // Generate a stub that calls into vm to find out the proper destination 3502 // of a java call. All the argument registers are live at this point 3503 // but since this is generic code we don't know what they are and the caller 3504 // must do any gc of the args. 3505 // 3506 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3507 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3508 3509 // allocate space for the code 3510 ResourceMark rm; 3511 3512 CodeBuffer buffer(name, 1200, 512); 3513 MacroAssembler* masm = new MacroAssembler(&buffer); 3514 3515 int frame_size_in_words; 3516 3517 OopMapSet *oop_maps = new OopMapSet(); 3518 OopMap* map = NULL; 3519 3520 int start = __ offset(); 3521 3522 // No need to save vector registers since they are caller-saved anyway. 3523 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); 3524 3525 int frame_complete = __ offset(); 3526 3527 __ set_last_Java_frame(noreg, noreg, NULL, rscratch1); 3528 3529 __ mov(c_rarg0, r15_thread); 3530 3531 __ call(RuntimeAddress(destination)); 3532 3533 3534 // Set an oopmap for the call site. 3535 // We need this not only for callee-saved registers, but also for volatile 3536 // registers that the compiler might be keeping live across a safepoint. 3537 3538 oop_maps->add_gc_map( __ offset() - start, map); 3539 3540 // rax contains the address we are going to jump to assuming no exception got installed 3541 3542 // clear last_Java_sp 3543 __ reset_last_Java_frame(false); 3544 // check for pending exceptions 3545 Label pending; 3546 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3547 __ jcc(Assembler::notEqual, pending); 3548 3549 // get the returned Method* 3550 __ get_vm_result_2(rbx, r15_thread); 3551 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3552 3553 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3554 3555 RegisterSaver::restore_live_registers(masm); 3556 3557 // We are back to the original state on entry and ready to go. 3558 3559 __ jmp(rax); 3560 3561 // Pending exception after the safepoint 3562 3563 __ bind(pending); 3564 3565 RegisterSaver::restore_live_registers(masm); 3566 3567 // exception pending => remove activation and forward to exception handler 3568 3569 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 3570 3571 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3572 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3573 3574 // ------------- 3575 // make sure all code is generated 3576 masm->flush(); 3577 3578 // return the blob 3579 // frame_size_words or bytes?? 3580 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3581 } 3582 3583 //------------------------------Montgomery multiplication------------------------ 3584 // 3585 3586 #ifndef _WINDOWS 3587 3588 // Subtract 0:b from carry:a. Return carry. 3589 static julong 3590 sub(julong a[], julong b[], julong carry, long len) { 3591 long long i = 0, cnt = len; 3592 julong tmp; 3593 asm volatile("clc; " 3594 "0: ; " 3595 "mov (%[b], %[i], 8), %[tmp]; " 3596 "sbb %[tmp], (%[a], %[i], 8); " 3597 "inc %[i]; dec %[cnt]; " 3598 "jne 0b; " 3599 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3600 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3601 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3602 : "memory"); 3603 return tmp; 3604 } 3605 3606 // Multiply (unsigned) Long A by Long B, accumulating the double- 3607 // length result into the accumulator formed of T0, T1, and T2. 3608 #define MACC(A, B, T0, T1, T2) \ 3609 do { \ 3610 unsigned long hi, lo; \ 3611 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3612 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3613 : "r"(A), "a"(B) : "cc"); \ 3614 } while(0) 3615 3616 // As above, but add twice the double-length result into the 3617 // accumulator. 3618 #define MACC2(A, B, T0, T1, T2) \ 3619 do { \ 3620 unsigned long hi, lo; \ 3621 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3622 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3623 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3624 : "r"(A), "a"(B) : "cc"); \ 3625 } while(0) 3626 3627 #else //_WINDOWS 3628 3629 static julong 3630 sub(julong a[], julong b[], julong carry, long len) { 3631 long i; 3632 julong tmp; 3633 unsigned char c = 1; 3634 for (i = 0; i < len; i++) { 3635 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3636 a[i] = tmp; 3637 } 3638 c = _addcarry_u64(c, carry, ~0, &tmp); 3639 return tmp; 3640 } 3641 3642 // Multiply (unsigned) Long A by Long B, accumulating the double- 3643 // length result into the accumulator formed of T0, T1, and T2. 3644 #define MACC(A, B, T0, T1, T2) \ 3645 do { \ 3646 julong hi, lo; \ 3647 lo = _umul128(A, B, &hi); \ 3648 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3649 c = _addcarry_u64(c, hi, T1, &T1); \ 3650 _addcarry_u64(c, T2, 0, &T2); \ 3651 } while(0) 3652 3653 // As above, but add twice the double-length result into the 3654 // accumulator. 3655 #define MACC2(A, B, T0, T1, T2) \ 3656 do { \ 3657 julong hi, lo; \ 3658 lo = _umul128(A, B, &hi); \ 3659 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3660 c = _addcarry_u64(c, hi, T1, &T1); \ 3661 _addcarry_u64(c, T2, 0, &T2); \ 3662 c = _addcarry_u64(0, lo, T0, &T0); \ 3663 c = _addcarry_u64(c, hi, T1, &T1); \ 3664 _addcarry_u64(c, T2, 0, &T2); \ 3665 } while(0) 3666 3667 #endif //_WINDOWS 3668 3669 // Fast Montgomery multiplication. The derivation of the algorithm is 3670 // in A Cryptographic Library for the Motorola DSP56000, 3671 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3672 3673 static void NOINLINE 3674 montgomery_multiply(julong a[], julong b[], julong n[], 3675 julong m[], julong inv, int len) { 3676 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3677 int i; 3678 3679 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3680 3681 for (i = 0; i < len; i++) { 3682 int j; 3683 for (j = 0; j < i; j++) { 3684 MACC(a[j], b[i-j], t0, t1, t2); 3685 MACC(m[j], n[i-j], t0, t1, t2); 3686 } 3687 MACC(a[i], b[0], t0, t1, t2); 3688 m[i] = t0 * inv; 3689 MACC(m[i], n[0], t0, t1, t2); 3690 3691 assert(t0 == 0, "broken Montgomery multiply"); 3692 3693 t0 = t1; t1 = t2; t2 = 0; 3694 } 3695 3696 for (i = len; i < 2*len; i++) { 3697 int j; 3698 for (j = i-len+1; j < len; j++) { 3699 MACC(a[j], b[i-j], t0, t1, t2); 3700 MACC(m[j], n[i-j], t0, t1, t2); 3701 } 3702 m[i-len] = t0; 3703 t0 = t1; t1 = t2; t2 = 0; 3704 } 3705 3706 while (t0) 3707 t0 = sub(m, n, t0, len); 3708 } 3709 3710 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3711 // multiplies so it should be up to 25% faster than Montgomery 3712 // multiplication. However, its loop control is more complex and it 3713 // may actually run slower on some machines. 3714 3715 static void NOINLINE 3716 montgomery_square(julong a[], julong n[], 3717 julong m[], julong inv, int len) { 3718 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3719 int i; 3720 3721 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3722 3723 for (i = 0; i < len; i++) { 3724 int j; 3725 int end = (i+1)/2; 3726 for (j = 0; j < end; j++) { 3727 MACC2(a[j], a[i-j], t0, t1, t2); 3728 MACC(m[j], n[i-j], t0, t1, t2); 3729 } 3730 if ((i & 1) == 0) { 3731 MACC(a[j], a[j], t0, t1, t2); 3732 } 3733 for (; j < i; j++) { 3734 MACC(m[j], n[i-j], t0, t1, t2); 3735 } 3736 m[i] = t0 * inv; 3737 MACC(m[i], n[0], t0, t1, t2); 3738 3739 assert(t0 == 0, "broken Montgomery square"); 3740 3741 t0 = t1; t1 = t2; t2 = 0; 3742 } 3743 3744 for (i = len; i < 2*len; i++) { 3745 int start = i-len+1; 3746 int end = start + (len - start)/2; 3747 int j; 3748 for (j = start; j < end; j++) { 3749 MACC2(a[j], a[i-j], t0, t1, t2); 3750 MACC(m[j], n[i-j], t0, t1, t2); 3751 } 3752 if ((i & 1) == 0) { 3753 MACC(a[j], a[j], t0, t1, t2); 3754 } 3755 for (; j < len; j++) { 3756 MACC(m[j], n[i-j], t0, t1, t2); 3757 } 3758 m[i-len] = t0; 3759 t0 = t1; t1 = t2; t2 = 0; 3760 } 3761 3762 while (t0) 3763 t0 = sub(m, n, t0, len); 3764 } 3765 3766 // Swap words in a longword. 3767 static julong swap(julong x) { 3768 return (x << 32) | (x >> 32); 3769 } 3770 3771 // Copy len longwords from s to d, word-swapping as we go. The 3772 // destination array is reversed. 3773 static void reverse_words(julong *s, julong *d, int len) { 3774 d += len; 3775 while(len-- > 0) { 3776 d--; 3777 *d = swap(*s); 3778 s++; 3779 } 3780 } 3781 3782 // The threshold at which squaring is advantageous was determined 3783 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3784 #define MONTGOMERY_SQUARING_THRESHOLD 64 3785 3786 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3787 jint len, jlong inv, 3788 jint *m_ints) { 3789 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3790 int longwords = len/2; 3791 3792 // Make very sure we don't use so much space that the stack might 3793 // overflow. 512 jints corresponds to an 16384-bit integer and 3794 // will use here a total of 8k bytes of stack space. 3795 int divisor = sizeof(julong) * 4; 3796 guarantee(longwords <= 8192 / divisor, "must be"); 3797 int total_allocation = longwords * sizeof (julong) * 4; 3798 julong *scratch = (julong *)alloca(total_allocation); 3799 3800 // Local scratch arrays 3801 julong 3802 *a = scratch + 0 * longwords, 3803 *b = scratch + 1 * longwords, 3804 *n = scratch + 2 * longwords, 3805 *m = scratch + 3 * longwords; 3806 3807 reverse_words((julong *)a_ints, a, longwords); 3808 reverse_words((julong *)b_ints, b, longwords); 3809 reverse_words((julong *)n_ints, n, longwords); 3810 3811 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3812 3813 reverse_words(m, (julong *)m_ints, longwords); 3814 } 3815 3816 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3817 jint len, jlong inv, 3818 jint *m_ints) { 3819 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3820 int longwords = len/2; 3821 3822 // Make very sure we don't use so much space that the stack might 3823 // overflow. 512 jints corresponds to an 16384-bit integer and 3824 // will use here a total of 6k bytes of stack space. 3825 int divisor = sizeof(julong) * 3; 3826 guarantee(longwords <= (8192 / divisor), "must be"); 3827 int total_allocation = longwords * sizeof (julong) * 3; 3828 julong *scratch = (julong *)alloca(total_allocation); 3829 3830 // Local scratch arrays 3831 julong 3832 *a = scratch + 0 * longwords, 3833 *n = scratch + 1 * longwords, 3834 *m = scratch + 2 * longwords; 3835 3836 reverse_words((julong *)a_ints, a, longwords); 3837 reverse_words((julong *)n_ints, n, longwords); 3838 3839 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3840 ::montgomery_square(a, n, m, (julong)inv, longwords); 3841 } else { 3842 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3843 } 3844 3845 reverse_words(m, (julong *)m_ints, longwords); 3846 } 3847 3848 #ifdef COMPILER2 3849 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3850 // 3851 //------------------------------generate_exception_blob--------------------------- 3852 // creates exception blob at the end 3853 // Using exception blob, this code is jumped from a compiled method. 3854 // (see emit_exception_handler in x86_64.ad file) 3855 // 3856 // Given an exception pc at a call we call into the runtime for the 3857 // handler in this method. This handler might merely restore state 3858 // (i.e. callee save registers) unwind the frame and jump to the 3859 // exception handler for the nmethod if there is no Java level handler 3860 // for the nmethod. 3861 // 3862 // This code is entered with a jmp. 3863 // 3864 // Arguments: 3865 // rax: exception oop 3866 // rdx: exception pc 3867 // 3868 // Results: 3869 // rax: exception oop 3870 // rdx: exception pc in caller or ??? 3871 // destination: exception handler of caller 3872 // 3873 // Note: the exception pc MUST be at a call (precise debug information) 3874 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 3875 // 3876 3877 void OptoRuntime::generate_exception_blob() { 3878 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 3879 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 3880 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 3881 3882 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3883 3884 // Allocate space for the code 3885 ResourceMark rm; 3886 // Setup code generation tools 3887 CodeBuffer buffer("exception_blob", 2048, 1024); 3888 MacroAssembler* masm = new MacroAssembler(&buffer); 3889 3890 3891 address start = __ pc(); 3892 3893 // Exception pc is 'return address' for stack walker 3894 __ push(rdx); 3895 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 3896 3897 // Save callee-saved registers. See x86_64.ad. 3898 3899 // rbp is an implicitly saved callee saved register (i.e., the calling 3900 // convention will save/restore it in the prolog/epilog). Other than that 3901 // there are no callee save registers now that adapter frames are gone. 3902 3903 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3904 3905 // Store exception in Thread object. We cannot pass any arguments to the 3906 // handle_exception call, since we do not want to make any assumption 3907 // about the size of the frame where the exception happened in. 3908 // c_rarg0 is either rdi (Linux) or rcx (Windows). 3909 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 3910 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3911 3912 // This call does all the hard work. It checks if an exception handler 3913 // exists in the method. 3914 // If so, it returns the handler address. 3915 // If not, it prepares for stack-unwinding, restoring the callee-save 3916 // registers of the frame being removed. 3917 // 3918 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3919 3920 // At a method handle call, the stack may not be properly aligned 3921 // when returning with an exception. 3922 address the_pc = __ pc(); 3923 __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1); 3924 __ mov(c_rarg0, r15_thread); 3925 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3926 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3927 3928 // Set an oopmap for the call site. This oopmap will only be used if we 3929 // are unwinding the stack. Hence, all locations will be dead. 3930 // Callee-saved registers will be the same as the frame above (i.e., 3931 // handle_exception_stub), since they were restored when we got the 3932 // exception. 3933 3934 OopMapSet* oop_maps = new OopMapSet(); 3935 3936 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3937 3938 __ reset_last_Java_frame(false); 3939 3940 // Restore callee-saved registers 3941 3942 // rbp is an implicitly saved callee-saved register (i.e., the calling 3943 // convention will save restore it in prolog/epilog) Other than that 3944 // there are no callee save registers now that adapter frames are gone. 3945 3946 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 3947 3948 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 3949 __ pop(rdx); // No need for exception pc anymore 3950 3951 // rax: exception handler 3952 3953 // We have a handler in rax (could be deopt blob). 3954 __ mov(r8, rax); 3955 3956 // Get the exception oop 3957 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3958 // Get the exception pc in case we are deoptimized 3959 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3960 #ifdef ASSERT 3961 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), NULL_WORD); 3962 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3963 #endif 3964 // Clear the exception oop so GC no longer processes it as a root. 3965 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3966 3967 // rax: exception oop 3968 // r8: exception handler 3969 // rdx: exception pc 3970 // Jump to handler 3971 3972 __ jmp(r8); 3973 3974 // Make sure all code is generated 3975 masm->flush(); 3976 3977 // Set exception blob 3978 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 3979 } 3980 #endif // COMPILER2 3981 3982 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 3983 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 3984 CodeBuffer buffer(buf); 3985 short buffer_locs[20]; 3986 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 3987 sizeof(buffer_locs)/sizeof(relocInfo)); 3988 3989 MacroAssembler* masm = new MacroAssembler(&buffer); 3990 3991 const Array<SigEntry>* sig_vk = vk->extended_sig(); 3992 const Array<VMRegPair>* regs = vk->return_regs(); 3993 3994 int pack_fields_jobject_off = __ offset(); 3995 // Resolve pre-allocated buffer from JNI handle. 3996 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 3997 __ movptr(rax, Address(r13, 0)); 3998 __ resolve_jobject(rax /* value */, 3999 r15_thread /* thread */, 4000 r12 /* tmp */); 4001 __ movptr(Address(r13, 0), rax); 4002 4003 int pack_fields_off = __ offset(); 4004 4005 int j = 1; 4006 for (int i = 0; i < sig_vk->length(); i++) { 4007 BasicType bt = sig_vk->at(i)._bt; 4008 if (bt == T_PRIMITIVE_OBJECT) { 4009 continue; 4010 } 4011 if (bt == T_VOID) { 4012 if (sig_vk->at(i-1)._bt == T_LONG || 4013 sig_vk->at(i-1)._bt == T_DOUBLE) { 4014 j++; 4015 } 4016 continue; 4017 } 4018 int off = sig_vk->at(i)._offset; 4019 assert(off > 0, "offset in object should be positive"); 4020 VMRegPair pair = regs->at(j); 4021 VMReg r_1 = pair.first(); 4022 VMReg r_2 = pair.second(); 4023 Address to(rax, off); 4024 if (bt == T_FLOAT) { 4025 __ movflt(to, r_1->as_XMMRegister()); 4026 } else if (bt == T_DOUBLE) { 4027 __ movdbl(to, r_1->as_XMMRegister()); 4028 } else { 4029 Register val = r_1->as_Register(); 4030 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 4031 if (is_reference_type(bt)) { 4032 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 4033 } else { 4034 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 4035 } 4036 } 4037 j++; 4038 } 4039 assert(j == regs->length(), "missed a field?"); 4040 4041 __ ret(0); 4042 4043 int unpack_fields_off = __ offset(); 4044 4045 Label skip; 4046 __ testptr(rax, rax); 4047 __ jcc(Assembler::zero, skip); 4048 4049 j = 1; 4050 for (int i = 0; i < sig_vk->length(); i++) { 4051 BasicType bt = sig_vk->at(i)._bt; 4052 if (bt == T_PRIMITIVE_OBJECT) { 4053 continue; 4054 } 4055 if (bt == T_VOID) { 4056 if (sig_vk->at(i-1)._bt == T_LONG || 4057 sig_vk->at(i-1)._bt == T_DOUBLE) { 4058 j++; 4059 } 4060 continue; 4061 } 4062 int off = sig_vk->at(i)._offset; 4063 assert(off > 0, "offset in object should be positive"); 4064 VMRegPair pair = regs->at(j); 4065 VMReg r_1 = pair.first(); 4066 VMReg r_2 = pair.second(); 4067 Address from(rax, off); 4068 if (bt == T_FLOAT) { 4069 __ movflt(r_1->as_XMMRegister(), from); 4070 } else if (bt == T_DOUBLE) { 4071 __ movdbl(r_1->as_XMMRegister(), from); 4072 } else if (bt == T_OBJECT || bt == T_ARRAY) { 4073 assert_different_registers(rax, r_1->as_Register()); 4074 __ load_heap_oop(r_1->as_Register(), from); 4075 } else { 4076 assert(is_java_primitive(bt), "unexpected basic type"); 4077 assert_different_registers(rax, r_1->as_Register()); 4078 size_t size_in_bytes = type2aelembytes(bt); 4079 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 4080 } 4081 j++; 4082 } 4083 assert(j == regs->length(), "missed a field?"); 4084 4085 __ bind(skip); 4086 __ ret(0); 4087 4088 __ flush(); 4089 4090 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 4091 }