1 /* 2 * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "classfile/symbolTable.hpp" 32 #include "code/debugInfoRec.hpp" 33 #include "code/icBuffer.hpp" 34 #include "code/nativeInst.hpp" 35 #include "code/vtableStubs.hpp" 36 #include "compiler/oopMap.hpp" 37 #include "gc/shared/collectedHeap.hpp" 38 #include "gc/shared/gcLocker.hpp" 39 #include "gc/shared/barrierSet.hpp" 40 #include "gc/shared/barrierSetAssembler.hpp" 41 #include "interpreter/interpreter.hpp" 42 #include "logging/log.hpp" 43 #include "memory/resourceArea.hpp" 44 #include "memory/universe.hpp" 45 #include "oops/compiledICHolder.hpp" 46 #include "oops/klass.inline.hpp" 47 #include "prims/methodHandles.hpp" 48 #include "runtime/jniHandles.hpp" 49 #include "runtime/safepointMechanism.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/signature.hpp" 52 #include "runtime/stubRoutines.hpp" 53 #include "runtime/vframeArray.hpp" 54 #include "runtime/vm_version.hpp" 55 #include "utilities/align.hpp" 56 #include "utilities/formatBuffer.hpp" 57 #include "vmreg_x86.inline.hpp" 58 #ifdef COMPILER1 59 #include "c1/c1_Runtime1.hpp" 60 #endif 61 #ifdef COMPILER2 62 #include "opto/runtime.hpp" 63 #endif 64 #if INCLUDE_JVMCI 65 #include "jvmci/jvmciJavaClasses.hpp" 66 #endif 67 68 #define __ masm-> 69 70 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 71 72 class SimpleRuntimeFrame { 73 74 public: 75 76 // Most of the runtime stubs have this simple frame layout. 77 // This class exists to make the layout shared in one place. 78 // Offsets are for compiler stack slots, which are jints. 79 enum layout { 80 // The frame sender code expects that rbp will be in the "natural" place and 81 // will override any oopMap setting for it. We must therefore force the layout 82 // so that it agrees with the frame sender code. 83 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 84 rbp_off2, 85 return_off, return_off2, 86 framesize 87 }; 88 }; 89 90 class RegisterSaver { 91 // Capture info about frame layout. Layout offsets are in jint 92 // units because compiler frame slots are jints. 93 #define XSAVE_AREA_BEGIN 160 94 #define XSAVE_AREA_YMM_BEGIN 576 95 #define XSAVE_AREA_OPMASK_BEGIN 1088 96 #define XSAVE_AREA_ZMM_BEGIN 1152 97 #define XSAVE_AREA_UPPERBANK 1664 98 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 99 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 100 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 101 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 102 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 103 enum layout { 104 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 105 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 106 DEF_XMM_OFFS(0), 107 DEF_XMM_OFFS(1), 108 // 2..15 are implied in range usage 109 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 110 DEF_YMM_OFFS(0), 111 DEF_YMM_OFFS(1), 112 // 2..15 are implied in range usage 113 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 114 DEF_OPMASK_OFFS(0), 115 DEF_OPMASK_OFFS(1), 116 // 2..7 are implied in range usage 117 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 118 DEF_ZMM_OFFS(0), 119 DEF_ZMM_OFFS(1), 120 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 121 DEF_ZMM_UPPER_OFFS(16), 122 DEF_ZMM_UPPER_OFFS(17), 123 // 18..31 are implied in range usage 124 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 125 fpu_stateH_end, 126 r15_off, r15H_off, 127 r14_off, r14H_off, 128 r13_off, r13H_off, 129 r12_off, r12H_off, 130 r11_off, r11H_off, 131 r10_off, r10H_off, 132 r9_off, r9H_off, 133 r8_off, r8H_off, 134 rdi_off, rdiH_off, 135 rsi_off, rsiH_off, 136 ignore_off, ignoreH_off, // extra copy of rbp 137 rsp_off, rspH_off, 138 rbx_off, rbxH_off, 139 rdx_off, rdxH_off, 140 rcx_off, rcxH_off, 141 rax_off, raxH_off, 142 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 143 align_off, alignH_off, 144 flags_off, flagsH_off, 145 // The frame sender code expects that rbp will be in the "natural" place and 146 // will override any oopMap setting for it. We must therefore force the layout 147 // so that it agrees with the frame sender code. 148 rbp_off, rbpH_off, // copy of rbp we will restore 149 return_off, returnH_off, // slot for return address 150 reg_save_size // size in compiler stack slots 151 }; 152 153 public: 154 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors); 155 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 156 157 // Offsets into the register save area 158 // Used by deoptimization when it is managing result register 159 // values on its own 160 161 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 162 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 163 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 164 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 165 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 166 167 // During deoptimization only the result registers need to be restored, 168 // all the other values have already been extracted. 169 static void restore_result_registers(MacroAssembler* masm); 170 }; 171 172 // Register is a class, but it would be assigned numerical value. 173 // "0" is assigned for rax. Thus we need to ignore -Wnonnull. 174 PRAGMA_DIAG_PUSH 175 PRAGMA_NONNULL_IGNORED 176 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 177 int off = 0; 178 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 179 if (UseAVX < 3) { 180 num_xmm_regs = num_xmm_regs/2; 181 } 182 #if COMPILER2_OR_JVMCI 183 if (save_vectors && UseAVX == 0) { 184 save_vectors = false; // vectors larger than 16 byte long are supported only with AVX 185 } 186 assert(!save_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 187 #else 188 save_vectors = false; // vectors are generated only by C2 and JVMCI 189 #endif 190 191 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 192 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 193 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 194 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 195 // CodeBlob frame size is in words. 196 int frame_size_in_words = frame_size_in_bytes / wordSize; 197 *total_frame_words = frame_size_in_words; 198 199 // Save registers, fpu state, and flags. 200 // We assume caller has already pushed the return address onto the 201 // stack, so rsp is 8-byte aligned here. 202 // We push rpb twice in this sequence because we want the real rbp 203 // to be under the return like a normal enter. 204 205 __ enter(); // rsp becomes 16-byte aligned here 206 __ push_CPU_state(); // Push a multiple of 16 bytes 207 208 // push cpu state handles this on EVEX enabled targets 209 if (save_vectors) { 210 // Save upper half of YMM registers(0..15) 211 int base_addr = XSAVE_AREA_YMM_BEGIN; 212 for (int n = 0; n < 16; n++) { 213 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 214 } 215 if (VM_Version::supports_evex()) { 216 // Save upper half of ZMM registers(0..15) 217 base_addr = XSAVE_AREA_ZMM_BEGIN; 218 for (int n = 0; n < 16; n++) { 219 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 220 } 221 // Save full ZMM registers(16..num_xmm_regs) 222 base_addr = XSAVE_AREA_UPPERBANK; 223 off = 0; 224 int vector_len = Assembler::AVX_512bit; 225 for (int n = 16; n < num_xmm_regs; n++) { 226 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 227 } 228 #if COMPILER2_OR_JVMCI 229 base_addr = XSAVE_AREA_OPMASK_BEGIN; 230 off = 0; 231 for(int n = 0; n < KRegisterImpl::number_of_registers; n++) { 232 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 233 } 234 #endif 235 } 236 } else { 237 if (VM_Version::supports_evex()) { 238 // Save upper bank of ZMM registers(16..31) for double/float usage 239 int base_addr = XSAVE_AREA_UPPERBANK; 240 off = 0; 241 for (int n = 16; n < num_xmm_regs; n++) { 242 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); 243 } 244 #if COMPILER2_OR_JVMCI 245 base_addr = XSAVE_AREA_OPMASK_BEGIN; 246 off = 0; 247 for(int n = 0; n < KRegisterImpl::number_of_registers; n++) { 248 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 249 } 250 #endif 251 } 252 } 253 __ vzeroupper(); 254 if (frame::arg_reg_save_area_bytes != 0) { 255 // Allocate argument register save area 256 __ subptr(rsp, frame::arg_reg_save_area_bytes); 257 } 258 259 // Set an oopmap for the call site. This oopmap will map all 260 // oop-registers and debug-info registers as callee-saved. This 261 // will allow deoptimization at this safepoint to find all possible 262 // debug-info recordings, as well as let GC find all oops. 263 264 OopMapSet *oop_maps = new OopMapSet(); 265 OopMap* map = new OopMap(frame_size_in_slots, 0); 266 267 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 268 269 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 270 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 271 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 272 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 273 // rbp location is known implicitly by the frame sender code, needs no oopmap 274 // and the location where rbp was saved by is ignored 275 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 276 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 277 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 278 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 279 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 280 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 281 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 282 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 283 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 284 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 285 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 286 // on EVEX enabled targets, we get it included in the xsave area 287 off = xmm0_off; 288 int delta = xmm1_off - off; 289 for (int n = 0; n < 16; n++) { 290 XMMRegister xmm_name = as_XMMRegister(n); 291 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 292 off += delta; 293 } 294 if (UseAVX > 2) { 295 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 296 off = zmm16_off; 297 delta = zmm17_off - off; 298 for (int n = 16; n < num_xmm_regs; n++) { 299 XMMRegister zmm_name = as_XMMRegister(n); 300 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 301 off += delta; 302 } 303 } 304 305 #if COMPILER2_OR_JVMCI 306 if (save_vectors) { 307 // Save upper half of YMM registers(0..15) 308 off = ymm0_off; 309 delta = ymm1_off - ymm0_off; 310 for (int n = 0; n < 16; n++) { 311 XMMRegister ymm_name = as_XMMRegister(n); 312 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 313 off += delta; 314 } 315 if (VM_Version::supports_evex()) { 316 // Save upper half of ZMM registers(0..15) 317 off = zmm0_off; 318 delta = zmm1_off - zmm0_off; 319 for (int n = 0; n < 16; n++) { 320 XMMRegister zmm_name = as_XMMRegister(n); 321 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 322 off += delta; 323 } 324 } 325 } 326 #endif // COMPILER2_OR_JVMCI 327 328 // %%% These should all be a waste but we'll keep things as they were for now 329 if (true) { 330 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 331 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 332 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 333 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 334 // rbp location is known implicitly by the frame sender code, needs no oopmap 335 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 336 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 337 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 338 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 339 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 340 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 341 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 342 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 343 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 344 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 345 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 346 // on EVEX enabled targets, we get it included in the xsave area 347 off = xmm0H_off; 348 delta = xmm1H_off - off; 349 for (int n = 0; n < 16; n++) { 350 XMMRegister xmm_name = as_XMMRegister(n); 351 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 352 off += delta; 353 } 354 if (UseAVX > 2) { 355 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 356 off = zmm16H_off; 357 delta = zmm17H_off - off; 358 for (int n = 16; n < num_xmm_regs; n++) { 359 XMMRegister zmm_name = as_XMMRegister(n); 360 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 361 off += delta; 362 } 363 } 364 } 365 366 return map; 367 } 368 PRAGMA_DIAG_POP 369 370 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 371 int num_xmm_regs = XMMRegisterImpl::number_of_registers; 372 if (UseAVX < 3) { 373 num_xmm_regs = num_xmm_regs/2; 374 } 375 if (frame::arg_reg_save_area_bytes != 0) { 376 // Pop arg register save area 377 __ addptr(rsp, frame::arg_reg_save_area_bytes); 378 } 379 380 #if COMPILER2_OR_JVMCI 381 if (restore_vectors) { 382 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 383 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 384 } 385 #else 386 assert(!restore_vectors, "vectors are generated only by C2"); 387 #endif 388 389 __ vzeroupper(); 390 391 // On EVEX enabled targets everything is handled in pop fpu state 392 if (restore_vectors) { 393 // Restore upper half of YMM registers (0..15) 394 int base_addr = XSAVE_AREA_YMM_BEGIN; 395 for (int n = 0; n < 16; n++) { 396 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 397 } 398 if (VM_Version::supports_evex()) { 399 // Restore upper half of ZMM registers (0..15) 400 base_addr = XSAVE_AREA_ZMM_BEGIN; 401 for (int n = 0; n < 16; n++) { 402 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 403 } 404 // Restore full ZMM registers(16..num_xmm_regs) 405 base_addr = XSAVE_AREA_UPPERBANK; 406 int vector_len = Assembler::AVX_512bit; 407 int off = 0; 408 for (int n = 16; n < num_xmm_regs; n++) { 409 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 410 } 411 #if COMPILER2_OR_JVMCI 412 base_addr = XSAVE_AREA_OPMASK_BEGIN; 413 off = 0; 414 for (int n = 0; n < KRegisterImpl::number_of_registers; n++) { 415 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 416 } 417 #endif 418 } 419 } else { 420 if (VM_Version::supports_evex()) { 421 // Restore upper bank of ZMM registers(16..31) for double/float usage 422 int base_addr = XSAVE_AREA_UPPERBANK; 423 int off = 0; 424 for (int n = 16; n < num_xmm_regs; n++) { 425 __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); 426 } 427 #if COMPILER2_OR_JVMCI 428 base_addr = XSAVE_AREA_OPMASK_BEGIN; 429 off = 0; 430 for (int n = 0; n < KRegisterImpl::number_of_registers; n++) { 431 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 432 } 433 #endif 434 } 435 } 436 437 // Recover CPU state 438 __ pop_CPU_state(); 439 // Get the rbp described implicitly by the calling convention (no oopMap) 440 __ pop(rbp); 441 } 442 443 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 444 445 // Just restore result register. Only used by deoptimization. By 446 // now any callee save register that needs to be restored to a c2 447 // caller of the deoptee has been extracted into the vframeArray 448 // and will be stuffed into the c2i adapter we create for later 449 // restoration so only result registers need to be restored here. 450 451 // Restore fp result register 452 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 453 // Restore integer result register 454 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 455 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 456 457 // Pop all of the register save are off the stack except the return address 458 __ addptr(rsp, return_offset_in_bytes()); 459 } 460 461 // Is vector's size (in bytes) bigger than a size saved by default? 462 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 463 bool SharedRuntime::is_wide_vector(int size) { 464 return size > 16; 465 } 466 467 // --------------------------------------------------------------------------- 468 // Read the array of BasicTypes from a signature, and compute where the 469 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 470 // quantities. Values less than VMRegImpl::stack0 are registers, those above 471 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 472 // as framesizes are fixed. 473 // VMRegImpl::stack0 refers to the first slot 0(sp). 474 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 475 // up to RegisterImpl::number_of_registers) are the 64-bit 476 // integer registers. 477 478 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 479 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 480 // units regardless of build. Of course for i486 there is no 64 bit build 481 482 // The Java calling convention is a "shifted" version of the C ABI. 483 // By skipping the first C ABI register we can call non-static jni methods 484 // with small numbers of arguments without having to shuffle the arguments 485 // at all. Since we control the java ABI we ought to at least get some 486 // advantage out of it. 487 488 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 489 VMRegPair *regs, 490 int total_args_passed) { 491 492 // Create the mapping between argument positions and 493 // registers. 494 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 495 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 496 }; 497 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 498 j_farg0, j_farg1, j_farg2, j_farg3, 499 j_farg4, j_farg5, j_farg6, j_farg7 500 }; 501 502 503 uint int_args = 0; 504 uint fp_args = 0; 505 uint stk_args = 0; // inc by 2 each time 506 507 for (int i = 0; i < total_args_passed; i++) { 508 switch (sig_bt[i]) { 509 case T_BOOLEAN: 510 case T_CHAR: 511 case T_BYTE: 512 case T_SHORT: 513 case T_INT: 514 if (int_args < Argument::n_int_register_parameters_j) { 515 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 516 } else { 517 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 518 stk_args += 2; 519 } 520 break; 521 case T_VOID: 522 // halves of T_LONG or T_DOUBLE 523 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 524 regs[i].set_bad(); 525 break; 526 case T_LONG: 527 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 528 // fall through 529 case T_OBJECT: 530 case T_ARRAY: 531 case T_ADDRESS: 532 case T_PRIMITIVE_OBJECT: 533 if (int_args < Argument::n_int_register_parameters_j) { 534 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 535 } else { 536 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 537 stk_args += 2; 538 } 539 break; 540 case T_FLOAT: 541 if (fp_args < Argument::n_float_register_parameters_j) { 542 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 543 } else { 544 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 545 stk_args += 2; 546 } 547 break; 548 case T_DOUBLE: 549 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 550 if (fp_args < Argument::n_float_register_parameters_j) { 551 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 552 } else { 553 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 554 stk_args += 2; 555 } 556 break; 557 default: 558 ShouldNotReachHere(); 559 break; 560 } 561 } 562 563 return align_up(stk_args, 2); 564 } 565 566 // Same as java_calling_convention() but for multiple return 567 // values. There's no way to store them on the stack so if we don't 568 // have enough registers, multiple values can't be returned. 569 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 570 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 571 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 572 VMRegPair *regs, 573 int total_args_passed) { 574 // Create the mapping between argument positions and 575 // registers. 576 static const Register INT_ArgReg[java_return_convention_max_int] = { 577 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 578 }; 579 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 580 j_farg0, j_farg1, j_farg2, j_farg3, 581 j_farg4, j_farg5, j_farg6, j_farg7 582 }; 583 584 585 uint int_args = 0; 586 uint fp_args = 0; 587 588 for (int i = 0; i < total_args_passed; i++) { 589 switch (sig_bt[i]) { 590 case T_BOOLEAN: 591 case T_CHAR: 592 case T_BYTE: 593 case T_SHORT: 594 case T_INT: 595 if (int_args < Argument::n_int_register_parameters_j+1) { 596 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 597 int_args++; 598 } else { 599 return -1; 600 } 601 break; 602 case T_VOID: 603 // halves of T_LONG or T_DOUBLE 604 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 605 regs[i].set_bad(); 606 break; 607 case T_LONG: 608 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 609 // fall through 610 case T_OBJECT: 611 case T_PRIMITIVE_OBJECT: 612 case T_ARRAY: 613 case T_ADDRESS: 614 case T_METADATA: 615 if (int_args < Argument::n_int_register_parameters_j+1) { 616 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 617 int_args++; 618 } else { 619 return -1; 620 } 621 break; 622 case T_FLOAT: 623 if (fp_args < Argument::n_float_register_parameters_j) { 624 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 625 fp_args++; 626 } else { 627 return -1; 628 } 629 break; 630 case T_DOUBLE: 631 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 632 if (fp_args < Argument::n_float_register_parameters_j) { 633 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 634 fp_args++; 635 } else { 636 return -1; 637 } 638 break; 639 default: 640 ShouldNotReachHere(); 641 break; 642 } 643 } 644 645 return int_args + fp_args; 646 } 647 648 // Patch the callers callsite with entry to compiled code if it exists. 649 static void patch_callers_callsite(MacroAssembler *masm) { 650 Label L; 651 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 652 __ jcc(Assembler::equal, L); 653 654 // Save the current stack pointer 655 __ mov(r13, rsp); 656 // Schedule the branch target address early. 657 // Call into the VM to patch the caller, then jump to compiled callee 658 // rax isn't live so capture return address while we easily can 659 __ movptr(rax, Address(rsp, 0)); 660 661 // align stack so push_CPU_state doesn't fault 662 __ andptr(rsp, -(StackAlignmentInBytes)); 663 __ push_CPU_state(); 664 __ vzeroupper(); 665 // VM needs caller's callsite 666 // VM needs target method 667 // This needs to be a long call since we will relocate this adapter to 668 // the codeBuffer and it may not reach 669 670 // Allocate argument register save area 671 if (frame::arg_reg_save_area_bytes != 0) { 672 __ subptr(rsp, frame::arg_reg_save_area_bytes); 673 } 674 __ mov(c_rarg0, rbx); 675 __ mov(c_rarg1, rax); 676 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 677 678 // De-allocate argument register save area 679 if (frame::arg_reg_save_area_bytes != 0) { 680 __ addptr(rsp, frame::arg_reg_save_area_bytes); 681 } 682 683 __ vzeroupper(); 684 __ pop_CPU_state(); 685 // restore sp 686 __ mov(rsp, r13); 687 __ bind(L); 688 } 689 690 // For each inline type argument, sig includes the list of fields of 691 // the inline type. This utility function computes the number of 692 // arguments for the call if inline types are passed by reference (the 693 // calling convention the interpreter expects). 694 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 695 int total_args_passed = 0; 696 if (InlineTypePassFieldsAsArgs) { 697 for (int i = 0; i < sig_extended->length(); i++) { 698 BasicType bt = sig_extended->at(i)._bt; 699 if (bt == T_PRIMITIVE_OBJECT) { 700 // In sig_extended, an inline type argument starts with: 701 // T_PRIMITIVE_OBJECT, followed by the types of the fields of the 702 // inline type and T_VOID to mark the end of the value 703 // type. Inline types are flattened so, for instance, in the 704 // case of an inline type with an int field and an inline type 705 // field that itself has 2 fields, an int and a long: 706 // T_PRIMITIVE_OBJECT T_INT T_PRIMITIVE_OBJECT T_INT T_LONG T_VOID (second 707 // slot for the T_LONG) T_VOID (inner T_PRIMITIVE_OBJECT) T_VOID 708 // (outer T_PRIMITIVE_OBJECT) 709 total_args_passed++; 710 int vt = 1; 711 do { 712 i++; 713 BasicType bt = sig_extended->at(i)._bt; 714 BasicType prev_bt = sig_extended->at(i-1)._bt; 715 if (bt == T_PRIMITIVE_OBJECT) { 716 vt++; 717 } else if (bt == T_VOID && 718 prev_bt != T_LONG && 719 prev_bt != T_DOUBLE) { 720 vt--; 721 } 722 } while (vt != 0); 723 } else { 724 total_args_passed++; 725 } 726 } 727 } else { 728 total_args_passed = sig_extended->length(); 729 } 730 return total_args_passed; 731 } 732 733 734 static void gen_c2i_adapter_helper(MacroAssembler* masm, 735 BasicType bt, 736 BasicType prev_bt, 737 size_t size_in_bytes, 738 const VMRegPair& reg_pair, 739 const Address& to, 740 int extraspace, 741 bool is_oop) { 742 assert(bt != T_PRIMITIVE_OBJECT || !InlineTypePassFieldsAsArgs, "no inline type here"); 743 if (bt == T_VOID) { 744 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 745 return; 746 } 747 748 // Say 4 args: 749 // i st_off 750 // 0 32 T_LONG 751 // 1 24 T_VOID 752 // 2 16 T_OBJECT 753 // 3 8 T_BOOL 754 // - 0 return address 755 // 756 // However to make thing extra confusing. Because we can fit a long/double in 757 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 758 // leaves one slot empty and only stores to a single slot. In this case the 759 // slot that is occupied is the T_VOID slot. See I said it was confusing. 760 761 bool wide = (size_in_bytes == wordSize); 762 VMReg r_1 = reg_pair.first(); 763 VMReg r_2 = reg_pair.second(); 764 assert(r_2->is_valid() == wide, "invalid size"); 765 if (!r_1->is_valid()) { 766 assert(!r_2->is_valid(), "must be invalid"); 767 return; 768 } 769 770 if (!r_1->is_XMMRegister()) { 771 Register val = rax; 772 if (r_1->is_stack()) { 773 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 774 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 775 } else { 776 val = r_1->as_Register(); 777 } 778 assert_different_registers(to.base(), val, rscratch1); 779 if (is_oop) { 780 __ push(r13); 781 __ push(rbx); 782 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 783 __ pop(rbx); 784 __ pop(r13); 785 } else { 786 __ store_sized_value(to, val, size_in_bytes); 787 } 788 } else { 789 if (wide) { 790 __ movdbl(to, r_1->as_XMMRegister()); 791 } else { 792 __ movflt(to, r_1->as_XMMRegister()); 793 } 794 } 795 } 796 797 static void gen_c2i_adapter(MacroAssembler *masm, 798 const GrowableArray<SigEntry>* sig_extended, 799 const VMRegPair *regs, 800 Label& skip_fixup, 801 address start, 802 OopMapSet* oop_maps, 803 int& frame_complete, 804 int& frame_size_in_words, 805 bool alloc_inline_receiver) { 806 // Before we get into the guts of the C2I adapter, see if we should be here 807 // at all. We've come from compiled code and are attempting to jump to the 808 // interpreter, which means the caller made a static call to get here 809 // (vcalls always get a compiled target if there is one). Check for a 810 // compiled target. If there is one, we need to patch the caller's call. 811 patch_callers_callsite(masm); 812 813 __ bind(skip_fixup); 814 815 if (InlineTypePassFieldsAsArgs) { 816 // Is there an inline type argument? 817 bool has_inline_argument = false; 818 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 819 has_inline_argument = (sig_extended->at(i)._bt == T_PRIMITIVE_OBJECT); 820 } 821 if (has_inline_argument) { 822 // There is at least an inline type argument: we're coming from 823 // compiled code so we have no buffers to back the inline types. 824 // Allocate the buffers here with a runtime call. 825 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 826 827 frame_complete = __ offset(); 828 829 __ set_last_Java_frame(noreg, noreg, NULL); 830 831 __ mov(c_rarg0, r15_thread); 832 __ mov(c_rarg1, rbx); 833 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 834 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 835 836 oop_maps->add_gc_map((int)(__ pc() - start), map); 837 __ reset_last_Java_frame(false); 838 839 RegisterSaver::restore_live_registers(masm); 840 841 Label no_exception; 842 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 843 __ jcc(Assembler::equal, no_exception); 844 845 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 846 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 847 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 848 849 __ bind(no_exception); 850 851 // We get an array of objects from the runtime call 852 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 853 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? 854 } 855 } 856 857 // Since all args are passed on the stack, total_args_passed * 858 // Interpreter::stackElementSize is the space we need. Plus 1 because 859 // we also account for the return address location since 860 // we store it first rather than hold it in rax across all the shuffling 861 int total_args_passed = compute_total_args_passed_int(sig_extended); 862 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; 863 864 // stack is aligned, keep it that way 865 extraspace = align_up(extraspace, 2*wordSize); 866 867 // Get return address 868 __ pop(rax); 869 870 // set senderSP value 871 __ mov(r13, rsp); 872 873 __ subptr(rsp, extraspace); 874 875 // Store the return address in the expected location 876 __ movptr(Address(rsp, 0), rax); 877 878 // Now write the args into the outgoing interpreter space 879 880 // next_arg_comp is the next argument from the compiler point of 881 // view (inline type fields are passed in registers/on the stack). In 882 // sig_extended, an inline type argument starts with: T_PRIMITIVE_OBJECT, 883 // followed by the types of the fields of the inline type and T_VOID 884 // to mark the end of the inline type. ignored counts the number of 885 // T_PRIMITIVE_OBJECT/T_VOID. next_vt_arg is the next inline type argument: 886 // used to get the buffer for that argument from the pool of buffers 887 // we allocated above and want to pass to the 888 // interpreter. next_arg_int is the next argument from the 889 // interpreter point of view (inline types are passed by reference). 890 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 891 next_arg_comp < sig_extended->length(); next_arg_comp++) { 892 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 893 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 894 BasicType bt = sig_extended->at(next_arg_comp)._bt; 895 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 896 if (!InlineTypePassFieldsAsArgs || bt != T_PRIMITIVE_OBJECT) { 897 int next_off = st_off - Interpreter::stackElementSize; 898 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 899 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 900 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 901 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 902 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 903 next_arg_int++; 904 #ifdef ASSERT 905 if (bt == T_LONG || bt == T_DOUBLE) { 906 // Overwrite the unused slot with known junk 907 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 908 __ movptr(Address(rsp, st_off), rax); 909 } 910 #endif /* ASSERT */ 911 } else { 912 ignored++; 913 // get the buffer from the just allocated pool of buffers 914 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_PRIMITIVE_OBJECT); 915 __ load_heap_oop(r14, Address(rscratch2, index)); 916 next_vt_arg++; next_arg_int++; 917 int vt = 1; 918 // write fields we get from compiled code in registers/stack 919 // slots to the buffer: we know we are done with that inline type 920 // argument when we hit the T_VOID that acts as an end of inline 921 // type delimiter for this inline type. Inline types are flattened 922 // so we might encounter embedded inline types. Each entry in 923 // sig_extended contains a field offset in the buffer. 924 Label L_null; 925 do { 926 next_arg_comp++; 927 BasicType bt = sig_extended->at(next_arg_comp)._bt; 928 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 929 if (bt == T_PRIMITIVE_OBJECT) { 930 vt++; 931 ignored++; 932 } else if (bt == T_VOID && 933 prev_bt != T_LONG && 934 prev_bt != T_DOUBLE) { 935 vt--; 936 ignored++; 937 } else { 938 int off = sig_extended->at(next_arg_comp)._offset; 939 if (off == -1) { 940 // Nullable inline type argument, emit null check 941 VMReg reg = regs[next_arg_comp-ignored].first(); 942 Label L_notNull; 943 if (reg->is_stack()) { 944 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 945 __ testb(Address(rsp, ld_off), 1); 946 } else { 947 __ testb(reg->as_Register(), 1); 948 } 949 __ jcc(Assembler::notZero, L_notNull); 950 __ movptr(Address(rsp, st_off), 0); 951 __ jmp(L_null); 952 __ bind(L_notNull); 953 continue; 954 } 955 assert(off > 0, "offset in object should be positive"); 956 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 957 bool is_oop = is_reference_type(bt); 958 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 959 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 960 } 961 } while (vt != 0); 962 // pass the buffer to the interpreter 963 __ movptr(Address(rsp, st_off), r14); 964 __ bind(L_null); 965 } 966 } 967 968 // Schedule the branch target address early. 969 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 970 __ jmp(rcx); 971 } 972 973 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 974 address code_start, address code_end, 975 Label& L_ok) { 976 Label L_fail; 977 __ lea(temp_reg, ExternalAddress(code_start)); 978 __ cmpptr(pc_reg, temp_reg); 979 __ jcc(Assembler::belowEqual, L_fail); 980 __ lea(temp_reg, ExternalAddress(code_end)); 981 __ cmpptr(pc_reg, temp_reg); 982 __ jcc(Assembler::below, L_ok); 983 __ bind(L_fail); 984 } 985 986 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 987 int comp_args_on_stack, 988 const GrowableArray<SigEntry>* sig, 989 const VMRegPair *regs) { 990 991 // Note: r13 contains the senderSP on entry. We must preserve it since 992 // we may do a i2c -> c2i transition if we lose a race where compiled 993 // code goes non-entrant while we get args ready. 994 // In addition we use r13 to locate all the interpreter args as 995 // we must align the stack to 16 bytes on an i2c entry else we 996 // lose alignment we expect in all compiled code and register 997 // save code can segv when fxsave instructions find improperly 998 // aligned stack pointer. 999 1000 // Adapters can be frameless because they do not require the caller 1001 // to perform additional cleanup work, such as correcting the stack pointer. 1002 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1003 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1004 // even if a callee has modified the stack pointer. 1005 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1006 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1007 // up via the senderSP register). 1008 // In other words, if *either* the caller or callee is interpreted, we can 1009 // get the stack pointer repaired after a call. 1010 // This is why c2i and i2c adapters cannot be indefinitely composed. 1011 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1012 // both caller and callee would be compiled methods, and neither would 1013 // clean up the stack pointer changes performed by the two adapters. 1014 // If this happens, control eventually transfers back to the compiled 1015 // caller, but with an uncorrected stack, causing delayed havoc. 1016 1017 // Pick up the return address 1018 __ movptr(rax, Address(rsp, 0)); 1019 1020 if (VerifyAdapterCalls && 1021 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 1022 // So, let's test for cascading c2i/i2c adapters right now. 1023 // assert(Interpreter::contains($return_addr) || 1024 // StubRoutines::contains($return_addr), 1025 // "i2c adapter must return to an interpreter frame"); 1026 __ block_comment("verify_i2c { "); 1027 Label L_ok; 1028 if (Interpreter::code() != NULL) 1029 range_check(masm, rax, r11, 1030 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 1031 L_ok); 1032 if (StubRoutines::code1() != NULL) 1033 range_check(masm, rax, r11, 1034 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 1035 L_ok); 1036 if (StubRoutines::code2() != NULL) 1037 range_check(masm, rax, r11, 1038 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 1039 L_ok); 1040 const char* msg = "i2c adapter must return to an interpreter frame"; 1041 __ block_comment(msg); 1042 __ stop(msg); 1043 __ bind(L_ok); 1044 __ block_comment("} verify_i2ce "); 1045 } 1046 1047 // Must preserve original SP for loading incoming arguments because 1048 // we need to align the outgoing SP for compiled code. 1049 __ movptr(r11, rsp); 1050 1051 // Cut-out for having no stack args. Since up to 2 int/oop args are passed 1052 // in registers, we will occasionally have no stack args. 1053 int comp_words_on_stack = 0; 1054 if (comp_args_on_stack) { 1055 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in 1056 // registers are below. By subtracting stack0, we either get a negative 1057 // number (all values in registers) or the maximum stack slot accessed. 1058 1059 // Convert 4-byte c2 stack slots to words. 1060 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1061 // Round up to miminum stack alignment, in wordSize 1062 comp_words_on_stack = align_up(comp_words_on_stack, 2); 1063 __ subptr(rsp, comp_words_on_stack * wordSize); 1064 } 1065 1066 1067 // Ensure compiled code always sees stack at proper alignment 1068 __ andptr(rsp, -16); 1069 1070 // push the return address and misalign the stack that youngest frame always sees 1071 // as far as the placement of the call instruction 1072 __ push(rax); 1073 1074 // Put saved SP in another register 1075 const Register saved_sp = rax; 1076 __ movptr(saved_sp, r11); 1077 1078 // Will jump to the compiled code just as if compiled code was doing it. 1079 // Pre-load the register-jump target early, to schedule it better. 1080 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1081 1082 #if INCLUDE_JVMCI 1083 if (EnableJVMCI) { 1084 // check if this call should be routed towards a specific entry point 1085 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1086 Label no_alternative_target; 1087 __ jcc(Assembler::equal, no_alternative_target); 1088 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1089 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1090 __ bind(no_alternative_target); 1091 } 1092 #endif // INCLUDE_JVMCI 1093 1094 int total_args_passed = sig->length(); 1095 1096 // Now generate the shuffle code. Pick up all register args and move the 1097 // rest through the floating point stack top. 1098 for (int i = 0; i < total_args_passed; i++) { 1099 BasicType bt = sig->at(i)._bt; 1100 assert(bt != T_PRIMITIVE_OBJECT, "i2c adapter doesn't unpack inline type args"); 1101 if (bt == T_VOID) { 1102 // Longs and doubles are passed in native word order, but misaligned 1103 // in the 32-bit build. 1104 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1105 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1106 continue; 1107 } 1108 1109 // Pick up 0, 1 or 2 words from SP+offset. 1110 1111 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1112 "scrambled load targets?"); 1113 // Load in argument order going down. 1114 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1115 // Point to interpreter value (vs. tag) 1116 int next_off = ld_off - Interpreter::stackElementSize; 1117 // 1118 // 1119 // 1120 VMReg r_1 = regs[i].first(); 1121 VMReg r_2 = regs[i].second(); 1122 if (!r_1->is_valid()) { 1123 assert(!r_2->is_valid(), ""); 1124 continue; 1125 } 1126 if (r_1->is_stack()) { 1127 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1128 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1129 1130 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1131 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1132 // will be generated. 1133 if (!r_2->is_valid()) { 1134 // sign extend??? 1135 __ movl(r13, Address(saved_sp, ld_off)); 1136 __ movptr(Address(rsp, st_off), r13); 1137 } else { 1138 // 1139 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1140 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1141 // So we must adjust where to pick up the data to match the interpreter. 1142 // 1143 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1144 // are accessed as negative so LSW is at LOW address 1145 1146 // ld_off is MSW so get LSW 1147 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1148 next_off : ld_off; 1149 __ movq(r13, Address(saved_sp, offset)); 1150 // st_off is LSW (i.e. reg.first()) 1151 __ movq(Address(rsp, st_off), r13); 1152 } 1153 } else if (r_1->is_Register()) { // Register argument 1154 Register r = r_1->as_Register(); 1155 assert(r != rax, "must be different"); 1156 if (r_2->is_valid()) { 1157 // 1158 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1159 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1160 // So we must adjust where to pick up the data to match the interpreter. 1161 1162 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1163 next_off : ld_off; 1164 1165 // this can be a misaligned move 1166 __ movq(r, Address(saved_sp, offset)); 1167 } else { 1168 // sign extend and use a full word? 1169 __ movl(r, Address(saved_sp, ld_off)); 1170 } 1171 } else { 1172 if (!r_2->is_valid()) { 1173 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1174 } else { 1175 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1176 } 1177 } 1178 } 1179 1180 // 6243940 We might end up in handle_wrong_method if 1181 // the callee is deoptimized as we race thru here. If that 1182 // happens we don't want to take a safepoint because the 1183 // caller frame will look interpreted and arguments are now 1184 // "compiled" so it is much better to make this transition 1185 // invisible to the stack walking code. Unfortunately if 1186 // we try and find the callee by normal means a safepoint 1187 // is possible. So we stash the desired callee in the thread 1188 // and the vm will find there should this case occur. 1189 1190 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1191 1192 // put Method* where a c2i would expect should we end up there 1193 // only needed because of c2 resolve stubs return Method* as a result in 1194 // rax 1195 __ mov(rax, rbx); 1196 __ jmp(r11); 1197 } 1198 1199 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1200 Label ok; 1201 1202 Register holder = rax; 1203 Register receiver = j_rarg0; 1204 Register temp = rbx; 1205 1206 __ load_klass(temp, receiver, rscratch1); 1207 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 1208 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset())); 1209 __ jcc(Assembler::equal, ok); 1210 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1211 1212 __ bind(ok); 1213 // Method might have been compiled since the call site was patched to 1214 // interpreted if that is the case treat it as a miss so we can get 1215 // the call site corrected. 1216 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 1217 __ jcc(Assembler::equal, skip_fixup); 1218 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1219 } 1220 1221 // --------------------------------------------------------------- 1222 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1223 int comp_args_on_stack, 1224 const GrowableArray<SigEntry>* sig, 1225 const VMRegPair* regs, 1226 const GrowableArray<SigEntry>* sig_cc, 1227 const VMRegPair* regs_cc, 1228 const GrowableArray<SigEntry>* sig_cc_ro, 1229 const VMRegPair* regs_cc_ro, 1230 AdapterFingerPrint* fingerprint, 1231 AdapterBlob*& new_adapter, 1232 bool allocate_code_blob) { 1233 address i2c_entry = __ pc(); 1234 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1235 1236 // ------------------------------------------------------------------------- 1237 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1238 // to the interpreter. The args start out packed in the compiled layout. They 1239 // need to be unpacked into the interpreter layout. This will almost always 1240 // require some stack space. We grow the current (compiled) stack, then repack 1241 // the args. We finally end in a jump to the generic interpreter entry point. 1242 // On exit from the interpreter, the interpreter will restore our SP (lest the 1243 // compiled code, which relys solely on SP and not RBP, get sick). 1244 1245 address c2i_unverified_entry = __ pc(); 1246 Label skip_fixup; 1247 1248 gen_inline_cache_check(masm, skip_fixup); 1249 1250 OopMapSet* oop_maps = new OopMapSet(); 1251 int frame_complete = CodeOffsets::frame_never_safe; 1252 int frame_size_in_words = 0; 1253 1254 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1255 address c2i_inline_ro_entry = __ pc(); 1256 if (regs_cc != regs_cc_ro) { 1257 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); 1258 skip_fixup.reset(); 1259 } 1260 1261 // Scalarized c2i adapter 1262 address c2i_entry = __ pc(); 1263 1264 // Class initialization barrier for static methods 1265 address c2i_no_clinit_check_entry = NULL; 1266 if (VM_Version::supports_fast_class_init_checks()) { 1267 Label L_skip_barrier; 1268 Register method = rbx; 1269 1270 { // Bypass the barrier for non-static methods 1271 Register flags = rscratch1; 1272 __ movl(flags, Address(method, Method::access_flags_offset())); 1273 __ testl(flags, JVM_ACC_STATIC); 1274 __ jcc(Assembler::zero, L_skip_barrier); // non-static 1275 } 1276 1277 Register klass = rscratch1; 1278 __ load_method_holder(klass, method); 1279 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 1280 1281 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 1282 1283 __ bind(L_skip_barrier); 1284 c2i_no_clinit_check_entry = __ pc(); 1285 } 1286 1287 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1288 bs->c2i_entry_barrier(masm); 1289 1290 gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true); 1291 1292 address c2i_unverified_inline_entry = c2i_unverified_entry; 1293 1294 // Non-scalarized c2i adapter 1295 address c2i_inline_entry = c2i_entry; 1296 if (regs != regs_cc) { 1297 Label inline_entry_skip_fixup; 1298 c2i_unverified_inline_entry = __ pc(); 1299 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1300 1301 c2i_inline_entry = __ pc(); 1302 gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); 1303 } 1304 1305 __ flush(); 1306 1307 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1308 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1309 if (allocate_code_blob) { 1310 bool caller_must_gc_arguments = (regs != regs_cc); 1311 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1312 } 1313 1314 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1315 } 1316 1317 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1318 VMRegPair *regs, 1319 VMRegPair *regs2, 1320 int total_args_passed) { 1321 assert(regs2 == NULL, "not needed on x86"); 1322 // We return the amount of VMRegImpl stack slots we need to reserve for all 1323 // the arguments NOT counting out_preserve_stack_slots. 1324 1325 // NOTE: These arrays will have to change when c1 is ported 1326 #ifdef _WIN64 1327 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1328 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1329 }; 1330 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1331 c_farg0, c_farg1, c_farg2, c_farg3 1332 }; 1333 #else 1334 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1335 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1336 }; 1337 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1338 c_farg0, c_farg1, c_farg2, c_farg3, 1339 c_farg4, c_farg5, c_farg6, c_farg7 1340 }; 1341 #endif // _WIN64 1342 1343 1344 uint int_args = 0; 1345 uint fp_args = 0; 1346 uint stk_args = 0; // inc by 2 each time 1347 1348 for (int i = 0; i < total_args_passed; i++) { 1349 switch (sig_bt[i]) { 1350 case T_BOOLEAN: 1351 case T_CHAR: 1352 case T_BYTE: 1353 case T_SHORT: 1354 case T_INT: 1355 if (int_args < Argument::n_int_register_parameters_c) { 1356 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1357 #ifdef _WIN64 1358 fp_args++; 1359 // Allocate slots for callee to stuff register args the stack. 1360 stk_args += 2; 1361 #endif 1362 } else { 1363 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1364 stk_args += 2; 1365 } 1366 break; 1367 case T_LONG: 1368 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1369 // fall through 1370 case T_OBJECT: 1371 case T_ARRAY: 1372 case T_PRIMITIVE_OBJECT: 1373 case T_ADDRESS: 1374 case T_METADATA: 1375 if (int_args < Argument::n_int_register_parameters_c) { 1376 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1377 #ifdef _WIN64 1378 fp_args++; 1379 stk_args += 2; 1380 #endif 1381 } else { 1382 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1383 stk_args += 2; 1384 } 1385 break; 1386 case T_FLOAT: 1387 if (fp_args < Argument::n_float_register_parameters_c) { 1388 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1389 #ifdef _WIN64 1390 int_args++; 1391 // Allocate slots for callee to stuff register args the stack. 1392 stk_args += 2; 1393 #endif 1394 } else { 1395 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1396 stk_args += 2; 1397 } 1398 break; 1399 case T_DOUBLE: 1400 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1401 if (fp_args < Argument::n_float_register_parameters_c) { 1402 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1403 #ifdef _WIN64 1404 int_args++; 1405 // Allocate slots for callee to stuff register args the stack. 1406 stk_args += 2; 1407 #endif 1408 } else { 1409 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1410 stk_args += 2; 1411 } 1412 break; 1413 case T_VOID: // Halves of longs and doubles 1414 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1415 regs[i].set_bad(); 1416 break; 1417 default: 1418 ShouldNotReachHere(); 1419 break; 1420 } 1421 } 1422 #ifdef _WIN64 1423 // windows abi requires that we always allocate enough stack space 1424 // for 4 64bit registers to be stored down. 1425 if (stk_args < 8) { 1426 stk_args = 8; 1427 } 1428 #endif // _WIN64 1429 1430 return stk_args; 1431 } 1432 1433 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1434 uint num_bits, 1435 uint total_args_passed) { 1436 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1437 "only certain vector sizes are supported for now"); 1438 1439 static const XMMRegister VEC_ArgReg[32] = { 1440 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1441 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1442 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1443 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1444 }; 1445 1446 uint stk_args = 0; 1447 uint fp_args = 0; 1448 1449 for (uint i = 0; i < total_args_passed; i++) { 1450 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1451 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1452 regs[i].set_pair(vmreg->next(next_val), vmreg); 1453 } 1454 1455 return stk_args; 1456 } 1457 1458 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1459 // We always ignore the frame_slots arg and just use the space just below frame pointer 1460 // which by this time is free to use 1461 switch (ret_type) { 1462 case T_FLOAT: 1463 __ movflt(Address(rbp, -wordSize), xmm0); 1464 break; 1465 case T_DOUBLE: 1466 __ movdbl(Address(rbp, -wordSize), xmm0); 1467 break; 1468 case T_VOID: break; 1469 default: { 1470 __ movptr(Address(rbp, -wordSize), rax); 1471 } 1472 } 1473 } 1474 1475 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1476 // We always ignore the frame_slots arg and just use the space just below frame pointer 1477 // which by this time is free to use 1478 switch (ret_type) { 1479 case T_FLOAT: 1480 __ movflt(xmm0, Address(rbp, -wordSize)); 1481 break; 1482 case T_DOUBLE: 1483 __ movdbl(xmm0, Address(rbp, -wordSize)); 1484 break; 1485 case T_VOID: break; 1486 default: { 1487 __ movptr(rax, Address(rbp, -wordSize)); 1488 } 1489 } 1490 } 1491 1492 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1493 for ( int i = first_arg ; i < arg_count ; i++ ) { 1494 if (args[i].first()->is_Register()) { 1495 __ push(args[i].first()->as_Register()); 1496 } else if (args[i].first()->is_XMMRegister()) { 1497 __ subptr(rsp, 2*wordSize); 1498 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1499 } 1500 } 1501 } 1502 1503 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1504 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1505 if (args[i].first()->is_Register()) { 1506 __ pop(args[i].first()->as_Register()); 1507 } else if (args[i].first()->is_XMMRegister()) { 1508 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1509 __ addptr(rsp, 2*wordSize); 1510 } 1511 } 1512 } 1513 1514 // Different signatures may require very different orders for the move 1515 // to avoid clobbering other arguments. There's no simple way to 1516 // order them safely. Compute a safe order for issuing stores and 1517 // break any cycles in those stores. This code is fairly general but 1518 // it's not necessary on the other platforms so we keep it in the 1519 // platform dependent code instead of moving it into a shared file. 1520 // (See bugs 7013347 & 7145024.) 1521 // Note that this code is specific to LP64. 1522 class ComputeMoveOrder: public StackObj { 1523 class MoveOperation: public ResourceObj { 1524 friend class ComputeMoveOrder; 1525 private: 1526 VMRegPair _src; 1527 VMRegPair _dst; 1528 int _src_index; 1529 int _dst_index; 1530 bool _processed; 1531 MoveOperation* _next; 1532 MoveOperation* _prev; 1533 1534 static int get_id(VMRegPair r) { 1535 return r.first()->value(); 1536 } 1537 1538 public: 1539 MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): 1540 _src(src) 1541 , _dst(dst) 1542 , _src_index(src_index) 1543 , _dst_index(dst_index) 1544 , _processed(false) 1545 , _next(NULL) 1546 , _prev(NULL) { 1547 } 1548 1549 VMRegPair src() const { return _src; } 1550 int src_id() const { return get_id(src()); } 1551 int src_index() const { return _src_index; } 1552 VMRegPair dst() const { return _dst; } 1553 void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; } 1554 int dst_index() const { return _dst_index; } 1555 int dst_id() const { return get_id(dst()); } 1556 MoveOperation* next() const { return _next; } 1557 MoveOperation* prev() const { return _prev; } 1558 void set_processed() { _processed = true; } 1559 bool is_processed() const { return _processed; } 1560 1561 // insert 1562 void break_cycle(VMRegPair temp_register) { 1563 // create a new store following the last store 1564 // to move from the temp_register to the original 1565 MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst()); 1566 1567 // break the cycle of links and insert new_store at the end 1568 // break the reverse link. 1569 MoveOperation* p = prev(); 1570 assert(p->next() == this, "must be"); 1571 _prev = NULL; 1572 p->_next = new_store; 1573 new_store->_prev = p; 1574 1575 // change the original store to save it's value in the temp. 1576 set_dst(-1, temp_register); 1577 } 1578 1579 void link(GrowableArray<MoveOperation*>& killer) { 1580 // link this store in front the store that it depends on 1581 MoveOperation* n = killer.at_grow(src_id(), NULL); 1582 if (n != NULL) { 1583 assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet"); 1584 _next = n; 1585 n->_prev = this; 1586 } 1587 } 1588 }; 1589 1590 private: 1591 GrowableArray<MoveOperation*> edges; 1592 1593 public: 1594 ComputeMoveOrder(int total_in_args, const VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, 1595 const BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { 1596 // Move operations where the dest is the stack can all be 1597 // scheduled first since they can't interfere with the other moves. 1598 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1599 if (in_sig_bt[i] == T_ARRAY) { 1600 c_arg--; 1601 if (out_regs[c_arg].first()->is_stack() && 1602 out_regs[c_arg + 1].first()->is_stack()) { 1603 arg_order.push(i); 1604 arg_order.push(c_arg); 1605 } else { 1606 if (out_regs[c_arg].first()->is_stack() || 1607 in_regs[i].first() == out_regs[c_arg].first()) { 1608 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]); 1609 } else { 1610 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1611 } 1612 } 1613 } else if (in_sig_bt[i] == T_VOID) { 1614 arg_order.push(i); 1615 arg_order.push(c_arg); 1616 } else { 1617 if (out_regs[c_arg].first()->is_stack() || 1618 in_regs[i].first() == out_regs[c_arg].first()) { 1619 arg_order.push(i); 1620 arg_order.push(c_arg); 1621 } else { 1622 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1623 } 1624 } 1625 } 1626 // Break any cycles in the register moves and emit the in the 1627 // proper order. 1628 GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg); 1629 for (int i = 0; i < stores->length(); i++) { 1630 arg_order.push(stores->at(i)->src_index()); 1631 arg_order.push(stores->at(i)->dst_index()); 1632 } 1633 } 1634 1635 // Collected all the move operations 1636 void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { 1637 if (src.first() == dst.first()) return; 1638 edges.append(new MoveOperation(src_index, src, dst_index, dst)); 1639 } 1640 1641 // Walk the edges breaking cycles between moves. The result list 1642 // can be walked in order to produce the proper set of loads 1643 GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { 1644 // Record which moves kill which values 1645 GrowableArray<MoveOperation*> killer; 1646 for (int i = 0; i < edges.length(); i++) { 1647 MoveOperation* s = edges.at(i); 1648 assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer"); 1649 killer.at_put_grow(s->dst_id(), s, NULL); 1650 } 1651 assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL, 1652 "make sure temp isn't in the registers that are killed"); 1653 1654 // create links between loads and stores 1655 for (int i = 0; i < edges.length(); i++) { 1656 edges.at(i)->link(killer); 1657 } 1658 1659 // at this point, all the move operations are chained together 1660 // in a doubly linked list. Processing it backwards finds 1661 // the beginning of the chain, forwards finds the end. If there's 1662 // a cycle it can be broken at any point, so pick an edge and walk 1663 // backward until the list ends or we end where we started. 1664 GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>(); 1665 for (int e = 0; e < edges.length(); e++) { 1666 MoveOperation* s = edges.at(e); 1667 if (!s->is_processed()) { 1668 MoveOperation* start = s; 1669 // search for the beginning of the chain or cycle 1670 while (start->prev() != NULL && start->prev() != s) { 1671 start = start->prev(); 1672 } 1673 if (start->prev() == s) { 1674 start->break_cycle(temp_register); 1675 } 1676 // walk the chain forward inserting to store list 1677 while (start != NULL) { 1678 stores->append(start); 1679 start->set_processed(); 1680 start = start->next(); 1681 } 1682 } 1683 } 1684 return stores; 1685 } 1686 }; 1687 1688 static void verify_oop_args(MacroAssembler* masm, 1689 const methodHandle& method, 1690 const BasicType* sig_bt, 1691 const VMRegPair* regs) { 1692 Register temp_reg = rbx; // not part of any compiled calling seq 1693 if (VerifyOops) { 1694 for (int i = 0; i < method->size_of_parameters(); i++) { 1695 if (is_reference_type(sig_bt[i])) { 1696 VMReg r = regs[i].first(); 1697 assert(r->is_valid(), "bad oop arg"); 1698 if (r->is_stack()) { 1699 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1700 __ verify_oop(temp_reg); 1701 } else { 1702 __ verify_oop(r->as_Register()); 1703 } 1704 } 1705 } 1706 } 1707 } 1708 1709 static void gen_special_dispatch(MacroAssembler* masm, 1710 const methodHandle& method, 1711 const BasicType* sig_bt, 1712 const VMRegPair* regs) { 1713 verify_oop_args(masm, method, sig_bt, regs); 1714 vmIntrinsics::ID iid = method->intrinsic_id(); 1715 1716 // Now write the args into the outgoing interpreter space 1717 bool has_receiver = false; 1718 Register receiver_reg = noreg; 1719 int member_arg_pos = -1; 1720 Register member_reg = noreg; 1721 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1722 if (ref_kind != 0) { 1723 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1724 member_reg = rbx; // known to be free at this point 1725 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1726 } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { 1727 has_receiver = true; 1728 } else { 1729 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1730 } 1731 1732 if (member_reg != noreg) { 1733 // Load the member_arg into register, if necessary. 1734 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1735 VMReg r = regs[member_arg_pos].first(); 1736 if (r->is_stack()) { 1737 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1738 } else { 1739 // no data motion is needed 1740 member_reg = r->as_Register(); 1741 } 1742 } 1743 1744 if (has_receiver) { 1745 // Make sure the receiver is loaded into a register. 1746 assert(method->size_of_parameters() > 0, "oob"); 1747 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1748 VMReg r = regs[0].first(); 1749 assert(r->is_valid(), "bad receiver arg"); 1750 if (r->is_stack()) { 1751 // Porting note: This assumes that compiled calling conventions always 1752 // pass the receiver oop in a register. If this is not true on some 1753 // platform, pick a temp and load the receiver from stack. 1754 fatal("receiver always in a register"); 1755 receiver_reg = j_rarg0; // known to be free at this point 1756 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1757 } else { 1758 // no data motion is needed 1759 receiver_reg = r->as_Register(); 1760 } 1761 } 1762 1763 // Figure out which address we are really jumping to: 1764 MethodHandles::generate_method_handle_dispatch(masm, iid, 1765 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1766 } 1767 1768 // --------------------------------------------------------------------------- 1769 // Generate a native wrapper for a given method. The method takes arguments 1770 // in the Java compiled code convention, marshals them to the native 1771 // convention (handlizes oops, etc), transitions to native, makes the call, 1772 // returns to java state (possibly blocking), unhandlizes any result and 1773 // returns. 1774 // 1775 // Critical native functions are a shorthand for the use of 1776 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1777 // functions. The wrapper is expected to unpack the arguments before 1778 // passing them to the callee. Critical native functions leave the state _in_Java, 1779 // since they cannot stop for GC. 1780 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1781 // block and the check for pending exceptions it's impossible for them 1782 // to be thrown. 1783 // 1784 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1785 const methodHandle& method, 1786 int compile_id, 1787 BasicType* in_sig_bt, 1788 VMRegPair* in_regs, 1789 BasicType ret_type) { 1790 if (method->is_method_handle_intrinsic()) { 1791 vmIntrinsics::ID iid = method->intrinsic_id(); 1792 intptr_t start = (intptr_t)__ pc(); 1793 int vep_offset = ((intptr_t)__ pc()) - start; 1794 gen_special_dispatch(masm, 1795 method, 1796 in_sig_bt, 1797 in_regs); 1798 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1799 __ flush(); 1800 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1801 return nmethod::new_native_nmethod(method, 1802 compile_id, 1803 masm->code(), 1804 vep_offset, 1805 frame_complete, 1806 stack_slots / VMRegImpl::slots_per_word, 1807 in_ByteSize(-1), 1808 in_ByteSize(-1), 1809 (OopMapSet*)NULL); 1810 } 1811 address native_func = method->native_function(); 1812 assert(native_func != NULL, "must have function"); 1813 1814 // An OopMap for lock (and class if static) 1815 OopMapSet *oop_maps = new OopMapSet(); 1816 intptr_t start = (intptr_t)__ pc(); 1817 1818 // We have received a description of where all the java arg are located 1819 // on entry to the wrapper. We need to convert these args to where 1820 // the jni function will expect them. To figure out where they go 1821 // we convert the java signature to a C signature by inserting 1822 // the hidden arguments as arg[0] and possibly arg[1] (static method) 1823 1824 const int total_in_args = method->size_of_parameters(); 1825 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 1826 1827 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1828 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1829 BasicType* in_elem_bt = NULL; 1830 1831 int argc = 0; 1832 out_sig_bt[argc++] = T_ADDRESS; 1833 if (method->is_static()) { 1834 out_sig_bt[argc++] = T_OBJECT; 1835 } 1836 1837 for (int i = 0; i < total_in_args ; i++ ) { 1838 out_sig_bt[argc++] = in_sig_bt[i]; 1839 } 1840 1841 // Now figure out where the args must be stored and how much stack space 1842 // they require. 1843 int out_arg_slots; 1844 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1845 1846 // Compute framesize for the wrapper. We need to handlize all oops in 1847 // incoming registers 1848 1849 // Calculate the total number of stack slots we will need. 1850 1851 // First count the abi requirement plus all of the outgoing args 1852 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1853 1854 // Now the space for the inbound oop handle area 1855 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 1856 1857 int oop_handle_offset = stack_slots; 1858 stack_slots += total_save_slots; 1859 1860 // Now any space we need for handlizing a klass if static method 1861 1862 int klass_slot_offset = 0; 1863 int klass_offset = -1; 1864 int lock_slot_offset = 0; 1865 bool is_static = false; 1866 1867 if (method->is_static()) { 1868 klass_slot_offset = stack_slots; 1869 stack_slots += VMRegImpl::slots_per_word; 1870 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1871 is_static = true; 1872 } 1873 1874 // Plus a lock if needed 1875 1876 if (method->is_synchronized()) { 1877 lock_slot_offset = stack_slots; 1878 stack_slots += VMRegImpl::slots_per_word; 1879 } 1880 1881 // Now a place (+2) to save return values or temp during shuffling 1882 // + 4 for return address (which we own) and saved rbp 1883 stack_slots += 6; 1884 1885 // Ok The space we have allocated will look like: 1886 // 1887 // 1888 // FP-> | | 1889 // |---------------------| 1890 // | 2 slots for moves | 1891 // |---------------------| 1892 // | lock box (if sync) | 1893 // |---------------------| <- lock_slot_offset 1894 // | klass (if static) | 1895 // |---------------------| <- klass_slot_offset 1896 // | oopHandle area | 1897 // |---------------------| <- oop_handle_offset (6 java arg registers) 1898 // | outbound memory | 1899 // | based arguments | 1900 // | | 1901 // |---------------------| 1902 // | | 1903 // SP-> | out_preserved_slots | 1904 // 1905 // 1906 1907 1908 // Now compute actual number of stack words we need rounding to make 1909 // stack properly aligned. 1910 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 1911 1912 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1913 1914 // First thing make an ic check to see if we should even be here 1915 1916 // We are free to use all registers as temps without saving them and 1917 // restoring them except rbp. rbp is the only callee save register 1918 // as far as the interpreter and the compiler(s) are concerned. 1919 1920 1921 const Register ic_reg = rax; 1922 const Register receiver = j_rarg0; 1923 1924 Label hit; 1925 Label exception_pending; 1926 1927 assert_different_registers(ic_reg, receiver, rscratch1); 1928 __ verify_oop(receiver); 1929 __ load_klass(rscratch1, receiver, rscratch2); 1930 __ cmpq(ic_reg, rscratch1); 1931 __ jcc(Assembler::equal, hit); 1932 1933 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1934 1935 // Verified entry point must be aligned 1936 __ align(8); 1937 1938 __ bind(hit); 1939 1940 int vep_offset = ((intptr_t)__ pc()) - start; 1941 1942 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 1943 Label L_skip_barrier; 1944 Register klass = r10; 1945 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 1946 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 1947 1948 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 1949 1950 __ bind(L_skip_barrier); 1951 } 1952 1953 #ifdef COMPILER1 1954 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 1955 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 1956 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 1957 } 1958 #endif // COMPILER1 1959 1960 // The instruction at the verified entry point must be 5 bytes or longer 1961 // because it can be patched on the fly by make_non_entrant. The stack bang 1962 // instruction fits that requirement. 1963 1964 // Generate stack overflow check 1965 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 1966 1967 // Generate a new frame for the wrapper. 1968 __ enter(); 1969 // -2 because return address is already present and so is saved rbp 1970 __ subptr(rsp, stack_size - 2*wordSize); 1971 1972 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1973 bs->nmethod_entry_barrier(masm); 1974 1975 // Frame is now completed as far as size and linkage. 1976 int frame_complete = ((intptr_t)__ pc()) - start; 1977 1978 if (UseRTMLocking) { 1979 // Abort RTM transaction before calling JNI 1980 // because critical section will be large and will be 1981 // aborted anyway. Also nmethod could be deoptimized. 1982 __ xabort(0); 1983 } 1984 1985 #ifdef ASSERT 1986 { 1987 Label L; 1988 __ mov(rax, rsp); 1989 __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) 1990 __ cmpptr(rax, rsp); 1991 __ jcc(Assembler::equal, L); 1992 __ stop("improperly aligned stack"); 1993 __ bind(L); 1994 } 1995 #endif /* ASSERT */ 1996 1997 1998 // We use r14 as the oop handle for the receiver/klass 1999 // It is callee save so it survives the call to native 2000 2001 const Register oop_handle_reg = r14; 2002 2003 // 2004 // We immediately shuffle the arguments so that any vm call we have to 2005 // make from here on out (sync slow path, jvmti, etc.) we will have 2006 // captured the oops from our caller and have a valid oopMap for 2007 // them. 2008 2009 // ----------------- 2010 // The Grand Shuffle 2011 2012 // The Java calling convention is either equal (linux) or denser (win64) than the 2013 // c calling convention. However the because of the jni_env argument the c calling 2014 // convention always has at least one more (and two for static) arguments than Java. 2015 // Therefore if we move the args from java -> c backwards then we will never have 2016 // a register->register conflict and we don't have to build a dependency graph 2017 // and figure out how to break any cycles. 2018 // 2019 2020 // Record esp-based slot for receiver on stack for non-static methods 2021 int receiver_offset = -1; 2022 2023 // This is a trick. We double the stack slots so we can claim 2024 // the oops in the caller's frame. Since we are sure to have 2025 // more args than the caller doubling is enough to make 2026 // sure we can capture all the incoming oop args from the 2027 // caller. 2028 // 2029 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2030 2031 // Mark location of rbp (someday) 2032 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2033 2034 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2035 // All inbound args are referenced based on rbp and all outbound args via rsp. 2036 2037 2038 #ifdef ASSERT 2039 bool reg_destroyed[RegisterImpl::number_of_registers]; 2040 bool freg_destroyed[XMMRegisterImpl::number_of_registers]; 2041 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 2042 reg_destroyed[r] = false; 2043 } 2044 for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) { 2045 freg_destroyed[f] = false; 2046 } 2047 2048 #endif /* ASSERT */ 2049 2050 // For JNI natives the incoming and outgoing registers are offset upwards. 2051 GrowableArray<int> arg_order(2 * total_in_args); 2052 2053 VMRegPair tmp_vmreg; 2054 tmp_vmreg.set2(rbx->as_VMReg()); 2055 2056 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2057 arg_order.push(i); 2058 arg_order.push(c_arg); 2059 } 2060 2061 int temploc = -1; 2062 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2063 int i = arg_order.at(ai); 2064 int c_arg = arg_order.at(ai + 1); 2065 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2066 #ifdef ASSERT 2067 if (in_regs[i].first()->is_Register()) { 2068 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2069 } else if (in_regs[i].first()->is_XMMRegister()) { 2070 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2071 } 2072 if (out_regs[c_arg].first()->is_Register()) { 2073 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2074 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2075 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2076 } 2077 #endif /* ASSERT */ 2078 switch (in_sig_bt[i]) { 2079 case T_ARRAY: 2080 case T_PRIMITIVE_OBJECT: 2081 case T_OBJECT: 2082 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2083 ((i == 0) && (!is_static)), 2084 &receiver_offset); 2085 break; 2086 case T_VOID: 2087 break; 2088 2089 case T_FLOAT: 2090 __ float_move(in_regs[i], out_regs[c_arg]); 2091 break; 2092 2093 case T_DOUBLE: 2094 assert( i + 1 < total_in_args && 2095 in_sig_bt[i + 1] == T_VOID && 2096 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2097 __ double_move(in_regs[i], out_regs[c_arg]); 2098 break; 2099 2100 case T_LONG : 2101 __ long_move(in_regs[i], out_regs[c_arg]); 2102 break; 2103 2104 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2105 2106 default: 2107 __ move32_64(in_regs[i], out_regs[c_arg]); 2108 } 2109 } 2110 2111 int c_arg; 2112 2113 // Pre-load a static method's oop into r14. Used both by locking code and 2114 // the normal JNI call code. 2115 // point c_arg at the first arg that is already loaded in case we 2116 // need to spill before we call out 2117 c_arg = total_c_args - total_in_args; 2118 2119 if (method->is_static()) { 2120 2121 // load oop into a register 2122 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2123 2124 // Now handlize the static class mirror it's known not-null. 2125 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2126 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2127 2128 // Now get the handle 2129 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2130 // store the klass handle as second argument 2131 __ movptr(c_rarg1, oop_handle_reg); 2132 // and protect the arg if we must spill 2133 c_arg--; 2134 } 2135 2136 // Change state to native (we save the return address in the thread, since it might not 2137 // be pushed on the stack when we do a a stack traversal). It is enough that the pc() 2138 // points into the right code segment. It does not have to be the correct return pc. 2139 // We use the same pc/oopMap repeatedly when we call out 2140 2141 intptr_t the_pc = (intptr_t) __ pc(); 2142 oop_maps->add_gc_map(the_pc - start, map); 2143 2144 __ set_last_Java_frame(rsp, noreg, (address)the_pc); 2145 2146 2147 // We have all of the arguments setup at this point. We must not touch any register 2148 // argument registers at this point (what if we save/restore them there are no oop? 2149 2150 { 2151 SkipIfEqual skip(masm, &DTraceMethodProbes, false); 2152 // protect the args we've loaded 2153 save_args(masm, total_c_args, c_arg, out_regs); 2154 __ mov_metadata(c_rarg1, method()); 2155 __ call_VM_leaf( 2156 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2157 r15_thread, c_rarg1); 2158 restore_args(masm, total_c_args, c_arg, out_regs); 2159 } 2160 2161 // RedefineClasses() tracing support for obsolete method entry 2162 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2163 // protect the args we've loaded 2164 save_args(masm, total_c_args, c_arg, out_regs); 2165 __ mov_metadata(c_rarg1, method()); 2166 __ call_VM_leaf( 2167 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2168 r15_thread, c_rarg1); 2169 restore_args(masm, total_c_args, c_arg, out_regs); 2170 } 2171 2172 // Lock a synchronized method 2173 2174 // Register definitions used by locking and unlocking 2175 2176 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2177 const Register obj_reg = rbx; // Will contain the oop 2178 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2179 const Register old_hdr = r13; // value of old header at unlock time 2180 2181 Label slow_path_lock; 2182 Label lock_done; 2183 2184 if (method->is_synchronized()) { 2185 2186 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 2187 2188 // Get the handle (the 2nd argument) 2189 __ mov(oop_handle_reg, c_rarg1); 2190 2191 // Get address of the box 2192 2193 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2194 2195 // Load the oop from the handle 2196 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2197 2198 if (!UseHeavyMonitors) { 2199 // Load immediate 1 into swap_reg %rax 2200 __ movl(swap_reg, 1); 2201 2202 // Load (object->mark() | 1) into swap_reg %rax 2203 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2204 if (EnableValhalla) { 2205 // Mask inline_type bit such that we go to the slow path if object is an inline type 2206 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place)); 2207 } 2208 2209 // Save (object->mark() | 1) into BasicLock's displaced header 2210 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2211 2212 // src -> dest iff dest == rax else rax <- dest 2213 __ lock(); 2214 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2215 __ jcc(Assembler::equal, lock_done); 2216 2217 // Hmm should this move to the slow path code area??? 2218 2219 // Test if the oopMark is an obvious stack pointer, i.e., 2220 // 1) (mark & 3) == 0, and 2221 // 2) rsp <= mark < mark + os::pagesize() 2222 // These 3 tests can be done by evaluating the following 2223 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 2224 // assuming both stack pointer and pagesize have their 2225 // least significant 2 bits clear. 2226 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 2227 2228 __ subptr(swap_reg, rsp); 2229 __ andptr(swap_reg, 3 - os::vm_page_size()); 2230 2231 // Save the test result, for recursive case, the result is zero 2232 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2233 __ jcc(Assembler::notEqual, slow_path_lock); 2234 } else { 2235 __ jmp(slow_path_lock); 2236 } 2237 2238 // Slow path will re-enter here 2239 2240 __ bind(lock_done); 2241 } 2242 2243 // Finally just about ready to make the JNI call 2244 2245 // get JNIEnv* which is first argument to native 2246 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2247 2248 // Now set thread in native 2249 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2250 2251 __ call(RuntimeAddress(native_func)); 2252 2253 // Verify or restore cpu control state after JNI call 2254 __ restore_cpu_control_state_after_jni(); 2255 2256 // Unpack native results. 2257 switch (ret_type) { 2258 case T_BOOLEAN: __ c2bool(rax); break; 2259 case T_CHAR : __ movzwl(rax, rax); break; 2260 case T_BYTE : __ sign_extend_byte (rax); break; 2261 case T_SHORT : __ sign_extend_short(rax); break; 2262 case T_INT : /* nothing to do */ break; 2263 case T_DOUBLE : 2264 case T_FLOAT : 2265 // Result is in xmm0 we'll save as needed 2266 break; 2267 case T_ARRAY: // Really a handle 2268 case T_PRIMITIVE_OBJECT: // Really a handle 2269 case T_OBJECT: // Really a handle 2270 break; // can't de-handlize until after safepoint check 2271 case T_VOID: break; 2272 case T_LONG: break; 2273 default : ShouldNotReachHere(); 2274 } 2275 2276 Label after_transition; 2277 2278 // Switch thread to "native transition" state before reading the synchronization state. 2279 // This additional state is necessary because reading and testing the synchronization 2280 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2281 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2282 // VM thread changes sync state to synchronizing and suspends threads for GC. 2283 // Thread A is resumed to finish this native method, but doesn't block here since it 2284 // didn't see any synchronization is progress, and escapes. 2285 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2286 2287 // Force this write out before the read below 2288 __ membar(Assembler::Membar_mask_bits( 2289 Assembler::LoadLoad | Assembler::LoadStore | 2290 Assembler::StoreLoad | Assembler::StoreStore)); 2291 2292 // check for safepoint operation in progress and/or pending suspend requests 2293 { 2294 Label Continue; 2295 Label slow_path; 2296 2297 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2298 2299 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2300 __ jcc(Assembler::equal, Continue); 2301 __ bind(slow_path); 2302 2303 // Don't use call_VM as it will see a possible pending exception and forward it 2304 // and never return here preventing us from clearing _last_native_pc down below. 2305 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2306 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2307 // by hand. 2308 // 2309 __ vzeroupper(); 2310 save_native_result(masm, ret_type, stack_slots); 2311 __ mov(c_rarg0, r15_thread); 2312 __ mov(r12, rsp); // remember sp 2313 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2314 __ andptr(rsp, -16); // align stack as required by ABI 2315 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2316 __ mov(rsp, r12); // restore sp 2317 __ reinit_heapbase(); 2318 // Restore any method result value 2319 restore_native_result(masm, ret_type, stack_slots); 2320 __ bind(Continue); 2321 } 2322 2323 // change thread state 2324 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2325 __ bind(after_transition); 2326 2327 Label reguard; 2328 Label reguard_done; 2329 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2330 __ jcc(Assembler::equal, reguard); 2331 __ bind(reguard_done); 2332 2333 // native result if any is live 2334 2335 // Unlock 2336 Label unlock_done; 2337 Label slow_path_unlock; 2338 if (method->is_synchronized()) { 2339 2340 // Get locked oop from the handle we passed to jni 2341 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2342 2343 Label done; 2344 2345 if (!UseHeavyMonitors) { 2346 // Simple recursive lock? 2347 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); 2348 __ jcc(Assembler::equal, done); 2349 } 2350 2351 // Must save rax if it is live now because cmpxchg must use it 2352 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2353 save_native_result(masm, ret_type, stack_slots); 2354 } 2355 2356 2357 if (!UseHeavyMonitors) { 2358 // get address of the stack lock 2359 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2360 // get old displaced header 2361 __ movptr(old_hdr, Address(rax, 0)); 2362 2363 // Atomic swap old header if oop still contains the stack lock 2364 __ lock(); 2365 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2366 __ jcc(Assembler::notEqual, slow_path_unlock); 2367 } else { 2368 __ jmp(slow_path_unlock); 2369 } 2370 2371 // slow path re-enters here 2372 __ bind(unlock_done); 2373 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2374 restore_native_result(masm, ret_type, stack_slots); 2375 } 2376 2377 __ bind(done); 2378 2379 } 2380 { 2381 SkipIfEqual skip(masm, &DTraceMethodProbes, false); 2382 save_native_result(masm, ret_type, stack_slots); 2383 __ mov_metadata(c_rarg1, method()); 2384 __ call_VM_leaf( 2385 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2386 r15_thread, c_rarg1); 2387 restore_native_result(masm, ret_type, stack_slots); 2388 } 2389 2390 __ reset_last_Java_frame(false); 2391 2392 // Unbox oop result, e.g. JNIHandles::resolve value. 2393 if (is_reference_type(ret_type)) { 2394 __ resolve_jobject(rax /* value */, 2395 r15_thread /* thread */, 2396 rcx /* tmp */); 2397 } 2398 2399 if (CheckJNICalls) { 2400 // clear_pending_jni_exception_check 2401 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2402 } 2403 2404 // reset handle block 2405 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2406 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); 2407 2408 // pop our frame 2409 2410 __ leave(); 2411 2412 // Any exception pending? 2413 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2414 __ jcc(Assembler::notEqual, exception_pending); 2415 2416 // Return 2417 2418 __ ret(0); 2419 2420 // Unexpected paths are out of line and go here 2421 2422 // forward the exception 2423 __ bind(exception_pending); 2424 2425 // and forward the exception 2426 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2427 2428 // Slow path locking & unlocking 2429 if (method->is_synchronized()) { 2430 2431 // BEGIN Slow path lock 2432 __ bind(slow_path_lock); 2433 2434 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2435 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2436 2437 // protect the args we've loaded 2438 save_args(masm, total_c_args, c_arg, out_regs); 2439 2440 __ mov(c_rarg0, obj_reg); 2441 __ mov(c_rarg1, lock_reg); 2442 __ mov(c_rarg2, r15_thread); 2443 2444 // Not a leaf but we have last_Java_frame setup as we want 2445 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2446 restore_args(masm, total_c_args, c_arg, out_regs); 2447 2448 #ifdef ASSERT 2449 { Label L; 2450 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2451 __ jcc(Assembler::equal, L); 2452 __ stop("no pending exception allowed on exit from monitorenter"); 2453 __ bind(L); 2454 } 2455 #endif 2456 __ jmp(lock_done); 2457 2458 // END Slow path lock 2459 2460 // BEGIN Slow path unlock 2461 __ bind(slow_path_unlock); 2462 2463 // If we haven't already saved the native result we must save it now as xmm registers 2464 // are still exposed. 2465 __ vzeroupper(); 2466 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2467 save_native_result(masm, ret_type, stack_slots); 2468 } 2469 2470 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2471 2472 __ mov(c_rarg0, obj_reg); 2473 __ mov(c_rarg2, r15_thread); 2474 __ mov(r12, rsp); // remember sp 2475 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2476 __ andptr(rsp, -16); // align stack as required by ABI 2477 2478 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2479 // NOTE that obj_reg == rbx currently 2480 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2481 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2482 2483 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2484 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2485 __ mov(rsp, r12); // restore sp 2486 __ reinit_heapbase(); 2487 #ifdef ASSERT 2488 { 2489 Label L; 2490 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); 2491 __ jcc(Assembler::equal, L); 2492 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2493 __ bind(L); 2494 } 2495 #endif /* ASSERT */ 2496 2497 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2498 2499 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2500 restore_native_result(masm, ret_type, stack_slots); 2501 } 2502 __ jmp(unlock_done); 2503 2504 // END Slow path unlock 2505 2506 } // synchronized 2507 2508 // SLOW PATH Reguard the stack if needed 2509 2510 __ bind(reguard); 2511 __ vzeroupper(); 2512 save_native_result(masm, ret_type, stack_slots); 2513 __ mov(r12, rsp); // remember sp 2514 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2515 __ andptr(rsp, -16); // align stack as required by ABI 2516 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2517 __ mov(rsp, r12); // restore sp 2518 __ reinit_heapbase(); 2519 restore_native_result(masm, ret_type, stack_slots); 2520 // and continue 2521 __ jmp(reguard_done); 2522 2523 2524 2525 __ flush(); 2526 2527 nmethod *nm = nmethod::new_native_nmethod(method, 2528 compile_id, 2529 masm->code(), 2530 vep_offset, 2531 frame_complete, 2532 stack_slots / VMRegImpl::slots_per_word, 2533 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2534 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2535 oop_maps); 2536 2537 return nm; 2538 } 2539 2540 // this function returns the adjust size (in number of words) to a c2i adapter 2541 // activation for use during deoptimization 2542 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2543 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2544 } 2545 2546 2547 uint SharedRuntime::out_preserve_stack_slots() { 2548 return 0; 2549 } 2550 2551 2552 // Number of stack slots between incoming argument block and the start of 2553 // a new frame. The PROLOG must add this many slots to the stack. The 2554 // EPILOG must remove this many slots. amd64 needs two slots for 2555 // return address. 2556 uint SharedRuntime::in_preserve_stack_slots() { 2557 return 4 + 2 * VerifyStackAtCalls; 2558 } 2559 2560 //------------------------------generate_deopt_blob---------------------------- 2561 void SharedRuntime::generate_deopt_blob() { 2562 // Allocate space for the code 2563 ResourceMark rm; 2564 // Setup code generation tools 2565 int pad = 0; 2566 if (UseAVX > 2) { 2567 pad += 1024; 2568 } 2569 #if INCLUDE_JVMCI 2570 if (EnableJVMCI) { 2571 pad += 512; // Increase the buffer size when compiling for JVMCI 2572 } 2573 #endif 2574 CodeBuffer buffer("deopt_blob", 2560+pad, 1024); 2575 MacroAssembler* masm = new MacroAssembler(&buffer); 2576 int frame_size_in_words; 2577 OopMap* map = NULL; 2578 OopMapSet *oop_maps = new OopMapSet(); 2579 2580 // ------------- 2581 // This code enters when returning to a de-optimized nmethod. A return 2582 // address has been pushed on the the stack, and return values are in 2583 // registers. 2584 // If we are doing a normal deopt then we were called from the patched 2585 // nmethod from the point we returned to the nmethod. So the return 2586 // address on the stack is wrong by NativeCall::instruction_size 2587 // We will adjust the value so it looks like we have the original return 2588 // address on the stack (like when we eagerly deoptimized). 2589 // In the case of an exception pending when deoptimizing, we enter 2590 // with a return address on the stack that points after the call we patched 2591 // into the exception handler. We have the following register state from, 2592 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2593 // rax: exception oop 2594 // rbx: exception handler 2595 // rdx: throwing pc 2596 // So in this case we simply jam rdx into the useless return address and 2597 // the stack looks just like we want. 2598 // 2599 // At this point we need to de-opt. We save the argument return 2600 // registers. We call the first C routine, fetch_unroll_info(). This 2601 // routine captures the return values and returns a structure which 2602 // describes the current frame size and the sizes of all replacement frames. 2603 // The current frame is compiled code and may contain many inlined 2604 // functions, each with their own JVM state. We pop the current frame, then 2605 // push all the new frames. Then we call the C routine unpack_frames() to 2606 // populate these frames. Finally unpack_frames() returns us the new target 2607 // address. Notice that callee-save registers are BLOWN here; they have 2608 // already been captured in the vframeArray at the time the return PC was 2609 // patched. 2610 address start = __ pc(); 2611 Label cont; 2612 2613 // Prolog for non exception case! 2614 2615 // Save everything in sight. 2616 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2617 2618 // Normal deoptimization. Save exec mode for unpack_frames. 2619 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2620 __ jmp(cont); 2621 2622 int reexecute_offset = __ pc() - start; 2623 #if INCLUDE_JVMCI && !defined(COMPILER1) 2624 if (EnableJVMCI && UseJVMCICompiler) { 2625 // JVMCI does not use this kind of deoptimization 2626 __ should_not_reach_here(); 2627 } 2628 #endif 2629 2630 // Reexecute case 2631 // return address is the pc describes what bci to do re-execute at 2632 2633 // No need to update map as each call to save_live_registers will produce identical oopmap 2634 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2635 2636 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2637 __ jmp(cont); 2638 2639 #if INCLUDE_JVMCI 2640 Label after_fetch_unroll_info_call; 2641 int implicit_exception_uncommon_trap_offset = 0; 2642 int uncommon_trap_offset = 0; 2643 2644 if (EnableJVMCI) { 2645 implicit_exception_uncommon_trap_offset = __ pc() - start; 2646 2647 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2648 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD); 2649 2650 uncommon_trap_offset = __ pc() - start; 2651 2652 // Save everything in sight. 2653 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2654 // fetch_unroll_info needs to call last_java_frame() 2655 __ set_last_Java_frame(noreg, noreg, NULL); 2656 2657 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2658 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2659 2660 __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute); 2661 __ mov(c_rarg0, r15_thread); 2662 __ movl(c_rarg2, r14); // exec mode 2663 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2664 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2665 2666 __ reset_last_Java_frame(false); 2667 2668 __ jmp(after_fetch_unroll_info_call); 2669 } // EnableJVMCI 2670 #endif // INCLUDE_JVMCI 2671 2672 int exception_offset = __ pc() - start; 2673 2674 // Prolog for exception case 2675 2676 // all registers are dead at this entry point, except for rax, and 2677 // rdx which contain the exception oop and exception pc 2678 // respectively. Set them in TLS and fall thru to the 2679 // unpack_with_exception_in_tls entry point. 2680 2681 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2682 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2683 2684 int exception_in_tls_offset = __ pc() - start; 2685 2686 // new implementation because exception oop is now passed in JavaThread 2687 2688 // Prolog for exception case 2689 // All registers must be preserved because they might be used by LinearScan 2690 // Exceptiop oop and throwing PC are passed in JavaThread 2691 // tos: stack at point of call to method that threw the exception (i.e. only 2692 // args are on the stack, no return address) 2693 2694 // make room on stack for the return address 2695 // It will be patched later with the throwing pc. The correct value is not 2696 // available now because loading it from memory would destroy registers. 2697 __ push(0); 2698 2699 // Save everything in sight. 2700 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2701 2702 // Now it is safe to overwrite any register 2703 2704 // Deopt during an exception. Save exec mode for unpack_frames. 2705 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2706 2707 // load throwing pc from JavaThread and patch it as the return address 2708 // of the current frame. Then clear the field in JavaThread 2709 2710 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2711 __ movptr(Address(rbp, wordSize), rdx); 2712 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 2713 2714 #ifdef ASSERT 2715 // verify that there is really an exception oop in JavaThread 2716 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2717 __ verify_oop(rax); 2718 2719 // verify that there is no pending exception 2720 Label no_pending_exception; 2721 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2722 __ testptr(rax, rax); 2723 __ jcc(Assembler::zero, no_pending_exception); 2724 __ stop("must not have pending exception here"); 2725 __ bind(no_pending_exception); 2726 #endif 2727 2728 __ bind(cont); 2729 2730 // Call C code. Need thread and this frame, but NOT official VM entry 2731 // crud. We cannot block on this call, no GC can happen. 2732 // 2733 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2734 2735 // fetch_unroll_info needs to call last_java_frame(). 2736 2737 __ set_last_Java_frame(noreg, noreg, NULL); 2738 #ifdef ASSERT 2739 { Label L; 2740 __ cmpptr(Address(r15_thread, 2741 JavaThread::last_Java_fp_offset()), 2742 (int32_t)0); 2743 __ jcc(Assembler::equal, L); 2744 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2745 __ bind(L); 2746 } 2747 #endif // ASSERT 2748 __ mov(c_rarg0, r15_thread); 2749 __ movl(c_rarg1, r14); // exec_mode 2750 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2751 2752 // Need to have an oopmap that tells fetch_unroll_info where to 2753 // find any register it might need. 2754 oop_maps->add_gc_map(__ pc() - start, map); 2755 2756 __ reset_last_Java_frame(false); 2757 2758 #if INCLUDE_JVMCI 2759 if (EnableJVMCI) { 2760 __ bind(after_fetch_unroll_info_call); 2761 } 2762 #endif 2763 2764 // Load UnrollBlock* into rdi 2765 __ mov(rdi, rax); 2766 2767 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); 2768 Label noException; 2769 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 2770 __ jcc(Assembler::notEqual, noException); 2771 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2772 // QQQ this is useless it was NULL above 2773 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2774 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD); 2775 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 2776 2777 __ verify_oop(rax); 2778 2779 // Overwrite the result registers with the exception results. 2780 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 2781 // I think this is useless 2782 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 2783 2784 __ bind(noException); 2785 2786 // Only register save data is on the stack. 2787 // Now restore the result registers. Everything else is either dead 2788 // or captured in the vframeArray. 2789 RegisterSaver::restore_result_registers(masm); 2790 2791 // All of the register save area has been popped of the stack. Only the 2792 // return address remains. 2793 2794 // Pop all the frames we must move/replace. 2795 // 2796 // Frame picture (youngest to oldest) 2797 // 1: self-frame (no frame link) 2798 // 2: deopting frame (no frame link) 2799 // 3: caller of deopting frame (could be compiled/interpreted). 2800 // 2801 // Note: by leaving the return address of self-frame on the stack 2802 // and using the size of frame 2 to adjust the stack 2803 // when we are done the return to frame 3 will still be on the stack. 2804 2805 // Pop deoptimized frame 2806 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); 2807 __ addptr(rsp, rcx); 2808 2809 // rsp should be pointing at the return address to the caller (3) 2810 2811 // Pick up the initial fp we should save 2812 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 2813 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 2814 2815 #ifdef ASSERT 2816 // Compilers generate code that bang the stack by as much as the 2817 // interpreter would need. So this stack banging should never 2818 // trigger a fault. Verify that it does not on non product builds. 2819 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 2820 __ bang_stack_size(rbx, rcx); 2821 #endif 2822 2823 // Load address of array of frame pcs into rcx 2824 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 2825 2826 // Trash the old pc 2827 __ addptr(rsp, wordSize); 2828 2829 // Load address of array of frame sizes into rsi 2830 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); 2831 2832 // Load counter into rdx 2833 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); 2834 2835 // Now adjust the caller's stack to make up for the extra locals 2836 // but record the original sp so that we can save it in the skeletal interpreter 2837 // frame and the stack walking of interpreter_sender will get the unextended sp 2838 // value and not the "real" sp value. 2839 2840 const Register sender_sp = r8; 2841 2842 __ mov(sender_sp, rsp); 2843 __ movl(rbx, Address(rdi, 2844 Deoptimization::UnrollBlock:: 2845 caller_adjustment_offset_in_bytes())); 2846 __ subptr(rsp, rbx); 2847 2848 // Push interpreter frames in a loop 2849 Label loop; 2850 __ bind(loop); 2851 __ movptr(rbx, Address(rsi, 0)); // Load frame size 2852 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 2853 __ pushptr(Address(rcx, 0)); // Save return address 2854 __ enter(); // Save old & set new ebp 2855 __ subptr(rsp, rbx); // Prolog 2856 // This value is corrected by layout_activation_impl 2857 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 2858 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 2859 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 2860 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 2861 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 2862 __ decrementl(rdx); // Decrement counter 2863 __ jcc(Assembler::notZero, loop); 2864 __ pushptr(Address(rcx, 0)); // Save final return address 2865 2866 // Re-push self-frame 2867 __ enter(); // Save old & set new ebp 2868 2869 // Allocate a full sized register save area. 2870 // Return address and rbp are in place, so we allocate two less words. 2871 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 2872 2873 // Restore frame locals after moving the frame 2874 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 2875 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 2876 2877 // Call C code. Need thread but NOT official VM entry 2878 // crud. We cannot block on this call, no GC can happen. Call should 2879 // restore return values to their stack-slots with the new SP. 2880 // 2881 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 2882 2883 // Use rbp because the frames look interpreted now 2884 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 2885 // Don't need the precise return PC here, just precise enough to point into this code blob. 2886 address the_pc = __ pc(); 2887 __ set_last_Java_frame(noreg, rbp, the_pc); 2888 2889 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 2890 __ mov(c_rarg0, r15_thread); 2891 __ movl(c_rarg1, r14); // second arg: exec_mode 2892 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 2893 // Revert SP alignment after call since we're going to do some SP relative addressing below 2894 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 2895 2896 // Set an oopmap for the call site 2897 // Use the same PC we used for the last java frame 2898 oop_maps->add_gc_map(the_pc - start, 2899 new OopMap( frame_size_in_words, 0 )); 2900 2901 // Clear fp AND pc 2902 __ reset_last_Java_frame(true); 2903 2904 // Collect return values 2905 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 2906 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 2907 // I think this is useless (throwing pc?) 2908 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 2909 2910 // Pop self-frame. 2911 __ leave(); // Epilog 2912 2913 // Jump to interpreter 2914 __ ret(0); 2915 2916 // Make sure all code is generated 2917 masm->flush(); 2918 2919 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 2920 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 2921 #if INCLUDE_JVMCI 2922 if (EnableJVMCI) { 2923 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 2924 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 2925 } 2926 #endif 2927 } 2928 2929 #ifdef COMPILER2 2930 //------------------------------generate_uncommon_trap_blob-------------------- 2931 void SharedRuntime::generate_uncommon_trap_blob() { 2932 // Allocate space for the code 2933 ResourceMark rm; 2934 // Setup code generation tools 2935 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 2936 MacroAssembler* masm = new MacroAssembler(&buffer); 2937 2938 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 2939 2940 address start = __ pc(); 2941 2942 if (UseRTMLocking) { 2943 // Abort RTM transaction before possible nmethod deoptimization. 2944 __ xabort(0); 2945 } 2946 2947 // Push self-frame. We get here with a return address on the 2948 // stack, so rsp is 8-byte aligned until we allocate our frame. 2949 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 2950 2951 // No callee saved registers. rbp is assumed implicitly saved 2952 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 2953 2954 // compiler left unloaded_class_index in j_rarg0 move to where the 2955 // runtime expects it. 2956 __ movl(c_rarg1, j_rarg0); 2957 2958 __ set_last_Java_frame(noreg, noreg, NULL); 2959 2960 // Call C code. Need thread but NOT official VM entry 2961 // crud. We cannot block on this call, no GC can happen. Call should 2962 // capture callee-saved registers as well as return values. 2963 // Thread is in rdi already. 2964 // 2965 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 2966 2967 __ mov(c_rarg0, r15_thread); 2968 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); 2969 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2970 2971 // Set an oopmap for the call site 2972 OopMapSet* oop_maps = new OopMapSet(); 2973 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 2974 2975 // location of rbp is known implicitly by the frame sender code 2976 2977 oop_maps->add_gc_map(__ pc() - start, map); 2978 2979 __ reset_last_Java_frame(false); 2980 2981 // Load UnrollBlock* into rdi 2982 __ mov(rdi, rax); 2983 2984 #ifdef ASSERT 2985 { Label L; 2986 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), 2987 (int32_t)Deoptimization::Unpack_uncommon_trap); 2988 __ jcc(Assembler::equal, L); 2989 __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); 2990 __ bind(L); 2991 } 2992 #endif 2993 2994 // Pop all the frames we must move/replace. 2995 // 2996 // Frame picture (youngest to oldest) 2997 // 1: self-frame (no frame link) 2998 // 2: deopting frame (no frame link) 2999 // 3: caller of deopting frame (could be compiled/interpreted). 3000 3001 // Pop self-frame. We have no frame, and must rely only on rax and rsp. 3002 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 3003 3004 // Pop deoptimized frame (int) 3005 __ movl(rcx, Address(rdi, 3006 Deoptimization::UnrollBlock:: 3007 size_of_deoptimized_frame_offset_in_bytes())); 3008 __ addptr(rsp, rcx); 3009 3010 // rsp should be pointing at the return address to the caller (3) 3011 3012 // Pick up the initial fp we should save 3013 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3014 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 3015 3016 #ifdef ASSERT 3017 // Compilers generate code that bang the stack by as much as the 3018 // interpreter would need. So this stack banging should never 3019 // trigger a fault. Verify that it does not on non product builds. 3020 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 3021 __ bang_stack_size(rbx, rcx); 3022 #endif 3023 3024 // Load address of array of frame pcs into rcx (address*) 3025 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 3026 3027 // Trash the return pc 3028 __ addptr(rsp, wordSize); 3029 3030 // Load address of array of frame sizes into rsi (intptr_t*) 3031 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); 3032 3033 // Counter 3034 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) 3035 3036 // Now adjust the caller's stack to make up for the extra locals but 3037 // record the original sp so that we can save it in the skeletal 3038 // interpreter frame and the stack walking of interpreter_sender 3039 // will get the unextended sp value and not the "real" sp value. 3040 3041 const Register sender_sp = r8; 3042 3043 __ mov(sender_sp, rsp); 3044 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) 3045 __ subptr(rsp, rbx); 3046 3047 // Push interpreter frames in a loop 3048 Label loop; 3049 __ bind(loop); 3050 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3051 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 3052 __ pushptr(Address(rcx, 0)); // Save return address 3053 __ enter(); // Save old & set new rbp 3054 __ subptr(rsp, rbx); // Prolog 3055 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 3056 sender_sp); // Make it walkable 3057 // This value is corrected by layout_activation_impl 3058 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 3059 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3060 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3061 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3062 __ decrementl(rdx); // Decrement counter 3063 __ jcc(Assembler::notZero, loop); 3064 __ pushptr(Address(rcx, 0)); // Save final return address 3065 3066 // Re-push self-frame 3067 __ enter(); // Save old & set new rbp 3068 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 3069 // Prolog 3070 3071 // Use rbp because the frames look interpreted now 3072 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3073 // Don't need the precise return PC here, just precise enough to point into this code blob. 3074 address the_pc = __ pc(); 3075 __ set_last_Java_frame(noreg, rbp, the_pc); 3076 3077 // Call C code. Need thread but NOT official VM entry 3078 // crud. We cannot block on this call, no GC can happen. Call should 3079 // restore return values to their stack-slots with the new SP. 3080 // Thread is in rdi already. 3081 // 3082 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 3083 3084 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 3085 __ mov(c_rarg0, r15_thread); 3086 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 3087 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3088 3089 // Set an oopmap for the call site 3090 // Use the same PC we used for the last java frame 3091 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3092 3093 // Clear fp AND pc 3094 __ reset_last_Java_frame(true); 3095 3096 // Pop self-frame. 3097 __ leave(); // Epilog 3098 3099 // Jump to interpreter 3100 __ ret(0); 3101 3102 // Make sure all code is generated 3103 masm->flush(); 3104 3105 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 3106 SimpleRuntimeFrame::framesize >> 1); 3107 } 3108 #endif // COMPILER2 3109 3110 //------------------------------generate_handler_blob------ 3111 // 3112 // Generate a special Compile2Runtime blob that saves all registers, 3113 // and setup oopmap. 3114 // 3115 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 3116 assert(StubRoutines::forward_exception_entry() != NULL, 3117 "must be generated before"); 3118 3119 ResourceMark rm; 3120 OopMapSet *oop_maps = new OopMapSet(); 3121 OopMap* map; 3122 3123 // Allocate space for the code. Setup code generation tools. 3124 CodeBuffer buffer("handler_blob", 2048, 1024); 3125 MacroAssembler* masm = new MacroAssembler(&buffer); 3126 3127 address start = __ pc(); 3128 address call_pc = NULL; 3129 int frame_size_in_words; 3130 bool cause_return = (poll_type == POLL_AT_RETURN); 3131 bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 3132 3133 if (UseRTMLocking) { 3134 // Abort RTM transaction before calling runtime 3135 // because critical section will be large and will be 3136 // aborted anyway. Also nmethod could be deoptimized. 3137 __ xabort(0); 3138 } 3139 3140 // Make room for return address (or push it again) 3141 if (!cause_return) { 3142 __ push(rbx); 3143 } 3144 3145 // Save registers, fpu state, and flags 3146 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); 3147 3148 // The following is basically a call_VM. However, we need the precise 3149 // address of the call in order to generate an oopmap. Hence, we do all the 3150 // work outselves. 3151 3152 __ set_last_Java_frame(noreg, noreg, NULL); 3153 3154 // The return address must always be correct so that frame constructor never 3155 // sees an invalid pc. 3156 3157 if (!cause_return) { 3158 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3159 // Additionally, rbx is a callee saved register and we can look at it later to determine 3160 // if someone changed the return address for us! 3161 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3162 __ movptr(Address(rbp, wordSize), rbx); 3163 } 3164 3165 // Do the call 3166 __ mov(c_rarg0, r15_thread); 3167 __ call(RuntimeAddress(call_ptr)); 3168 3169 // Set an oopmap for the call site. This oopmap will map all 3170 // oop-registers and debug-info registers as callee-saved. This 3171 // will allow deoptimization at this safepoint to find all possible 3172 // debug-info recordings, as well as let GC find all oops. 3173 3174 oop_maps->add_gc_map( __ pc() - start, map); 3175 3176 Label noException; 3177 3178 __ reset_last_Java_frame(false); 3179 3180 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 3181 __ jcc(Assembler::equal, noException); 3182 3183 // Exception pending 3184 3185 RegisterSaver::restore_live_registers(masm, save_vectors); 3186 3187 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3188 3189 // No exception case 3190 __ bind(noException); 3191 3192 Label no_adjust; 3193 #ifdef ASSERT 3194 Label bail; 3195 #endif 3196 if (!cause_return) { 3197 Label no_prefix, not_special; 3198 3199 // If our stashed return pc was modified by the runtime we avoid touching it 3200 __ cmpptr(rbx, Address(rbp, wordSize)); 3201 __ jccb(Assembler::notEqual, no_adjust); 3202 3203 // Skip over the poll instruction. 3204 // See NativeInstruction::is_safepoint_poll() 3205 // Possible encodings: 3206 // 85 00 test %eax,(%rax) 3207 // 85 01 test %eax,(%rcx) 3208 // 85 02 test %eax,(%rdx) 3209 // 85 03 test %eax,(%rbx) 3210 // 85 06 test %eax,(%rsi) 3211 // 85 07 test %eax,(%rdi) 3212 // 3213 // 41 85 00 test %eax,(%r8) 3214 // 41 85 01 test %eax,(%r9) 3215 // 41 85 02 test %eax,(%r10) 3216 // 41 85 03 test %eax,(%r11) 3217 // 41 85 06 test %eax,(%r14) 3218 // 41 85 07 test %eax,(%r15) 3219 // 3220 // 85 04 24 test %eax,(%rsp) 3221 // 41 85 04 24 test %eax,(%r12) 3222 // 85 45 00 test %eax,0x0(%rbp) 3223 // 41 85 45 00 test %eax,0x0(%r13) 3224 3225 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3226 __ jcc(Assembler::notEqual, no_prefix); 3227 __ addptr(rbx, 1); 3228 __ bind(no_prefix); 3229 #ifdef ASSERT 3230 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3231 #endif 3232 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3233 // r12/rsp 0x04 3234 // r13/rbp 0x05 3235 __ movzbq(rcx, Address(rbx, 1)); 3236 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3237 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3238 __ cmpptr(rcx, 1); 3239 __ jcc(Assembler::above, not_special); 3240 __ addptr(rbx, 1); 3241 __ bind(not_special); 3242 #ifdef ASSERT 3243 // Verify the correct encoding of the poll we're about to skip. 3244 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3245 __ jcc(Assembler::notEqual, bail); 3246 // Mask out the modrm bits 3247 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3248 // rax encodes to 0, so if the bits are nonzero it's incorrect 3249 __ jcc(Assembler::notZero, bail); 3250 #endif 3251 // Adjust return pc forward to step over the safepoint poll instruction 3252 __ addptr(rbx, 2); 3253 __ movptr(Address(rbp, wordSize), rbx); 3254 } 3255 3256 __ bind(no_adjust); 3257 // Normal exit, restore registers and exit. 3258 RegisterSaver::restore_live_registers(masm, save_vectors); 3259 __ ret(0); 3260 3261 #ifdef ASSERT 3262 __ bind(bail); 3263 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3264 #endif 3265 3266 // Make sure all code is generated 3267 masm->flush(); 3268 3269 // Fill-out other meta info 3270 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3271 } 3272 3273 // 3274 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3275 // 3276 // Generate a stub that calls into vm to find out the proper destination 3277 // of a java call. All the argument registers are live at this point 3278 // but since this is generic code we don't know what they are and the caller 3279 // must do any gc of the args. 3280 // 3281 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3282 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3283 3284 // allocate space for the code 3285 ResourceMark rm; 3286 3287 CodeBuffer buffer(name, 1200, 512); 3288 MacroAssembler* masm = new MacroAssembler(&buffer); 3289 3290 int frame_size_in_words; 3291 3292 OopMapSet *oop_maps = new OopMapSet(); 3293 OopMap* map = NULL; 3294 3295 int start = __ offset(); 3296 3297 // No need to save vector registers since they are caller-saved anyway. 3298 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 3299 3300 int frame_complete = __ offset(); 3301 3302 __ set_last_Java_frame(noreg, noreg, NULL); 3303 3304 __ mov(c_rarg0, r15_thread); 3305 3306 __ call(RuntimeAddress(destination)); 3307 3308 3309 // Set an oopmap for the call site. 3310 // We need this not only for callee-saved registers, but also for volatile 3311 // registers that the compiler might be keeping live across a safepoint. 3312 3313 oop_maps->add_gc_map( __ offset() - start, map); 3314 3315 // rax contains the address we are going to jump to assuming no exception got installed 3316 3317 // clear last_Java_sp 3318 __ reset_last_Java_frame(false); 3319 // check for pending exceptions 3320 Label pending; 3321 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 3322 __ jcc(Assembler::notEqual, pending); 3323 3324 // get the returned Method* 3325 __ get_vm_result_2(rbx, r15_thread); 3326 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3327 3328 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3329 3330 RegisterSaver::restore_live_registers(masm); 3331 3332 // We are back the the original state on entry and ready to go. 3333 3334 __ jmp(rax); 3335 3336 // Pending exception after the safepoint 3337 3338 __ bind(pending); 3339 3340 RegisterSaver::restore_live_registers(masm); 3341 3342 // exception pending => remove activation and forward to exception handler 3343 3344 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 3345 3346 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3347 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3348 3349 // ------------- 3350 // make sure all code is generated 3351 masm->flush(); 3352 3353 // return the blob 3354 // frame_size_words or bytes?? 3355 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3356 } 3357 3358 #ifdef COMPILER2 3359 static const int native_invoker_code_size = MethodHandles::adapter_code_size; 3360 3361 class NativeInvokerGenerator : public StubCodeGenerator { 3362 address _call_target; 3363 int _shadow_space_bytes; 3364 3365 const GrowableArray<VMReg>& _input_registers; 3366 const GrowableArray<VMReg>& _output_registers; 3367 3368 int _frame_complete; 3369 int _framesize; 3370 OopMapSet* _oop_maps; 3371 public: 3372 NativeInvokerGenerator(CodeBuffer* buffer, 3373 address call_target, 3374 int shadow_space_bytes, 3375 const GrowableArray<VMReg>& input_registers, 3376 const GrowableArray<VMReg>& output_registers) 3377 : StubCodeGenerator(buffer, PrintMethodHandleStubs), 3378 _call_target(call_target), 3379 _shadow_space_bytes(shadow_space_bytes), 3380 _input_registers(input_registers), 3381 _output_registers(output_registers), 3382 _frame_complete(0), 3383 _framesize(0), 3384 _oop_maps(NULL) { 3385 assert(_output_registers.length() <= 1 3386 || (_output_registers.length() == 2 && !_output_registers.at(1)->is_valid()), "no multi-reg returns"); 3387 3388 } 3389 3390 void generate(); 3391 3392 int spill_size_in_bytes() const { 3393 if (_output_registers.length() == 0) { 3394 return 0; 3395 } 3396 VMReg reg = _output_registers.at(0); 3397 assert(reg->is_reg(), "must be a register"); 3398 if (reg->is_Register()) { 3399 return 8; 3400 } else if (reg->is_XMMRegister()) { 3401 if (UseAVX >= 3) { 3402 return 64; 3403 } else if (UseAVX >= 1) { 3404 return 32; 3405 } else { 3406 return 16; 3407 } 3408 } else { 3409 ShouldNotReachHere(); 3410 } 3411 return 0; 3412 } 3413 3414 void spill_out_registers() { 3415 if (_output_registers.length() == 0) { 3416 return; 3417 } 3418 VMReg reg = _output_registers.at(0); 3419 assert(reg->is_reg(), "must be a register"); 3420 MacroAssembler* masm = _masm; 3421 if (reg->is_Register()) { 3422 __ movptr(Address(rsp, 0), reg->as_Register()); 3423 } else if (reg->is_XMMRegister()) { 3424 if (UseAVX >= 3) { 3425 __ evmovdqul(Address(rsp, 0), reg->as_XMMRegister(), Assembler::AVX_512bit); 3426 } else if (UseAVX >= 1) { 3427 __ vmovdqu(Address(rsp, 0), reg->as_XMMRegister()); 3428 } else { 3429 __ movdqu(Address(rsp, 0), reg->as_XMMRegister()); 3430 } 3431 } else { 3432 ShouldNotReachHere(); 3433 } 3434 } 3435 3436 void fill_out_registers() { 3437 if (_output_registers.length() == 0) { 3438 return; 3439 } 3440 VMReg reg = _output_registers.at(0); 3441 assert(reg->is_reg(), "must be a register"); 3442 MacroAssembler* masm = _masm; 3443 if (reg->is_Register()) { 3444 __ movptr(reg->as_Register(), Address(rsp, 0)); 3445 } else if (reg->is_XMMRegister()) { 3446 if (UseAVX >= 3) { 3447 __ evmovdqul(reg->as_XMMRegister(), Address(rsp, 0), Assembler::AVX_512bit); 3448 } else if (UseAVX >= 1) { 3449 __ vmovdqu(reg->as_XMMRegister(), Address(rsp, 0)); 3450 } else { 3451 __ movdqu(reg->as_XMMRegister(), Address(rsp, 0)); 3452 } 3453 } else { 3454 ShouldNotReachHere(); 3455 } 3456 } 3457 3458 int frame_complete() const { 3459 return _frame_complete; 3460 } 3461 3462 int framesize() const { 3463 return (_framesize >> (LogBytesPerWord - LogBytesPerInt)); 3464 } 3465 3466 OopMapSet* oop_maps() const { 3467 return _oop_maps; 3468 } 3469 3470 private: 3471 #ifdef ASSERT 3472 bool target_uses_register(VMReg reg) { 3473 return _input_registers.contains(reg) || _output_registers.contains(reg); 3474 } 3475 #endif 3476 }; 3477 3478 RuntimeStub* SharedRuntime::make_native_invoker(address call_target, 3479 int shadow_space_bytes, 3480 const GrowableArray<VMReg>& input_registers, 3481 const GrowableArray<VMReg>& output_registers) { 3482 int locs_size = 64; 3483 CodeBuffer code("nep_invoker_blob", native_invoker_code_size, locs_size); 3484 NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers); 3485 g.generate(); 3486 code.log_section_sizes("nep_invoker_blob"); 3487 3488 RuntimeStub* stub = 3489 RuntimeStub::new_runtime_stub("nep_invoker_blob", 3490 &code, 3491 g.frame_complete(), 3492 g.framesize(), 3493 g.oop_maps(), false); 3494 return stub; 3495 } 3496 3497 void NativeInvokerGenerator::generate() { 3498 assert(!(target_uses_register(r15_thread->as_VMReg()) || target_uses_register(rscratch1->as_VMReg())), "Register conflict"); 3499 3500 enum layout { 3501 rbp_off, 3502 rbp_off2, 3503 return_off, 3504 return_off2, 3505 framesize // inclusive of return address 3506 }; 3507 3508 _framesize = align_up(framesize + ((_shadow_space_bytes + spill_size_in_bytes()) >> LogBytesPerInt), 4); 3509 assert(is_even(_framesize/2), "sp not 16-byte aligned"); 3510 3511 _oop_maps = new OopMapSet(); 3512 MacroAssembler* masm = _masm; 3513 3514 address start = __ pc(); 3515 3516 __ enter(); 3517 3518 // return address and rbp are already in place 3519 __ subptr(rsp, (_framesize-4) << LogBytesPerInt); // prolog 3520 3521 _frame_complete = __ pc() - start; 3522 3523 address the_pc = __ pc(); 3524 3525 __ set_last_Java_frame(rsp, rbp, (address)the_pc); 3526 OopMap* map = new OopMap(_framesize, 0); 3527 _oop_maps->add_gc_map(the_pc - start, map); 3528 3529 // State transition 3530 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 3531 3532 __ call(RuntimeAddress(_call_target)); 3533 3534 __ restore_cpu_control_state_after_jni(); 3535 3536 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 3537 3538 // Force this write out before the read below 3539 __ membar(Assembler::Membar_mask_bits( 3540 Assembler::LoadLoad | Assembler::LoadStore | 3541 Assembler::StoreLoad | Assembler::StoreStore)); 3542 3543 Label L_after_safepoint_poll; 3544 Label L_safepoint_poll_slow_path; 3545 3546 __ safepoint_poll(L_safepoint_poll_slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 3547 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 3548 __ jcc(Assembler::notEqual, L_safepoint_poll_slow_path); 3549 3550 __ bind(L_after_safepoint_poll); 3551 3552 // change thread state 3553 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 3554 3555 __ block_comment("reguard stack check"); 3556 Label L_reguard; 3557 Label L_after_reguard; 3558 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 3559 __ jcc(Assembler::equal, L_reguard); 3560 __ bind(L_after_reguard); 3561 3562 __ reset_last_Java_frame(r15_thread, true); 3563 3564 __ leave(); // required for proper stackwalking of RuntimeStub frame 3565 __ ret(0); 3566 3567 ////////////////////////////////////////////////////////////////////////////// 3568 3569 __ block_comment("{ L_safepoint_poll_slow_path"); 3570 __ bind(L_safepoint_poll_slow_path); 3571 __ vzeroupper(); 3572 3573 spill_out_registers(); 3574 3575 __ mov(c_rarg0, r15_thread); 3576 __ mov(r12, rsp); // remember sp 3577 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 3578 __ andptr(rsp, -16); // align stack as required by ABI 3579 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 3580 __ mov(rsp, r12); // restore sp 3581 __ reinit_heapbase(); 3582 3583 fill_out_registers(); 3584 3585 __ jmp(L_after_safepoint_poll); 3586 __ block_comment("} L_safepoint_poll_slow_path"); 3587 3588 ////////////////////////////////////////////////////////////////////////////// 3589 3590 __ block_comment("{ L_reguard"); 3591 __ bind(L_reguard); 3592 __ vzeroupper(); 3593 3594 spill_out_registers(); 3595 3596 __ mov(r12, rsp); // remember sp 3597 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 3598 __ andptr(rsp, -16); // align stack as required by ABI 3599 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 3600 __ mov(rsp, r12); // restore sp 3601 __ reinit_heapbase(); 3602 3603 fill_out_registers(); 3604 3605 __ jmp(L_after_reguard); 3606 3607 __ block_comment("} L_reguard"); 3608 3609 ////////////////////////////////////////////////////////////////////////////// 3610 3611 __ flush(); 3612 } 3613 #endif // COMPILER2 3614 3615 //------------------------------Montgomery multiplication------------------------ 3616 // 3617 3618 #ifndef _WINDOWS 3619 3620 // Subtract 0:b from carry:a. Return carry. 3621 static julong 3622 sub(julong a[], julong b[], julong carry, long len) { 3623 long long i = 0, cnt = len; 3624 julong tmp; 3625 asm volatile("clc; " 3626 "0: ; " 3627 "mov (%[b], %[i], 8), %[tmp]; " 3628 "sbb %[tmp], (%[a], %[i], 8); " 3629 "inc %[i]; dec %[cnt]; " 3630 "jne 0b; " 3631 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3632 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3633 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3634 : "memory"); 3635 return tmp; 3636 } 3637 3638 // Multiply (unsigned) Long A by Long B, accumulating the double- 3639 // length result into the accumulator formed of T0, T1, and T2. 3640 #define MACC(A, B, T0, T1, T2) \ 3641 do { \ 3642 unsigned long hi, lo; \ 3643 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3644 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3645 : "r"(A), "a"(B) : "cc"); \ 3646 } while(0) 3647 3648 // As above, but add twice the double-length result into the 3649 // accumulator. 3650 #define MACC2(A, B, T0, T1, T2) \ 3651 do { \ 3652 unsigned long hi, lo; \ 3653 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3654 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3655 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3656 : "r"(A), "a"(B) : "cc"); \ 3657 } while(0) 3658 3659 #else //_WINDOWS 3660 3661 static julong 3662 sub(julong a[], julong b[], julong carry, long len) { 3663 long i; 3664 julong tmp; 3665 unsigned char c = 1; 3666 for (i = 0; i < len; i++) { 3667 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3668 a[i] = tmp; 3669 } 3670 c = _addcarry_u64(c, carry, ~0, &tmp); 3671 return tmp; 3672 } 3673 3674 // Multiply (unsigned) Long A by Long B, accumulating the double- 3675 // length result into the accumulator formed of T0, T1, and T2. 3676 #define MACC(A, B, T0, T1, T2) \ 3677 do { \ 3678 julong hi, lo; \ 3679 lo = _umul128(A, B, &hi); \ 3680 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3681 c = _addcarry_u64(c, hi, T1, &T1); \ 3682 _addcarry_u64(c, T2, 0, &T2); \ 3683 } while(0) 3684 3685 // As above, but add twice the double-length result into the 3686 // accumulator. 3687 #define MACC2(A, B, T0, T1, T2) \ 3688 do { \ 3689 julong hi, lo; \ 3690 lo = _umul128(A, B, &hi); \ 3691 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3692 c = _addcarry_u64(c, hi, T1, &T1); \ 3693 _addcarry_u64(c, T2, 0, &T2); \ 3694 c = _addcarry_u64(0, lo, T0, &T0); \ 3695 c = _addcarry_u64(c, hi, T1, &T1); \ 3696 _addcarry_u64(c, T2, 0, &T2); \ 3697 } while(0) 3698 3699 #endif //_WINDOWS 3700 3701 // Fast Montgomery multiplication. The derivation of the algorithm is 3702 // in A Cryptographic Library for the Motorola DSP56000, 3703 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3704 3705 static void NOINLINE 3706 montgomery_multiply(julong a[], julong b[], julong n[], 3707 julong m[], julong inv, int len) { 3708 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3709 int i; 3710 3711 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3712 3713 for (i = 0; i < len; i++) { 3714 int j; 3715 for (j = 0; j < i; j++) { 3716 MACC(a[j], b[i-j], t0, t1, t2); 3717 MACC(m[j], n[i-j], t0, t1, t2); 3718 } 3719 MACC(a[i], b[0], t0, t1, t2); 3720 m[i] = t0 * inv; 3721 MACC(m[i], n[0], t0, t1, t2); 3722 3723 assert(t0 == 0, "broken Montgomery multiply"); 3724 3725 t0 = t1; t1 = t2; t2 = 0; 3726 } 3727 3728 for (i = len; i < 2*len; i++) { 3729 int j; 3730 for (j = i-len+1; j < len; j++) { 3731 MACC(a[j], b[i-j], t0, t1, t2); 3732 MACC(m[j], n[i-j], t0, t1, t2); 3733 } 3734 m[i-len] = t0; 3735 t0 = t1; t1 = t2; t2 = 0; 3736 } 3737 3738 while (t0) 3739 t0 = sub(m, n, t0, len); 3740 } 3741 3742 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3743 // multiplies so it should be up to 25% faster than Montgomery 3744 // multiplication. However, its loop control is more complex and it 3745 // may actually run slower on some machines. 3746 3747 static void NOINLINE 3748 montgomery_square(julong a[], julong n[], 3749 julong m[], julong inv, int len) { 3750 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3751 int i; 3752 3753 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3754 3755 for (i = 0; i < len; i++) { 3756 int j; 3757 int end = (i+1)/2; 3758 for (j = 0; j < end; j++) { 3759 MACC2(a[j], a[i-j], t0, t1, t2); 3760 MACC(m[j], n[i-j], t0, t1, t2); 3761 } 3762 if ((i & 1) == 0) { 3763 MACC(a[j], a[j], t0, t1, t2); 3764 } 3765 for (; j < i; j++) { 3766 MACC(m[j], n[i-j], t0, t1, t2); 3767 } 3768 m[i] = t0 * inv; 3769 MACC(m[i], n[0], t0, t1, t2); 3770 3771 assert(t0 == 0, "broken Montgomery square"); 3772 3773 t0 = t1; t1 = t2; t2 = 0; 3774 } 3775 3776 for (i = len; i < 2*len; i++) { 3777 int start = i-len+1; 3778 int end = start + (len - start)/2; 3779 int j; 3780 for (j = start; j < end; j++) { 3781 MACC2(a[j], a[i-j], t0, t1, t2); 3782 MACC(m[j], n[i-j], t0, t1, t2); 3783 } 3784 if ((i & 1) == 0) { 3785 MACC(a[j], a[j], t0, t1, t2); 3786 } 3787 for (; j < len; j++) { 3788 MACC(m[j], n[i-j], t0, t1, t2); 3789 } 3790 m[i-len] = t0; 3791 t0 = t1; t1 = t2; t2 = 0; 3792 } 3793 3794 while (t0) 3795 t0 = sub(m, n, t0, len); 3796 } 3797 3798 // Swap words in a longword. 3799 static julong swap(julong x) { 3800 return (x << 32) | (x >> 32); 3801 } 3802 3803 // Copy len longwords from s to d, word-swapping as we go. The 3804 // destination array is reversed. 3805 static void reverse_words(julong *s, julong *d, int len) { 3806 d += len; 3807 while(len-- > 0) { 3808 d--; 3809 *d = swap(*s); 3810 s++; 3811 } 3812 } 3813 3814 // The threshold at which squaring is advantageous was determined 3815 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3816 #define MONTGOMERY_SQUARING_THRESHOLD 64 3817 3818 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3819 jint len, jlong inv, 3820 jint *m_ints) { 3821 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3822 int longwords = len/2; 3823 3824 // Make very sure we don't use so much space that the stack might 3825 // overflow. 512 jints corresponds to an 16384-bit integer and 3826 // will use here a total of 8k bytes of stack space. 3827 int divisor = sizeof(julong) * 4; 3828 guarantee(longwords <= 8192 / divisor, "must be"); 3829 int total_allocation = longwords * sizeof (julong) * 4; 3830 julong *scratch = (julong *)alloca(total_allocation); 3831 3832 // Local scratch arrays 3833 julong 3834 *a = scratch + 0 * longwords, 3835 *b = scratch + 1 * longwords, 3836 *n = scratch + 2 * longwords, 3837 *m = scratch + 3 * longwords; 3838 3839 reverse_words((julong *)a_ints, a, longwords); 3840 reverse_words((julong *)b_ints, b, longwords); 3841 reverse_words((julong *)n_ints, n, longwords); 3842 3843 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3844 3845 reverse_words(m, (julong *)m_ints, longwords); 3846 } 3847 3848 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3849 jint len, jlong inv, 3850 jint *m_ints) { 3851 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3852 int longwords = len/2; 3853 3854 // Make very sure we don't use so much space that the stack might 3855 // overflow. 512 jints corresponds to an 16384-bit integer and 3856 // will use here a total of 6k bytes of stack space. 3857 int divisor = sizeof(julong) * 3; 3858 guarantee(longwords <= (8192 / divisor), "must be"); 3859 int total_allocation = longwords * sizeof (julong) * 3; 3860 julong *scratch = (julong *)alloca(total_allocation); 3861 3862 // Local scratch arrays 3863 julong 3864 *a = scratch + 0 * longwords, 3865 *n = scratch + 1 * longwords, 3866 *m = scratch + 2 * longwords; 3867 3868 reverse_words((julong *)a_ints, a, longwords); 3869 reverse_words((julong *)n_ints, n, longwords); 3870 3871 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3872 ::montgomery_square(a, n, m, (julong)inv, longwords); 3873 } else { 3874 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3875 } 3876 3877 reverse_words(m, (julong *)m_ints, longwords); 3878 } 3879 3880 #ifdef COMPILER2 3881 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3882 // 3883 //------------------------------generate_exception_blob--------------------------- 3884 // creates exception blob at the end 3885 // Using exception blob, this code is jumped from a compiled method. 3886 // (see emit_exception_handler in x86_64.ad file) 3887 // 3888 // Given an exception pc at a call we call into the runtime for the 3889 // handler in this method. This handler might merely restore state 3890 // (i.e. callee save registers) unwind the frame and jump to the 3891 // exception handler for the nmethod if there is no Java level handler 3892 // for the nmethod. 3893 // 3894 // This code is entered with a jmp. 3895 // 3896 // Arguments: 3897 // rax: exception oop 3898 // rdx: exception pc 3899 // 3900 // Results: 3901 // rax: exception oop 3902 // rdx: exception pc in caller or ??? 3903 // destination: exception handler of caller 3904 // 3905 // Note: the exception pc MUST be at a call (precise debug information) 3906 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 3907 // 3908 3909 void OptoRuntime::generate_exception_blob() { 3910 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 3911 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 3912 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 3913 3914 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3915 3916 // Allocate space for the code 3917 ResourceMark rm; 3918 // Setup code generation tools 3919 CodeBuffer buffer("exception_blob", 2048, 1024); 3920 MacroAssembler* masm = new MacroAssembler(&buffer); 3921 3922 3923 address start = __ pc(); 3924 3925 // Exception pc is 'return address' for stack walker 3926 __ push(rdx); 3927 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 3928 3929 // Save callee-saved registers. See x86_64.ad. 3930 3931 // rbp is an implicitly saved callee saved register (i.e., the calling 3932 // convention will save/restore it in the prolog/epilog). Other than that 3933 // there are no callee save registers now that adapter frames are gone. 3934 3935 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3936 3937 // Store exception in Thread object. We cannot pass any arguments to the 3938 // handle_exception call, since we do not want to make any assumption 3939 // about the size of the frame where the exception happened in. 3940 // c_rarg0 is either rdi (Linux) or rcx (Windows). 3941 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 3942 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3943 3944 // This call does all the hard work. It checks if an exception handler 3945 // exists in the method. 3946 // If so, it returns the handler address. 3947 // If not, it prepares for stack-unwinding, restoring the callee-save 3948 // registers of the frame being removed. 3949 // 3950 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3951 3952 // At a method handle call, the stack may not be properly aligned 3953 // when returning with an exception. 3954 address the_pc = __ pc(); 3955 __ set_last_Java_frame(noreg, noreg, the_pc); 3956 __ mov(c_rarg0, r15_thread); 3957 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3958 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3959 3960 // Set an oopmap for the call site. This oopmap will only be used if we 3961 // are unwinding the stack. Hence, all locations will be dead. 3962 // Callee-saved registers will be the same as the frame above (i.e., 3963 // handle_exception_stub), since they were restored when we got the 3964 // exception. 3965 3966 OopMapSet* oop_maps = new OopMapSet(); 3967 3968 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3969 3970 __ reset_last_Java_frame(false); 3971 3972 // Restore callee-saved registers 3973 3974 // rbp is an implicitly saved callee-saved register (i.e., the calling 3975 // convention will save restore it in prolog/epilog) Other than that 3976 // there are no callee save registers now that adapter frames are gone. 3977 3978 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 3979 3980 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 3981 __ pop(rdx); // No need for exception pc anymore 3982 3983 // rax: exception handler 3984 3985 // We have a handler in rax (could be deopt blob). 3986 __ mov(r8, rax); 3987 3988 // Get the exception oop 3989 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3990 // Get the exception pc in case we are deoptimized 3991 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3992 #ifdef ASSERT 3993 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD); 3994 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD); 3995 #endif 3996 // Clear the exception oop so GC no longer processes it as a root. 3997 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD); 3998 3999 // rax: exception oop 4000 // r8: exception handler 4001 // rdx: exception pc 4002 // Jump to handler 4003 4004 __ jmp(r8); 4005 4006 // Make sure all code is generated 4007 masm->flush(); 4008 4009 // Set exception blob 4010 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 4011 } 4012 #endif // COMPILER2 4013 4014 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 4015 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 4016 CodeBuffer buffer(buf); 4017 short buffer_locs[20]; 4018 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 4019 sizeof(buffer_locs)/sizeof(relocInfo)); 4020 4021 MacroAssembler* masm = new MacroAssembler(&buffer); 4022 4023 const Array<SigEntry>* sig_vk = vk->extended_sig(); 4024 const Array<VMRegPair>* regs = vk->return_regs(); 4025 4026 int pack_fields_jobject_off = __ offset(); 4027 // Resolve pre-allocated buffer from JNI handle. 4028 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 4029 __ movptr(rax, Address(r13, 0)); 4030 __ resolve_jobject(rax /* value */, 4031 r15_thread /* thread */, 4032 r12 /* tmp */); 4033 __ movptr(Address(r13, 0), rax); 4034 4035 int pack_fields_off = __ offset(); 4036 4037 int j = 1; 4038 for (int i = 0; i < sig_vk->length(); i++) { 4039 BasicType bt = sig_vk->at(i)._bt; 4040 if (bt == T_PRIMITIVE_OBJECT) { 4041 continue; 4042 } 4043 if (bt == T_VOID) { 4044 if (sig_vk->at(i-1)._bt == T_LONG || 4045 sig_vk->at(i-1)._bt == T_DOUBLE) { 4046 j++; 4047 } 4048 continue; 4049 } 4050 int off = sig_vk->at(i)._offset; 4051 assert(off > 0, "offset in object should be positive"); 4052 VMRegPair pair = regs->at(j); 4053 VMReg r_1 = pair.first(); 4054 VMReg r_2 = pair.second(); 4055 Address to(rax, off); 4056 if (bt == T_FLOAT) { 4057 __ movflt(to, r_1->as_XMMRegister()); 4058 } else if (bt == T_DOUBLE) { 4059 __ movdbl(to, r_1->as_XMMRegister()); 4060 } else { 4061 Register val = r_1->as_Register(); 4062 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 4063 if (is_reference_type(bt)) { 4064 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 4065 } else { 4066 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 4067 } 4068 } 4069 j++; 4070 } 4071 assert(j == regs->length(), "missed a field?"); 4072 4073 __ ret(0); 4074 4075 int unpack_fields_off = __ offset(); 4076 4077 Label skip; 4078 __ testptr(rax, rax); 4079 __ jcc(Assembler::zero, skip); 4080 4081 j = 1; 4082 for (int i = 0; i < sig_vk->length(); i++) { 4083 BasicType bt = sig_vk->at(i)._bt; 4084 if (bt == T_PRIMITIVE_OBJECT) { 4085 continue; 4086 } 4087 if (bt == T_VOID) { 4088 if (sig_vk->at(i-1)._bt == T_LONG || 4089 sig_vk->at(i-1)._bt == T_DOUBLE) { 4090 j++; 4091 } 4092 continue; 4093 } 4094 int off = sig_vk->at(i)._offset; 4095 assert(off > 0, "offset in object should be positive"); 4096 VMRegPair pair = regs->at(j); 4097 VMReg r_1 = pair.first(); 4098 VMReg r_2 = pair.second(); 4099 Address from(rax, off); 4100 if (bt == T_FLOAT) { 4101 __ movflt(r_1->as_XMMRegister(), from); 4102 } else if (bt == T_DOUBLE) { 4103 __ movdbl(r_1->as_XMMRegister(), from); 4104 } else if (bt == T_OBJECT || bt == T_ARRAY) { 4105 assert_different_registers(rax, r_1->as_Register()); 4106 __ load_heap_oop(r_1->as_Register(), from); 4107 } else { 4108 assert(is_java_primitive(bt), "unexpected basic type"); 4109 assert_different_registers(rax, r_1->as_Register()); 4110 size_t size_in_bytes = type2aelembytes(bt); 4111 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 4112 } 4113 j++; 4114 } 4115 assert(j == regs->length(), "missed a field?"); 4116 4117 __ bind(skip); 4118 __ ret(0); 4119 4120 __ flush(); 4121 4122 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 4123 } 4124 4125 void SharedRuntime::compute_move_order(const BasicType* in_sig_bt, 4126 int total_in_args, const VMRegPair* in_regs, 4127 int total_out_args, VMRegPair* out_regs, 4128 GrowableArray<int>& arg_order, 4129 VMRegPair tmp_vmreg) { 4130 ComputeMoveOrder order(total_in_args, in_regs, 4131 total_out_args, out_regs, 4132 in_sig_bt, arg_order, tmp_vmreg); 4133 }