1 /* 2 * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "code/debugInfoRec.hpp" 32 #include "code/icBuffer.hpp" 33 #include "code/nativeInst.hpp" 34 #include "code/vtableStubs.hpp" 35 #include "compiler/disassembler.hpp" 36 #include "compiler/oopMap.hpp" 37 #include "gc/shared/collectedHeap.hpp" 38 #include "gc/shared/gcLocker.hpp" 39 #include "gc/shared/barrierSet.hpp" 40 #include "gc/shared/barrierSetAssembler.hpp" 41 #include "interpreter/interpreter.hpp" 42 #include "logging/log.hpp" 43 #include "logging/logStream.hpp" 44 #include "memory/resourceArea.hpp" 45 #include "memory/universe.hpp" 46 #include "oops/compiledICHolder.hpp" 47 #include "oops/klass.inline.hpp" 48 #include "prims/methodHandles.hpp" 49 #include "runtime/jniHandles.hpp" 50 #include "runtime/safepointMechanism.hpp" 51 #include "runtime/sharedRuntime.hpp" 52 #include "runtime/signature.hpp" 53 #include "runtime/stubRoutines.hpp" 54 #include "runtime/vframeArray.hpp" 55 #include "runtime/vm_version.hpp" 56 #include "utilities/align.hpp" 57 #include "utilities/formatBuffer.hpp" 58 #include "vmreg_x86.inline.hpp" 59 #ifdef COMPILER1 60 #include "c1/c1_Runtime1.hpp" 61 #endif 62 #ifdef COMPILER2 63 #include "opto/runtime.hpp" 64 #endif 65 #if INCLUDE_JVMCI 66 #include "jvmci/jvmciJavaClasses.hpp" 67 #endif 68 69 #define __ masm-> 70 71 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 72 73 class SimpleRuntimeFrame { 74 75 public: 76 77 // Most of the runtime stubs have this simple frame layout. 78 // This class exists to make the layout shared in one place. 79 // Offsets are for compiler stack slots, which are jints. 80 enum layout { 81 // The frame sender code expects that rbp will be in the "natural" place and 82 // will override any oopMap setting for it. We must therefore force the layout 83 // so that it agrees with the frame sender code. 84 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 85 rbp_off2, 86 return_off, return_off2, 87 framesize 88 }; 89 }; 90 91 class RegisterSaver { 92 // Capture info about frame layout. Layout offsets are in jint 93 // units because compiler frame slots are jints. 94 #define XSAVE_AREA_BEGIN 160 95 #define XSAVE_AREA_YMM_BEGIN 576 96 #define XSAVE_AREA_OPMASK_BEGIN 1088 97 #define XSAVE_AREA_ZMM_BEGIN 1152 98 #define XSAVE_AREA_UPPERBANK 1664 99 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 100 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 101 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 102 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 103 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 104 enum layout { 105 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 106 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 107 DEF_XMM_OFFS(0), 108 DEF_XMM_OFFS(1), 109 // 2..15 are implied in range usage 110 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 111 DEF_YMM_OFFS(0), 112 DEF_YMM_OFFS(1), 113 // 2..15 are implied in range usage 114 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 115 DEF_OPMASK_OFFS(0), 116 DEF_OPMASK_OFFS(1), 117 // 2..7 are implied in range usage 118 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 119 DEF_ZMM_OFFS(0), 120 DEF_ZMM_OFFS(1), 121 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 122 DEF_ZMM_UPPER_OFFS(16), 123 DEF_ZMM_UPPER_OFFS(17), 124 // 18..31 are implied in range usage 125 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 126 fpu_stateH_end, 127 r15_off, r15H_off, 128 r14_off, r14H_off, 129 r13_off, r13H_off, 130 r12_off, r12H_off, 131 r11_off, r11H_off, 132 r10_off, r10H_off, 133 r9_off, r9H_off, 134 r8_off, r8H_off, 135 rdi_off, rdiH_off, 136 rsi_off, rsiH_off, 137 ignore_off, ignoreH_off, // extra copy of rbp 138 rsp_off, rspH_off, 139 rbx_off, rbxH_off, 140 rdx_off, rdxH_off, 141 rcx_off, rcxH_off, 142 rax_off, raxH_off, 143 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 144 align_off, alignH_off, 145 flags_off, flagsH_off, 146 // The frame sender code expects that rbp will be in the "natural" place and 147 // will override any oopMap setting for it. We must therefore force the layout 148 // so that it agrees with the frame sender code. 149 rbp_off, rbpH_off, // copy of rbp we will restore 150 return_off, returnH_off, // slot for return address 151 reg_save_size // size in compiler stack slots 152 }; 153 154 public: 155 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors); 156 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 157 158 // Offsets into the register save area 159 // Used by deoptimization when it is managing result register 160 // values on its own 161 162 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 163 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 164 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 165 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 166 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 167 168 // During deoptimization only the result registers need to be restored, 169 // all the other values have already been extracted. 170 static void restore_result_registers(MacroAssembler* masm); 171 }; 172 173 // Register is a class, but it would be assigned numerical value. 174 // "0" is assigned for rax. Thus we need to ignore -Wnonnull. 175 PRAGMA_DIAG_PUSH 176 PRAGMA_NONNULL_IGNORED 177 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 178 int off = 0; 179 int num_xmm_regs = XMMRegisterImpl::available_xmm_registers(); 180 #if COMPILER2_OR_JVMCI 181 if (save_vectors && UseAVX == 0) { 182 save_vectors = false; // vectors larger than 16 byte long are supported only with AVX 183 } 184 assert(!save_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 185 #else 186 save_vectors = false; // vectors are generated only by C2 and JVMCI 187 #endif 188 189 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 190 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 191 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 192 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 193 // CodeBlob frame size is in words. 194 int frame_size_in_words = frame_size_in_bytes / wordSize; 195 *total_frame_words = frame_size_in_words; 196 197 // Save registers, fpu state, and flags. 198 // We assume caller has already pushed the return address onto the 199 // stack, so rsp is 8-byte aligned here. 200 // We push rpb twice in this sequence because we want the real rbp 201 // to be under the return like a normal enter. 202 203 __ enter(); // rsp becomes 16-byte aligned here 204 __ push_CPU_state(); // Push a multiple of 16 bytes 205 206 // push cpu state handles this on EVEX enabled targets 207 if (save_vectors) { 208 // Save upper half of YMM registers(0..15) 209 int base_addr = XSAVE_AREA_YMM_BEGIN; 210 for (int n = 0; n < 16; n++) { 211 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 212 } 213 if (VM_Version::supports_evex()) { 214 // Save upper half of ZMM registers(0..15) 215 base_addr = XSAVE_AREA_ZMM_BEGIN; 216 for (int n = 0; n < 16; n++) { 217 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 218 } 219 // Save full ZMM registers(16..num_xmm_regs) 220 base_addr = XSAVE_AREA_UPPERBANK; 221 off = 0; 222 int vector_len = Assembler::AVX_512bit; 223 for (int n = 16; n < num_xmm_regs; n++) { 224 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 225 } 226 #if COMPILER2_OR_JVMCI 227 base_addr = XSAVE_AREA_OPMASK_BEGIN; 228 off = 0; 229 for(int n = 0; n < KRegisterImpl::number_of_registers; n++) { 230 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 231 } 232 #endif 233 } 234 } else { 235 if (VM_Version::supports_evex()) { 236 // Save upper bank of ZMM registers(16..31) for double/float usage 237 int base_addr = XSAVE_AREA_UPPERBANK; 238 off = 0; 239 for (int n = 16; n < num_xmm_regs; n++) { 240 __ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n)); 241 } 242 #if COMPILER2_OR_JVMCI 243 base_addr = XSAVE_AREA_OPMASK_BEGIN; 244 off = 0; 245 for(int n = 0; n < KRegisterImpl::number_of_registers; n++) { 246 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 247 } 248 #endif 249 } 250 } 251 __ vzeroupper(); 252 if (frame::arg_reg_save_area_bytes != 0) { 253 // Allocate argument register save area 254 __ subptr(rsp, frame::arg_reg_save_area_bytes); 255 } 256 257 // Set an oopmap for the call site. This oopmap will map all 258 // oop-registers and debug-info registers as callee-saved. This 259 // will allow deoptimization at this safepoint to find all possible 260 // debug-info recordings, as well as let GC find all oops. 261 262 OopMapSet *oop_maps = new OopMapSet(); 263 OopMap* map = new OopMap(frame_size_in_slots, 0); 264 265 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 266 267 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 268 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 269 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 270 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 271 // rbp location is known implicitly by the frame sender code, needs no oopmap 272 // and the location where rbp was saved by is ignored 273 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 274 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 275 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 276 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 277 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 278 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 279 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 280 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 281 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 282 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 283 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 284 // on EVEX enabled targets, we get it included in the xsave area 285 off = xmm0_off; 286 int delta = xmm1_off - off; 287 for (int n = 0; n < 16; n++) { 288 XMMRegister xmm_name = as_XMMRegister(n); 289 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 290 off += delta; 291 } 292 if (UseAVX > 2) { 293 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 294 off = zmm16_off; 295 delta = zmm17_off - off; 296 for (int n = 16; n < num_xmm_regs; n++) { 297 XMMRegister zmm_name = as_XMMRegister(n); 298 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 299 off += delta; 300 } 301 } 302 303 #if COMPILER2_OR_JVMCI 304 if (save_vectors) { 305 // Save upper half of YMM registers(0..15) 306 off = ymm0_off; 307 delta = ymm1_off - ymm0_off; 308 for (int n = 0; n < 16; n++) { 309 XMMRegister ymm_name = as_XMMRegister(n); 310 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 311 off += delta; 312 } 313 if (VM_Version::supports_evex()) { 314 // Save upper half of ZMM registers(0..15) 315 off = zmm0_off; 316 delta = zmm1_off - zmm0_off; 317 for (int n = 0; n < 16; n++) { 318 XMMRegister zmm_name = as_XMMRegister(n); 319 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 320 off += delta; 321 } 322 } 323 } 324 #endif // COMPILER2_OR_JVMCI 325 326 // %%% These should all be a waste but we'll keep things as they were for now 327 if (true) { 328 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 329 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 330 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 331 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 332 // rbp location is known implicitly by the frame sender code, needs no oopmap 333 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 334 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 335 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 336 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 337 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 338 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 339 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 340 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 341 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 342 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 343 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 344 // on EVEX enabled targets, we get it included in the xsave area 345 off = xmm0H_off; 346 delta = xmm1H_off - off; 347 for (int n = 0; n < 16; n++) { 348 XMMRegister xmm_name = as_XMMRegister(n); 349 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 350 off += delta; 351 } 352 if (UseAVX > 2) { 353 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 354 off = zmm16H_off; 355 delta = zmm17H_off - off; 356 for (int n = 16; n < num_xmm_regs; n++) { 357 XMMRegister zmm_name = as_XMMRegister(n); 358 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 359 off += delta; 360 } 361 } 362 } 363 364 return map; 365 } 366 PRAGMA_DIAG_POP 367 368 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 369 int num_xmm_regs = XMMRegisterImpl::available_xmm_registers(); 370 if (frame::arg_reg_save_area_bytes != 0) { 371 // Pop arg register save area 372 __ addptr(rsp, frame::arg_reg_save_area_bytes); 373 } 374 375 #if COMPILER2_OR_JVMCI 376 if (restore_vectors) { 377 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 378 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 379 } 380 #else 381 assert(!restore_vectors, "vectors are generated only by C2"); 382 #endif 383 384 __ vzeroupper(); 385 386 // On EVEX enabled targets everything is handled in pop fpu state 387 if (restore_vectors) { 388 // Restore upper half of YMM registers (0..15) 389 int base_addr = XSAVE_AREA_YMM_BEGIN; 390 for (int n = 0; n < 16; n++) { 391 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 392 } 393 if (VM_Version::supports_evex()) { 394 // Restore upper half of ZMM registers (0..15) 395 base_addr = XSAVE_AREA_ZMM_BEGIN; 396 for (int n = 0; n < 16; n++) { 397 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 398 } 399 // Restore full ZMM registers(16..num_xmm_regs) 400 base_addr = XSAVE_AREA_UPPERBANK; 401 int vector_len = Assembler::AVX_512bit; 402 int off = 0; 403 for (int n = 16; n < num_xmm_regs; n++) { 404 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 405 } 406 #if COMPILER2_OR_JVMCI 407 base_addr = XSAVE_AREA_OPMASK_BEGIN; 408 off = 0; 409 for (int n = 0; n < KRegisterImpl::number_of_registers; n++) { 410 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 411 } 412 #endif 413 } 414 } else { 415 if (VM_Version::supports_evex()) { 416 // Restore upper bank of ZMM registers(16..31) for double/float usage 417 int base_addr = XSAVE_AREA_UPPERBANK; 418 int off = 0; 419 for (int n = 16; n < num_xmm_regs; n++) { 420 __ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64))); 421 } 422 #if COMPILER2_OR_JVMCI 423 base_addr = XSAVE_AREA_OPMASK_BEGIN; 424 off = 0; 425 for (int n = 0; n < KRegisterImpl::number_of_registers; n++) { 426 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 427 } 428 #endif 429 } 430 } 431 432 // Recover CPU state 433 __ pop_CPU_state(); 434 // Get the rbp described implicitly by the calling convention (no oopMap) 435 __ pop(rbp); 436 } 437 438 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 439 440 // Just restore result register. Only used by deoptimization. By 441 // now any callee save register that needs to be restored to a c2 442 // caller of the deoptee has been extracted into the vframeArray 443 // and will be stuffed into the c2i adapter we create for later 444 // restoration so only result registers need to be restored here. 445 446 // Restore fp result register 447 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 448 // Restore integer result register 449 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 450 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 451 452 // Pop all of the register save are off the stack except the return address 453 __ addptr(rsp, return_offset_in_bytes()); 454 } 455 456 // Is vector's size (in bytes) bigger than a size saved by default? 457 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 458 bool SharedRuntime::is_wide_vector(int size) { 459 return size > 16; 460 } 461 462 // --------------------------------------------------------------------------- 463 // Read the array of BasicTypes from a signature, and compute where the 464 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 465 // quantities. Values less than VMRegImpl::stack0 are registers, those above 466 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 467 // as framesizes are fixed. 468 // VMRegImpl::stack0 refers to the first slot 0(sp). 469 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 470 // up to RegisterImpl::number_of_registers) are the 64-bit 471 // integer registers. 472 473 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 474 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 475 // units regardless of build. Of course for i486 there is no 64 bit build 476 477 // The Java calling convention is a "shifted" version of the C ABI. 478 // By skipping the first C ABI register we can call non-static jni methods 479 // with small numbers of arguments without having to shuffle the arguments 480 // at all. Since we control the java ABI we ought to at least get some 481 // advantage out of it. 482 483 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 484 VMRegPair *regs, 485 int total_args_passed) { 486 487 // Create the mapping between argument positions and 488 // registers. 489 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 490 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 491 }; 492 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 493 j_farg0, j_farg1, j_farg2, j_farg3, 494 j_farg4, j_farg5, j_farg6, j_farg7 495 }; 496 497 498 uint int_args = 0; 499 uint fp_args = 0; 500 uint stk_args = 0; // inc by 2 each time 501 502 for (int i = 0; i < total_args_passed; i++) { 503 switch (sig_bt[i]) { 504 case T_BOOLEAN: 505 case T_CHAR: 506 case T_BYTE: 507 case T_SHORT: 508 case T_INT: 509 if (int_args < Argument::n_int_register_parameters_j) { 510 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 511 } else { 512 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 513 stk_args += 2; 514 } 515 break; 516 case T_VOID: 517 // halves of T_LONG or T_DOUBLE 518 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 519 regs[i].set_bad(); 520 break; 521 case T_LONG: 522 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 523 // fall through 524 case T_OBJECT: 525 case T_ARRAY: 526 case T_ADDRESS: 527 if (int_args < Argument::n_int_register_parameters_j) { 528 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 529 } else { 530 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 531 stk_args += 2; 532 } 533 break; 534 case T_FLOAT: 535 if (fp_args < Argument::n_float_register_parameters_j) { 536 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 537 } else { 538 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 539 stk_args += 2; 540 } 541 break; 542 case T_DOUBLE: 543 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 544 if (fp_args < Argument::n_float_register_parameters_j) { 545 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 546 } else { 547 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 548 stk_args += 2; 549 } 550 break; 551 default: 552 ShouldNotReachHere(); 553 break; 554 } 555 } 556 557 return align_up(stk_args, 2); 558 } 559 560 // Patch the callers callsite with entry to compiled code if it exists. 561 static void patch_callers_callsite(MacroAssembler *masm) { 562 Label L; 563 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 564 __ jcc(Assembler::equal, L); 565 566 // Save the current stack pointer 567 __ mov(r13, rsp); 568 // Schedule the branch target address early. 569 // Call into the VM to patch the caller, then jump to compiled callee 570 // rax isn't live so capture return address while we easily can 571 __ movptr(rax, Address(rsp, 0)); 572 573 // align stack so push_CPU_state doesn't fault 574 __ andptr(rsp, -(StackAlignmentInBytes)); 575 __ push_CPU_state(); 576 __ vzeroupper(); 577 // VM needs caller's callsite 578 // VM needs target method 579 // This needs to be a long call since we will relocate this adapter to 580 // the codeBuffer and it may not reach 581 582 // Allocate argument register save area 583 if (frame::arg_reg_save_area_bytes != 0) { 584 __ subptr(rsp, frame::arg_reg_save_area_bytes); 585 } 586 __ mov(c_rarg0, rbx); 587 __ mov(c_rarg1, rax); 588 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 589 590 // De-allocate argument register save area 591 if (frame::arg_reg_save_area_bytes != 0) { 592 __ addptr(rsp, frame::arg_reg_save_area_bytes); 593 } 594 595 __ vzeroupper(); 596 __ pop_CPU_state(); 597 // restore sp 598 __ mov(rsp, r13); 599 __ bind(L); 600 } 601 602 603 static void gen_c2i_adapter(MacroAssembler *masm, 604 int total_args_passed, 605 int comp_args_on_stack, 606 const BasicType *sig_bt, 607 const VMRegPair *regs, 608 Label& skip_fixup) { 609 // Before we get into the guts of the C2I adapter, see if we should be here 610 // at all. We've come from compiled code and are attempting to jump to the 611 // interpreter, which means the caller made a static call to get here 612 // (vcalls always get a compiled target if there is one). Check for a 613 // compiled target. If there is one, we need to patch the caller's call. 614 patch_callers_callsite(masm); 615 616 __ bind(skip_fixup); 617 618 // Since all args are passed on the stack, total_args_passed * 619 // Interpreter::stackElementSize is the space we need. Plus 1 because 620 // we also account for the return address location since 621 // we store it first rather than hold it in rax across all the shuffling 622 623 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; 624 625 // stack is aligned, keep it that way 626 extraspace = align_up(extraspace, 2*wordSize); 627 628 // Get return address 629 __ pop(rax); 630 631 // set senderSP value 632 __ mov(r13, rsp); 633 634 __ subptr(rsp, extraspace); 635 636 // Store the return address in the expected location 637 __ movptr(Address(rsp, 0), rax); 638 639 // Now write the args into the outgoing interpreter space 640 for (int i = 0; i < total_args_passed; i++) { 641 if (sig_bt[i] == T_VOID) { 642 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 643 continue; 644 } 645 646 // offset to start parameters 647 int st_off = (total_args_passed - i) * Interpreter::stackElementSize; 648 int next_off = st_off - Interpreter::stackElementSize; 649 650 // Say 4 args: 651 // i st_off 652 // 0 32 T_LONG 653 // 1 24 T_VOID 654 // 2 16 T_OBJECT 655 // 3 8 T_BOOL 656 // - 0 return address 657 // 658 // However to make thing extra confusing. Because we can fit a long/double in 659 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 660 // leaves one slot empty and only stores to a single slot. In this case the 661 // slot that is occupied is the T_VOID slot. See I said it was confusing. 662 663 VMReg r_1 = regs[i].first(); 664 VMReg r_2 = regs[i].second(); 665 if (!r_1->is_valid()) { 666 assert(!r_2->is_valid(), ""); 667 continue; 668 } 669 if (r_1->is_stack()) { 670 // memory to memory use rax 671 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 672 if (!r_2->is_valid()) { 673 // sign extend?? 674 __ movl(rax, Address(rsp, ld_off)); 675 __ movptr(Address(rsp, st_off), rax); 676 677 } else { 678 679 __ movq(rax, Address(rsp, ld_off)); 680 681 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG 682 // T_DOUBLE and T_LONG use two slots in the interpreter 683 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 684 // ld_off == LSW, ld_off+wordSize == MSW 685 // st_off == MSW, next_off == LSW 686 __ movq(Address(rsp, next_off), rax); 687 #ifdef ASSERT 688 // Overwrite the unused slot with known junk 689 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 690 __ movptr(Address(rsp, st_off), rax); 691 #endif /* ASSERT */ 692 } else { 693 __ movq(Address(rsp, st_off), rax); 694 } 695 } 696 } else if (r_1->is_Register()) { 697 Register r = r_1->as_Register(); 698 if (!r_2->is_valid()) { 699 // must be only an int (or less ) so move only 32bits to slot 700 // why not sign extend?? 701 __ movl(Address(rsp, st_off), r); 702 } else { 703 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG 704 // T_DOUBLE and T_LONG use two slots in the interpreter 705 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 706 // long/double in gpr 707 #ifdef ASSERT 708 // Overwrite the unused slot with known junk 709 __ mov64(rax, CONST64(0xdeadffffdeadaaab)); 710 __ movptr(Address(rsp, st_off), rax); 711 #endif /* ASSERT */ 712 __ movq(Address(rsp, next_off), r); 713 } else { 714 __ movptr(Address(rsp, st_off), r); 715 } 716 } 717 } else { 718 assert(r_1->is_XMMRegister(), ""); 719 if (!r_2->is_valid()) { 720 // only a float use just part of the slot 721 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); 722 } else { 723 #ifdef ASSERT 724 // Overwrite the unused slot with known junk 725 __ mov64(rax, CONST64(0xdeadffffdeadaaac)); 726 __ movptr(Address(rsp, st_off), rax); 727 #endif /* ASSERT */ 728 __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); 729 } 730 } 731 } 732 733 // Schedule the branch target address early. 734 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 735 __ jmp(rcx); 736 } 737 738 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 739 address code_start, address code_end, 740 Label& L_ok) { 741 Label L_fail; 742 __ lea(temp_reg, ExternalAddress(code_start)); 743 __ cmpptr(pc_reg, temp_reg); 744 __ jcc(Assembler::belowEqual, L_fail); 745 __ lea(temp_reg, ExternalAddress(code_end)); 746 __ cmpptr(pc_reg, temp_reg); 747 __ jcc(Assembler::below, L_ok); 748 __ bind(L_fail); 749 } 750 751 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 752 int total_args_passed, 753 int comp_args_on_stack, 754 const BasicType *sig_bt, 755 const VMRegPair *regs) { 756 757 // Note: r13 contains the senderSP on entry. We must preserve it since 758 // we may do a i2c -> c2i transition if we lose a race where compiled 759 // code goes non-entrant while we get args ready. 760 // In addition we use r13 to locate all the interpreter args as 761 // we must align the stack to 16 bytes on an i2c entry else we 762 // lose alignment we expect in all compiled code and register 763 // save code can segv when fxsave instructions find improperly 764 // aligned stack pointer. 765 766 // Adapters can be frameless because they do not require the caller 767 // to perform additional cleanup work, such as correcting the stack pointer. 768 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 769 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 770 // even if a callee has modified the stack pointer. 771 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 772 // routinely repairs its caller's stack pointer (from sender_sp, which is set 773 // up via the senderSP register). 774 // In other words, if *either* the caller or callee is interpreted, we can 775 // get the stack pointer repaired after a call. 776 // This is why c2i and i2c adapters cannot be indefinitely composed. 777 // In particular, if a c2i adapter were to somehow call an i2c adapter, 778 // both caller and callee would be compiled methods, and neither would 779 // clean up the stack pointer changes performed by the two adapters. 780 // If this happens, control eventually transfers back to the compiled 781 // caller, but with an uncorrected stack, causing delayed havoc. 782 783 // Pick up the return address 784 __ movptr(rax, Address(rsp, 0)); 785 786 if (VerifyAdapterCalls && 787 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 788 // So, let's test for cascading c2i/i2c adapters right now. 789 // assert(Interpreter::contains($return_addr) || 790 // StubRoutines::contains($return_addr), 791 // "i2c adapter must return to an interpreter frame"); 792 __ block_comment("verify_i2c { "); 793 Label L_ok; 794 if (Interpreter::code() != NULL) 795 range_check(masm, rax, r11, 796 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 797 L_ok); 798 if (StubRoutines::code1() != NULL) 799 range_check(masm, rax, r11, 800 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 801 L_ok); 802 if (StubRoutines::code2() != NULL) 803 range_check(masm, rax, r11, 804 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 805 L_ok); 806 const char* msg = "i2c adapter must return to an interpreter frame"; 807 __ block_comment(msg); 808 __ stop(msg); 809 __ bind(L_ok); 810 __ block_comment("} verify_i2ce "); 811 } 812 813 // Must preserve original SP for loading incoming arguments because 814 // we need to align the outgoing SP for compiled code. 815 __ movptr(r11, rsp); 816 817 // Cut-out for having no stack args. Since up to 2 int/oop args are passed 818 // in registers, we will occasionally have no stack args. 819 int comp_words_on_stack = 0; 820 if (comp_args_on_stack) { 821 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in 822 // registers are below. By subtracting stack0, we either get a negative 823 // number (all values in registers) or the maximum stack slot accessed. 824 825 // Convert 4-byte c2 stack slots to words. 826 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 827 // Round up to miminum stack alignment, in wordSize 828 comp_words_on_stack = align_up(comp_words_on_stack, 2); 829 __ subptr(rsp, comp_words_on_stack * wordSize); 830 } 831 832 833 // Ensure compiled code always sees stack at proper alignment 834 __ andptr(rsp, -16); 835 836 // push the return address and misalign the stack that youngest frame always sees 837 // as far as the placement of the call instruction 838 __ push(rax); 839 840 // Put saved SP in another register 841 const Register saved_sp = rax; 842 __ movptr(saved_sp, r11); 843 844 // Will jump to the compiled code just as if compiled code was doing it. 845 // Pre-load the register-jump target early, to schedule it better. 846 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); 847 848 #if INCLUDE_JVMCI 849 if (EnableJVMCI) { 850 // check if this call should be routed towards a specific entry point 851 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 852 Label no_alternative_target; 853 __ jcc(Assembler::equal, no_alternative_target); 854 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 855 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 856 __ bind(no_alternative_target); 857 } 858 #endif // INCLUDE_JVMCI 859 860 // Now generate the shuffle code. Pick up all register args and move the 861 // rest through the floating point stack top. 862 for (int i = 0; i < total_args_passed; i++) { 863 if (sig_bt[i] == T_VOID) { 864 // Longs and doubles are passed in native word order, but misaligned 865 // in the 32-bit build. 866 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 867 continue; 868 } 869 870 // Pick up 0, 1 or 2 words from SP+offset. 871 872 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 873 "scrambled load targets?"); 874 // Load in argument order going down. 875 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 876 // Point to interpreter value (vs. tag) 877 int next_off = ld_off - Interpreter::stackElementSize; 878 // 879 // 880 // 881 VMReg r_1 = regs[i].first(); 882 VMReg r_2 = regs[i].second(); 883 if (!r_1->is_valid()) { 884 assert(!r_2->is_valid(), ""); 885 continue; 886 } 887 if (r_1->is_stack()) { 888 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 889 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 890 891 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 892 // and if we end up going thru a c2i because of a miss a reasonable value of r13 893 // will be generated. 894 if (!r_2->is_valid()) { 895 // sign extend??? 896 __ movl(r13, Address(saved_sp, ld_off)); 897 __ movptr(Address(rsp, st_off), r13); 898 } else { 899 // 900 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 901 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 902 // So we must adjust where to pick up the data to match the interpreter. 903 // 904 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 905 // are accessed as negative so LSW is at LOW address 906 907 // ld_off is MSW so get LSW 908 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? 909 next_off : ld_off; 910 __ movq(r13, Address(saved_sp, offset)); 911 // st_off is LSW (i.e. reg.first()) 912 __ movq(Address(rsp, st_off), r13); 913 } 914 } else if (r_1->is_Register()) { // Register argument 915 Register r = r_1->as_Register(); 916 assert(r != rax, "must be different"); 917 if (r_2->is_valid()) { 918 // 919 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 920 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 921 // So we must adjust where to pick up the data to match the interpreter. 922 923 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? 924 next_off : ld_off; 925 926 // this can be a misaligned move 927 __ movq(r, Address(saved_sp, offset)); 928 } else { 929 // sign extend and use a full word? 930 __ movl(r, Address(saved_sp, ld_off)); 931 } 932 } else { 933 if (!r_2->is_valid()) { 934 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 935 } else { 936 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 937 } 938 } 939 } 940 941 // 6243940 We might end up in handle_wrong_method if 942 // the callee is deoptimized as we race thru here. If that 943 // happens we don't want to take a safepoint because the 944 // caller frame will look interpreted and arguments are now 945 // "compiled" so it is much better to make this transition 946 // invisible to the stack walking code. Unfortunately if 947 // we try and find the callee by normal means a safepoint 948 // is possible. So we stash the desired callee in the thread 949 // and the vm will find there should this case occur. 950 951 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 952 953 // put Method* where a c2i would expect should we end up there 954 // only needed because eof c2 resolve stubs return Method* as a result in 955 // rax 956 __ mov(rax, rbx); 957 __ jmp(r11); 958 } 959 960 // --------------------------------------------------------------- 961 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 962 int total_args_passed, 963 int comp_args_on_stack, 964 const BasicType *sig_bt, 965 const VMRegPair *regs, 966 AdapterFingerPrint* fingerprint) { 967 address i2c_entry = __ pc(); 968 969 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); 970 971 // ------------------------------------------------------------------------- 972 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 973 // to the interpreter. The args start out packed in the compiled layout. They 974 // need to be unpacked into the interpreter layout. This will almost always 975 // require some stack space. We grow the current (compiled) stack, then repack 976 // the args. We finally end in a jump to the generic interpreter entry point. 977 // On exit from the interpreter, the interpreter will restore our SP (lest the 978 // compiled code, which relies solely on SP and not RBP, get sick). 979 980 address c2i_unverified_entry = __ pc(); 981 Label skip_fixup; 982 Label ok; 983 984 Register holder = rax; 985 Register receiver = j_rarg0; 986 Register temp = rbx; 987 988 { 989 __ load_klass(temp, receiver, rscratch1); 990 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 991 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset())); 992 __ jcc(Assembler::equal, ok); 993 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 994 995 __ bind(ok); 996 // Method might have been compiled since the call site was patched to 997 // interpreted if that is the case treat it as a miss so we can get 998 // the call site corrected. 999 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 1000 __ jcc(Assembler::equal, skip_fixup); 1001 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1002 } 1003 1004 address c2i_entry = __ pc(); 1005 1006 // Class initialization barrier for static methods 1007 address c2i_no_clinit_check_entry = NULL; 1008 if (VM_Version::supports_fast_class_init_checks()) { 1009 Label L_skip_barrier; 1010 Register method = rbx; 1011 1012 { // Bypass the barrier for non-static methods 1013 Register flags = rscratch1; 1014 __ movl(flags, Address(method, Method::access_flags_offset())); 1015 __ testl(flags, JVM_ACC_STATIC); 1016 __ jcc(Assembler::zero, L_skip_barrier); // non-static 1017 } 1018 1019 Register klass = rscratch1; 1020 __ load_method_holder(klass, method); 1021 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 1022 1023 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 1024 1025 __ bind(L_skip_barrier); 1026 c2i_no_clinit_check_entry = __ pc(); 1027 } 1028 1029 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1030 bs->c2i_entry_barrier(masm); 1031 1032 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1033 1034 __ flush(); 1035 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); 1036 } 1037 1038 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1039 VMRegPair *regs, 1040 VMRegPair *regs2, 1041 int total_args_passed) { 1042 assert(regs2 == NULL, "not needed on x86"); 1043 // We return the amount of VMRegImpl stack slots we need to reserve for all 1044 // the arguments NOT counting out_preserve_stack_slots. 1045 1046 // NOTE: These arrays will have to change when c1 is ported 1047 #ifdef _WIN64 1048 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1049 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1050 }; 1051 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1052 c_farg0, c_farg1, c_farg2, c_farg3 1053 }; 1054 #else 1055 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1056 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1057 }; 1058 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1059 c_farg0, c_farg1, c_farg2, c_farg3, 1060 c_farg4, c_farg5, c_farg6, c_farg7 1061 }; 1062 #endif // _WIN64 1063 1064 1065 uint int_args = 0; 1066 uint fp_args = 0; 1067 uint stk_args = 0; // inc by 2 each time 1068 1069 for (int i = 0; i < total_args_passed; i++) { 1070 switch (sig_bt[i]) { 1071 case T_BOOLEAN: 1072 case T_CHAR: 1073 case T_BYTE: 1074 case T_SHORT: 1075 case T_INT: 1076 if (int_args < Argument::n_int_register_parameters_c) { 1077 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1078 #ifdef _WIN64 1079 fp_args++; 1080 // Allocate slots for callee to stuff register args the stack. 1081 stk_args += 2; 1082 #endif 1083 } else { 1084 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1085 stk_args += 2; 1086 } 1087 break; 1088 case T_LONG: 1089 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1090 // fall through 1091 case T_OBJECT: 1092 case T_ARRAY: 1093 case T_ADDRESS: 1094 case T_METADATA: 1095 if (int_args < Argument::n_int_register_parameters_c) { 1096 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1097 #ifdef _WIN64 1098 fp_args++; 1099 stk_args += 2; 1100 #endif 1101 } else { 1102 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1103 stk_args += 2; 1104 } 1105 break; 1106 case T_FLOAT: 1107 if (fp_args < Argument::n_float_register_parameters_c) { 1108 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1109 #ifdef _WIN64 1110 int_args++; 1111 // Allocate slots for callee to stuff register args the stack. 1112 stk_args += 2; 1113 #endif 1114 } else { 1115 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1116 stk_args += 2; 1117 } 1118 break; 1119 case T_DOUBLE: 1120 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1121 if (fp_args < Argument::n_float_register_parameters_c) { 1122 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1123 #ifdef _WIN64 1124 int_args++; 1125 // Allocate slots for callee to stuff register args the stack. 1126 stk_args += 2; 1127 #endif 1128 } else { 1129 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1130 stk_args += 2; 1131 } 1132 break; 1133 case T_VOID: // Halves of longs and doubles 1134 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1135 regs[i].set_bad(); 1136 break; 1137 default: 1138 ShouldNotReachHere(); 1139 break; 1140 } 1141 } 1142 #ifdef _WIN64 1143 // windows abi requires that we always allocate enough stack space 1144 // for 4 64bit registers to be stored down. 1145 if (stk_args < 8) { 1146 stk_args = 8; 1147 } 1148 #endif // _WIN64 1149 1150 return stk_args; 1151 } 1152 1153 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1154 uint num_bits, 1155 uint total_args_passed) { 1156 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1157 "only certain vector sizes are supported for now"); 1158 1159 static const XMMRegister VEC_ArgReg[32] = { 1160 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1161 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1162 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1163 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1164 }; 1165 1166 uint stk_args = 0; 1167 uint fp_args = 0; 1168 1169 for (uint i = 0; i < total_args_passed; i++) { 1170 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1171 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1172 regs[i].set_pair(vmreg->next(next_val), vmreg); 1173 } 1174 1175 return stk_args; 1176 } 1177 1178 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1179 // We always ignore the frame_slots arg and just use the space just below frame pointer 1180 // which by this time is free to use 1181 switch (ret_type) { 1182 case T_FLOAT: 1183 __ movflt(Address(rbp, -wordSize), xmm0); 1184 break; 1185 case T_DOUBLE: 1186 __ movdbl(Address(rbp, -wordSize), xmm0); 1187 break; 1188 case T_VOID: break; 1189 default: { 1190 __ movptr(Address(rbp, -wordSize), rax); 1191 } 1192 } 1193 } 1194 1195 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1196 // We always ignore the frame_slots arg and just use the space just below frame pointer 1197 // which by this time is free to use 1198 switch (ret_type) { 1199 case T_FLOAT: 1200 __ movflt(xmm0, Address(rbp, -wordSize)); 1201 break; 1202 case T_DOUBLE: 1203 __ movdbl(xmm0, Address(rbp, -wordSize)); 1204 break; 1205 case T_VOID: break; 1206 default: { 1207 __ movptr(rax, Address(rbp, -wordSize)); 1208 } 1209 } 1210 } 1211 1212 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1213 for ( int i = first_arg ; i < arg_count ; i++ ) { 1214 if (args[i].first()->is_Register()) { 1215 __ push(args[i].first()->as_Register()); 1216 } else if (args[i].first()->is_XMMRegister()) { 1217 __ subptr(rsp, 2*wordSize); 1218 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1219 } 1220 } 1221 } 1222 1223 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1224 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1225 if (args[i].first()->is_Register()) { 1226 __ pop(args[i].first()->as_Register()); 1227 } else if (args[i].first()->is_XMMRegister()) { 1228 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1229 __ addptr(rsp, 2*wordSize); 1230 } 1231 } 1232 } 1233 1234 // Different signatures may require very different orders for the move 1235 // to avoid clobbering other arguments. There's no simple way to 1236 // order them safely. Compute a safe order for issuing stores and 1237 // break any cycles in those stores. This code is fairly general but 1238 // it's not necessary on the other platforms so we keep it in the 1239 // platform dependent code instead of moving it into a shared file. 1240 // (See bugs 7013347 & 7145024.) 1241 // Note that this code is specific to LP64. 1242 class ComputeMoveOrder: public StackObj { 1243 class MoveOperation: public ResourceObj { 1244 friend class ComputeMoveOrder; 1245 private: 1246 VMRegPair _src; 1247 VMRegPair _dst; 1248 int _src_index; 1249 int _dst_index; 1250 bool _processed; 1251 MoveOperation* _next; 1252 MoveOperation* _prev; 1253 1254 static int get_id(VMRegPair r) { 1255 return r.first()->value(); 1256 } 1257 1258 public: 1259 MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): 1260 _src(src) 1261 , _dst(dst) 1262 , _src_index(src_index) 1263 , _dst_index(dst_index) 1264 , _processed(false) 1265 , _next(NULL) 1266 , _prev(NULL) { 1267 } 1268 1269 VMRegPair src() const { return _src; } 1270 int src_id() const { return get_id(src()); } 1271 int src_index() const { return _src_index; } 1272 VMRegPair dst() const { return _dst; } 1273 void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; } 1274 int dst_index() const { return _dst_index; } 1275 int dst_id() const { return get_id(dst()); } 1276 MoveOperation* next() const { return _next; } 1277 MoveOperation* prev() const { return _prev; } 1278 void set_processed() { _processed = true; } 1279 bool is_processed() const { return _processed; } 1280 1281 // insert 1282 void break_cycle(VMRegPair temp_register) { 1283 // create a new store following the last store 1284 // to move from the temp_register to the original 1285 MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst()); 1286 1287 // break the cycle of links and insert new_store at the end 1288 // break the reverse link. 1289 MoveOperation* p = prev(); 1290 assert(p->next() == this, "must be"); 1291 _prev = NULL; 1292 p->_next = new_store; 1293 new_store->_prev = p; 1294 1295 // change the original store to save it's value in the temp. 1296 set_dst(-1, temp_register); 1297 } 1298 1299 void link(GrowableArray<MoveOperation*>& killer) { 1300 // link this store in front the store that it depends on 1301 MoveOperation* n = killer.at_grow(src_id(), NULL); 1302 if (n != NULL) { 1303 assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet"); 1304 _next = n; 1305 n->_prev = this; 1306 } 1307 } 1308 }; 1309 1310 private: 1311 GrowableArray<MoveOperation*> edges; 1312 1313 public: 1314 ComputeMoveOrder(int total_in_args, const VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, 1315 const BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { 1316 // Move operations where the dest is the stack can all be 1317 // scheduled first since they can't interfere with the other moves. 1318 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1319 if (in_sig_bt[i] == T_ARRAY) { 1320 c_arg--; 1321 if (out_regs[c_arg].first()->is_stack() && 1322 out_regs[c_arg + 1].first()->is_stack()) { 1323 arg_order.push(i); 1324 arg_order.push(c_arg); 1325 } else { 1326 if (out_regs[c_arg].first()->is_stack() || 1327 in_regs[i].first() == out_regs[c_arg].first()) { 1328 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]); 1329 } else { 1330 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1331 } 1332 } 1333 } else if (in_sig_bt[i] == T_VOID) { 1334 arg_order.push(i); 1335 arg_order.push(c_arg); 1336 } else { 1337 if (out_regs[c_arg].first()->is_stack() || 1338 in_regs[i].first() == out_regs[c_arg].first()) { 1339 arg_order.push(i); 1340 arg_order.push(c_arg); 1341 } else { 1342 add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1343 } 1344 } 1345 } 1346 // Break any cycles in the register moves and emit the in the 1347 // proper order. 1348 GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg); 1349 for (int i = 0; i < stores->length(); i++) { 1350 arg_order.push(stores->at(i)->src_index()); 1351 arg_order.push(stores->at(i)->dst_index()); 1352 } 1353 } 1354 1355 // Collected all the move operations 1356 void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { 1357 if (src.first() == dst.first()) return; 1358 edges.append(new MoveOperation(src_index, src, dst_index, dst)); 1359 } 1360 1361 // Walk the edges breaking cycles between moves. The result list 1362 // can be walked in order to produce the proper set of loads 1363 GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { 1364 // Record which moves kill which values 1365 GrowableArray<MoveOperation*> killer; 1366 for (int i = 0; i < edges.length(); i++) { 1367 MoveOperation* s = edges.at(i); 1368 assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer"); 1369 killer.at_put_grow(s->dst_id(), s, NULL); 1370 } 1371 assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL, 1372 "make sure temp isn't in the registers that are killed"); 1373 1374 // create links between loads and stores 1375 for (int i = 0; i < edges.length(); i++) { 1376 edges.at(i)->link(killer); 1377 } 1378 1379 // at this point, all the move operations are chained together 1380 // in a doubly linked list. Processing it backwards finds 1381 // the beginning of the chain, forwards finds the end. If there's 1382 // a cycle it can be broken at any point, so pick an edge and walk 1383 // backward until the list ends or we end where we started. 1384 GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>(); 1385 for (int e = 0; e < edges.length(); e++) { 1386 MoveOperation* s = edges.at(e); 1387 if (!s->is_processed()) { 1388 MoveOperation* start = s; 1389 // search for the beginning of the chain or cycle 1390 while (start->prev() != NULL && start->prev() != s) { 1391 start = start->prev(); 1392 } 1393 if (start->prev() == s) { 1394 start->break_cycle(temp_register); 1395 } 1396 // walk the chain forward inserting to store list 1397 while (start != NULL) { 1398 stores->append(start); 1399 start->set_processed(); 1400 start = start->next(); 1401 } 1402 } 1403 } 1404 return stores; 1405 } 1406 }; 1407 1408 static void verify_oop_args(MacroAssembler* masm, 1409 const methodHandle& method, 1410 const BasicType* sig_bt, 1411 const VMRegPair* regs) { 1412 Register temp_reg = rbx; // not part of any compiled calling seq 1413 if (VerifyOops) { 1414 for (int i = 0; i < method->size_of_parameters(); i++) { 1415 if (is_reference_type(sig_bt[i])) { 1416 VMReg r = regs[i].first(); 1417 assert(r->is_valid(), "bad oop arg"); 1418 if (r->is_stack()) { 1419 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1420 __ verify_oop(temp_reg); 1421 } else { 1422 __ verify_oop(r->as_Register()); 1423 } 1424 } 1425 } 1426 } 1427 } 1428 1429 static void gen_special_dispatch(MacroAssembler* masm, 1430 const methodHandle& method, 1431 const BasicType* sig_bt, 1432 const VMRegPair* regs) { 1433 verify_oop_args(masm, method, sig_bt, regs); 1434 vmIntrinsics::ID iid = method->intrinsic_id(); 1435 1436 // Now write the args into the outgoing interpreter space 1437 bool has_receiver = false; 1438 Register receiver_reg = noreg; 1439 int member_arg_pos = -1; 1440 Register member_reg = noreg; 1441 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1442 if (ref_kind != 0) { 1443 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1444 member_reg = rbx; // known to be free at this point 1445 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1446 } else if (iid == vmIntrinsics::_invokeBasic) { 1447 has_receiver = true; 1448 } else if (iid == vmIntrinsics::_linkToNative) { 1449 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1450 member_reg = rbx; // known to be free at this point 1451 } else { 1452 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1453 } 1454 1455 if (member_reg != noreg) { 1456 // Load the member_arg into register, if necessary. 1457 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1458 VMReg r = regs[member_arg_pos].first(); 1459 if (r->is_stack()) { 1460 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1461 } else { 1462 // no data motion is needed 1463 member_reg = r->as_Register(); 1464 } 1465 } 1466 1467 if (has_receiver) { 1468 // Make sure the receiver is loaded into a register. 1469 assert(method->size_of_parameters() > 0, "oob"); 1470 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1471 VMReg r = regs[0].first(); 1472 assert(r->is_valid(), "bad receiver arg"); 1473 if (r->is_stack()) { 1474 // Porting note: This assumes that compiled calling conventions always 1475 // pass the receiver oop in a register. If this is not true on some 1476 // platform, pick a temp and load the receiver from stack. 1477 fatal("receiver always in a register"); 1478 receiver_reg = j_rarg0; // known to be free at this point 1479 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1480 } else { 1481 // no data motion is needed 1482 receiver_reg = r->as_Register(); 1483 } 1484 } 1485 1486 // Figure out which address we are really jumping to: 1487 MethodHandles::generate_method_handle_dispatch(masm, iid, 1488 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1489 } 1490 1491 // --------------------------------------------------------------------------- 1492 // Generate a native wrapper for a given method. The method takes arguments 1493 // in the Java compiled code convention, marshals them to the native 1494 // convention (handlizes oops, etc), transitions to native, makes the call, 1495 // returns to java state (possibly blocking), unhandlizes any result and 1496 // returns. 1497 // 1498 // Critical native functions are a shorthand for the use of 1499 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1500 // functions. The wrapper is expected to unpack the arguments before 1501 // passing them to the callee. Critical native functions leave the state _in_Java, 1502 // since they cannot stop for GC. 1503 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1504 // block and the check for pending exceptions it's impossible for them 1505 // to be thrown. 1506 // 1507 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1508 const methodHandle& method, 1509 int compile_id, 1510 BasicType* in_sig_bt, 1511 VMRegPair* in_regs, 1512 BasicType ret_type) { 1513 if (method->is_method_handle_intrinsic()) { 1514 vmIntrinsics::ID iid = method->intrinsic_id(); 1515 intptr_t start = (intptr_t)__ pc(); 1516 int vep_offset = ((intptr_t)__ pc()) - start; 1517 gen_special_dispatch(masm, 1518 method, 1519 in_sig_bt, 1520 in_regs); 1521 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1522 __ flush(); 1523 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1524 return nmethod::new_native_nmethod(method, 1525 compile_id, 1526 masm->code(), 1527 vep_offset, 1528 frame_complete, 1529 stack_slots / VMRegImpl::slots_per_word, 1530 in_ByteSize(-1), 1531 in_ByteSize(-1), 1532 (OopMapSet*)NULL); 1533 } 1534 address native_func = method->native_function(); 1535 assert(native_func != NULL, "must have function"); 1536 1537 // An OopMap for lock (and class if static) 1538 OopMapSet *oop_maps = new OopMapSet(); 1539 intptr_t start = (intptr_t)__ pc(); 1540 1541 // We have received a description of where all the java arg are located 1542 // on entry to the wrapper. We need to convert these args to where 1543 // the jni function will expect them. To figure out where they go 1544 // we convert the java signature to a C signature by inserting 1545 // the hidden arguments as arg[0] and possibly arg[1] (static method) 1546 1547 const int total_in_args = method->size_of_parameters(); 1548 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 1549 1550 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1551 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1552 BasicType* in_elem_bt = NULL; 1553 1554 int argc = 0; 1555 out_sig_bt[argc++] = T_ADDRESS; 1556 if (method->is_static()) { 1557 out_sig_bt[argc++] = T_OBJECT; 1558 } 1559 1560 for (int i = 0; i < total_in_args ; i++ ) { 1561 out_sig_bt[argc++] = in_sig_bt[i]; 1562 } 1563 1564 // Now figure out where the args must be stored and how much stack space 1565 // they require. 1566 int out_arg_slots; 1567 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1568 1569 // Compute framesize for the wrapper. We need to handlize all oops in 1570 // incoming registers 1571 1572 // Calculate the total number of stack slots we will need. 1573 1574 // First count the abi requirement plus all of the outgoing args 1575 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1576 1577 // Now the space for the inbound oop handle area 1578 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 1579 1580 int oop_handle_offset = stack_slots; 1581 stack_slots += total_save_slots; 1582 1583 // Now any space we need for handlizing a klass if static method 1584 1585 int klass_slot_offset = 0; 1586 int klass_offset = -1; 1587 int lock_slot_offset = 0; 1588 bool is_static = false; 1589 1590 if (method->is_static()) { 1591 klass_slot_offset = stack_slots; 1592 stack_slots += VMRegImpl::slots_per_word; 1593 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1594 is_static = true; 1595 } 1596 1597 // Plus a lock if needed 1598 1599 if (method->is_synchronized()) { 1600 lock_slot_offset = stack_slots; 1601 stack_slots += VMRegImpl::slots_per_word; 1602 } 1603 1604 // Now a place (+2) to save return values or temp during shuffling 1605 // + 4 for return address (which we own) and saved rbp 1606 stack_slots += 6; 1607 1608 // Ok The space we have allocated will look like: 1609 // 1610 // 1611 // FP-> | | 1612 // |---------------------| 1613 // | 2 slots for moves | 1614 // |---------------------| 1615 // | lock box (if sync) | 1616 // |---------------------| <- lock_slot_offset 1617 // | klass (if static) | 1618 // |---------------------| <- klass_slot_offset 1619 // | oopHandle area | 1620 // |---------------------| <- oop_handle_offset (6 java arg registers) 1621 // | outbound memory | 1622 // | based arguments | 1623 // | | 1624 // |---------------------| 1625 // | | 1626 // SP-> | out_preserved_slots | 1627 // 1628 // 1629 1630 1631 // Now compute actual number of stack words we need rounding to make 1632 // stack properly aligned. 1633 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 1634 1635 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1636 1637 // First thing make an ic check to see if we should even be here 1638 1639 // We are free to use all registers as temps without saving them and 1640 // restoring them except rbp. rbp is the only callee save register 1641 // as far as the interpreter and the compiler(s) are concerned. 1642 1643 1644 const Register ic_reg = rax; 1645 const Register receiver = j_rarg0; 1646 1647 Label hit; 1648 Label exception_pending; 1649 1650 assert_different_registers(ic_reg, receiver, rscratch1); 1651 __ verify_oop(receiver); 1652 __ load_klass(rscratch1, receiver, rscratch2); 1653 __ cmpq(ic_reg, rscratch1); 1654 __ jcc(Assembler::equal, hit); 1655 1656 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1657 1658 // Verified entry point must be aligned 1659 __ align(8); 1660 1661 __ bind(hit); 1662 1663 int vep_offset = ((intptr_t)__ pc()) - start; 1664 1665 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 1666 Label L_skip_barrier; 1667 Register klass = r10; 1668 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 1669 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 1670 1671 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 1672 1673 __ bind(L_skip_barrier); 1674 } 1675 1676 #ifdef COMPILER1 1677 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 1678 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 1679 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 1680 } 1681 #endif // COMPILER1 1682 1683 // The instruction at the verified entry point must be 5 bytes or longer 1684 // because it can be patched on the fly by make_non_entrant. The stack bang 1685 // instruction fits that requirement. 1686 1687 // Generate stack overflow check 1688 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 1689 1690 // Generate a new frame for the wrapper. 1691 __ enter(); 1692 // -2 because return address is already present and so is saved rbp 1693 __ subptr(rsp, stack_size - 2*wordSize); 1694 1695 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1696 bs->nmethod_entry_barrier(masm); 1697 1698 // Frame is now completed as far as size and linkage. 1699 int frame_complete = ((intptr_t)__ pc()) - start; 1700 1701 if (UseRTMLocking) { 1702 // Abort RTM transaction before calling JNI 1703 // because critical section will be large and will be 1704 // aborted anyway. Also nmethod could be deoptimized. 1705 __ xabort(0); 1706 } 1707 1708 #ifdef ASSERT 1709 { 1710 Label L; 1711 __ mov(rax, rsp); 1712 __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) 1713 __ cmpptr(rax, rsp); 1714 __ jcc(Assembler::equal, L); 1715 __ stop("improperly aligned stack"); 1716 __ bind(L); 1717 } 1718 #endif /* ASSERT */ 1719 1720 1721 // We use r14 as the oop handle for the receiver/klass 1722 // It is callee save so it survives the call to native 1723 1724 const Register oop_handle_reg = r14; 1725 1726 // 1727 // We immediately shuffle the arguments so that any vm call we have to 1728 // make from here on out (sync slow path, jvmti, etc.) we will have 1729 // captured the oops from our caller and have a valid oopMap for 1730 // them. 1731 1732 // ----------------- 1733 // The Grand Shuffle 1734 1735 // The Java calling convention is either equal (linux) or denser (win64) than the 1736 // c calling convention. However the because of the jni_env argument the c calling 1737 // convention always has at least one more (and two for static) arguments than Java. 1738 // Therefore if we move the args from java -> c backwards then we will never have 1739 // a register->register conflict and we don't have to build a dependency graph 1740 // and figure out how to break any cycles. 1741 // 1742 1743 // Record esp-based slot for receiver on stack for non-static methods 1744 int receiver_offset = -1; 1745 1746 // This is a trick. We double the stack slots so we can claim 1747 // the oops in the caller's frame. Since we are sure to have 1748 // more args than the caller doubling is enough to make 1749 // sure we can capture all the incoming oop args from the 1750 // caller. 1751 // 1752 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1753 1754 // Mark location of rbp (someday) 1755 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 1756 1757 // Use eax, ebx as temporaries during any memory-memory moves we have to do 1758 // All inbound args are referenced based on rbp and all outbound args via rsp. 1759 1760 1761 #ifdef ASSERT 1762 bool reg_destroyed[RegisterImpl::number_of_registers]; 1763 bool freg_destroyed[XMMRegisterImpl::number_of_registers]; 1764 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1765 reg_destroyed[r] = false; 1766 } 1767 for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) { 1768 freg_destroyed[f] = false; 1769 } 1770 1771 #endif /* ASSERT */ 1772 1773 // For JNI natives the incoming and outgoing registers are offset upwards. 1774 GrowableArray<int> arg_order(2 * total_in_args); 1775 1776 VMRegPair tmp_vmreg; 1777 tmp_vmreg.set2(rbx->as_VMReg()); 1778 1779 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1780 arg_order.push(i); 1781 arg_order.push(c_arg); 1782 } 1783 1784 int temploc = -1; 1785 for (int ai = 0; ai < arg_order.length(); ai += 2) { 1786 int i = arg_order.at(ai); 1787 int c_arg = arg_order.at(ai + 1); 1788 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 1789 #ifdef ASSERT 1790 if (in_regs[i].first()->is_Register()) { 1791 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 1792 } else if (in_regs[i].first()->is_XMMRegister()) { 1793 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 1794 } 1795 if (out_regs[c_arg].first()->is_Register()) { 1796 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1797 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 1798 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 1799 } 1800 #endif /* ASSERT */ 1801 switch (in_sig_bt[i]) { 1802 case T_ARRAY: 1803 case T_OBJECT: 1804 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1805 ((i == 0) && (!is_static)), 1806 &receiver_offset); 1807 break; 1808 case T_VOID: 1809 break; 1810 1811 case T_FLOAT: 1812 __ float_move(in_regs[i], out_regs[c_arg]); 1813 break; 1814 1815 case T_DOUBLE: 1816 assert( i + 1 < total_in_args && 1817 in_sig_bt[i + 1] == T_VOID && 1818 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1819 __ double_move(in_regs[i], out_regs[c_arg]); 1820 break; 1821 1822 case T_LONG : 1823 __ long_move(in_regs[i], out_regs[c_arg]); 1824 break; 1825 1826 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1827 1828 default: 1829 __ move32_64(in_regs[i], out_regs[c_arg]); 1830 } 1831 } 1832 1833 int c_arg; 1834 1835 // Pre-load a static method's oop into r14. Used both by locking code and 1836 // the normal JNI call code. 1837 // point c_arg at the first arg that is already loaded in case we 1838 // need to spill before we call out 1839 c_arg = total_c_args - total_in_args; 1840 1841 if (method->is_static()) { 1842 1843 // load oop into a register 1844 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 1845 1846 // Now handlize the static class mirror it's known not-null. 1847 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 1848 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1849 1850 // Now get the handle 1851 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 1852 // store the klass handle as second argument 1853 __ movptr(c_rarg1, oop_handle_reg); 1854 // and protect the arg if we must spill 1855 c_arg--; 1856 } 1857 1858 // Change state to native (we save the return address in the thread, since it might not 1859 // be pushed on the stack when we do a a stack traversal). It is enough that the pc() 1860 // points into the right code segment. It does not have to be the correct return pc. 1861 // We use the same pc/oopMap repeatedly when we call out 1862 1863 intptr_t the_pc = (intptr_t) __ pc(); 1864 oop_maps->add_gc_map(the_pc - start, map); 1865 1866 __ set_last_Java_frame(rsp, noreg, (address)the_pc); 1867 1868 1869 // We have all of the arguments setup at this point. We must not touch any register 1870 // argument registers at this point (what if we save/restore them there are no oop? 1871 1872 { 1873 SkipIfEqual skip(masm, &DTraceMethodProbes, false); 1874 // protect the args we've loaded 1875 save_args(masm, total_c_args, c_arg, out_regs); 1876 __ mov_metadata(c_rarg1, method()); 1877 __ call_VM_leaf( 1878 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 1879 r15_thread, c_rarg1); 1880 restore_args(masm, total_c_args, c_arg, out_regs); 1881 } 1882 1883 // RedefineClasses() tracing support for obsolete method entry 1884 if (log_is_enabled(Trace, redefine, class, obsolete)) { 1885 // protect the args we've loaded 1886 save_args(masm, total_c_args, c_arg, out_regs); 1887 __ mov_metadata(c_rarg1, method()); 1888 __ call_VM_leaf( 1889 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 1890 r15_thread, c_rarg1); 1891 restore_args(masm, total_c_args, c_arg, out_regs); 1892 } 1893 1894 // Lock a synchronized method 1895 1896 // Register definitions used by locking and unlocking 1897 1898 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 1899 const Register obj_reg = rbx; // Will contain the oop 1900 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 1901 const Register old_hdr = r13; // value of old header at unlock time 1902 1903 Label slow_path_lock; 1904 Label lock_done; 1905 1906 if (method->is_synchronized()) { 1907 1908 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 1909 1910 // Get the handle (the 2nd argument) 1911 __ mov(oop_handle_reg, c_rarg1); 1912 1913 // Get address of the box 1914 1915 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 1916 1917 // Load the oop from the handle 1918 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 1919 1920 if (!UseHeavyMonitors) { 1921 // Load immediate 1 into swap_reg %rax 1922 __ movl(swap_reg, 1); 1923 1924 // Load (object->mark() | 1) into swap_reg %rax 1925 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1926 1927 // Save (object->mark() | 1) into BasicLock's displaced header 1928 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 1929 1930 // src -> dest iff dest == rax else rax <- dest 1931 __ lock(); 1932 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1933 __ jcc(Assembler::equal, lock_done); 1934 1935 // Hmm should this move to the slow path code area??? 1936 1937 // Test if the oopMark is an obvious stack pointer, i.e., 1938 // 1) (mark & 3) == 0, and 1939 // 2) rsp <= mark < mark + os::pagesize() 1940 // These 3 tests can be done by evaluating the following 1941 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 1942 // assuming both stack pointer and pagesize have their 1943 // least significant 2 bits clear. 1944 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 1945 1946 __ subptr(swap_reg, rsp); 1947 __ andptr(swap_reg, 3 - os::vm_page_size()); 1948 1949 // Save the test result, for recursive case, the result is zero 1950 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 1951 __ jcc(Assembler::notEqual, slow_path_lock); 1952 } else { 1953 __ jmp(slow_path_lock); 1954 } 1955 1956 // Slow path will re-enter here 1957 1958 __ bind(lock_done); 1959 } 1960 1961 // Finally just about ready to make the JNI call 1962 1963 // get JNIEnv* which is first argument to native 1964 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 1965 1966 // Now set thread in native 1967 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 1968 1969 __ call(RuntimeAddress(native_func)); 1970 1971 // Verify or restore cpu control state after JNI call 1972 __ restore_cpu_control_state_after_jni(); 1973 1974 // Unpack native results. 1975 switch (ret_type) { 1976 case T_BOOLEAN: __ c2bool(rax); break; 1977 case T_CHAR : __ movzwl(rax, rax); break; 1978 case T_BYTE : __ sign_extend_byte (rax); break; 1979 case T_SHORT : __ sign_extend_short(rax); break; 1980 case T_INT : /* nothing to do */ break; 1981 case T_DOUBLE : 1982 case T_FLOAT : 1983 // Result is in xmm0 we'll save as needed 1984 break; 1985 case T_ARRAY: // Really a handle 1986 case T_OBJECT: // Really a handle 1987 break; // can't de-handlize until after safepoint check 1988 case T_VOID: break; 1989 case T_LONG: break; 1990 default : ShouldNotReachHere(); 1991 } 1992 1993 Label after_transition; 1994 1995 // Switch thread to "native transition" state before reading the synchronization state. 1996 // This additional state is necessary because reading and testing the synchronization 1997 // state is not atomic w.r.t. GC, as this scenario demonstrates: 1998 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1999 // VM thread changes sync state to synchronizing and suspends threads for GC. 2000 // Thread A is resumed to finish this native method, but doesn't block here since it 2001 // didn't see any synchronization is progress, and escapes. 2002 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2003 2004 // Force this write out before the read below 2005 __ membar(Assembler::Membar_mask_bits( 2006 Assembler::LoadLoad | Assembler::LoadStore | 2007 Assembler::StoreLoad | Assembler::StoreStore)); 2008 2009 // check for safepoint operation in progress and/or pending suspend requests 2010 { 2011 Label Continue; 2012 Label slow_path; 2013 2014 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2015 2016 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2017 __ jcc(Assembler::equal, Continue); 2018 __ bind(slow_path); 2019 2020 // Don't use call_VM as it will see a possible pending exception and forward it 2021 // and never return here preventing us from clearing _last_native_pc down below. 2022 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2023 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2024 // by hand. 2025 // 2026 __ vzeroupper(); 2027 save_native_result(masm, ret_type, stack_slots); 2028 __ mov(c_rarg0, r15_thread); 2029 __ mov(r12, rsp); // remember sp 2030 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2031 __ andptr(rsp, -16); // align stack as required by ABI 2032 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2033 __ mov(rsp, r12); // restore sp 2034 __ reinit_heapbase(); 2035 // Restore any method result value 2036 restore_native_result(masm, ret_type, stack_slots); 2037 __ bind(Continue); 2038 } 2039 2040 // change thread state 2041 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2042 __ bind(after_transition); 2043 2044 Label reguard; 2045 Label reguard_done; 2046 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2047 __ jcc(Assembler::equal, reguard); 2048 __ bind(reguard_done); 2049 2050 // native result if any is live 2051 2052 // Unlock 2053 Label unlock_done; 2054 Label slow_path_unlock; 2055 if (method->is_synchronized()) { 2056 2057 // Get locked oop from the handle we passed to jni 2058 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2059 2060 Label done; 2061 2062 if (!UseHeavyMonitors) { 2063 // Simple recursive lock? 2064 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); 2065 __ jcc(Assembler::equal, done); 2066 } 2067 2068 // Must save rax if it is live now because cmpxchg must use it 2069 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2070 save_native_result(masm, ret_type, stack_slots); 2071 } 2072 2073 2074 if (!UseHeavyMonitors) { 2075 // get address of the stack lock 2076 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2077 // get old displaced header 2078 __ movptr(old_hdr, Address(rax, 0)); 2079 2080 // Atomic swap old header if oop still contains the stack lock 2081 __ lock(); 2082 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2083 __ jcc(Assembler::notEqual, slow_path_unlock); 2084 } else { 2085 __ jmp(slow_path_unlock); 2086 } 2087 2088 // slow path re-enters here 2089 __ bind(unlock_done); 2090 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2091 restore_native_result(masm, ret_type, stack_slots); 2092 } 2093 2094 __ bind(done); 2095 2096 } 2097 { 2098 SkipIfEqual skip(masm, &DTraceMethodProbes, false); 2099 save_native_result(masm, ret_type, stack_slots); 2100 __ mov_metadata(c_rarg1, method()); 2101 __ call_VM_leaf( 2102 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2103 r15_thread, c_rarg1); 2104 restore_native_result(masm, ret_type, stack_slots); 2105 } 2106 2107 __ reset_last_Java_frame(false); 2108 2109 // Unbox oop result, e.g. JNIHandles::resolve value. 2110 if (is_reference_type(ret_type)) { 2111 __ resolve_jobject(rax /* value */, 2112 r15_thread /* thread */, 2113 rcx /* tmp */); 2114 } 2115 2116 if (CheckJNICalls) { 2117 // clear_pending_jni_exception_check 2118 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2119 } 2120 2121 // reset handle block 2122 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2123 __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); 2124 2125 // pop our frame 2126 2127 __ leave(); 2128 2129 // Any exception pending? 2130 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2131 __ jcc(Assembler::notEqual, exception_pending); 2132 2133 // Return 2134 2135 __ ret(0); 2136 2137 // Unexpected paths are out of line and go here 2138 2139 // forward the exception 2140 __ bind(exception_pending); 2141 2142 // and forward the exception 2143 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2144 2145 // Slow path locking & unlocking 2146 if (method->is_synchronized()) { 2147 2148 // BEGIN Slow path lock 2149 __ bind(slow_path_lock); 2150 2151 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2152 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2153 2154 // protect the args we've loaded 2155 save_args(masm, total_c_args, c_arg, out_regs); 2156 2157 __ mov(c_rarg0, obj_reg); 2158 __ mov(c_rarg1, lock_reg); 2159 __ mov(c_rarg2, r15_thread); 2160 2161 // Not a leaf but we have last_Java_frame setup as we want 2162 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2163 restore_args(masm, total_c_args, c_arg, out_regs); 2164 2165 #ifdef ASSERT 2166 { Label L; 2167 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2168 __ jcc(Assembler::equal, L); 2169 __ stop("no pending exception allowed on exit from monitorenter"); 2170 __ bind(L); 2171 } 2172 #endif 2173 __ jmp(lock_done); 2174 2175 // END Slow path lock 2176 2177 // BEGIN Slow path unlock 2178 __ bind(slow_path_unlock); 2179 2180 // If we haven't already saved the native result we must save it now as xmm registers 2181 // are still exposed. 2182 __ vzeroupper(); 2183 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2184 save_native_result(masm, ret_type, stack_slots); 2185 } 2186 2187 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2188 2189 __ mov(c_rarg0, obj_reg); 2190 __ mov(c_rarg2, r15_thread); 2191 __ mov(r12, rsp); // remember sp 2192 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2193 __ andptr(rsp, -16); // align stack as required by ABI 2194 2195 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2196 // NOTE that obj_reg == rbx currently 2197 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2198 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 2199 2200 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2201 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2202 __ mov(rsp, r12); // restore sp 2203 __ reinit_heapbase(); 2204 #ifdef ASSERT 2205 { 2206 Label L; 2207 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); 2208 __ jcc(Assembler::equal, L); 2209 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2210 __ bind(L); 2211 } 2212 #endif /* ASSERT */ 2213 2214 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2215 2216 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2217 restore_native_result(masm, ret_type, stack_slots); 2218 } 2219 __ jmp(unlock_done); 2220 2221 // END Slow path unlock 2222 2223 } // synchronized 2224 2225 // SLOW PATH Reguard the stack if needed 2226 2227 __ bind(reguard); 2228 __ vzeroupper(); 2229 save_native_result(masm, ret_type, stack_slots); 2230 __ mov(r12, rsp); // remember sp 2231 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2232 __ andptr(rsp, -16); // align stack as required by ABI 2233 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2234 __ mov(rsp, r12); // restore sp 2235 __ reinit_heapbase(); 2236 restore_native_result(masm, ret_type, stack_slots); 2237 // and continue 2238 __ jmp(reguard_done); 2239 2240 2241 2242 __ flush(); 2243 2244 nmethod *nm = nmethod::new_native_nmethod(method, 2245 compile_id, 2246 masm->code(), 2247 vep_offset, 2248 frame_complete, 2249 stack_slots / VMRegImpl::slots_per_word, 2250 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2251 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2252 oop_maps); 2253 2254 return nm; 2255 } 2256 2257 // this function returns the adjust size (in number of words) to a c2i adapter 2258 // activation for use during deoptimization 2259 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2260 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2261 } 2262 2263 2264 uint SharedRuntime::out_preserve_stack_slots() { 2265 return 0; 2266 } 2267 2268 2269 // Number of stack slots between incoming argument block and the start of 2270 // a new frame. The PROLOG must add this many slots to the stack. The 2271 // EPILOG must remove this many slots. amd64 needs two slots for 2272 // return address. 2273 uint SharedRuntime::in_preserve_stack_slots() { 2274 return 4 + 2 * VerifyStackAtCalls; 2275 } 2276 2277 //------------------------------generate_deopt_blob---------------------------- 2278 void SharedRuntime::generate_deopt_blob() { 2279 // Allocate space for the code 2280 ResourceMark rm; 2281 // Setup code generation tools 2282 int pad = 0; 2283 if (UseAVX > 2) { 2284 pad += 1024; 2285 } 2286 #if INCLUDE_JVMCI 2287 if (EnableJVMCI) { 2288 pad += 512; // Increase the buffer size when compiling for JVMCI 2289 } 2290 #endif 2291 CodeBuffer buffer("deopt_blob", 2560+pad, 1024); 2292 MacroAssembler* masm = new MacroAssembler(&buffer); 2293 int frame_size_in_words; 2294 OopMap* map = NULL; 2295 OopMapSet *oop_maps = new OopMapSet(); 2296 2297 // ------------- 2298 // This code enters when returning to a de-optimized nmethod. A return 2299 // address has been pushed on the the stack, and return values are in 2300 // registers. 2301 // If we are doing a normal deopt then we were called from the patched 2302 // nmethod from the point we returned to the nmethod. So the return 2303 // address on the stack is wrong by NativeCall::instruction_size 2304 // We will adjust the value so it looks like we have the original return 2305 // address on the stack (like when we eagerly deoptimized). 2306 // In the case of an exception pending when deoptimizing, we enter 2307 // with a return address on the stack that points after the call we patched 2308 // into the exception handler. We have the following register state from, 2309 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2310 // rax: exception oop 2311 // rbx: exception handler 2312 // rdx: throwing pc 2313 // So in this case we simply jam rdx into the useless return address and 2314 // the stack looks just like we want. 2315 // 2316 // At this point we need to de-opt. We save the argument return 2317 // registers. We call the first C routine, fetch_unroll_info(). This 2318 // routine captures the return values and returns a structure which 2319 // describes the current frame size and the sizes of all replacement frames. 2320 // The current frame is compiled code and may contain many inlined 2321 // functions, each with their own JVM state. We pop the current frame, then 2322 // push all the new frames. Then we call the C routine unpack_frames() to 2323 // populate these frames. Finally unpack_frames() returns us the new target 2324 // address. Notice that callee-save registers are BLOWN here; they have 2325 // already been captured in the vframeArray at the time the return PC was 2326 // patched. 2327 address start = __ pc(); 2328 Label cont; 2329 2330 // Prolog for non exception case! 2331 2332 // Save everything in sight. 2333 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2334 2335 // Normal deoptimization. Save exec mode for unpack_frames. 2336 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2337 __ jmp(cont); 2338 2339 int reexecute_offset = __ pc() - start; 2340 #if INCLUDE_JVMCI && !defined(COMPILER1) 2341 if (EnableJVMCI && UseJVMCICompiler) { 2342 // JVMCI does not use this kind of deoptimization 2343 __ should_not_reach_here(); 2344 } 2345 #endif 2346 2347 // Reexecute case 2348 // return address is the pc describes what bci to do re-execute at 2349 2350 // No need to update map as each call to save_live_registers will produce identical oopmap 2351 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2352 2353 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2354 __ jmp(cont); 2355 2356 #if INCLUDE_JVMCI 2357 Label after_fetch_unroll_info_call; 2358 int implicit_exception_uncommon_trap_offset = 0; 2359 int uncommon_trap_offset = 0; 2360 2361 if (EnableJVMCI) { 2362 implicit_exception_uncommon_trap_offset = __ pc() - start; 2363 2364 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2365 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD); 2366 2367 uncommon_trap_offset = __ pc() - start; 2368 2369 // Save everything in sight. 2370 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2371 // fetch_unroll_info needs to call last_java_frame() 2372 __ set_last_Java_frame(noreg, noreg, NULL); 2373 2374 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2375 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2376 2377 __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute); 2378 __ mov(c_rarg0, r15_thread); 2379 __ movl(c_rarg2, r14); // exec mode 2380 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2381 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2382 2383 __ reset_last_Java_frame(false); 2384 2385 __ jmp(after_fetch_unroll_info_call); 2386 } // EnableJVMCI 2387 #endif // INCLUDE_JVMCI 2388 2389 int exception_offset = __ pc() - start; 2390 2391 // Prolog for exception case 2392 2393 // all registers are dead at this entry point, except for rax, and 2394 // rdx which contain the exception oop and exception pc 2395 // respectively. Set them in TLS and fall thru to the 2396 // unpack_with_exception_in_tls entry point. 2397 2398 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2399 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2400 2401 int exception_in_tls_offset = __ pc() - start; 2402 2403 // new implementation because exception oop is now passed in JavaThread 2404 2405 // Prolog for exception case 2406 // All registers must be preserved because they might be used by LinearScan 2407 // Exceptiop oop and throwing PC are passed in JavaThread 2408 // tos: stack at point of call to method that threw the exception (i.e. only 2409 // args are on the stack, no return address) 2410 2411 // make room on stack for the return address 2412 // It will be patched later with the throwing pc. The correct value is not 2413 // available now because loading it from memory would destroy registers. 2414 __ push(0); 2415 2416 // Save everything in sight. 2417 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ true); 2418 2419 // Now it is safe to overwrite any register 2420 2421 // Deopt during an exception. Save exec mode for unpack_frames. 2422 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2423 2424 // load throwing pc from JavaThread and patch it as the return address 2425 // of the current frame. Then clear the field in JavaThread 2426 2427 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2428 __ movptr(Address(rbp, wordSize), rdx); 2429 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 2430 2431 #ifdef ASSERT 2432 // verify that there is really an exception oop in JavaThread 2433 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2434 __ verify_oop(rax); 2435 2436 // verify that there is no pending exception 2437 Label no_pending_exception; 2438 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2439 __ testptr(rax, rax); 2440 __ jcc(Assembler::zero, no_pending_exception); 2441 __ stop("must not have pending exception here"); 2442 __ bind(no_pending_exception); 2443 #endif 2444 2445 __ bind(cont); 2446 2447 // Call C code. Need thread and this frame, but NOT official VM entry 2448 // crud. We cannot block on this call, no GC can happen. 2449 // 2450 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2451 2452 // fetch_unroll_info needs to call last_java_frame(). 2453 2454 __ set_last_Java_frame(noreg, noreg, NULL); 2455 #ifdef ASSERT 2456 { Label L; 2457 __ cmpptr(Address(r15_thread, 2458 JavaThread::last_Java_fp_offset()), 2459 (int32_t)0); 2460 __ jcc(Assembler::equal, L); 2461 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2462 __ bind(L); 2463 } 2464 #endif // ASSERT 2465 __ mov(c_rarg0, r15_thread); 2466 __ movl(c_rarg1, r14); // exec_mode 2467 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2468 2469 // Need to have an oopmap that tells fetch_unroll_info where to 2470 // find any register it might need. 2471 oop_maps->add_gc_map(__ pc() - start, map); 2472 2473 __ reset_last_Java_frame(false); 2474 2475 #if INCLUDE_JVMCI 2476 if (EnableJVMCI) { 2477 __ bind(after_fetch_unroll_info_call); 2478 } 2479 #endif 2480 2481 // Load UnrollBlock* into rdi 2482 __ mov(rdi, rax); 2483 2484 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); 2485 Label noException; 2486 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 2487 __ jcc(Assembler::notEqual, noException); 2488 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2489 // QQQ this is useless it was NULL above 2490 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2491 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD); 2492 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 2493 2494 __ verify_oop(rax); 2495 2496 // Overwrite the result registers with the exception results. 2497 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 2498 // I think this is useless 2499 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 2500 2501 __ bind(noException); 2502 2503 // Only register save data is on the stack. 2504 // Now restore the result registers. Everything else is either dead 2505 // or captured in the vframeArray. 2506 RegisterSaver::restore_result_registers(masm); 2507 2508 // All of the register save area has been popped of the stack. Only the 2509 // return address remains. 2510 2511 // Pop all the frames we must move/replace. 2512 // 2513 // Frame picture (youngest to oldest) 2514 // 1: self-frame (no frame link) 2515 // 2: deopting frame (no frame link) 2516 // 3: caller of deopting frame (could be compiled/interpreted). 2517 // 2518 // Note: by leaving the return address of self-frame on the stack 2519 // and using the size of frame 2 to adjust the stack 2520 // when we are done the return to frame 3 will still be on the stack. 2521 2522 // Pop deoptimized frame 2523 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); 2524 __ addptr(rsp, rcx); 2525 2526 // rsp should be pointing at the return address to the caller (3) 2527 2528 // Pick up the initial fp we should save 2529 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 2530 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 2531 2532 #ifdef ASSERT 2533 // Compilers generate code that bang the stack by as much as the 2534 // interpreter would need. So this stack banging should never 2535 // trigger a fault. Verify that it does not on non product builds. 2536 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 2537 __ bang_stack_size(rbx, rcx); 2538 #endif 2539 2540 // Load address of array of frame pcs into rcx 2541 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 2542 2543 // Trash the old pc 2544 __ addptr(rsp, wordSize); 2545 2546 // Load address of array of frame sizes into rsi 2547 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); 2548 2549 // Load counter into rdx 2550 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); 2551 2552 // Now adjust the caller's stack to make up for the extra locals 2553 // but record the original sp so that we can save it in the skeletal interpreter 2554 // frame and the stack walking of interpreter_sender will get the unextended sp 2555 // value and not the "real" sp value. 2556 2557 const Register sender_sp = r8; 2558 2559 __ mov(sender_sp, rsp); 2560 __ movl(rbx, Address(rdi, 2561 Deoptimization::UnrollBlock:: 2562 caller_adjustment_offset_in_bytes())); 2563 __ subptr(rsp, rbx); 2564 2565 // Push interpreter frames in a loop 2566 Label loop; 2567 __ bind(loop); 2568 __ movptr(rbx, Address(rsi, 0)); // Load frame size 2569 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 2570 __ pushptr(Address(rcx, 0)); // Save return address 2571 __ enter(); // Save old & set new ebp 2572 __ subptr(rsp, rbx); // Prolog 2573 // This value is corrected by layout_activation_impl 2574 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 2575 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 2576 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 2577 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 2578 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 2579 __ decrementl(rdx); // Decrement counter 2580 __ jcc(Assembler::notZero, loop); 2581 __ pushptr(Address(rcx, 0)); // Save final return address 2582 2583 // Re-push self-frame 2584 __ enter(); // Save old & set new ebp 2585 2586 // Allocate a full sized register save area. 2587 // Return address and rbp are in place, so we allocate two less words. 2588 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 2589 2590 // Restore frame locals after moving the frame 2591 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 2592 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 2593 2594 // Call C code. Need thread but NOT official VM entry 2595 // crud. We cannot block on this call, no GC can happen. Call should 2596 // restore return values to their stack-slots with the new SP. 2597 // 2598 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 2599 2600 // Use rbp because the frames look interpreted now 2601 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 2602 // Don't need the precise return PC here, just precise enough to point into this code blob. 2603 address the_pc = __ pc(); 2604 __ set_last_Java_frame(noreg, rbp, the_pc); 2605 2606 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 2607 __ mov(c_rarg0, r15_thread); 2608 __ movl(c_rarg1, r14); // second arg: exec_mode 2609 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 2610 // Revert SP alignment after call since we're going to do some SP relative addressing below 2611 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 2612 2613 // Set an oopmap for the call site 2614 // Use the same PC we used for the last java frame 2615 oop_maps->add_gc_map(the_pc - start, 2616 new OopMap( frame_size_in_words, 0 )); 2617 2618 // Clear fp AND pc 2619 __ reset_last_Java_frame(true); 2620 2621 // Collect return values 2622 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 2623 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 2624 // I think this is useless (throwing pc?) 2625 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 2626 2627 // Pop self-frame. 2628 __ leave(); // Epilog 2629 2630 // Jump to interpreter 2631 __ ret(0); 2632 2633 // Make sure all code is generated 2634 masm->flush(); 2635 2636 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 2637 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 2638 #if INCLUDE_JVMCI 2639 if (EnableJVMCI) { 2640 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 2641 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 2642 } 2643 #endif 2644 } 2645 2646 #ifdef COMPILER2 2647 //------------------------------generate_uncommon_trap_blob-------------------- 2648 void SharedRuntime::generate_uncommon_trap_blob() { 2649 // Allocate space for the code 2650 ResourceMark rm; 2651 // Setup code generation tools 2652 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 2653 MacroAssembler* masm = new MacroAssembler(&buffer); 2654 2655 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 2656 2657 address start = __ pc(); 2658 2659 if (UseRTMLocking) { 2660 // Abort RTM transaction before possible nmethod deoptimization. 2661 __ xabort(0); 2662 } 2663 2664 // Push self-frame. We get here with a return address on the 2665 // stack, so rsp is 8-byte aligned until we allocate our frame. 2666 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 2667 2668 // No callee saved registers. rbp is assumed implicitly saved 2669 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 2670 2671 // compiler left unloaded_class_index in j_rarg0 move to where the 2672 // runtime expects it. 2673 __ movl(c_rarg1, j_rarg0); 2674 2675 __ set_last_Java_frame(noreg, noreg, NULL); 2676 2677 // Call C code. Need thread but NOT official VM entry 2678 // crud. We cannot block on this call, no GC can happen. Call should 2679 // capture callee-saved registers as well as return values. 2680 // Thread is in rdi already. 2681 // 2682 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 2683 2684 __ mov(c_rarg0, r15_thread); 2685 __ movl(c_rarg2, Deoptimization::Unpack_uncommon_trap); 2686 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2687 2688 // Set an oopmap for the call site 2689 OopMapSet* oop_maps = new OopMapSet(); 2690 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 2691 2692 // location of rbp is known implicitly by the frame sender code 2693 2694 oop_maps->add_gc_map(__ pc() - start, map); 2695 2696 __ reset_last_Java_frame(false); 2697 2698 // Load UnrollBlock* into rdi 2699 __ mov(rdi, rax); 2700 2701 #ifdef ASSERT 2702 { Label L; 2703 __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), 2704 (int32_t)Deoptimization::Unpack_uncommon_trap); 2705 __ jcc(Assembler::equal, L); 2706 __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); 2707 __ bind(L); 2708 } 2709 #endif 2710 2711 // Pop all the frames we must move/replace. 2712 // 2713 // Frame picture (youngest to oldest) 2714 // 1: self-frame (no frame link) 2715 // 2: deopting frame (no frame link) 2716 // 3: caller of deopting frame (could be compiled/interpreted). 2717 2718 // Pop self-frame. We have no frame, and must rely only on rax and rsp. 2719 __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 2720 2721 // Pop deoptimized frame (int) 2722 __ movl(rcx, Address(rdi, 2723 Deoptimization::UnrollBlock:: 2724 size_of_deoptimized_frame_offset_in_bytes())); 2725 __ addptr(rsp, rcx); 2726 2727 // rsp should be pointing at the return address to the caller (3) 2728 2729 // Pick up the initial fp we should save 2730 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 2731 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 2732 2733 #ifdef ASSERT 2734 // Compilers generate code that bang the stack by as much as the 2735 // interpreter would need. So this stack banging should never 2736 // trigger a fault. Verify that it does not on non product builds. 2737 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 2738 __ bang_stack_size(rbx, rcx); 2739 #endif 2740 2741 // Load address of array of frame pcs into rcx (address*) 2742 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 2743 2744 // Trash the return pc 2745 __ addptr(rsp, wordSize); 2746 2747 // Load address of array of frame sizes into rsi (intptr_t*) 2748 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); 2749 2750 // Counter 2751 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) 2752 2753 // Now adjust the caller's stack to make up for the extra locals but 2754 // record the original sp so that we can save it in the skeletal 2755 // interpreter frame and the stack walking of interpreter_sender 2756 // will get the unextended sp value and not the "real" sp value. 2757 2758 const Register sender_sp = r8; 2759 2760 __ mov(sender_sp, rsp); 2761 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) 2762 __ subptr(rsp, rbx); 2763 2764 // Push interpreter frames in a loop 2765 Label loop; 2766 __ bind(loop); 2767 __ movptr(rbx, Address(rsi, 0)); // Load frame size 2768 __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 2769 __ pushptr(Address(rcx, 0)); // Save return address 2770 __ enter(); // Save old & set new rbp 2771 __ subptr(rsp, rbx); // Prolog 2772 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 2773 sender_sp); // Make it walkable 2774 // This value is corrected by layout_activation_impl 2775 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 2776 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 2777 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 2778 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 2779 __ decrementl(rdx); // Decrement counter 2780 __ jcc(Assembler::notZero, loop); 2781 __ pushptr(Address(rcx, 0)); // Save final return address 2782 2783 // Re-push self-frame 2784 __ enter(); // Save old & set new rbp 2785 __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 2786 // Prolog 2787 2788 // Use rbp because the frames look interpreted now 2789 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 2790 // Don't need the precise return PC here, just precise enough to point into this code blob. 2791 address the_pc = __ pc(); 2792 __ set_last_Java_frame(noreg, rbp, the_pc); 2793 2794 // Call C code. Need thread but NOT official VM entry 2795 // crud. We cannot block on this call, no GC can happen. Call should 2796 // restore return values to their stack-slots with the new SP. 2797 // Thread is in rdi already. 2798 // 2799 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 2800 2801 __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 2802 __ mov(c_rarg0, r15_thread); 2803 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 2804 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 2805 2806 // Set an oopmap for the call site 2807 // Use the same PC we used for the last java frame 2808 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 2809 2810 // Clear fp AND pc 2811 __ reset_last_Java_frame(true); 2812 2813 // Pop self-frame. 2814 __ leave(); // Epilog 2815 2816 // Jump to interpreter 2817 __ ret(0); 2818 2819 // Make sure all code is generated 2820 masm->flush(); 2821 2822 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 2823 SimpleRuntimeFrame::framesize >> 1); 2824 } 2825 #endif // COMPILER2 2826 2827 //------------------------------generate_handler_blob------ 2828 // 2829 // Generate a special Compile2Runtime blob that saves all registers, 2830 // and setup oopmap. 2831 // 2832 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 2833 assert(StubRoutines::forward_exception_entry() != NULL, 2834 "must be generated before"); 2835 2836 ResourceMark rm; 2837 OopMapSet *oop_maps = new OopMapSet(); 2838 OopMap* map; 2839 2840 // Allocate space for the code. Setup code generation tools. 2841 CodeBuffer buffer("handler_blob", 2048, 1024); 2842 MacroAssembler* masm = new MacroAssembler(&buffer); 2843 2844 address start = __ pc(); 2845 address call_pc = NULL; 2846 int frame_size_in_words; 2847 bool cause_return = (poll_type == POLL_AT_RETURN); 2848 bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 2849 2850 if (UseRTMLocking) { 2851 // Abort RTM transaction before calling runtime 2852 // because critical section will be large and will be 2853 // aborted anyway. Also nmethod could be deoptimized. 2854 __ xabort(0); 2855 } 2856 2857 // Make room for return address (or push it again) 2858 if (!cause_return) { 2859 __ push(rbx); 2860 } 2861 2862 // Save registers, fpu state, and flags 2863 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); 2864 2865 // The following is basically a call_VM. However, we need the precise 2866 // address of the call in order to generate an oopmap. Hence, we do all the 2867 // work ourselves. 2868 2869 __ set_last_Java_frame(noreg, noreg, NULL); 2870 2871 // The return address must always be correct so that frame constructor never 2872 // sees an invalid pc. 2873 2874 if (!cause_return) { 2875 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 2876 // Additionally, rbx is a callee saved register and we can look at it later to determine 2877 // if someone changed the return address for us! 2878 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 2879 __ movptr(Address(rbp, wordSize), rbx); 2880 } 2881 2882 // Do the call 2883 __ mov(c_rarg0, r15_thread); 2884 __ call(RuntimeAddress(call_ptr)); 2885 2886 // Set an oopmap for the call site. This oopmap will map all 2887 // oop-registers and debug-info registers as callee-saved. This 2888 // will allow deoptimization at this safepoint to find all possible 2889 // debug-info recordings, as well as let GC find all oops. 2890 2891 oop_maps->add_gc_map( __ pc() - start, map); 2892 2893 Label noException; 2894 2895 __ reset_last_Java_frame(false); 2896 2897 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 2898 __ jcc(Assembler::equal, noException); 2899 2900 // Exception pending 2901 2902 RegisterSaver::restore_live_registers(masm, save_vectors); 2903 2904 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2905 2906 // No exception case 2907 __ bind(noException); 2908 2909 Label no_adjust; 2910 #ifdef ASSERT 2911 Label bail; 2912 #endif 2913 if (!cause_return) { 2914 Label no_prefix, not_special; 2915 2916 // If our stashed return pc was modified by the runtime we avoid touching it 2917 __ cmpptr(rbx, Address(rbp, wordSize)); 2918 __ jccb(Assembler::notEqual, no_adjust); 2919 2920 // Skip over the poll instruction. 2921 // See NativeInstruction::is_safepoint_poll() 2922 // Possible encodings: 2923 // 85 00 test %eax,(%rax) 2924 // 85 01 test %eax,(%rcx) 2925 // 85 02 test %eax,(%rdx) 2926 // 85 03 test %eax,(%rbx) 2927 // 85 06 test %eax,(%rsi) 2928 // 85 07 test %eax,(%rdi) 2929 // 2930 // 41 85 00 test %eax,(%r8) 2931 // 41 85 01 test %eax,(%r9) 2932 // 41 85 02 test %eax,(%r10) 2933 // 41 85 03 test %eax,(%r11) 2934 // 41 85 06 test %eax,(%r14) 2935 // 41 85 07 test %eax,(%r15) 2936 // 2937 // 85 04 24 test %eax,(%rsp) 2938 // 41 85 04 24 test %eax,(%r12) 2939 // 85 45 00 test %eax,0x0(%rbp) 2940 // 41 85 45 00 test %eax,0x0(%r13) 2941 2942 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 2943 __ jcc(Assembler::notEqual, no_prefix); 2944 __ addptr(rbx, 1); 2945 __ bind(no_prefix); 2946 #ifdef ASSERT 2947 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 2948 #endif 2949 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 2950 // r12/rsp 0x04 2951 // r13/rbp 0x05 2952 __ movzbq(rcx, Address(rbx, 1)); 2953 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 2954 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 2955 __ cmpptr(rcx, 1); 2956 __ jcc(Assembler::above, not_special); 2957 __ addptr(rbx, 1); 2958 __ bind(not_special); 2959 #ifdef ASSERT 2960 // Verify the correct encoding of the poll we're about to skip. 2961 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 2962 __ jcc(Assembler::notEqual, bail); 2963 // Mask out the modrm bits 2964 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 2965 // rax encodes to 0, so if the bits are nonzero it's incorrect 2966 __ jcc(Assembler::notZero, bail); 2967 #endif 2968 // Adjust return pc forward to step over the safepoint poll instruction 2969 __ addptr(rbx, 2); 2970 __ movptr(Address(rbp, wordSize), rbx); 2971 } 2972 2973 __ bind(no_adjust); 2974 // Normal exit, restore registers and exit. 2975 RegisterSaver::restore_live_registers(masm, save_vectors); 2976 __ ret(0); 2977 2978 #ifdef ASSERT 2979 __ bind(bail); 2980 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 2981 #endif 2982 2983 // Make sure all code is generated 2984 masm->flush(); 2985 2986 // Fill-out other meta info 2987 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 2988 } 2989 2990 // 2991 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 2992 // 2993 // Generate a stub that calls into vm to find out the proper destination 2994 // of a java call. All the argument registers are live at this point 2995 // but since this is generic code we don't know what they are and the caller 2996 // must do any gc of the args. 2997 // 2998 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 2999 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3000 3001 // allocate space for the code 3002 ResourceMark rm; 3003 3004 CodeBuffer buffer(name, 1200, 512); 3005 MacroAssembler* masm = new MacroAssembler(&buffer); 3006 3007 int frame_size_in_words; 3008 3009 OopMapSet *oop_maps = new OopMapSet(); 3010 OopMap* map = NULL; 3011 3012 int start = __ offset(); 3013 3014 // No need to save vector registers since they are caller-saved anyway. 3015 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 3016 3017 int frame_complete = __ offset(); 3018 3019 __ set_last_Java_frame(noreg, noreg, NULL); 3020 3021 __ mov(c_rarg0, r15_thread); 3022 3023 __ call(RuntimeAddress(destination)); 3024 3025 3026 // Set an oopmap for the call site. 3027 // We need this not only for callee-saved registers, but also for volatile 3028 // registers that the compiler might be keeping live across a safepoint. 3029 3030 oop_maps->add_gc_map( __ offset() - start, map); 3031 3032 // rax contains the address we are going to jump to assuming no exception got installed 3033 3034 // clear last_Java_sp 3035 __ reset_last_Java_frame(false); 3036 // check for pending exceptions 3037 Label pending; 3038 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 3039 __ jcc(Assembler::notEqual, pending); 3040 3041 // get the returned Method* 3042 __ get_vm_result_2(rbx, r15_thread); 3043 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3044 3045 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3046 3047 RegisterSaver::restore_live_registers(masm); 3048 3049 // We are back the the original state on entry and ready to go. 3050 3051 __ jmp(rax); 3052 3053 // Pending exception after the safepoint 3054 3055 __ bind(pending); 3056 3057 RegisterSaver::restore_live_registers(masm); 3058 3059 // exception pending => remove activation and forward to exception handler 3060 3061 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 3062 3063 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3064 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3065 3066 // ------------- 3067 // make sure all code is generated 3068 masm->flush(); 3069 3070 // return the blob 3071 // frame_size_words or bytes?? 3072 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3073 } 3074 3075 //------------------------------Montgomery multiplication------------------------ 3076 // 3077 3078 #ifndef _WINDOWS 3079 3080 // Subtract 0:b from carry:a. Return carry. 3081 static julong 3082 sub(julong a[], julong b[], julong carry, long len) { 3083 long long i = 0, cnt = len; 3084 julong tmp; 3085 asm volatile("clc; " 3086 "0: ; " 3087 "mov (%[b], %[i], 8), %[tmp]; " 3088 "sbb %[tmp], (%[a], %[i], 8); " 3089 "inc %[i]; dec %[cnt]; " 3090 "jne 0b; " 3091 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3092 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3093 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3094 : "memory"); 3095 return tmp; 3096 } 3097 3098 // Multiply (unsigned) Long A by Long B, accumulating the double- 3099 // length result into the accumulator formed of T0, T1, and T2. 3100 #define MACC(A, B, T0, T1, T2) \ 3101 do { \ 3102 unsigned long hi, lo; \ 3103 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3104 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3105 : "r"(A), "a"(B) : "cc"); \ 3106 } while(0) 3107 3108 // As above, but add twice the double-length result into the 3109 // accumulator. 3110 #define MACC2(A, B, T0, T1, T2) \ 3111 do { \ 3112 unsigned long hi, lo; \ 3113 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3114 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3115 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3116 : "r"(A), "a"(B) : "cc"); \ 3117 } while(0) 3118 3119 #else //_WINDOWS 3120 3121 static julong 3122 sub(julong a[], julong b[], julong carry, long len) { 3123 long i; 3124 julong tmp; 3125 unsigned char c = 1; 3126 for (i = 0; i < len; i++) { 3127 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3128 a[i] = tmp; 3129 } 3130 c = _addcarry_u64(c, carry, ~0, &tmp); 3131 return tmp; 3132 } 3133 3134 // Multiply (unsigned) Long A by Long B, accumulating the double- 3135 // length result into the accumulator formed of T0, T1, and T2. 3136 #define MACC(A, B, T0, T1, T2) \ 3137 do { \ 3138 julong hi, lo; \ 3139 lo = _umul128(A, B, &hi); \ 3140 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3141 c = _addcarry_u64(c, hi, T1, &T1); \ 3142 _addcarry_u64(c, T2, 0, &T2); \ 3143 } while(0) 3144 3145 // As above, but add twice the double-length result into the 3146 // accumulator. 3147 #define MACC2(A, B, T0, T1, T2) \ 3148 do { \ 3149 julong hi, lo; \ 3150 lo = _umul128(A, B, &hi); \ 3151 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3152 c = _addcarry_u64(c, hi, T1, &T1); \ 3153 _addcarry_u64(c, T2, 0, &T2); \ 3154 c = _addcarry_u64(0, lo, T0, &T0); \ 3155 c = _addcarry_u64(c, hi, T1, &T1); \ 3156 _addcarry_u64(c, T2, 0, &T2); \ 3157 } while(0) 3158 3159 #endif //_WINDOWS 3160 3161 // Fast Montgomery multiplication. The derivation of the algorithm is 3162 // in A Cryptographic Library for the Motorola DSP56000, 3163 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3164 3165 static void NOINLINE 3166 montgomery_multiply(julong a[], julong b[], julong n[], 3167 julong m[], julong inv, int len) { 3168 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3169 int i; 3170 3171 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3172 3173 for (i = 0; i < len; i++) { 3174 int j; 3175 for (j = 0; j < i; j++) { 3176 MACC(a[j], b[i-j], t0, t1, t2); 3177 MACC(m[j], n[i-j], t0, t1, t2); 3178 } 3179 MACC(a[i], b[0], t0, t1, t2); 3180 m[i] = t0 * inv; 3181 MACC(m[i], n[0], t0, t1, t2); 3182 3183 assert(t0 == 0, "broken Montgomery multiply"); 3184 3185 t0 = t1; t1 = t2; t2 = 0; 3186 } 3187 3188 for (i = len; i < 2*len; i++) { 3189 int j; 3190 for (j = i-len+1; j < len; j++) { 3191 MACC(a[j], b[i-j], t0, t1, t2); 3192 MACC(m[j], n[i-j], t0, t1, t2); 3193 } 3194 m[i-len] = t0; 3195 t0 = t1; t1 = t2; t2 = 0; 3196 } 3197 3198 while (t0) 3199 t0 = sub(m, n, t0, len); 3200 } 3201 3202 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3203 // multiplies so it should be up to 25% faster than Montgomery 3204 // multiplication. However, its loop control is more complex and it 3205 // may actually run slower on some machines. 3206 3207 static void NOINLINE 3208 montgomery_square(julong a[], julong n[], 3209 julong m[], julong inv, int len) { 3210 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3211 int i; 3212 3213 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3214 3215 for (i = 0; i < len; i++) { 3216 int j; 3217 int end = (i+1)/2; 3218 for (j = 0; j < end; j++) { 3219 MACC2(a[j], a[i-j], t0, t1, t2); 3220 MACC(m[j], n[i-j], t0, t1, t2); 3221 } 3222 if ((i & 1) == 0) { 3223 MACC(a[j], a[j], t0, t1, t2); 3224 } 3225 for (; j < i; j++) { 3226 MACC(m[j], n[i-j], t0, t1, t2); 3227 } 3228 m[i] = t0 * inv; 3229 MACC(m[i], n[0], t0, t1, t2); 3230 3231 assert(t0 == 0, "broken Montgomery square"); 3232 3233 t0 = t1; t1 = t2; t2 = 0; 3234 } 3235 3236 for (i = len; i < 2*len; i++) { 3237 int start = i-len+1; 3238 int end = start + (len - start)/2; 3239 int j; 3240 for (j = start; j < end; j++) { 3241 MACC2(a[j], a[i-j], t0, t1, t2); 3242 MACC(m[j], n[i-j], t0, t1, t2); 3243 } 3244 if ((i & 1) == 0) { 3245 MACC(a[j], a[j], t0, t1, t2); 3246 } 3247 for (; j < len; j++) { 3248 MACC(m[j], n[i-j], t0, t1, t2); 3249 } 3250 m[i-len] = t0; 3251 t0 = t1; t1 = t2; t2 = 0; 3252 } 3253 3254 while (t0) 3255 t0 = sub(m, n, t0, len); 3256 } 3257 3258 // Swap words in a longword. 3259 static julong swap(julong x) { 3260 return (x << 32) | (x >> 32); 3261 } 3262 3263 // Copy len longwords from s to d, word-swapping as we go. The 3264 // destination array is reversed. 3265 static void reverse_words(julong *s, julong *d, int len) { 3266 d += len; 3267 while(len-- > 0) { 3268 d--; 3269 *d = swap(*s); 3270 s++; 3271 } 3272 } 3273 3274 // The threshold at which squaring is advantageous was determined 3275 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3276 #define MONTGOMERY_SQUARING_THRESHOLD 64 3277 3278 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3279 jint len, jlong inv, 3280 jint *m_ints) { 3281 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3282 int longwords = len/2; 3283 3284 // Make very sure we don't use so much space that the stack might 3285 // overflow. 512 jints corresponds to an 16384-bit integer and 3286 // will use here a total of 8k bytes of stack space. 3287 int divisor = sizeof(julong) * 4; 3288 guarantee(longwords <= 8192 / divisor, "must be"); 3289 int total_allocation = longwords * sizeof (julong) * 4; 3290 julong *scratch = (julong *)alloca(total_allocation); 3291 3292 // Local scratch arrays 3293 julong 3294 *a = scratch + 0 * longwords, 3295 *b = scratch + 1 * longwords, 3296 *n = scratch + 2 * longwords, 3297 *m = scratch + 3 * longwords; 3298 3299 reverse_words((julong *)a_ints, a, longwords); 3300 reverse_words((julong *)b_ints, b, longwords); 3301 reverse_words((julong *)n_ints, n, longwords); 3302 3303 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3304 3305 reverse_words(m, (julong *)m_ints, longwords); 3306 } 3307 3308 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3309 jint len, jlong inv, 3310 jint *m_ints) { 3311 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3312 int longwords = len/2; 3313 3314 // Make very sure we don't use so much space that the stack might 3315 // overflow. 512 jints corresponds to an 16384-bit integer and 3316 // will use here a total of 6k bytes of stack space. 3317 int divisor = sizeof(julong) * 3; 3318 guarantee(longwords <= (8192 / divisor), "must be"); 3319 int total_allocation = longwords * sizeof (julong) * 3; 3320 julong *scratch = (julong *)alloca(total_allocation); 3321 3322 // Local scratch arrays 3323 julong 3324 *a = scratch + 0 * longwords, 3325 *n = scratch + 1 * longwords, 3326 *m = scratch + 2 * longwords; 3327 3328 reverse_words((julong *)a_ints, a, longwords); 3329 reverse_words((julong *)n_ints, n, longwords); 3330 3331 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3332 ::montgomery_square(a, n, m, (julong)inv, longwords); 3333 } else { 3334 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3335 } 3336 3337 reverse_words(m, (julong *)m_ints, longwords); 3338 } 3339 3340 #ifdef COMPILER2 3341 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3342 // 3343 //------------------------------generate_exception_blob--------------------------- 3344 // creates exception blob at the end 3345 // Using exception blob, this code is jumped from a compiled method. 3346 // (see emit_exception_handler in x86_64.ad file) 3347 // 3348 // Given an exception pc at a call we call into the runtime for the 3349 // handler in this method. This handler might merely restore state 3350 // (i.e. callee save registers) unwind the frame and jump to the 3351 // exception handler for the nmethod if there is no Java level handler 3352 // for the nmethod. 3353 // 3354 // This code is entered with a jmp. 3355 // 3356 // Arguments: 3357 // rax: exception oop 3358 // rdx: exception pc 3359 // 3360 // Results: 3361 // rax: exception oop 3362 // rdx: exception pc in caller or ??? 3363 // destination: exception handler of caller 3364 // 3365 // Note: the exception pc MUST be at a call (precise debug information) 3366 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 3367 // 3368 3369 void OptoRuntime::generate_exception_blob() { 3370 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 3371 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 3372 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 3373 3374 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3375 3376 // Allocate space for the code 3377 ResourceMark rm; 3378 // Setup code generation tools 3379 CodeBuffer buffer("exception_blob", 2048, 1024); 3380 MacroAssembler* masm = new MacroAssembler(&buffer); 3381 3382 3383 address start = __ pc(); 3384 3385 // Exception pc is 'return address' for stack walker 3386 __ push(rdx); 3387 __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 3388 3389 // Save callee-saved registers. See x86_64.ad. 3390 3391 // rbp is an implicitly saved callee saved register (i.e., the calling 3392 // convention will save/restore it in the prolog/epilog). Other than that 3393 // there are no callee save registers now that adapter frames are gone. 3394 3395 __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 3396 3397 // Store exception in Thread object. We cannot pass any arguments to the 3398 // handle_exception call, since we do not want to make any assumption 3399 // about the size of the frame where the exception happened in. 3400 // c_rarg0 is either rdi (Linux) or rcx (Windows). 3401 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 3402 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3403 3404 // This call does all the hard work. It checks if an exception handler 3405 // exists in the method. 3406 // If so, it returns the handler address. 3407 // If not, it prepares for stack-unwinding, restoring the callee-save 3408 // registers of the frame being removed. 3409 // 3410 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3411 3412 // At a method handle call, the stack may not be properly aligned 3413 // when returning with an exception. 3414 address the_pc = __ pc(); 3415 __ set_last_Java_frame(noreg, noreg, the_pc); 3416 __ mov(c_rarg0, r15_thread); 3417 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3418 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3419 3420 // Set an oopmap for the call site. This oopmap will only be used if we 3421 // are unwinding the stack. Hence, all locations will be dead. 3422 // Callee-saved registers will be the same as the frame above (i.e., 3423 // handle_exception_stub), since they were restored when we got the 3424 // exception. 3425 3426 OopMapSet* oop_maps = new OopMapSet(); 3427 3428 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3429 3430 __ reset_last_Java_frame(false); 3431 3432 // Restore callee-saved registers 3433 3434 // rbp is an implicitly saved callee-saved register (i.e., the calling 3435 // convention will save restore it in prolog/epilog) Other than that 3436 // there are no callee save registers now that adapter frames are gone. 3437 3438 __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 3439 3440 __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 3441 __ pop(rdx); // No need for exception pc anymore 3442 3443 // rax: exception handler 3444 3445 // We have a handler in rax (could be deopt blob). 3446 __ mov(r8, rax); 3447 3448 // Get the exception oop 3449 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3450 // Get the exception pc in case we are deoptimized 3451 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3452 #ifdef ASSERT 3453 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD); 3454 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD); 3455 #endif 3456 // Clear the exception oop so GC no longer processes it as a root. 3457 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD); 3458 3459 // rax: exception oop 3460 // r8: exception handler 3461 // rdx: exception pc 3462 // Jump to handler 3463 3464 __ jmp(r8); 3465 3466 // Make sure all code is generated 3467 masm->flush(); 3468 3469 // Set exception blob 3470 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 3471 } 3472 #endif // COMPILER2 3473