1 /* 2 * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef _WINDOWS 26 #include "alloca.h" 27 #endif 28 #include "asm/macroAssembler.hpp" 29 #include "asm/macroAssembler.inline.hpp" 30 #include "classfile/symbolTable.hpp" 31 #include "code/aotCodeCache.hpp" 32 #include "code/compiledIC.hpp" 33 #include "code/debugInfoRec.hpp" 34 #include "code/nativeInst.hpp" 35 #include "code/vtableStubs.hpp" 36 #include "compiler/oopMap.hpp" 37 #include "gc/shared/collectedHeap.hpp" 38 #include "gc/shared/gcLocker.hpp" 39 #include "gc/shared/barrierSet.hpp" 40 #include "gc/shared/barrierSetAssembler.hpp" 41 #include "interpreter/interpreter.hpp" 42 #include "logging/log.hpp" 43 #include "memory/resourceArea.hpp" 44 #include "memory/universe.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/method.inline.hpp" 47 #include "prims/methodHandles.hpp" 48 #include "runtime/continuation.hpp" 49 #include "runtime/continuationEntry.inline.hpp" 50 #include "runtime/globals.hpp" 51 #include "runtime/jniHandles.hpp" 52 #include "runtime/safepointMechanism.hpp" 53 #include "runtime/sharedRuntime.hpp" 54 #include "runtime/signature.hpp" 55 #include "runtime/stubRoutines.hpp" 56 #include "runtime/timerTrace.hpp" 57 #include "runtime/vframeArray.hpp" 58 #include "runtime/vm_version.hpp" 59 #include "utilities/align.hpp" 60 #include "utilities/checkedCast.hpp" 61 #include "utilities/formatBuffer.hpp" 62 #include "vmreg_x86.inline.hpp" 63 #ifdef COMPILER1 64 #include "c1/c1_Runtime1.hpp" 65 #endif 66 #ifdef COMPILER2 67 #include "opto/runtime.hpp" 68 #endif 69 #if INCLUDE_JVMCI 70 #include "jvmci/jvmciJavaClasses.hpp" 71 #endif 72 73 #define __ masm-> 74 75 #ifdef PRODUCT 76 #define BLOCK_COMMENT(str) /* nothing */ 77 #else 78 #define BLOCK_COMMENT(str) __ block_comment(str) 79 #endif // PRODUCT 80 81 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 82 83 class RegisterSaver { 84 // Capture info about frame layout. Layout offsets are in jint 85 // units because compiler frame slots are jints. 86 #define XSAVE_AREA_BEGIN 160 87 #define XSAVE_AREA_YMM_BEGIN 576 88 #define XSAVE_AREA_EGPRS 960 89 #define XSAVE_AREA_OPMASK_BEGIN 1088 90 #define XSAVE_AREA_ZMM_BEGIN 1152 91 #define XSAVE_AREA_UPPERBANK 1664 92 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 93 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 94 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 95 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 96 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 97 enum layout { 98 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 99 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 100 DEF_XMM_OFFS(0), 101 DEF_XMM_OFFS(1), 102 // 2..15 are implied in range usage 103 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 104 DEF_YMM_OFFS(0), 105 DEF_YMM_OFFS(1), 106 r16_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt, 107 r16H_off, 108 r17_off, r17H_off, 109 r18_off, r18H_off, 110 r19_off, r19H_off, 111 r20_off, r20H_off, 112 r21_off, r21H_off, 113 r22_off, r22H_off, 114 r23_off, r23H_off, 115 r24_off, r24H_off, 116 r25_off, r25H_off, 117 r26_off, r26H_off, 118 r27_off, r27H_off, 119 r28_off, r28H_off, 120 r29_off, r29H_off, 121 r30_off, r30H_off, 122 r31_off, r31H_off, 123 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 124 DEF_OPMASK_OFFS(0), 125 DEF_OPMASK_OFFS(1), 126 // 2..7 are implied in range usage 127 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 128 DEF_ZMM_OFFS(0), 129 DEF_ZMM_OFFS(1), 130 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 131 DEF_ZMM_UPPER_OFFS(16), 132 DEF_ZMM_UPPER_OFFS(17), 133 // 18..31 are implied in range usage 134 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 135 fpu_stateH_end, 136 r15_off, r15H_off, 137 r14_off, r14H_off, 138 r13_off, r13H_off, 139 r12_off, r12H_off, 140 r11_off, r11H_off, 141 r10_off, r10H_off, 142 r9_off, r9H_off, 143 r8_off, r8H_off, 144 rdi_off, rdiH_off, 145 rsi_off, rsiH_off, 146 ignore_off, ignoreH_off, // extra copy of rbp 147 rsp_off, rspH_off, 148 rbx_off, rbxH_off, 149 rdx_off, rdxH_off, 150 rcx_off, rcxH_off, 151 rax_off, raxH_off, 152 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 153 align_off, alignH_off, 154 flags_off, flagsH_off, 155 // The frame sender code expects that rbp will be in the "natural" place and 156 // will override any oopMap setting for it. We must therefore force the layout 157 // so that it agrees with the frame sender code. 158 rbp_off, rbpH_off, // copy of rbp we will restore 159 return_off, returnH_off, // slot for return address 160 reg_save_size // size in compiler stack slots 161 }; 162 163 public: 164 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); 165 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); 166 167 // Offsets into the register save area 168 // Used by deoptimization when it is managing result register 169 // values on its own 170 171 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 172 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 173 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 174 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; } 175 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 176 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 177 178 // During deoptimization only the result registers need to be restored, 179 // all the other values have already been extracted. 180 static void restore_result_registers(MacroAssembler* masm); 181 }; 182 183 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { 184 int off = 0; 185 int num_xmm_regs = XMMRegister::available_xmm_registers(); 186 #if COMPILER2_OR_JVMCI 187 if (save_wide_vectors && UseAVX == 0) { 188 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX 189 } 190 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 191 #else 192 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI 193 #endif 194 195 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 196 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 197 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 198 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 199 // CodeBlob frame size is in words. 200 int frame_size_in_words = frame_size_in_bytes / wordSize; 201 *total_frame_words = frame_size_in_words; 202 203 // Save registers, fpu state, and flags. 204 // We assume caller has already pushed the return address onto the 205 // stack, so rsp is 8-byte aligned here. 206 // We push rpb twice in this sequence because we want the real rbp 207 // to be under the return like a normal enter. 208 209 __ enter(); // rsp becomes 16-byte aligned here 210 __ pushf(); 211 // Make sure rsp stays 16-byte aligned 212 __ subq(rsp, 8); 213 // Push CPU state in multiple of 16 bytes 214 __ save_legacy_gprs(); 215 __ push_FPU_state(); 216 217 218 // push cpu state handles this on EVEX enabled targets 219 if (save_wide_vectors) { 220 // Save upper half of YMM registers(0..15) 221 int base_addr = XSAVE_AREA_YMM_BEGIN; 222 for (int n = 0; n < 16; n++) { 223 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 224 } 225 if (VM_Version::supports_evex()) { 226 // Save upper half of ZMM registers(0..15) 227 base_addr = XSAVE_AREA_ZMM_BEGIN; 228 for (int n = 0; n < 16; n++) { 229 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 230 } 231 // Save full ZMM registers(16..num_xmm_regs) 232 base_addr = XSAVE_AREA_UPPERBANK; 233 off = 0; 234 int vector_len = Assembler::AVX_512bit; 235 for (int n = 16; n < num_xmm_regs; n++) { 236 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 237 } 238 #if COMPILER2_OR_JVMCI 239 base_addr = XSAVE_AREA_OPMASK_BEGIN; 240 off = 0; 241 for(int n = 0; n < KRegister::number_of_registers; n++) { 242 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 243 } 244 #endif 245 } 246 } else { 247 if (VM_Version::supports_evex()) { 248 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 249 int base_addr = XSAVE_AREA_UPPERBANK; 250 off = 0; 251 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 252 for (int n = 16; n < num_xmm_regs; n++) { 253 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 254 } 255 #if COMPILER2_OR_JVMCI 256 base_addr = XSAVE_AREA_OPMASK_BEGIN; 257 off = 0; 258 for(int n = 0; n < KRegister::number_of_registers; n++) { 259 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 260 } 261 #endif 262 } 263 } 264 265 #if COMPILER2_OR_JVMCI 266 if (UseAPX) { 267 int base_addr = XSAVE_AREA_EGPRS; 268 off = 0; 269 for (int n = 16; n < Register::number_of_registers; n++) { 270 __ movq(Address(rsp, base_addr+(off++*8)), as_Register(n)); 271 } 272 } 273 #endif 274 275 __ vzeroupper(); 276 if (frame::arg_reg_save_area_bytes != 0) { 277 // Allocate argument register save area 278 __ subptr(rsp, frame::arg_reg_save_area_bytes); 279 } 280 281 // Set an oopmap for the call site. This oopmap will map all 282 // oop-registers and debug-info registers as callee-saved. This 283 // will allow deoptimization at this safepoint to find all possible 284 // debug-info recordings, as well as let GC find all oops. 285 286 OopMapSet *oop_maps = new OopMapSet(); 287 OopMap* map = new OopMap(frame_size_in_slots, 0); 288 289 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 290 291 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 292 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 293 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 294 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 295 // rbp location is known implicitly by the frame sender code, needs no oopmap 296 // and the location where rbp was saved by is ignored 297 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 298 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 299 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 300 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 301 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 302 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 303 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 304 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 305 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 306 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 307 308 if (UseAPX) { 309 map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg()); 310 map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg()); 311 map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg()); 312 map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg()); 313 map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg()); 314 map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg()); 315 map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg()); 316 map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg()); 317 map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg()); 318 map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg()); 319 map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg()); 320 map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg()); 321 map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg()); 322 map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg()); 323 map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg()); 324 map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg()); 325 } 326 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 327 // on EVEX enabled targets, we get it included in the xsave area 328 off = xmm0_off; 329 int delta = xmm1_off - off; 330 for (int n = 0; n < 16; n++) { 331 XMMRegister xmm_name = as_XMMRegister(n); 332 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 333 off += delta; 334 } 335 if (UseAVX > 2) { 336 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 337 off = zmm16_off; 338 delta = zmm17_off - off; 339 for (int n = 16; n < num_xmm_regs; n++) { 340 XMMRegister zmm_name = as_XMMRegister(n); 341 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 342 off += delta; 343 } 344 } 345 346 #if COMPILER2_OR_JVMCI 347 if (save_wide_vectors) { 348 // Save upper half of YMM registers(0..15) 349 off = ymm0_off; 350 delta = ymm1_off - ymm0_off; 351 for (int n = 0; n < 16; n++) { 352 XMMRegister ymm_name = as_XMMRegister(n); 353 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 354 off += delta; 355 } 356 if (VM_Version::supports_evex()) { 357 // Save upper half of ZMM registers(0..15) 358 off = zmm0_off; 359 delta = zmm1_off - zmm0_off; 360 for (int n = 0; n < 16; n++) { 361 XMMRegister zmm_name = as_XMMRegister(n); 362 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 363 off += delta; 364 } 365 } 366 } 367 #endif // COMPILER2_OR_JVMCI 368 369 // %%% These should all be a waste but we'll keep things as they were for now 370 if (true) { 371 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 372 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 373 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 374 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 375 // rbp location is known implicitly by the frame sender code, needs no oopmap 376 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 377 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 378 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 379 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 380 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 381 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 382 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 383 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 384 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 385 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 386 if (UseAPX) { 387 map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next()); 388 map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next()); 389 map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next()); 390 map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next()); 391 map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next()); 392 map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next()); 393 map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next()); 394 map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next()); 395 map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next()); 396 map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next()); 397 map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next()); 398 map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next()); 399 map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next()); 400 map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next()); 401 map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next()); 402 map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next()); 403 } 404 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 405 // on EVEX enabled targets, we get it included in the xsave area 406 off = xmm0H_off; 407 delta = xmm1H_off - off; 408 for (int n = 0; n < 16; n++) { 409 XMMRegister xmm_name = as_XMMRegister(n); 410 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 411 off += delta; 412 } 413 if (UseAVX > 2) { 414 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 415 off = zmm16H_off; 416 delta = zmm17H_off - off; 417 for (int n = 16; n < num_xmm_regs; n++) { 418 XMMRegister zmm_name = as_XMMRegister(n); 419 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 420 off += delta; 421 } 422 } 423 } 424 425 return map; 426 } 427 428 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { 429 int num_xmm_regs = XMMRegister::available_xmm_registers(); 430 if (frame::arg_reg_save_area_bytes != 0) { 431 // Pop arg register save area 432 __ addptr(rsp, frame::arg_reg_save_area_bytes); 433 } 434 435 #if COMPILER2_OR_JVMCI 436 if (restore_wide_vectors) { 437 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 438 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 439 } 440 #else 441 assert(!restore_wide_vectors, "vectors are generated only by C2"); 442 #endif 443 444 __ vzeroupper(); 445 446 // On EVEX enabled targets everything is handled in pop fpu state 447 if (restore_wide_vectors) { 448 // Restore upper half of YMM registers (0..15) 449 int base_addr = XSAVE_AREA_YMM_BEGIN; 450 for (int n = 0; n < 16; n++) { 451 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 452 } 453 if (VM_Version::supports_evex()) { 454 // Restore upper half of ZMM registers (0..15) 455 base_addr = XSAVE_AREA_ZMM_BEGIN; 456 for (int n = 0; n < 16; n++) { 457 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 458 } 459 // Restore full ZMM registers(16..num_xmm_regs) 460 base_addr = XSAVE_AREA_UPPERBANK; 461 int vector_len = Assembler::AVX_512bit; 462 int off = 0; 463 for (int n = 16; n < num_xmm_regs; n++) { 464 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 465 } 466 #if COMPILER2_OR_JVMCI 467 base_addr = XSAVE_AREA_OPMASK_BEGIN; 468 off = 0; 469 for (int n = 0; n < KRegister::number_of_registers; n++) { 470 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 471 } 472 #endif 473 } 474 } else { 475 if (VM_Version::supports_evex()) { 476 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 477 int base_addr = XSAVE_AREA_UPPERBANK; 478 int off = 0; 479 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 480 for (int n = 16; n < num_xmm_regs; n++) { 481 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 482 } 483 #if COMPILER2_OR_JVMCI 484 base_addr = XSAVE_AREA_OPMASK_BEGIN; 485 off = 0; 486 for (int n = 0; n < KRegister::number_of_registers; n++) { 487 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 488 } 489 #endif 490 } 491 } 492 493 #if COMPILER2_OR_JVMCI 494 if (UseAPX) { 495 int base_addr = XSAVE_AREA_EGPRS; 496 int off = 0; 497 for (int n = 16; n < Register::number_of_registers; n++) { 498 __ movq(as_Register(n), Address(rsp, base_addr+(off++*8))); 499 } 500 } 501 #endif 502 503 // Recover CPU state 504 __ pop_FPU_state(); 505 __ restore_legacy_gprs(); 506 __ addq(rsp, 8); 507 __ popf(); 508 // Get the rbp described implicitly by the calling convention (no oopMap) 509 __ pop(rbp); 510 } 511 512 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 513 514 // Just restore result register. Only used by deoptimization. By 515 // now any callee save register that needs to be restored to a c2 516 // caller of the deoptee has been extracted into the vframeArray 517 // and will be stuffed into the c2i adapter we create for later 518 // restoration so only result registers need to be restored here. 519 520 // Restore fp result register 521 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 522 // Restore integer result register 523 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 524 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 525 526 // Pop all of the register save are off the stack except the return address 527 __ addptr(rsp, return_offset_in_bytes()); 528 } 529 530 // Is vector's size (in bytes) bigger than a size saved by default? 531 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 532 bool SharedRuntime::is_wide_vector(int size) { 533 return size > 16; 534 } 535 536 // --------------------------------------------------------------------------- 537 // Read the array of BasicTypes from a signature, and compute where the 538 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 539 // quantities. Values less than VMRegImpl::stack0 are registers, those above 540 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 541 // as framesizes are fixed. 542 // VMRegImpl::stack0 refers to the first slot 0(sp). 543 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. 544 // Register up to Register::number_of_registers are the 64-bit 545 // integer registers. 546 547 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 548 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 549 // units regardless of build. Of course for i486 there is no 64 bit build 550 551 // The Java calling convention is a "shifted" version of the C ABI. 552 // By skipping the first C ABI register we can call non-static jni methods 553 // with small numbers of arguments without having to shuffle the arguments 554 // at all. Since we control the java ABI we ought to at least get some 555 // advantage out of it. 556 557 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 558 VMRegPair *regs, 559 int total_args_passed) { 560 561 // Create the mapping between argument positions and 562 // registers. 563 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 564 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 565 }; 566 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 567 j_farg0, j_farg1, j_farg2, j_farg3, 568 j_farg4, j_farg5, j_farg6, j_farg7 569 }; 570 571 572 uint int_args = 0; 573 uint fp_args = 0; 574 uint stk_args = 0; 575 576 for (int i = 0; i < total_args_passed; i++) { 577 switch (sig_bt[i]) { 578 case T_BOOLEAN: 579 case T_CHAR: 580 case T_BYTE: 581 case T_SHORT: 582 case T_INT: 583 if (int_args < Argument::n_int_register_parameters_j) { 584 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 585 } else { 586 stk_args = align_up(stk_args, 2); 587 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 588 stk_args += 1; 589 } 590 break; 591 case T_VOID: 592 // halves of T_LONG or T_DOUBLE 593 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 594 regs[i].set_bad(); 595 break; 596 case T_LONG: 597 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 598 // fall through 599 case T_OBJECT: 600 case T_ARRAY: 601 case T_ADDRESS: 602 if (int_args < Argument::n_int_register_parameters_j) { 603 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 604 } else { 605 stk_args = align_up(stk_args, 2); 606 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 607 stk_args += 2; 608 } 609 break; 610 case T_FLOAT: 611 if (fp_args < Argument::n_float_register_parameters_j) { 612 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 613 } else { 614 stk_args = align_up(stk_args, 2); 615 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 616 stk_args += 1; 617 } 618 break; 619 case T_DOUBLE: 620 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 621 if (fp_args < Argument::n_float_register_parameters_j) { 622 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 623 } else { 624 stk_args = align_up(stk_args, 2); 625 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 626 stk_args += 2; 627 } 628 break; 629 default: 630 ShouldNotReachHere(); 631 break; 632 } 633 } 634 635 return stk_args; 636 } 637 638 // Same as java_calling_convention() but for multiple return 639 // values. There's no way to store them on the stack so if we don't 640 // have enough registers, multiple values can't be returned. 641 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 642 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 643 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 644 VMRegPair *regs, 645 int total_args_passed) { 646 // Create the mapping between argument positions and 647 // registers. 648 static const Register INT_ArgReg[java_return_convention_max_int] = { 649 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 650 }; 651 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 652 j_farg0, j_farg1, j_farg2, j_farg3, 653 j_farg4, j_farg5, j_farg6, j_farg7 654 }; 655 656 657 uint int_args = 0; 658 uint fp_args = 0; 659 660 for (int i = 0; i < total_args_passed; i++) { 661 switch (sig_bt[i]) { 662 case T_BOOLEAN: 663 case T_CHAR: 664 case T_BYTE: 665 case T_SHORT: 666 case T_INT: 667 if (int_args < Argument::n_int_register_parameters_j+1) { 668 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 669 int_args++; 670 } else { 671 return -1; 672 } 673 break; 674 case T_VOID: 675 // halves of T_LONG or T_DOUBLE 676 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 677 regs[i].set_bad(); 678 break; 679 case T_LONG: 680 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 681 // fall through 682 case T_OBJECT: 683 case T_ARRAY: 684 case T_ADDRESS: 685 case T_METADATA: 686 if (int_args < Argument::n_int_register_parameters_j+1) { 687 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 688 int_args++; 689 } else { 690 return -1; 691 } 692 break; 693 case T_FLOAT: 694 if (fp_args < Argument::n_float_register_parameters_j) { 695 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 696 fp_args++; 697 } else { 698 return -1; 699 } 700 break; 701 case T_DOUBLE: 702 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 703 if (fp_args < Argument::n_float_register_parameters_j) { 704 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 705 fp_args++; 706 } else { 707 return -1; 708 } 709 break; 710 default: 711 ShouldNotReachHere(); 712 break; 713 } 714 } 715 716 return int_args + fp_args; 717 } 718 719 // Patch the callers callsite with entry to compiled code if it exists. 720 static void patch_callers_callsite(MacroAssembler *masm) { 721 Label L; 722 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 723 __ jcc(Assembler::equal, L); 724 725 // Save the current stack pointer 726 __ mov(r13, rsp); 727 // Schedule the branch target address early. 728 // Call into the VM to patch the caller, then jump to compiled callee 729 // rax isn't live so capture return address while we easily can 730 __ movptr(rax, Address(rsp, 0)); 731 732 // align stack so push_CPU_state doesn't fault 733 __ andptr(rsp, -(StackAlignmentInBytes)); 734 __ push_CPU_state(); 735 __ vzeroupper(); 736 // VM needs caller's callsite 737 // VM needs target method 738 // This needs to be a long call since we will relocate this adapter to 739 // the codeBuffer and it may not reach 740 741 // Allocate argument register save area 742 if (frame::arg_reg_save_area_bytes != 0) { 743 __ subptr(rsp, frame::arg_reg_save_area_bytes); 744 } 745 __ mov(c_rarg0, rbx); 746 __ mov(c_rarg1, rax); 747 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 748 749 // De-allocate argument register save area 750 if (frame::arg_reg_save_area_bytes != 0) { 751 __ addptr(rsp, frame::arg_reg_save_area_bytes); 752 } 753 754 __ vzeroupper(); 755 __ pop_CPU_state(); 756 // restore sp 757 __ mov(rsp, r13); 758 __ bind(L); 759 } 760 761 // For each inline type argument, sig includes the list of fields of 762 // the inline type. This utility function computes the number of 763 // arguments for the call if inline types are passed by reference (the 764 // calling convention the interpreter expects). 765 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 766 int total_args_passed = 0; 767 if (InlineTypePassFieldsAsArgs) { 768 for (int i = 0; i < sig_extended->length(); i++) { 769 BasicType bt = sig_extended->at(i)._bt; 770 if (bt == T_METADATA) { 771 // In sig_extended, an inline type argument starts with: 772 // T_METADATA, followed by the types of the fields of the 773 // inline type and T_VOID to mark the end of the value 774 // type. Inline types are flattened so, for instance, in the 775 // case of an inline type with an int field and an inline type 776 // field that itself has 2 fields, an int and a long: 777 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second 778 // slot for the T_LONG) T_VOID (inner inline type) T_VOID 779 // (outer inline type) 780 total_args_passed++; 781 int vt = 1; 782 do { 783 i++; 784 BasicType bt = sig_extended->at(i)._bt; 785 BasicType prev_bt = sig_extended->at(i-1)._bt; 786 if (bt == T_METADATA) { 787 vt++; 788 } else if (bt == T_VOID && 789 prev_bt != T_LONG && 790 prev_bt != T_DOUBLE) { 791 vt--; 792 } 793 } while (vt != 0); 794 } else { 795 total_args_passed++; 796 } 797 } 798 } else { 799 total_args_passed = sig_extended->length(); 800 } 801 return total_args_passed; 802 } 803 804 805 static void gen_c2i_adapter_helper(MacroAssembler* masm, 806 BasicType bt, 807 BasicType prev_bt, 808 size_t size_in_bytes, 809 const VMRegPair& reg_pair, 810 const Address& to, 811 int extraspace, 812 bool is_oop) { 813 if (bt == T_VOID) { 814 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 815 return; 816 } 817 818 // Say 4 args: 819 // i st_off 820 // 0 32 T_LONG 821 // 1 24 T_VOID 822 // 2 16 T_OBJECT 823 // 3 8 T_BOOL 824 // - 0 return address 825 // 826 // However to make thing extra confusing. Because we can fit a long/double in 827 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 828 // leaves one slot empty and only stores to a single slot. In this case the 829 // slot that is occupied is the T_VOID slot. See I said it was confusing. 830 831 bool wide = (size_in_bytes == wordSize); 832 VMReg r_1 = reg_pair.first(); 833 VMReg r_2 = reg_pair.second(); 834 assert(r_2->is_valid() == wide, "invalid size"); 835 if (!r_1->is_valid()) { 836 assert(!r_2->is_valid(), "must be invalid"); 837 return; 838 } 839 840 if (!r_1->is_XMMRegister()) { 841 Register val = rax; 842 if (r_1->is_stack()) { 843 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 844 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 845 } else { 846 val = r_1->as_Register(); 847 } 848 assert_different_registers(to.base(), val, rscratch1); 849 if (is_oop) { 850 __ push(r13); 851 __ push(rbx); 852 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 853 __ pop(rbx); 854 __ pop(r13); 855 } else { 856 __ store_sized_value(to, val, size_in_bytes); 857 } 858 } else { 859 if (wide) { 860 __ movdbl(to, r_1->as_XMMRegister()); 861 } else { 862 __ movflt(to, r_1->as_XMMRegister()); 863 } 864 } 865 } 866 867 static void gen_c2i_adapter(MacroAssembler *masm, 868 const GrowableArray<SigEntry>* sig_extended, 869 const VMRegPair *regs, 870 bool requires_clinit_barrier, 871 address& c2i_no_clinit_check_entry, 872 Label& skip_fixup, 873 address start, 874 OopMapSet* oop_maps, 875 int& frame_complete, 876 int& frame_size_in_words, 877 bool alloc_inline_receiver) { 878 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) { 879 Label L_skip_barrier; 880 Register method = rbx; 881 882 { // Bypass the barrier for non-static methods 883 Register flags = rscratch1; 884 __ load_unsigned_short(flags, Address(method, Method::access_flags_offset())); 885 __ testl(flags, JVM_ACC_STATIC); 886 __ jcc(Assembler::zero, L_skip_barrier); // non-static 887 } 888 889 Register klass = rscratch1; 890 __ load_method_holder(klass, method); 891 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); 892 893 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 894 895 __ bind(L_skip_barrier); 896 c2i_no_clinit_check_entry = __ pc(); 897 } 898 899 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 900 bs->c2i_entry_barrier(masm); 901 902 // Before we get into the guts of the C2I adapter, see if we should be here 903 // at all. We've come from compiled code and are attempting to jump to the 904 // interpreter, which means the caller made a static call to get here 905 // (vcalls always get a compiled target if there is one). Check for a 906 // compiled target. If there is one, we need to patch the caller's call. 907 patch_callers_callsite(masm); 908 909 __ bind(skip_fixup); 910 911 if (InlineTypePassFieldsAsArgs) { 912 // Is there an inline type argument? 913 bool has_inline_argument = false; 914 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 915 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); 916 } 917 if (has_inline_argument) { 918 // There is at least an inline type argument: we're coming from 919 // compiled code so we have no buffers to back the inline types. 920 // Allocate the buffers here with a runtime call. 921 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 922 923 frame_complete = __ offset(); 924 925 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 926 927 __ mov(c_rarg0, r15_thread); 928 __ mov(c_rarg1, rbx); 929 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 930 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 931 932 oop_maps->add_gc_map((int)(__ pc() - start), map); 933 __ reset_last_Java_frame(false); 934 935 RegisterSaver::restore_live_registers(masm); 936 937 Label no_exception; 938 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 939 __ jcc(Assembler::equal, no_exception); 940 941 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD); 942 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 943 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 944 945 __ bind(no_exception); 946 947 // We get an array of objects from the runtime call 948 __ get_vm_result_oop(rscratch2); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 949 __ get_vm_result_metadata(rbx); // TODO: required to keep the callee Method live? 950 } 951 } 952 953 // Since all args are passed on the stack, total_args_passed * 954 // Interpreter::stackElementSize is the space we need. 955 int total_args_passed = compute_total_args_passed_int(sig_extended); 956 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); 957 958 int extraspace = (total_args_passed * Interpreter::stackElementSize); 959 960 // stack is aligned, keep it that way 961 // This is not currently needed or enforced by the interpreter, but 962 // we might as well conform to the ABI. 963 extraspace = align_up(extraspace, 2*wordSize); 964 965 // set senderSP value 966 __ lea(r13, Address(rsp, wordSize)); 967 968 #ifdef ASSERT 969 __ check_stack_alignment(r13, "sender stack not aligned"); 970 #endif 971 if (extraspace > 0) { 972 // Pop the return address 973 __ pop(rax); 974 975 __ subptr(rsp, extraspace); 976 977 // Push the return address 978 __ push(rax); 979 980 // Account for the return address location since we store it first rather 981 // than hold it in a register across all the shuffling 982 extraspace += wordSize; 983 } 984 985 #ifdef ASSERT 986 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax); 987 #endif 988 989 // Now write the args into the outgoing interpreter space 990 991 // next_arg_comp is the next argument from the compiler point of 992 // view (inline type fields are passed in registers/on the stack). In 993 // sig_extended, an inline type argument starts with: T_METADATA, 994 // followed by the types of the fields of the inline type and T_VOID 995 // to mark the end of the inline type. ignored counts the number of 996 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: 997 // used to get the buffer for that argument from the pool of buffers 998 // we allocated above and want to pass to the 999 // interpreter. next_arg_int is the next argument from the 1000 // interpreter point of view (inline types are passed by reference). 1001 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 1002 next_arg_comp < sig_extended->length(); next_arg_comp++) { 1003 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 1004 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 1005 BasicType bt = sig_extended->at(next_arg_comp)._bt; 1006 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 1007 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { 1008 int next_off = st_off - Interpreter::stackElementSize; 1009 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 1010 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 1011 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 1012 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 1013 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 1014 next_arg_int++; 1015 #ifdef ASSERT 1016 if (bt == T_LONG || bt == T_DOUBLE) { 1017 // Overwrite the unused slot with known junk 1018 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 1019 __ movptr(Address(rsp, st_off), rax); 1020 } 1021 #endif /* ASSERT */ 1022 } else { 1023 ignored++; 1024 // get the buffer from the just allocated pool of buffers 1025 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); 1026 __ load_heap_oop(r14, Address(rscratch2, index)); 1027 next_vt_arg++; next_arg_int++; 1028 int vt = 1; 1029 // write fields we get from compiled code in registers/stack 1030 // slots to the buffer: we know we are done with that inline type 1031 // argument when we hit the T_VOID that acts as an end of inline 1032 // type delimiter for this inline type. Inline types are flattened 1033 // so we might encounter embedded inline types. Each entry in 1034 // sig_extended contains a field offset in the buffer. 1035 Label L_null; 1036 do { 1037 next_arg_comp++; 1038 BasicType bt = sig_extended->at(next_arg_comp)._bt; 1039 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 1040 if (bt == T_METADATA) { 1041 vt++; 1042 ignored++; 1043 } else if (bt == T_VOID && 1044 prev_bt != T_LONG && 1045 prev_bt != T_DOUBLE) { 1046 vt--; 1047 ignored++; 1048 } else { 1049 int off = sig_extended->at(next_arg_comp)._offset; 1050 if (off == -1) { 1051 // Nullable inline type argument, emit null check 1052 VMReg reg = regs[next_arg_comp-ignored].first(); 1053 Label L_notNull; 1054 if (reg->is_stack()) { 1055 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 1056 __ testb(Address(rsp, ld_off), 1); 1057 } else { 1058 __ testb(reg->as_Register(), 1); 1059 } 1060 __ jcc(Assembler::notZero, L_notNull); 1061 __ movptr(Address(rsp, st_off), 0); 1062 __ jmp(L_null); 1063 __ bind(L_notNull); 1064 continue; 1065 } 1066 assert(off > 0, "offset in object should be positive"); 1067 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 1068 bool is_oop = is_reference_type(bt); 1069 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 1070 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 1071 } 1072 } while (vt != 0); 1073 // pass the buffer to the interpreter 1074 __ movptr(Address(rsp, st_off), r14); 1075 __ bind(L_null); 1076 } 1077 } 1078 1079 // Schedule the branch target address early. 1080 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1081 __ jmp(rcx); 1082 } 1083 1084 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1085 int comp_args_on_stack, 1086 const GrowableArray<SigEntry>* sig, 1087 const VMRegPair *regs) { 1088 1089 // Note: r13 contains the senderSP on entry. We must preserve it since 1090 // we may do a i2c -> c2i transition if we lose a race where compiled 1091 // code goes non-entrant while we get args ready. 1092 // In addition we use r13 to locate all the interpreter args as 1093 // we must align the stack to 16 bytes on an i2c entry else we 1094 // lose alignment we expect in all compiled code and register 1095 // save code can segv when fxsave instructions find improperly 1096 // aligned stack pointer. 1097 1098 // Adapters can be frameless because they do not require the caller 1099 // to perform additional cleanup work, such as correcting the stack pointer. 1100 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1101 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1102 // even if a callee has modified the stack pointer. 1103 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1104 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1105 // up via the senderSP register). 1106 // In other words, if *either* the caller or callee is interpreted, we can 1107 // get the stack pointer repaired after a call. 1108 // This is why c2i and i2c adapters cannot be indefinitely composed. 1109 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1110 // both caller and callee would be compiled methods, and neither would 1111 // clean up the stack pointer changes performed by the two adapters. 1112 // If this happens, control eventually transfers back to the compiled 1113 // caller, but with an uncorrected stack, causing delayed havoc. 1114 1115 // Must preserve original SP for loading incoming arguments because 1116 // we need to align the outgoing SP for compiled code. 1117 __ movptr(r11, rsp); 1118 1119 // Pick up the return address 1120 __ pop(rax); 1121 1122 // Convert 4-byte c2 stack slots to words. 1123 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1124 1125 if (comp_args_on_stack) { 1126 __ subptr(rsp, comp_words_on_stack * wordSize); 1127 } 1128 1129 // Ensure compiled code always sees stack at proper alignment 1130 __ andptr(rsp, -16); 1131 1132 // push the return address and misalign the stack that youngest frame always sees 1133 // as far as the placement of the call instruction 1134 __ push(rax); 1135 1136 // Put saved SP in another register 1137 const Register saved_sp = rax; 1138 __ movptr(saved_sp, r11); 1139 1140 // Will jump to the compiled code just as if compiled code was doing it. 1141 // Pre-load the register-jump target early, to schedule it better. 1142 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1143 1144 #if INCLUDE_JVMCI 1145 if (EnableJVMCI) { 1146 // check if this call should be routed towards a specific entry point 1147 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1148 Label no_alternative_target; 1149 __ jcc(Assembler::equal, no_alternative_target); 1150 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1151 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1152 __ bind(no_alternative_target); 1153 } 1154 #endif // INCLUDE_JVMCI 1155 1156 int total_args_passed = sig->length(); 1157 1158 // Now generate the shuffle code. Pick up all register args and move the 1159 // rest through the floating point stack top. 1160 for (int i = 0; i < total_args_passed; i++) { 1161 BasicType bt = sig->at(i)._bt; 1162 if (bt == T_VOID) { 1163 // Longs and doubles are passed in native word order, but misaligned 1164 // in the 32-bit build. 1165 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1166 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1167 continue; 1168 } 1169 1170 // Pick up 0, 1 or 2 words from SP+offset. 1171 1172 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1173 "scrambled load targets?"); 1174 // Load in argument order going down. 1175 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1176 // Point to interpreter value (vs. tag) 1177 int next_off = ld_off - Interpreter::stackElementSize; 1178 // 1179 // 1180 // 1181 VMReg r_1 = regs[i].first(); 1182 VMReg r_2 = regs[i].second(); 1183 if (!r_1->is_valid()) { 1184 assert(!r_2->is_valid(), ""); 1185 continue; 1186 } 1187 if (r_1->is_stack()) { 1188 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1189 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1190 1191 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1192 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1193 // will be generated. 1194 if (!r_2->is_valid()) { 1195 // sign extend??? 1196 __ movl(r13, Address(saved_sp, ld_off)); 1197 __ movptr(Address(rsp, st_off), r13); 1198 } else { 1199 // 1200 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1201 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1202 // So we must adjust where to pick up the data to match the interpreter. 1203 // 1204 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1205 // are accessed as negative so LSW is at LOW address 1206 1207 // ld_off is MSW so get LSW 1208 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1209 next_off : ld_off; 1210 __ movq(r13, Address(saved_sp, offset)); 1211 // st_off is LSW (i.e. reg.first()) 1212 __ movq(Address(rsp, st_off), r13); 1213 } 1214 } else if (r_1->is_Register()) { // Register argument 1215 Register r = r_1->as_Register(); 1216 assert(r != rax, "must be different"); 1217 if (r_2->is_valid()) { 1218 // 1219 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1220 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1221 // So we must adjust where to pick up the data to match the interpreter. 1222 1223 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1224 next_off : ld_off; 1225 1226 // this can be a misaligned move 1227 __ movq(r, Address(saved_sp, offset)); 1228 } else { 1229 // sign extend and use a full word? 1230 __ movl(r, Address(saved_sp, ld_off)); 1231 } 1232 } else { 1233 if (!r_2->is_valid()) { 1234 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1235 } else { 1236 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1237 } 1238 } 1239 } 1240 1241 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1242 1243 // 6243940 We might end up in handle_wrong_method if 1244 // the callee is deoptimized as we race thru here. If that 1245 // happens we don't want to take a safepoint because the 1246 // caller frame will look interpreted and arguments are now 1247 // "compiled" so it is much better to make this transition 1248 // invisible to the stack walking code. Unfortunately if 1249 // we try and find the callee by normal means a safepoint 1250 // is possible. So we stash the desired callee in the thread 1251 // and the vm will find there should this case occur. 1252 1253 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1254 1255 // put Method* where a c2i would expect should we end up there 1256 // only needed because of c2 resolve stubs return Method* as a result in 1257 // rax 1258 __ mov(rax, rbx); 1259 __ jmp(r11); 1260 } 1261 1262 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1263 Register data = rax; 1264 __ ic_check(1 /* end_alignment */); 1265 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); 1266 1267 // Method might have been compiled since the call site was patched to 1268 // interpreted if that is the case treat it as a miss so we can get 1269 // the call site corrected. 1270 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 1271 __ jcc(Assembler::equal, skip_fixup); 1272 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1273 } 1274 1275 // --------------------------------------------------------------- 1276 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1277 int comp_args_on_stack, 1278 const GrowableArray<SigEntry>* sig, 1279 const VMRegPair* regs, 1280 const GrowableArray<SigEntry>* sig_cc, 1281 const VMRegPair* regs_cc, 1282 const GrowableArray<SigEntry>* sig_cc_ro, 1283 const VMRegPair* regs_cc_ro, 1284 AdapterHandlerEntry* handler, 1285 AdapterBlob*& new_adapter, 1286 bool allocate_code_blob) { 1287 address i2c_entry = __ pc(); 1288 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1289 1290 // ------------------------------------------------------------------------- 1291 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1292 // to the interpreter. The args start out packed in the compiled layout. They 1293 // need to be unpacked into the interpreter layout. This will almost always 1294 // require some stack space. We grow the current (compiled) stack, then repack 1295 // the args. We finally end in a jump to the generic interpreter entry point. 1296 // On exit from the interpreter, the interpreter will restore our SP (lest the 1297 // compiled code, which relies solely on SP and not RBP, get sick). 1298 1299 address c2i_unverified_entry = __ pc(); 1300 address c2i_unverified_inline_entry = __ pc(); 1301 Label skip_fixup; 1302 1303 gen_inline_cache_check(masm, skip_fixup); 1304 1305 OopMapSet* oop_maps = new OopMapSet(); 1306 int frame_complete = CodeOffsets::frame_never_safe; 1307 int frame_size_in_words = 0; 1308 1309 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1310 address c2i_no_clinit_check_entry = nullptr; 1311 address c2i_inline_ro_entry = __ pc(); 1312 if (regs_cc != regs_cc_ro) { 1313 // No class init barrier needed because method is guaranteed to be non-static 1314 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry, 1315 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1316 skip_fixup.reset(); 1317 } 1318 1319 // Scalarized c2i adapter 1320 address c2i_entry = __ pc(); 1321 address c2i_inline_entry = __ pc(); 1322 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1323 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); 1324 1325 // Non-scalarized c2i adapter 1326 if (regs != regs_cc) { 1327 c2i_unverified_inline_entry = __ pc(); 1328 Label inline_entry_skip_fixup; 1329 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1330 1331 c2i_inline_entry = __ pc(); 1332 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1333 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1334 } 1335 1336 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1337 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1338 if (allocate_code_blob) { 1339 bool caller_must_gc_arguments = (regs != regs_cc); 1340 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT]; 1341 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity"); 1342 entry_offset[0] = 0; // i2c_entry offset 1343 entry_offset[1] = c2i_entry - i2c_entry; 1344 entry_offset[2] = c2i_inline_entry - i2c_entry; 1345 entry_offset[3] = c2i_inline_ro_entry - i2c_entry; 1346 entry_offset[4] = c2i_unverified_entry - i2c_entry; 1347 entry_offset[5] = c2i_unverified_inline_entry - i2c_entry; 1348 entry_offset[6] = c2i_no_clinit_check_entry - i2c_entry; 1349 1350 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1351 } 1352 1353 handler->set_entry_points(i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, 1354 c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1355 } 1356 1357 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1358 VMRegPair *regs, 1359 int total_args_passed) { 1360 1361 // We return the amount of VMRegImpl stack slots we need to reserve for all 1362 // the arguments NOT counting out_preserve_stack_slots. 1363 1364 // NOTE: These arrays will have to change when c1 is ported 1365 #ifdef _WIN64 1366 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1367 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1368 }; 1369 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1370 c_farg0, c_farg1, c_farg2, c_farg3 1371 }; 1372 #else 1373 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1374 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1375 }; 1376 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1377 c_farg0, c_farg1, c_farg2, c_farg3, 1378 c_farg4, c_farg5, c_farg6, c_farg7 1379 }; 1380 #endif // _WIN64 1381 1382 1383 uint int_args = 0; 1384 uint fp_args = 0; 1385 uint stk_args = 0; // inc by 2 each time 1386 1387 for (int i = 0; i < total_args_passed; i++) { 1388 switch (sig_bt[i]) { 1389 case T_BOOLEAN: 1390 case T_CHAR: 1391 case T_BYTE: 1392 case T_SHORT: 1393 case T_INT: 1394 if (int_args < Argument::n_int_register_parameters_c) { 1395 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1396 #ifdef _WIN64 1397 fp_args++; 1398 // Allocate slots for callee to stuff register args the stack. 1399 stk_args += 2; 1400 #endif 1401 } else { 1402 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1403 stk_args += 2; 1404 } 1405 break; 1406 case T_LONG: 1407 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1408 // fall through 1409 case T_OBJECT: 1410 case T_ARRAY: 1411 case T_ADDRESS: 1412 case T_METADATA: 1413 if (int_args < Argument::n_int_register_parameters_c) { 1414 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1415 #ifdef _WIN64 1416 fp_args++; 1417 stk_args += 2; 1418 #endif 1419 } else { 1420 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1421 stk_args += 2; 1422 } 1423 break; 1424 case T_FLOAT: 1425 if (fp_args < Argument::n_float_register_parameters_c) { 1426 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1427 #ifdef _WIN64 1428 int_args++; 1429 // Allocate slots for callee to stuff register args the stack. 1430 stk_args += 2; 1431 #endif 1432 } else { 1433 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1434 stk_args += 2; 1435 } 1436 break; 1437 case T_DOUBLE: 1438 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1439 if (fp_args < Argument::n_float_register_parameters_c) { 1440 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1441 #ifdef _WIN64 1442 int_args++; 1443 // Allocate slots for callee to stuff register args the stack. 1444 stk_args += 2; 1445 #endif 1446 } else { 1447 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1448 stk_args += 2; 1449 } 1450 break; 1451 case T_VOID: // Halves of longs and doubles 1452 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1453 regs[i].set_bad(); 1454 break; 1455 default: 1456 ShouldNotReachHere(); 1457 break; 1458 } 1459 } 1460 #ifdef _WIN64 1461 // windows abi requires that we always allocate enough stack space 1462 // for 4 64bit registers to be stored down. 1463 if (stk_args < 8) { 1464 stk_args = 8; 1465 } 1466 #endif // _WIN64 1467 1468 return stk_args; 1469 } 1470 1471 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1472 uint num_bits, 1473 uint total_args_passed) { 1474 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1475 "only certain vector sizes are supported for now"); 1476 1477 static const XMMRegister VEC_ArgReg[32] = { 1478 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1479 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1480 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1481 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1482 }; 1483 1484 uint stk_args = 0; 1485 uint fp_args = 0; 1486 1487 for (uint i = 0; i < total_args_passed; i++) { 1488 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1489 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1490 regs[i].set_pair(vmreg->next(next_val), vmreg); 1491 } 1492 1493 return stk_args; 1494 } 1495 1496 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1497 // We always ignore the frame_slots arg and just use the space just below frame pointer 1498 // which by this time is free to use 1499 switch (ret_type) { 1500 case T_FLOAT: 1501 __ movflt(Address(rbp, -wordSize), xmm0); 1502 break; 1503 case T_DOUBLE: 1504 __ movdbl(Address(rbp, -wordSize), xmm0); 1505 break; 1506 case T_VOID: break; 1507 default: { 1508 __ movptr(Address(rbp, -wordSize), rax); 1509 } 1510 } 1511 } 1512 1513 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1514 // We always ignore the frame_slots arg and just use the space just below frame pointer 1515 // which by this time is free to use 1516 switch (ret_type) { 1517 case T_FLOAT: 1518 __ movflt(xmm0, Address(rbp, -wordSize)); 1519 break; 1520 case T_DOUBLE: 1521 __ movdbl(xmm0, Address(rbp, -wordSize)); 1522 break; 1523 case T_VOID: break; 1524 default: { 1525 __ movptr(rax, Address(rbp, -wordSize)); 1526 } 1527 } 1528 } 1529 1530 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1531 for ( int i = first_arg ; i < arg_count ; i++ ) { 1532 if (args[i].first()->is_Register()) { 1533 __ push(args[i].first()->as_Register()); 1534 } else if (args[i].first()->is_XMMRegister()) { 1535 __ subptr(rsp, 2*wordSize); 1536 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1537 } 1538 } 1539 } 1540 1541 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1542 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1543 if (args[i].first()->is_Register()) { 1544 __ pop(args[i].first()->as_Register()); 1545 } else if (args[i].first()->is_XMMRegister()) { 1546 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1547 __ addptr(rsp, 2*wordSize); 1548 } 1549 } 1550 } 1551 1552 static void verify_oop_args(MacroAssembler* masm, 1553 const methodHandle& method, 1554 const BasicType* sig_bt, 1555 const VMRegPair* regs) { 1556 Register temp_reg = rbx; // not part of any compiled calling seq 1557 if (VerifyOops) { 1558 for (int i = 0; i < method->size_of_parameters(); i++) { 1559 if (is_reference_type(sig_bt[i])) { 1560 VMReg r = regs[i].first(); 1561 assert(r->is_valid(), "bad oop arg"); 1562 if (r->is_stack()) { 1563 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1564 __ verify_oop(temp_reg); 1565 } else { 1566 __ verify_oop(r->as_Register()); 1567 } 1568 } 1569 } 1570 } 1571 } 1572 1573 static void check_continuation_enter_argument(VMReg actual_vmreg, 1574 Register expected_reg, 1575 const char* name) { 1576 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1577 assert(actual_vmreg->as_Register() == expected_reg, 1578 "%s is in unexpected register: %s instead of %s", 1579 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1580 } 1581 1582 1583 //---------------------------- continuation_enter_setup --------------------------- 1584 // 1585 // Arguments: 1586 // None. 1587 // 1588 // Results: 1589 // rsp: pointer to blank ContinuationEntry 1590 // 1591 // Kills: 1592 // rax 1593 // 1594 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) { 1595 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1596 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1597 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1598 1599 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize; 1600 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1601 1602 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size; 1603 OopMap* map = new OopMap(frame_size, 0); 1604 1605 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset())); 1606 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax); 1607 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp); 1608 1609 return map; 1610 } 1611 1612 //---------------------------- fill_continuation_entry --------------------------- 1613 // 1614 // Arguments: 1615 // rsp: pointer to blank Continuation entry 1616 // reg_cont_obj: pointer to the continuation 1617 // reg_flags: flags 1618 // 1619 // Results: 1620 // rsp: pointer to filled out ContinuationEntry 1621 // 1622 // Kills: 1623 // rax 1624 // 1625 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1626 assert_different_registers(rax, reg_cont_obj, reg_flags); 1627 #ifdef ASSERT 1628 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value()); 1629 #endif 1630 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj); 1631 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags); 1632 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0); 1633 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0); 1634 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0); 1635 1636 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset())); 1637 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax); 1638 __ movq(rax, Address(r15_thread, JavaThread::held_monitor_count_offset())); 1639 __ movq(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); 1640 1641 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); 1642 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), 0); 1643 } 1644 1645 //---------------------------- continuation_enter_cleanup --------------------------- 1646 // 1647 // Arguments: 1648 // rsp: pointer to the ContinuationEntry 1649 // 1650 // Results: 1651 // rsp: pointer to the spilled rbp in the entry frame 1652 // 1653 // Kills: 1654 // rbx 1655 // 1656 static void continuation_enter_cleanup(MacroAssembler* masm) { 1657 #ifdef ASSERT 1658 Label L_good_sp; 1659 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1660 __ jcc(Assembler::equal, L_good_sp); 1661 __ stop("Incorrect rsp at continuation_enter_cleanup"); 1662 __ bind(L_good_sp); 1663 #endif 1664 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset())); 1665 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx); 1666 1667 if (CheckJNICalls) { 1668 // Check if this is a virtual thread continuation 1669 Label L_skip_vthread_code; 1670 __ cmpl(Address(rsp, ContinuationEntry::flags_offset()), 0); 1671 __ jcc(Assembler::equal, L_skip_vthread_code); 1672 1673 // If the held monitor count is > 0 and this vthread is terminating then 1674 // it failed to release a JNI monitor. So we issue the same log message 1675 // that JavaThread::exit does. 1676 __ cmpptr(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1677 __ jcc(Assembler::equal, L_skip_vthread_code); 1678 1679 // rax may hold an exception oop, save it before the call 1680 __ push(rax); 1681 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::log_jni_monitor_still_held)); 1682 __ pop(rax); 1683 1684 // For vthreads we have to explicitly zero the JNI monitor count of the carrier 1685 // on termination. The held count is implicitly zeroed below when we restore from 1686 // the parent held count (which has to be zero). 1687 __ movq(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1688 1689 __ bind(L_skip_vthread_code); 1690 } 1691 #ifdef ASSERT 1692 else { 1693 // Check if this is a virtual thread continuation 1694 Label L_skip_vthread_code; 1695 __ cmpl(Address(rsp, ContinuationEntry::flags_offset()), 0); 1696 __ jcc(Assembler::equal, L_skip_vthread_code); 1697 1698 // See comment just above. If not checking JNI calls the JNI count is only 1699 // needed for assertion checking. 1700 __ movq(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1701 1702 __ bind(L_skip_vthread_code); 1703 } 1704 #endif 1705 1706 __ movq(rbx, Address(rsp, ContinuationEntry::parent_held_monitor_count_offset())); 1707 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), rbx); 1708 1709 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset())); 1710 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx); 1711 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1712 } 1713 1714 static void gen_continuation_enter(MacroAssembler* masm, 1715 const VMRegPair* regs, 1716 int& exception_offset, 1717 OopMapSet* oop_maps, 1718 int& frame_complete, 1719 int& stack_slots, 1720 int& interpreted_entry_offset, 1721 int& compiled_entry_offset) { 1722 1723 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1724 int pos_cont_obj = 0; 1725 int pos_is_cont = 1; 1726 int pos_is_virtual = 2; 1727 1728 // The platform-specific calling convention may present the arguments in various registers. 1729 // To simplify the rest of the code, we expect the arguments to reside at these known 1730 // registers, and we additionally check the placement here in case calling convention ever 1731 // changes. 1732 Register reg_cont_obj = c_rarg1; 1733 Register reg_is_cont = c_rarg2; 1734 Register reg_is_virtual = c_rarg3; 1735 1736 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1737 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1738 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1739 1740 // Utility methods kill rax, make sure there are no collisions 1741 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual); 1742 1743 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), 1744 relocInfo::static_call_type); 1745 1746 address start = __ pc(); 1747 1748 Label L_thaw, L_exit; 1749 1750 // i2i entry used at interp_only_mode only 1751 interpreted_entry_offset = __ pc() - start; 1752 { 1753 #ifdef ASSERT 1754 Label is_interp_only; 1755 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0); 1756 __ jcc(Assembler::notEqual, is_interp_only); 1757 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1758 __ bind(is_interp_only); 1759 #endif 1760 1761 __ pop(rax); // return address 1762 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1763 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2)); 1764 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1)); 1765 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0)); 1766 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment 1767 __ push(rax); // return address 1768 __ push_cont_fastpath(); 1769 1770 __ enter(); 1771 1772 stack_slots = 2; // will be adjusted in setup 1773 OopMap* map = continuation_enter_setup(masm, stack_slots); 1774 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1775 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1776 1777 __ verify_oop(reg_cont_obj); 1778 1779 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1780 1781 // If continuation, call to thaw. Otherwise, resolve the call and exit. 1782 __ testptr(reg_is_cont, reg_is_cont); 1783 __ jcc(Assembler::notZero, L_thaw); 1784 1785 // --- Resolve path 1786 1787 // Make sure the call is patchable 1788 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1789 // Emit stub for static call 1790 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc()); 1791 if (stub == nullptr) { 1792 fatal("CodeCache is full at gen_continuation_enter"); 1793 } 1794 __ call(resolve); 1795 oop_maps->add_gc_map(__ pc() - start, map); 1796 __ post_call_nop(); 1797 1798 __ jmp(L_exit); 1799 } 1800 1801 // compiled entry 1802 __ align(CodeEntryAlignment); 1803 compiled_entry_offset = __ pc() - start; 1804 __ enter(); 1805 1806 stack_slots = 2; // will be adjusted in setup 1807 OopMap* map = continuation_enter_setup(masm, stack_slots); 1808 1809 // Frame is now completed as far as size and linkage. 1810 frame_complete = __ pc() - start; 1811 1812 __ verify_oop(reg_cont_obj); 1813 1814 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1815 1816 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1817 __ testptr(reg_is_cont, reg_is_cont); 1818 __ jccb(Assembler::notZero, L_thaw); 1819 1820 // --- call Continuation.enter(Continuation c, boolean isContinue) 1821 1822 // Make sure the call is patchable 1823 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1824 1825 // Emit stub for static call 1826 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc()); 1827 if (stub == nullptr) { 1828 fatal("CodeCache is full at gen_continuation_enter"); 1829 } 1830 1831 // The call needs to be resolved. There's a special case for this in 1832 // SharedRuntime::find_callee_info_helper() which calls 1833 // LinkResolver::resolve_continuation_enter() which resolves the call to 1834 // Continuation.enter(Continuation c, boolean isContinue). 1835 __ call(resolve); 1836 1837 oop_maps->add_gc_map(__ pc() - start, map); 1838 __ post_call_nop(); 1839 1840 __ jmpb(L_exit); 1841 1842 // --- Thawing path 1843 1844 __ bind(L_thaw); 1845 1846 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start; 1847 __ call(RuntimeAddress(StubRoutines::cont_thaw())); 1848 1849 ContinuationEntry::_return_pc_offset = __ pc() - start; 1850 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1851 __ post_call_nop(); 1852 1853 // --- Normal exit (resolve/thawing) 1854 1855 __ bind(L_exit); 1856 ContinuationEntry::_cleanup_offset = __ pc() - start; 1857 continuation_enter_cleanup(masm); 1858 __ pop(rbp); 1859 __ ret(0); 1860 1861 // --- Exception handling path 1862 1863 exception_offset = __ pc() - start; 1864 1865 continuation_enter_cleanup(masm); 1866 __ pop(rbp); 1867 1868 __ movptr(c_rarg0, r15_thread); 1869 __ movptr(c_rarg1, Address(rsp, 0)); // return address 1870 1871 // rax still holds the original exception oop, save it before the call 1872 __ push(rax); 1873 1874 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2); 1875 __ movptr(rbx, rax); 1876 1877 // Continue at exception handler: 1878 // rax: exception oop 1879 // rbx: exception handler 1880 // rdx: exception pc 1881 __ pop(rax); 1882 __ verify_oop(rax); 1883 __ pop(rdx); 1884 __ jmp(rbx); 1885 } 1886 1887 static void gen_continuation_yield(MacroAssembler* masm, 1888 const VMRegPair* regs, 1889 OopMapSet* oop_maps, 1890 int& frame_complete, 1891 int& stack_slots, 1892 int& compiled_entry_offset) { 1893 enum layout { 1894 rbp_off, 1895 rbpH_off, 1896 return_off, 1897 return_off2, 1898 framesize // inclusive of return address 1899 }; 1900 stack_slots = framesize / VMRegImpl::slots_per_word; 1901 assert(stack_slots == 2, "recheck layout"); 1902 1903 address start = __ pc(); 1904 compiled_entry_offset = __ pc() - start; 1905 __ enter(); 1906 address the_pc = __ pc(); 1907 1908 frame_complete = the_pc - start; 1909 1910 // This nop must be exactly at the PC we push into the frame info. 1911 // We use this nop for fast CodeBlob lookup, associate the OopMap 1912 // with it right away. 1913 __ post_call_nop(); 1914 OopMap* map = new OopMap(framesize, 1); 1915 oop_maps->add_gc_map(frame_complete, map); 1916 1917 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 1918 __ movptr(c_rarg0, r15_thread); 1919 __ movptr(c_rarg1, rsp); 1920 __ call_VM_leaf(Continuation::freeze_entry(), 2); 1921 __ reset_last_Java_frame(true); 1922 1923 Label L_pinned; 1924 1925 __ testptr(rax, rax); 1926 __ jcc(Assembler::notZero, L_pinned); 1927 1928 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1929 continuation_enter_cleanup(masm); 1930 __ pop(rbp); 1931 __ ret(0); 1932 1933 __ bind(L_pinned); 1934 1935 // Pinned, return to caller 1936 1937 // handle pending exception thrown by freeze 1938 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 1939 Label ok; 1940 __ jcc(Assembler::equal, ok); 1941 __ leave(); 1942 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1943 __ bind(ok); 1944 1945 __ leave(); 1946 __ ret(0); 1947 } 1948 1949 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) { 1950 ::continuation_enter_cleanup(masm); 1951 } 1952 1953 static void gen_special_dispatch(MacroAssembler* masm, 1954 const methodHandle& method, 1955 const BasicType* sig_bt, 1956 const VMRegPair* regs) { 1957 verify_oop_args(masm, method, sig_bt, regs); 1958 vmIntrinsics::ID iid = method->intrinsic_id(); 1959 1960 // Now write the args into the outgoing interpreter space 1961 bool has_receiver = false; 1962 Register receiver_reg = noreg; 1963 int member_arg_pos = -1; 1964 Register member_reg = noreg; 1965 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1966 if (ref_kind != 0) { 1967 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1968 member_reg = rbx; // known to be free at this point 1969 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1970 } else if (iid == vmIntrinsics::_invokeBasic) { 1971 has_receiver = true; 1972 } else if (iid == vmIntrinsics::_linkToNative) { 1973 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1974 member_reg = rbx; // known to be free at this point 1975 } else { 1976 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1977 } 1978 1979 if (member_reg != noreg) { 1980 // Load the member_arg into register, if necessary. 1981 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1982 VMReg r = regs[member_arg_pos].first(); 1983 if (r->is_stack()) { 1984 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1985 } else { 1986 // no data motion is needed 1987 member_reg = r->as_Register(); 1988 } 1989 } 1990 1991 if (has_receiver) { 1992 // Make sure the receiver is loaded into a register. 1993 assert(method->size_of_parameters() > 0, "oob"); 1994 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1995 VMReg r = regs[0].first(); 1996 assert(r->is_valid(), "bad receiver arg"); 1997 if (r->is_stack()) { 1998 // Porting note: This assumes that compiled calling conventions always 1999 // pass the receiver oop in a register. If this is not true on some 2000 // platform, pick a temp and load the receiver from stack. 2001 fatal("receiver always in a register"); 2002 receiver_reg = j_rarg0; // known to be free at this point 2003 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 2004 } else { 2005 // no data motion is needed 2006 receiver_reg = r->as_Register(); 2007 } 2008 } 2009 2010 // Figure out which address we are really jumping to: 2011 MethodHandles::generate_method_handle_dispatch(masm, iid, 2012 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 2013 } 2014 2015 // --------------------------------------------------------------------------- 2016 // Generate a native wrapper for a given method. The method takes arguments 2017 // in the Java compiled code convention, marshals them to the native 2018 // convention (handlizes oops, etc), transitions to native, makes the call, 2019 // returns to java state (possibly blocking), unhandlizes any result and 2020 // returns. 2021 // 2022 // Critical native functions are a shorthand for the use of 2023 // GetPrimtiveArrayCritical and disallow the use of any other JNI 2024 // functions. The wrapper is expected to unpack the arguments before 2025 // passing them to the callee. Critical native functions leave the state _in_Java, 2026 // since they cannot stop for GC. 2027 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 2028 // block and the check for pending exceptions it's impossible for them 2029 // to be thrown. 2030 // 2031 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 2032 const methodHandle& method, 2033 int compile_id, 2034 BasicType* in_sig_bt, 2035 VMRegPair* in_regs, 2036 BasicType ret_type) { 2037 if (method->is_continuation_native_intrinsic()) { 2038 int exception_offset = -1; 2039 OopMapSet* oop_maps = new OopMapSet(); 2040 int frame_complete = -1; 2041 int stack_slots = -1; 2042 int interpreted_entry_offset = -1; 2043 int vep_offset = -1; 2044 if (method->is_continuation_enter_intrinsic()) { 2045 gen_continuation_enter(masm, 2046 in_regs, 2047 exception_offset, 2048 oop_maps, 2049 frame_complete, 2050 stack_slots, 2051 interpreted_entry_offset, 2052 vep_offset); 2053 } else if (method->is_continuation_yield_intrinsic()) { 2054 gen_continuation_yield(masm, 2055 in_regs, 2056 oop_maps, 2057 frame_complete, 2058 stack_slots, 2059 vep_offset); 2060 } else { 2061 guarantee(false, "Unknown Continuation native intrinsic"); 2062 } 2063 2064 #ifdef ASSERT 2065 if (method->is_continuation_enter_intrinsic()) { 2066 assert(interpreted_entry_offset != -1, "Must be set"); 2067 assert(exception_offset != -1, "Must be set"); 2068 } else { 2069 assert(interpreted_entry_offset == -1, "Must be unset"); 2070 assert(exception_offset == -1, "Must be unset"); 2071 } 2072 assert(frame_complete != -1, "Must be set"); 2073 assert(stack_slots != -1, "Must be set"); 2074 assert(vep_offset != -1, "Must be set"); 2075 #endif 2076 2077 __ flush(); 2078 nmethod* nm = nmethod::new_native_nmethod(method, 2079 compile_id, 2080 masm->code(), 2081 vep_offset, 2082 frame_complete, 2083 stack_slots, 2084 in_ByteSize(-1), 2085 in_ByteSize(-1), 2086 oop_maps, 2087 exception_offset); 2088 if (nm == nullptr) return nm; 2089 if (method->is_continuation_enter_intrinsic()) { 2090 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2091 } else if (method->is_continuation_yield_intrinsic()) { 2092 _cont_doYield_stub = nm; 2093 } 2094 return nm; 2095 } 2096 2097 if (method->is_method_handle_intrinsic()) { 2098 vmIntrinsics::ID iid = method->intrinsic_id(); 2099 intptr_t start = (intptr_t)__ pc(); 2100 int vep_offset = ((intptr_t)__ pc()) - start; 2101 gen_special_dispatch(masm, 2102 method, 2103 in_sig_bt, 2104 in_regs); 2105 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2106 __ flush(); 2107 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2108 return nmethod::new_native_nmethod(method, 2109 compile_id, 2110 masm->code(), 2111 vep_offset, 2112 frame_complete, 2113 stack_slots / VMRegImpl::slots_per_word, 2114 in_ByteSize(-1), 2115 in_ByteSize(-1), 2116 nullptr); 2117 } 2118 address native_func = method->native_function(); 2119 assert(native_func != nullptr, "must have function"); 2120 2121 // An OopMap for lock (and class if static) 2122 OopMapSet *oop_maps = new OopMapSet(); 2123 intptr_t start = (intptr_t)__ pc(); 2124 2125 // We have received a description of where all the java arg are located 2126 // on entry to the wrapper. We need to convert these args to where 2127 // the jni function will expect them. To figure out where they go 2128 // we convert the java signature to a C signature by inserting 2129 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2130 2131 const int total_in_args = method->size_of_parameters(); 2132 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 2133 2134 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2135 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2136 2137 int argc = 0; 2138 out_sig_bt[argc++] = T_ADDRESS; 2139 if (method->is_static()) { 2140 out_sig_bt[argc++] = T_OBJECT; 2141 } 2142 2143 for (int i = 0; i < total_in_args ; i++ ) { 2144 out_sig_bt[argc++] = in_sig_bt[i]; 2145 } 2146 2147 // Now figure out where the args must be stored and how much stack space 2148 // they require. 2149 int out_arg_slots; 2150 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 2151 2152 // Compute framesize for the wrapper. We need to handlize all oops in 2153 // incoming registers 2154 2155 // Calculate the total number of stack slots we will need. 2156 2157 // First count the abi requirement plus all of the outgoing args 2158 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2159 2160 // Now the space for the inbound oop handle area 2161 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 2162 2163 int oop_handle_offset = stack_slots; 2164 stack_slots += total_save_slots; 2165 2166 // Now any space we need for handlizing a klass if static method 2167 2168 int klass_slot_offset = 0; 2169 int klass_offset = -1; 2170 int lock_slot_offset = 0; 2171 bool is_static = false; 2172 2173 if (method->is_static()) { 2174 klass_slot_offset = stack_slots; 2175 stack_slots += VMRegImpl::slots_per_word; 2176 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2177 is_static = true; 2178 } 2179 2180 // Plus a lock if needed 2181 2182 if (method->is_synchronized()) { 2183 lock_slot_offset = stack_slots; 2184 stack_slots += VMRegImpl::slots_per_word; 2185 } 2186 2187 // Now a place (+2) to save return values or temp during shuffling 2188 // + 4 for return address (which we own) and saved rbp 2189 stack_slots += 6; 2190 2191 // Ok The space we have allocated will look like: 2192 // 2193 // 2194 // FP-> | | 2195 // |---------------------| 2196 // | 2 slots for moves | 2197 // |---------------------| 2198 // | lock box (if sync) | 2199 // |---------------------| <- lock_slot_offset 2200 // | klass (if static) | 2201 // |---------------------| <- klass_slot_offset 2202 // | oopHandle area | 2203 // |---------------------| <- oop_handle_offset (6 java arg registers) 2204 // | outbound memory | 2205 // | based arguments | 2206 // | | 2207 // |---------------------| 2208 // | | 2209 // SP-> | out_preserved_slots | 2210 // 2211 // 2212 2213 2214 // Now compute actual number of stack words we need rounding to make 2215 // stack properly aligned. 2216 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 2217 2218 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2219 2220 // First thing make an ic check to see if we should even be here 2221 2222 // We are free to use all registers as temps without saving them and 2223 // restoring them except rbp. rbp is the only callee save register 2224 // as far as the interpreter and the compiler(s) are concerned. 2225 2226 const Register receiver = j_rarg0; 2227 2228 Label exception_pending; 2229 2230 assert_different_registers(receiver, rscratch1, rscratch2); 2231 __ verify_oop(receiver); 2232 __ ic_check(8 /* end_alignment */); 2233 2234 int vep_offset = ((intptr_t)__ pc()) - start; 2235 2236 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2237 Label L_skip_barrier; 2238 Register klass = r10; 2239 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 2240 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/); 2241 2242 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 2243 2244 __ bind(L_skip_barrier); 2245 } 2246 2247 #ifdef COMPILER1 2248 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 2249 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 2250 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 2251 } 2252 #endif // COMPILER1 2253 2254 // The instruction at the verified entry point must be 5 bytes or longer 2255 // because it can be patched on the fly by make_non_entrant. The stack bang 2256 // instruction fits that requirement. 2257 2258 // Generate stack overflow check 2259 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 2260 2261 // Generate a new frame for the wrapper. 2262 __ enter(); 2263 // -2 because return address is already present and so is saved rbp 2264 __ subptr(rsp, stack_size - 2*wordSize); 2265 2266 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2267 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub 2268 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); 2269 2270 // Frame is now completed as far as size and linkage. 2271 int frame_complete = ((intptr_t)__ pc()) - start; 2272 2273 #ifdef ASSERT 2274 __ check_stack_alignment(rsp, "improperly aligned stack"); 2275 #endif /* ASSERT */ 2276 2277 2278 // We use r14 as the oop handle for the receiver/klass 2279 // It is callee save so it survives the call to native 2280 2281 const Register oop_handle_reg = r14; 2282 2283 // 2284 // We immediately shuffle the arguments so that any vm call we have to 2285 // make from here on out (sync slow path, jvmti, etc.) we will have 2286 // captured the oops from our caller and have a valid oopMap for 2287 // them. 2288 2289 // ----------------- 2290 // The Grand Shuffle 2291 2292 // The Java calling convention is either equal (linux) or denser (win64) than the 2293 // c calling convention. However the because of the jni_env argument the c calling 2294 // convention always has at least one more (and two for static) arguments than Java. 2295 // Therefore if we move the args from java -> c backwards then we will never have 2296 // a register->register conflict and we don't have to build a dependency graph 2297 // and figure out how to break any cycles. 2298 // 2299 2300 // Record esp-based slot for receiver on stack for non-static methods 2301 int receiver_offset = -1; 2302 2303 // This is a trick. We double the stack slots so we can claim 2304 // the oops in the caller's frame. Since we are sure to have 2305 // more args than the caller doubling is enough to make 2306 // sure we can capture all the incoming oop args from the 2307 // caller. 2308 // 2309 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2310 2311 // Mark location of rbp (someday) 2312 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2313 2314 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2315 // All inbound args are referenced based on rbp and all outbound args via rsp. 2316 2317 2318 #ifdef ASSERT 2319 bool reg_destroyed[Register::number_of_registers]; 2320 bool freg_destroyed[XMMRegister::number_of_registers]; 2321 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) { 2322 reg_destroyed[r] = false; 2323 } 2324 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) { 2325 freg_destroyed[f] = false; 2326 } 2327 2328 #endif /* ASSERT */ 2329 2330 // For JNI natives the incoming and outgoing registers are offset upwards. 2331 GrowableArray<int> arg_order(2 * total_in_args); 2332 2333 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2334 arg_order.push(i); 2335 arg_order.push(c_arg); 2336 } 2337 2338 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2339 int i = arg_order.at(ai); 2340 int c_arg = arg_order.at(ai + 1); 2341 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2342 #ifdef ASSERT 2343 if (in_regs[i].first()->is_Register()) { 2344 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2345 } else if (in_regs[i].first()->is_XMMRegister()) { 2346 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2347 } 2348 if (out_regs[c_arg].first()->is_Register()) { 2349 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2350 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2351 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2352 } 2353 #endif /* ASSERT */ 2354 switch (in_sig_bt[i]) { 2355 case T_ARRAY: 2356 case T_OBJECT: 2357 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2358 ((i == 0) && (!is_static)), 2359 &receiver_offset); 2360 break; 2361 case T_VOID: 2362 break; 2363 2364 case T_FLOAT: 2365 __ float_move(in_regs[i], out_regs[c_arg]); 2366 break; 2367 2368 case T_DOUBLE: 2369 assert( i + 1 < total_in_args && 2370 in_sig_bt[i + 1] == T_VOID && 2371 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2372 __ double_move(in_regs[i], out_regs[c_arg]); 2373 break; 2374 2375 case T_LONG : 2376 __ long_move(in_regs[i], out_regs[c_arg]); 2377 break; 2378 2379 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2380 2381 default: 2382 __ move32_64(in_regs[i], out_regs[c_arg]); 2383 } 2384 } 2385 2386 int c_arg; 2387 2388 // Pre-load a static method's oop into r14. Used both by locking code and 2389 // the normal JNI call code. 2390 // point c_arg at the first arg that is already loaded in case we 2391 // need to spill before we call out 2392 c_arg = total_c_args - total_in_args; 2393 2394 if (method->is_static()) { 2395 2396 // load oop into a register 2397 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2398 2399 // Now handlize the static class mirror it's known not-null. 2400 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2401 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2402 2403 // Now get the handle 2404 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2405 // store the klass handle as second argument 2406 __ movptr(c_rarg1, oop_handle_reg); 2407 // and protect the arg if we must spill 2408 c_arg--; 2409 } 2410 2411 // Change state to native (we save the return address in the thread, since it might not 2412 // be pushed on the stack when we do a stack traversal). It is enough that the pc() 2413 // points into the right code segment. It does not have to be the correct return pc. 2414 // We use the same pc/oopMap repeatedly when we call out 2415 2416 Label native_return; 2417 if (method->is_object_wait0()) { 2418 // For convenience we use the pc we want to resume to in case of preemption on Object.wait. 2419 __ set_last_Java_frame(rsp, noreg, native_return, rscratch1); 2420 } else { 2421 intptr_t the_pc = (intptr_t) __ pc(); 2422 oop_maps->add_gc_map(the_pc - start, map); 2423 2424 __ set_last_Java_frame(rsp, noreg, __ pc(), rscratch1); 2425 } 2426 2427 // We have all of the arguments setup at this point. We must not touch any register 2428 // argument registers at this point (what if we save/restore them there are no oop? 2429 2430 if (DTraceMethodProbes) { 2431 // protect the args we've loaded 2432 save_args(masm, total_c_args, c_arg, out_regs); 2433 __ mov_metadata(c_rarg1, method()); 2434 __ call_VM_leaf( 2435 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2436 r15_thread, c_rarg1); 2437 restore_args(masm, total_c_args, c_arg, out_regs); 2438 } 2439 2440 // RedefineClasses() tracing support for obsolete method entry 2441 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2442 // protect the args we've loaded 2443 save_args(masm, total_c_args, c_arg, out_regs); 2444 __ mov_metadata(c_rarg1, method()); 2445 __ call_VM_leaf( 2446 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2447 r15_thread, c_rarg1); 2448 restore_args(masm, total_c_args, c_arg, out_regs); 2449 } 2450 2451 // Lock a synchronized method 2452 2453 // Register definitions used by locking and unlocking 2454 2455 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2456 const Register obj_reg = rbx; // Will contain the oop 2457 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2458 2459 Label slow_path_lock; 2460 Label lock_done; 2461 2462 if (method->is_synchronized()) { 2463 // Get the handle (the 2nd argument) 2464 __ mov(oop_handle_reg, c_rarg1); 2465 2466 // Get address of the box 2467 2468 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2469 2470 // Load the oop from the handle 2471 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2472 2473 __ lightweight_lock(lock_reg, obj_reg, swap_reg, rscratch1, slow_path_lock); 2474 2475 // Slow path will re-enter here 2476 __ bind(lock_done); 2477 } 2478 2479 // Finally just about ready to make the JNI call 2480 2481 // get JNIEnv* which is first argument to native 2482 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2483 2484 // Now set thread in native 2485 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2486 2487 __ call(RuntimeAddress(native_func)); 2488 2489 // Verify or restore cpu control state after JNI call 2490 __ restore_cpu_control_state_after_jni(rscratch1); 2491 2492 // Unpack native results. 2493 switch (ret_type) { 2494 case T_BOOLEAN: __ c2bool(rax); break; 2495 case T_CHAR : __ movzwl(rax, rax); break; 2496 case T_BYTE : __ sign_extend_byte (rax); break; 2497 case T_SHORT : __ sign_extend_short(rax); break; 2498 case T_INT : /* nothing to do */ break; 2499 case T_DOUBLE : 2500 case T_FLOAT : 2501 // Result is in xmm0 we'll save as needed 2502 break; 2503 case T_ARRAY: // Really a handle 2504 case T_OBJECT: // Really a handle 2505 break; // can't de-handlize until after safepoint check 2506 case T_VOID: break; 2507 case T_LONG: break; 2508 default : ShouldNotReachHere(); 2509 } 2510 2511 // Switch thread to "native transition" state before reading the synchronization state. 2512 // This additional state is necessary because reading and testing the synchronization 2513 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2514 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2515 // VM thread changes sync state to synchronizing and suspends threads for GC. 2516 // Thread A is resumed to finish this native method, but doesn't block here since it 2517 // didn't see any synchronization is progress, and escapes. 2518 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2519 2520 // Force this write out before the read below 2521 if (!UseSystemMemoryBarrier) { 2522 __ membar(Assembler::Membar_mask_bits( 2523 Assembler::LoadLoad | Assembler::LoadStore | 2524 Assembler::StoreLoad | Assembler::StoreStore)); 2525 } 2526 2527 // check for safepoint operation in progress and/or pending suspend requests 2528 { 2529 Label Continue; 2530 Label slow_path; 2531 2532 __ safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */); 2533 2534 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2535 __ jcc(Assembler::equal, Continue); 2536 __ bind(slow_path); 2537 2538 // Don't use call_VM as it will see a possible pending exception and forward it 2539 // and never return here preventing us from clearing _last_native_pc down below. 2540 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2541 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2542 // by hand. 2543 // 2544 __ vzeroupper(); 2545 save_native_result(masm, ret_type, stack_slots); 2546 __ mov(c_rarg0, r15_thread); 2547 __ mov(r12, rsp); // remember sp 2548 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2549 __ andptr(rsp, -16); // align stack as required by ABI 2550 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2551 __ mov(rsp, r12); // restore sp 2552 __ reinit_heapbase(); 2553 // Restore any method result value 2554 restore_native_result(masm, ret_type, stack_slots); 2555 __ bind(Continue); 2556 } 2557 2558 // change thread state 2559 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2560 2561 if (method->is_object_wait0()) { 2562 // Check preemption for Object.wait() 2563 __ movptr(rscratch1, Address(r15_thread, JavaThread::preempt_alternate_return_offset())); 2564 __ cmpptr(rscratch1, NULL_WORD); 2565 __ jccb(Assembler::equal, native_return); 2566 __ movptr(Address(r15_thread, JavaThread::preempt_alternate_return_offset()), NULL_WORD); 2567 __ jmp(rscratch1); 2568 __ bind(native_return); 2569 2570 intptr_t the_pc = (intptr_t) __ pc(); 2571 oop_maps->add_gc_map(the_pc - start, map); 2572 } 2573 2574 2575 Label reguard; 2576 Label reguard_done; 2577 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2578 __ jcc(Assembler::equal, reguard); 2579 __ bind(reguard_done); 2580 2581 // native result if any is live 2582 2583 // Unlock 2584 Label slow_path_unlock; 2585 Label unlock_done; 2586 if (method->is_synchronized()) { 2587 2588 Label fast_done; 2589 2590 // Get locked oop from the handle we passed to jni 2591 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2592 2593 // Must save rax if it is live now because cmpxchg must use it 2594 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2595 save_native_result(masm, ret_type, stack_slots); 2596 } 2597 2598 __ lightweight_unlock(obj_reg, swap_reg, lock_reg, slow_path_unlock); 2599 2600 // slow path re-enters here 2601 __ bind(unlock_done); 2602 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2603 restore_native_result(masm, ret_type, stack_slots); 2604 } 2605 2606 __ bind(fast_done); 2607 } 2608 if (DTraceMethodProbes) { 2609 save_native_result(masm, ret_type, stack_slots); 2610 __ mov_metadata(c_rarg1, method()); 2611 __ call_VM_leaf( 2612 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2613 r15_thread, c_rarg1); 2614 restore_native_result(masm, ret_type, stack_slots); 2615 } 2616 2617 __ reset_last_Java_frame(false); 2618 2619 // Unbox oop result, e.g. JNIHandles::resolve value. 2620 if (is_reference_type(ret_type)) { 2621 __ resolve_jobject(rax /* value */, 2622 rcx /* tmp */); 2623 } 2624 2625 if (CheckJNICalls) { 2626 // clear_pending_jni_exception_check 2627 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2628 } 2629 2630 // reset handle block 2631 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2632 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); 2633 2634 // pop our frame 2635 2636 __ leave(); 2637 2638 #if INCLUDE_JFR 2639 // We need to do a poll test after unwind in case the sampler 2640 // managed to sample the native frame after returning to Java. 2641 Label L_return; 2642 address poll_test_pc = __ pc(); 2643 __ relocate(relocInfo::poll_return_type); 2644 __ testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); 2645 __ jccb(Assembler::zero, L_return); 2646 __ lea(rscratch1, InternalAddress(poll_test_pc)); 2647 __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); 2648 assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, 2649 "polling page return stub not created yet"); 2650 address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); 2651 __ jump(RuntimeAddress(stub)); 2652 __ bind(L_return); 2653 #endif // INCLUDE_JFR 2654 2655 // Any exception pending? 2656 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2657 __ jcc(Assembler::notEqual, exception_pending); 2658 2659 // Return 2660 2661 __ ret(0); 2662 2663 // Unexpected paths are out of line and go here 2664 2665 // forward the exception 2666 __ bind(exception_pending); 2667 2668 // and forward the exception 2669 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2670 2671 // Slow path locking & unlocking 2672 if (method->is_synchronized()) { 2673 2674 // BEGIN Slow path lock 2675 __ bind(slow_path_lock); 2676 2677 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2678 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2679 2680 // protect the args we've loaded 2681 save_args(masm, total_c_args, c_arg, out_regs); 2682 2683 __ mov(c_rarg0, obj_reg); 2684 __ mov(c_rarg1, lock_reg); 2685 __ mov(c_rarg2, r15_thread); 2686 2687 // Not a leaf but we have last_Java_frame setup as we want. 2688 // We don't want to unmount in case of contention since that would complicate preserving 2689 // the arguments that had already been marshalled into the native convention. So we force 2690 // the freeze slow path to find this native wrapper frame (see recurse_freeze_native_frame()) 2691 // and pin the vthread. Otherwise the fast path won't find it since we don't walk the stack. 2692 __ push_cont_fastpath(); 2693 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2694 __ pop_cont_fastpath(); 2695 restore_args(masm, total_c_args, c_arg, out_regs); 2696 2697 #ifdef ASSERT 2698 { Label L; 2699 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2700 __ jcc(Assembler::equal, L); 2701 __ stop("no pending exception allowed on exit from monitorenter"); 2702 __ bind(L); 2703 } 2704 #endif 2705 __ jmp(lock_done); 2706 2707 // END Slow path lock 2708 2709 // BEGIN Slow path unlock 2710 __ bind(slow_path_unlock); 2711 2712 // If we haven't already saved the native result we must save it now as xmm registers 2713 // are still exposed. 2714 __ vzeroupper(); 2715 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2716 save_native_result(masm, ret_type, stack_slots); 2717 } 2718 2719 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2720 2721 __ mov(c_rarg0, obj_reg); 2722 __ mov(c_rarg2, r15_thread); 2723 __ mov(r12, rsp); // remember sp 2724 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2725 __ andptr(rsp, -16); // align stack as required by ABI 2726 2727 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2728 // NOTE that obj_reg == rbx currently 2729 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2730 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2731 2732 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2733 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2734 __ mov(rsp, r12); // restore sp 2735 __ reinit_heapbase(); 2736 #ifdef ASSERT 2737 { 2738 Label L; 2739 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2740 __ jcc(Assembler::equal, L); 2741 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2742 __ bind(L); 2743 } 2744 #endif /* ASSERT */ 2745 2746 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2747 2748 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2749 restore_native_result(masm, ret_type, stack_slots); 2750 } 2751 __ jmp(unlock_done); 2752 2753 // END Slow path unlock 2754 2755 } // synchronized 2756 2757 // SLOW PATH Reguard the stack if needed 2758 2759 __ bind(reguard); 2760 __ vzeroupper(); 2761 save_native_result(masm, ret_type, stack_slots); 2762 __ mov(r12, rsp); // remember sp 2763 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2764 __ andptr(rsp, -16); // align stack as required by ABI 2765 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2766 __ mov(rsp, r12); // restore sp 2767 __ reinit_heapbase(); 2768 restore_native_result(masm, ret_type, stack_slots); 2769 // and continue 2770 __ jmp(reguard_done); 2771 2772 2773 2774 __ flush(); 2775 2776 nmethod *nm = nmethod::new_native_nmethod(method, 2777 compile_id, 2778 masm->code(), 2779 vep_offset, 2780 frame_complete, 2781 stack_slots / VMRegImpl::slots_per_word, 2782 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2783 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2784 oop_maps); 2785 2786 return nm; 2787 } 2788 2789 // this function returns the adjust size (in number of words) to a c2i adapter 2790 // activation for use during deoptimization 2791 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2792 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2793 } 2794 2795 2796 uint SharedRuntime::out_preserve_stack_slots() { 2797 return 0; 2798 } 2799 2800 2801 // Number of stack slots between incoming argument block and the start of 2802 // a new frame. The PROLOG must add this many slots to the stack. The 2803 // EPILOG must remove this many slots. amd64 needs two slots for 2804 // return address. 2805 uint SharedRuntime::in_preserve_stack_slots() { 2806 return 4 + 2 * VerifyStackAtCalls; 2807 } 2808 2809 VMReg SharedRuntime::thread_register() { 2810 return r15_thread->as_VMReg(); 2811 } 2812 2813 //------------------------------generate_deopt_blob---------------------------- 2814 void SharedRuntime::generate_deopt_blob() { 2815 // Allocate space for the code 2816 ResourceMark rm; 2817 // Setup code generation tools 2818 int pad = 0; 2819 if (UseAVX > 2) { 2820 pad += 1024; 2821 } 2822 if (UseAPX) { 2823 pad += 1024; 2824 } 2825 #if INCLUDE_JVMCI 2826 if (EnableJVMCI) { 2827 pad += 512; // Increase the buffer size when compiling for JVMCI 2828 } 2829 #endif 2830 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id); 2831 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id); 2832 if (blob != nullptr) { 2833 _deopt_blob = blob->as_deoptimization_blob(); 2834 return; 2835 } 2836 2837 CodeBuffer buffer(name, 2560+pad, 1024); 2838 MacroAssembler* masm = new MacroAssembler(&buffer); 2839 int frame_size_in_words; 2840 OopMap* map = nullptr; 2841 OopMapSet *oop_maps = new OopMapSet(); 2842 2843 // ------------- 2844 // This code enters when returning to a de-optimized nmethod. A return 2845 // address has been pushed on the stack, and return values are in 2846 // registers. 2847 // If we are doing a normal deopt then we were called from the patched 2848 // nmethod from the point we returned to the nmethod. So the return 2849 // address on the stack is wrong by NativeCall::instruction_size 2850 // We will adjust the value so it looks like we have the original return 2851 // address on the stack (like when we eagerly deoptimized). 2852 // In the case of an exception pending when deoptimizing, we enter 2853 // with a return address on the stack that points after the call we patched 2854 // into the exception handler. We have the following register state from, 2855 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2856 // rax: exception oop 2857 // rbx: exception handler 2858 // rdx: throwing pc 2859 // So in this case we simply jam rdx into the useless return address and 2860 // the stack looks just like we want. 2861 // 2862 // At this point we need to de-opt. We save the argument return 2863 // registers. We call the first C routine, fetch_unroll_info(). This 2864 // routine captures the return values and returns a structure which 2865 // describes the current frame size and the sizes of all replacement frames. 2866 // The current frame is compiled code and may contain many inlined 2867 // functions, each with their own JVM state. We pop the current frame, then 2868 // push all the new frames. Then we call the C routine unpack_frames() to 2869 // populate these frames. Finally unpack_frames() returns us the new target 2870 // address. Notice that callee-save registers are BLOWN here; they have 2871 // already been captured in the vframeArray at the time the return PC was 2872 // patched. 2873 address start = __ pc(); 2874 Label cont; 2875 2876 // Prolog for non exception case! 2877 2878 // Save everything in sight. 2879 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2880 2881 // Normal deoptimization. Save exec mode for unpack_frames. 2882 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2883 __ jmp(cont); 2884 2885 int reexecute_offset = __ pc() - start; 2886 #if INCLUDE_JVMCI && !defined(COMPILER1) 2887 if (UseJVMCICompiler) { 2888 // JVMCI does not use this kind of deoptimization 2889 __ should_not_reach_here(); 2890 } 2891 #endif 2892 2893 // Reexecute case 2894 // return address is the pc describes what bci to do re-execute at 2895 2896 // No need to update map as each call to save_live_registers will produce identical oopmap 2897 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2898 2899 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2900 __ jmp(cont); 2901 2902 #if INCLUDE_JVMCI 2903 Label after_fetch_unroll_info_call; 2904 int implicit_exception_uncommon_trap_offset = 0; 2905 int uncommon_trap_offset = 0; 2906 2907 if (EnableJVMCI) { 2908 implicit_exception_uncommon_trap_offset = __ pc() - start; 2909 2910 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2911 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD); 2912 2913 uncommon_trap_offset = __ pc() - start; 2914 2915 // Save everything in sight. 2916 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2917 // fetch_unroll_info needs to call last_java_frame() 2918 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2919 2920 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2921 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2922 2923 __ movl(r14, Deoptimization::Unpack_reexecute); 2924 __ mov(c_rarg0, r15_thread); 2925 __ movl(c_rarg2, r14); // exec mode 2926 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2927 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2928 2929 __ reset_last_Java_frame(false); 2930 2931 __ jmp(after_fetch_unroll_info_call); 2932 } // EnableJVMCI 2933 #endif // INCLUDE_JVMCI 2934 2935 int exception_offset = __ pc() - start; 2936 2937 // Prolog for exception case 2938 2939 // all registers are dead at this entry point, except for rax, and 2940 // rdx which contain the exception oop and exception pc 2941 // respectively. Set them in TLS and fall thru to the 2942 // unpack_with_exception_in_tls entry point. 2943 2944 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 2945 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 2946 2947 int exception_in_tls_offset = __ pc() - start; 2948 2949 // new implementation because exception oop is now passed in JavaThread 2950 2951 // Prolog for exception case 2952 // All registers must be preserved because they might be used by LinearScan 2953 // Exceptiop oop and throwing PC are passed in JavaThread 2954 // tos: stack at point of call to method that threw the exception (i.e. only 2955 // args are on the stack, no return address) 2956 2957 // make room on stack for the return address 2958 // It will be patched later with the throwing pc. The correct value is not 2959 // available now because loading it from memory would destroy registers. 2960 __ push(0); 2961 2962 // Save everything in sight. 2963 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2964 2965 // Now it is safe to overwrite any register 2966 2967 // Deopt during an exception. Save exec mode for unpack_frames. 2968 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 2969 2970 // load throwing pc from JavaThread and patch it as the return address 2971 // of the current frame. Then clear the field in JavaThread 2972 2973 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 2974 __ movptr(Address(rbp, wordSize), rdx); 2975 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 2976 2977 #ifdef ASSERT 2978 // verify that there is really an exception oop in JavaThread 2979 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 2980 __ verify_oop(rax); 2981 2982 // verify that there is no pending exception 2983 Label no_pending_exception; 2984 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 2985 __ testptr(rax, rax); 2986 __ jcc(Assembler::zero, no_pending_exception); 2987 __ stop("must not have pending exception here"); 2988 __ bind(no_pending_exception); 2989 #endif 2990 2991 __ bind(cont); 2992 2993 // Call C code. Need thread and this frame, but NOT official VM entry 2994 // crud. We cannot block on this call, no GC can happen. 2995 // 2996 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2997 2998 // fetch_unroll_info needs to call last_java_frame(). 2999 3000 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3001 #ifdef ASSERT 3002 { Label L; 3003 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 3004 __ jcc(Assembler::equal, L); 3005 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 3006 __ bind(L); 3007 } 3008 #endif // ASSERT 3009 __ mov(c_rarg0, r15_thread); 3010 __ movl(c_rarg1, r14); // exec_mode 3011 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 3012 3013 // Need to have an oopmap that tells fetch_unroll_info where to 3014 // find any register it might need. 3015 oop_maps->add_gc_map(__ pc() - start, map); 3016 3017 __ reset_last_Java_frame(false); 3018 3019 #if INCLUDE_JVMCI 3020 if (EnableJVMCI) { 3021 __ bind(after_fetch_unroll_info_call); 3022 } 3023 #endif 3024 3025 // Load UnrollBlock* into rdi 3026 __ mov(rdi, rax); 3027 3028 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset())); 3029 Label noException; 3030 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 3031 __ jcc(Assembler::notEqual, noException); 3032 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3033 // QQQ this is useless it was null above 3034 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3035 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3036 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3037 3038 __ verify_oop(rax); 3039 3040 // Overwrite the result registers with the exception results. 3041 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3042 // I think this is useless 3043 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 3044 3045 __ bind(noException); 3046 3047 // Only register save data is on the stack. 3048 // Now restore the result registers. Everything else is either dead 3049 // or captured in the vframeArray. 3050 RegisterSaver::restore_result_registers(masm); 3051 3052 // All of the register save area has been popped of the stack. Only the 3053 // return address remains. 3054 3055 // Pop all the frames we must move/replace. 3056 // 3057 // Frame picture (youngest to oldest) 3058 // 1: self-frame (no frame link) 3059 // 2: deopting frame (no frame link) 3060 // 3: caller of deopting frame (could be compiled/interpreted). 3061 // 3062 // Note: by leaving the return address of self-frame on the stack 3063 // and using the size of frame 2 to adjust the stack 3064 // when we are done the return to frame 3 will still be on the stack. 3065 3066 // Pop deoptimized frame 3067 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); 3068 __ addptr(rsp, rcx); 3069 3070 // rsp should be pointing at the return address to the caller (3) 3071 3072 // Pick up the initial fp we should save 3073 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3074 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3075 3076 #ifdef ASSERT 3077 // Compilers generate code that bang the stack by as much as the 3078 // interpreter would need. So this stack banging should never 3079 // trigger a fault. Verify that it does not on non product builds. 3080 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3081 __ bang_stack_size(rbx, rcx); 3082 #endif 3083 3084 // Load address of array of frame pcs into rcx 3085 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3086 3087 // Trash the old pc 3088 __ addptr(rsp, wordSize); 3089 3090 // Load address of array of frame sizes into rsi 3091 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset())); 3092 3093 // Load counter into rdx 3094 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); 3095 3096 // Now adjust the caller's stack to make up for the extra locals 3097 // but record the original sp so that we can save it in the skeletal interpreter 3098 // frame and the stack walking of interpreter_sender will get the unextended sp 3099 // value and not the "real" sp value. 3100 3101 const Register sender_sp = r8; 3102 3103 __ mov(sender_sp, rsp); 3104 __ movl(rbx, Address(rdi, 3105 Deoptimization::UnrollBlock:: 3106 caller_adjustment_offset())); 3107 __ subptr(rsp, rbx); 3108 3109 // Push interpreter frames in a loop 3110 Label loop; 3111 __ bind(loop); 3112 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3113 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 3114 __ pushptr(Address(rcx, 0)); // Save return address 3115 __ enter(); // Save old & set new ebp 3116 __ subptr(rsp, rbx); // Prolog 3117 // This value is corrected by layout_activation_impl 3118 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3119 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 3120 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3121 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3122 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3123 __ decrementl(rdx); // Decrement counter 3124 __ jcc(Assembler::notZero, loop); 3125 __ pushptr(Address(rcx, 0)); // Save final return address 3126 3127 // Re-push self-frame 3128 __ enter(); // Save old & set new ebp 3129 3130 // Allocate a full sized register save area. 3131 // Return address and rbp are in place, so we allocate two less words. 3132 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 3133 3134 // Restore frame locals after moving the frame 3135 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 3136 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3137 3138 // Call C code. Need thread but NOT official VM entry 3139 // crud. We cannot block on this call, no GC can happen. Call should 3140 // restore return values to their stack-slots with the new SP. 3141 // 3142 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 3143 3144 // Use rbp because the frames look interpreted now 3145 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3146 // Don't need the precise return PC here, just precise enough to point into this code blob. 3147 address the_pc = __ pc(); 3148 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3149 3150 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 3151 __ mov(c_rarg0, r15_thread); 3152 __ movl(c_rarg1, r14); // second arg: exec_mode 3153 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3154 // Revert SP alignment after call since we're going to do some SP relative addressing below 3155 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 3156 3157 // Set an oopmap for the call site 3158 // Use the same PC we used for the last java frame 3159 oop_maps->add_gc_map(the_pc - start, 3160 new OopMap( frame_size_in_words, 0 )); 3161 3162 // Clear fp AND pc 3163 __ reset_last_Java_frame(true); 3164 3165 // Collect return values 3166 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 3167 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 3168 // I think this is useless (throwing pc?) 3169 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 3170 3171 // Pop self-frame. 3172 __ leave(); // Epilog 3173 3174 // Jump to interpreter 3175 __ ret(0); 3176 3177 // Make sure all code is generated 3178 masm->flush(); 3179 3180 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 3181 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3182 #if INCLUDE_JVMCI 3183 if (EnableJVMCI) { 3184 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 3185 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 3186 } 3187 #endif 3188 3189 AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id); 3190 } 3191 3192 //------------------------------generate_handler_blob------ 3193 // 3194 // Generate a special Compile2Runtime blob that saves all registers, 3195 // and setup oopmap. 3196 // 3197 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) { 3198 assert(StubRoutines::forward_exception_entry() != nullptr, 3199 "must be generated before"); 3200 assert(is_polling_page_id(id), "expected a polling page stub id"); 3201 3202 // Allocate space for the code. Setup code generation tools. 3203 const char* name = SharedRuntime::stub_name(id); 3204 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3205 if (blob != nullptr) { 3206 return blob->as_safepoint_blob(); 3207 } 3208 3209 ResourceMark rm; 3210 OopMapSet *oop_maps = new OopMapSet(); 3211 OopMap* map; 3212 CodeBuffer buffer(name, 2548, 1024); 3213 MacroAssembler* masm = new MacroAssembler(&buffer); 3214 3215 address start = __ pc(); 3216 address call_pc = nullptr; 3217 int frame_size_in_words; 3218 bool cause_return = (id == StubId::shared_polling_page_return_handler_id); 3219 bool save_wide_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id); 3220 3221 // Make room for return address (or push it again) 3222 if (!cause_return) { 3223 __ push(rbx); 3224 } 3225 3226 // Save registers, fpu state, and flags 3227 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); 3228 3229 // The following is basically a call_VM. However, we need the precise 3230 // address of the call in order to generate an oopmap. Hence, we do all the 3231 // work ourselves. 3232 3233 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next: 3234 3235 // The return address must always be correct so that frame constructor never 3236 // sees an invalid pc. 3237 3238 if (!cause_return) { 3239 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3240 // Additionally, rbx is a callee saved register and we can look at it later to determine 3241 // if someone changed the return address for us! 3242 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3243 __ movptr(Address(rbp, wordSize), rbx); 3244 } 3245 3246 // Do the call 3247 __ mov(c_rarg0, r15_thread); 3248 __ call(RuntimeAddress(call_ptr)); 3249 3250 // Set an oopmap for the call site. This oopmap will map all 3251 // oop-registers and debug-info registers as callee-saved. This 3252 // will allow deoptimization at this safepoint to find all possible 3253 // debug-info recordings, as well as let GC find all oops. 3254 3255 oop_maps->add_gc_map( __ pc() - start, map); 3256 3257 Label noException; 3258 3259 __ reset_last_Java_frame(false); 3260 3261 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3262 __ jcc(Assembler::equal, noException); 3263 3264 // Exception pending 3265 3266 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3267 3268 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3269 3270 // No exception case 3271 __ bind(noException); 3272 3273 Label no_adjust; 3274 #ifdef ASSERT 3275 Label bail; 3276 #endif 3277 if (!cause_return) { 3278 Label no_prefix, not_special, check_rex_prefix; 3279 3280 // If our stashed return pc was modified by the runtime we avoid touching it 3281 __ cmpptr(rbx, Address(rbp, wordSize)); 3282 __ jcc(Assembler::notEqual, no_adjust); 3283 3284 // Skip over the poll instruction. 3285 // See NativeInstruction::is_safepoint_poll() 3286 // Possible encodings: 3287 // 85 00 test %eax,(%rax) 3288 // 85 01 test %eax,(%rcx) 3289 // 85 02 test %eax,(%rdx) 3290 // 85 03 test %eax,(%rbx) 3291 // 85 06 test %eax,(%rsi) 3292 // 85 07 test %eax,(%rdi) 3293 // 3294 // 41 85 00 test %eax,(%r8) 3295 // 41 85 01 test %eax,(%r9) 3296 // 41 85 02 test %eax,(%r10) 3297 // 41 85 03 test %eax,(%r11) 3298 // 41 85 06 test %eax,(%r14) 3299 // 41 85 07 test %eax,(%r15) 3300 // 3301 // 85 04 24 test %eax,(%rsp) 3302 // 41 85 04 24 test %eax,(%r12) 3303 // 85 45 00 test %eax,0x0(%rbp) 3304 // 41 85 45 00 test %eax,0x0(%r13) 3305 // 3306 // Notes: 3307 // Format of legacy MAP0 test instruction:- 3308 // [REX/REX2] [OPCODE] [ModRM] [SIB] [DISP] [IMM32] 3309 // o For safepoint polling instruction "test %eax,(%rax)", encoding of first register 3310 // operand and base register of memory operand is b/w [0-8), hence we do not require 3311 // additional REX prefix where REX.B bit stores MSB bit of register encoding, which 3312 // is why two bytes encoding is sufficient here. 3313 // o For safepoint polling instruction like "test %eax,(%r8)", register encoding of BASE 3314 // register of memory operand is 1000, thus we need additional REX prefix in this case, 3315 // there by adding additional byte to instruction encoding. 3316 // o In case BASE register is one of the 32 extended GPR registers available only on targets 3317 // supporting Intel APX extension, then we need to emit two bytes REX2 prefix to hold 3318 // most significant two bits of 5 bit register encoding. 3319 3320 if (VM_Version::supports_apx_f()) { 3321 __ cmpb(Address(rbx, 0), Assembler::REX2); 3322 __ jccb(Assembler::notEqual, check_rex_prefix); 3323 __ addptr(rbx, 2); 3324 __ bind(check_rex_prefix); 3325 } 3326 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3327 __ jccb(Assembler::notEqual, no_prefix); 3328 __ addptr(rbx, 1); 3329 __ bind(no_prefix); 3330 #ifdef ASSERT 3331 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3332 #endif 3333 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3334 // r12/rsp 0x04 3335 // r13/rbp 0x05 3336 __ movzbq(rcx, Address(rbx, 1)); 3337 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3338 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3339 __ cmpptr(rcx, 1); 3340 __ jccb(Assembler::above, not_special); 3341 __ addptr(rbx, 1); 3342 __ bind(not_special); 3343 #ifdef ASSERT 3344 // Verify the correct encoding of the poll we're about to skip. 3345 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3346 __ jcc(Assembler::notEqual, bail); 3347 // Mask out the modrm bits 3348 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3349 // rax encodes to 0, so if the bits are nonzero it's incorrect 3350 __ jcc(Assembler::notZero, bail); 3351 #endif 3352 // Adjust return pc forward to step over the safepoint poll instruction 3353 __ addptr(rbx, 2); 3354 __ movptr(Address(rbp, wordSize), rbx); 3355 } 3356 3357 __ bind(no_adjust); 3358 // Normal exit, restore registers and exit. 3359 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3360 __ ret(0); 3361 3362 #ifdef ASSERT 3363 __ bind(bail); 3364 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3365 #endif 3366 3367 // Make sure all code is generated 3368 masm->flush(); 3369 3370 // Fill-out other meta info 3371 SafepointBlob* sp_blob = SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3372 3373 AOTCodeCache::store_code_blob(*sp_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3374 return sp_blob; 3375 } 3376 3377 // 3378 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3379 // 3380 // Generate a stub that calls into vm to find out the proper destination 3381 // of a java call. All the argument registers are live at this point 3382 // but since this is generic code we don't know what they are and the caller 3383 // must do any gc of the args. 3384 // 3385 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) { 3386 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); 3387 assert(is_resolve_id(id), "expected a resolve stub id"); 3388 3389 const char* name = SharedRuntime::stub_name(id); 3390 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3391 if (blob != nullptr) { 3392 return blob->as_runtime_stub(); 3393 } 3394 3395 // allocate space for the code 3396 ResourceMark rm; 3397 CodeBuffer buffer(name, 1552, 512); 3398 MacroAssembler* masm = new MacroAssembler(&buffer); 3399 3400 int frame_size_in_words; 3401 3402 OopMapSet *oop_maps = new OopMapSet(); 3403 OopMap* map = nullptr; 3404 3405 int start = __ offset(); 3406 3407 // No need to save vector registers since they are caller-saved anyway. 3408 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); 3409 3410 int frame_complete = __ offset(); 3411 3412 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3413 3414 __ mov(c_rarg0, r15_thread); 3415 3416 __ call(RuntimeAddress(destination)); 3417 3418 3419 // Set an oopmap for the call site. 3420 // We need this not only for callee-saved registers, but also for volatile 3421 // registers that the compiler might be keeping live across a safepoint. 3422 3423 oop_maps->add_gc_map( __ offset() - start, map); 3424 3425 // rax contains the address we are going to jump to assuming no exception got installed 3426 3427 // clear last_Java_sp 3428 __ reset_last_Java_frame(false); 3429 // check for pending exceptions 3430 Label pending; 3431 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3432 __ jcc(Assembler::notEqual, pending); 3433 3434 // get the returned Method* 3435 __ get_vm_result_metadata(rbx); 3436 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3437 3438 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3439 3440 RegisterSaver::restore_live_registers(masm); 3441 3442 // We are back to the original state on entry and ready to go. 3443 3444 __ jmp(rax); 3445 3446 // Pending exception after the safepoint 3447 3448 __ bind(pending); 3449 3450 RegisterSaver::restore_live_registers(masm); 3451 3452 // exception pending => remove activation and forward to exception handler 3453 3454 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD); 3455 3456 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3457 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3458 3459 // ------------- 3460 // make sure all code is generated 3461 masm->flush(); 3462 3463 // return the blob 3464 // frame_size_words or bytes?? 3465 RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3466 3467 AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3468 return rs_blob; 3469 } 3470 3471 // Continuation point for throwing of implicit exceptions that are 3472 // not handled in the current activation. Fabricates an exception 3473 // oop and initiates normal exception dispatching in this 3474 // frame. Since we need to preserve callee-saved values (currently 3475 // only for C2, but done for C1 as well) we need a callee-saved oop 3476 // map and therefore have to make these stubs into RuntimeStubs 3477 // rather than BufferBlobs. If the compiler needs all registers to 3478 // be preserved between the fault point and the exception handler 3479 // then it must assume responsibility for that in 3480 // AbstractCompiler::continuation_for_implicit_null_exception or 3481 // continuation_for_implicit_division_by_zero_exception. All other 3482 // implicit exceptions (e.g., NullPointerException or 3483 // AbstractMethodError on entry) are either at call sites or 3484 // otherwise assume that stack unwinding will be initiated, so 3485 // caller saved registers were assumed volatile in the compiler. 3486 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) { 3487 assert(is_throw_id(id), "expected a throw stub id"); 3488 3489 const char* name = SharedRuntime::stub_name(id); 3490 3491 // Information about frame layout at time of blocking runtime call. 3492 // Note that we only have to preserve callee-saved registers since 3493 // the compilers are responsible for supplying a continuation point 3494 // if they expect all registers to be preserved. 3495 enum layout { 3496 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 3497 rbp_off2, 3498 return_off, 3499 return_off2, 3500 framesize // inclusive of return address 3501 }; 3502 3503 int insts_size = 512; 3504 int locs_size = 64; 3505 3506 const char* timer_msg = "SharedRuntime generate_throw_exception"; 3507 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); 3508 3509 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3510 if (blob != nullptr) { 3511 return blob->as_runtime_stub(); 3512 } 3513 3514 ResourceMark rm; 3515 CodeBuffer code(name, insts_size, locs_size); 3516 OopMapSet* oop_maps = new OopMapSet(); 3517 MacroAssembler* masm = new MacroAssembler(&code); 3518 3519 address start = __ pc(); 3520 3521 // This is an inlined and slightly modified version of call_VM 3522 // which has the ability to fetch the return PC out of 3523 // thread-local storage and also sets up last_Java_sp slightly 3524 // differently than the real call_VM 3525 3526 __ enter(); // required for proper stackwalking of RuntimeStub frame 3527 3528 assert(is_even(framesize/2), "sp not 16-byte aligned"); 3529 3530 // return address and rbp are already in place 3531 __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog 3532 3533 int frame_complete = __ pc() - start; 3534 3535 // Set up last_Java_sp and last_Java_fp 3536 address the_pc = __ pc(); 3537 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 3538 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3539 3540 // Call runtime 3541 __ movptr(c_rarg0, r15_thread); 3542 BLOCK_COMMENT("call runtime_entry"); 3543 __ call(RuntimeAddress(runtime_entry)); 3544 3545 // Generate oop map 3546 OopMap* map = new OopMap(framesize, 0); 3547 3548 oop_maps->add_gc_map(the_pc - start, map); 3549 3550 __ reset_last_Java_frame(true); 3551 3552 __ leave(); // required for proper stackwalking of RuntimeStub frame 3553 3554 // check for pending exceptions 3555 #ifdef ASSERT 3556 Label L; 3557 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3558 __ jcc(Assembler::notEqual, L); 3559 __ should_not_reach_here(); 3560 __ bind(L); 3561 #endif // ASSERT 3562 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3563 3564 3565 // codeBlob framesize is in words (not VMRegImpl::slot_size) 3566 RuntimeStub* stub = 3567 RuntimeStub::new_runtime_stub(name, 3568 &code, 3569 frame_complete, 3570 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 3571 oop_maps, false); 3572 AOTCodeCache::store_code_blob(*stub, AOTCodeEntry::SharedBlob, StubInfo::blob(id)); 3573 3574 return stub; 3575 } 3576 3577 //------------------------------Montgomery multiplication------------------------ 3578 // 3579 3580 #ifndef _WINDOWS 3581 3582 // Subtract 0:b from carry:a. Return carry. 3583 static julong 3584 sub(julong a[], julong b[], julong carry, long len) { 3585 long long i = 0, cnt = len; 3586 julong tmp; 3587 asm volatile("clc; " 3588 "0: ; " 3589 "mov (%[b], %[i], 8), %[tmp]; " 3590 "sbb %[tmp], (%[a], %[i], 8); " 3591 "inc %[i]; dec %[cnt]; " 3592 "jne 0b; " 3593 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3594 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3595 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3596 : "memory"); 3597 return tmp; 3598 } 3599 3600 // Multiply (unsigned) Long A by Long B, accumulating the double- 3601 // length result into the accumulator formed of T0, T1, and T2. 3602 #define MACC(A, B, T0, T1, T2) \ 3603 do { \ 3604 unsigned long hi, lo; \ 3605 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3606 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3607 : "r"(A), "a"(B) : "cc"); \ 3608 } while(0) 3609 3610 // As above, but add twice the double-length result into the 3611 // accumulator. 3612 #define MACC2(A, B, T0, T1, T2) \ 3613 do { \ 3614 unsigned long hi, lo; \ 3615 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3616 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3617 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3618 : "r"(A), "a"(B) : "cc"); \ 3619 } while(0) 3620 3621 #else //_WINDOWS 3622 3623 static julong 3624 sub(julong a[], julong b[], julong carry, long len) { 3625 long i; 3626 julong tmp; 3627 unsigned char c = 1; 3628 for (i = 0; i < len; i++) { 3629 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3630 a[i] = tmp; 3631 } 3632 c = _addcarry_u64(c, carry, ~0, &tmp); 3633 return tmp; 3634 } 3635 3636 // Multiply (unsigned) Long A by Long B, accumulating the double- 3637 // length result into the accumulator formed of T0, T1, and T2. 3638 #define MACC(A, B, T0, T1, T2) \ 3639 do { \ 3640 julong hi, lo; \ 3641 lo = _umul128(A, B, &hi); \ 3642 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3643 c = _addcarry_u64(c, hi, T1, &T1); \ 3644 _addcarry_u64(c, T2, 0, &T2); \ 3645 } while(0) 3646 3647 // As above, but add twice the double-length result into the 3648 // accumulator. 3649 #define MACC2(A, B, T0, T1, T2) \ 3650 do { \ 3651 julong hi, lo; \ 3652 lo = _umul128(A, B, &hi); \ 3653 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3654 c = _addcarry_u64(c, hi, T1, &T1); \ 3655 _addcarry_u64(c, T2, 0, &T2); \ 3656 c = _addcarry_u64(0, lo, T0, &T0); \ 3657 c = _addcarry_u64(c, hi, T1, &T1); \ 3658 _addcarry_u64(c, T2, 0, &T2); \ 3659 } while(0) 3660 3661 #endif //_WINDOWS 3662 3663 // Fast Montgomery multiplication. The derivation of the algorithm is 3664 // in A Cryptographic Library for the Motorola DSP56000, 3665 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3666 3667 static void NOINLINE 3668 montgomery_multiply(julong a[], julong b[], julong n[], 3669 julong m[], julong inv, int len) { 3670 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3671 int i; 3672 3673 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3674 3675 for (i = 0; i < len; i++) { 3676 int j; 3677 for (j = 0; j < i; j++) { 3678 MACC(a[j], b[i-j], t0, t1, t2); 3679 MACC(m[j], n[i-j], t0, t1, t2); 3680 } 3681 MACC(a[i], b[0], t0, t1, t2); 3682 m[i] = t0 * inv; 3683 MACC(m[i], n[0], t0, t1, t2); 3684 3685 assert(t0 == 0, "broken Montgomery multiply"); 3686 3687 t0 = t1; t1 = t2; t2 = 0; 3688 } 3689 3690 for (i = len; i < 2*len; i++) { 3691 int j; 3692 for (j = i-len+1; j < len; j++) { 3693 MACC(a[j], b[i-j], t0, t1, t2); 3694 MACC(m[j], n[i-j], t0, t1, t2); 3695 } 3696 m[i-len] = t0; 3697 t0 = t1; t1 = t2; t2 = 0; 3698 } 3699 3700 while (t0) 3701 t0 = sub(m, n, t0, len); 3702 } 3703 3704 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3705 // multiplies so it should be up to 25% faster than Montgomery 3706 // multiplication. However, its loop control is more complex and it 3707 // may actually run slower on some machines. 3708 3709 static void NOINLINE 3710 montgomery_square(julong a[], julong n[], 3711 julong m[], julong inv, int len) { 3712 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3713 int i; 3714 3715 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3716 3717 for (i = 0; i < len; i++) { 3718 int j; 3719 int end = (i+1)/2; 3720 for (j = 0; j < end; j++) { 3721 MACC2(a[j], a[i-j], t0, t1, t2); 3722 MACC(m[j], n[i-j], t0, t1, t2); 3723 } 3724 if ((i & 1) == 0) { 3725 MACC(a[j], a[j], t0, t1, t2); 3726 } 3727 for (; j < i; j++) { 3728 MACC(m[j], n[i-j], t0, t1, t2); 3729 } 3730 m[i] = t0 * inv; 3731 MACC(m[i], n[0], t0, t1, t2); 3732 3733 assert(t0 == 0, "broken Montgomery square"); 3734 3735 t0 = t1; t1 = t2; t2 = 0; 3736 } 3737 3738 for (i = len; i < 2*len; i++) { 3739 int start = i-len+1; 3740 int end = start + (len - start)/2; 3741 int j; 3742 for (j = start; j < end; j++) { 3743 MACC2(a[j], a[i-j], t0, t1, t2); 3744 MACC(m[j], n[i-j], t0, t1, t2); 3745 } 3746 if ((i & 1) == 0) { 3747 MACC(a[j], a[j], t0, t1, t2); 3748 } 3749 for (; j < len; j++) { 3750 MACC(m[j], n[i-j], t0, t1, t2); 3751 } 3752 m[i-len] = t0; 3753 t0 = t1; t1 = t2; t2 = 0; 3754 } 3755 3756 while (t0) 3757 t0 = sub(m, n, t0, len); 3758 } 3759 3760 // Swap words in a longword. 3761 static julong swap(julong x) { 3762 return (x << 32) | (x >> 32); 3763 } 3764 3765 // Copy len longwords from s to d, word-swapping as we go. The 3766 // destination array is reversed. 3767 static void reverse_words(julong *s, julong *d, int len) { 3768 d += len; 3769 while(len-- > 0) { 3770 d--; 3771 *d = swap(*s); 3772 s++; 3773 } 3774 } 3775 3776 // The threshold at which squaring is advantageous was determined 3777 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3778 #define MONTGOMERY_SQUARING_THRESHOLD 64 3779 3780 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3781 jint len, jlong inv, 3782 jint *m_ints) { 3783 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3784 int longwords = len/2; 3785 3786 // Make very sure we don't use so much space that the stack might 3787 // overflow. 512 jints corresponds to an 16384-bit integer and 3788 // will use here a total of 8k bytes of stack space. 3789 int divisor = sizeof(julong) * 4; 3790 guarantee(longwords <= 8192 / divisor, "must be"); 3791 int total_allocation = longwords * sizeof (julong) * 4; 3792 julong *scratch = (julong *)alloca(total_allocation); 3793 3794 // Local scratch arrays 3795 julong 3796 *a = scratch + 0 * longwords, 3797 *b = scratch + 1 * longwords, 3798 *n = scratch + 2 * longwords, 3799 *m = scratch + 3 * longwords; 3800 3801 reverse_words((julong *)a_ints, a, longwords); 3802 reverse_words((julong *)b_ints, b, longwords); 3803 reverse_words((julong *)n_ints, n, longwords); 3804 3805 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3806 3807 reverse_words(m, (julong *)m_ints, longwords); 3808 } 3809 3810 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3811 jint len, jlong inv, 3812 jint *m_ints) { 3813 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3814 int longwords = len/2; 3815 3816 // Make very sure we don't use so much space that the stack might 3817 // overflow. 512 jints corresponds to an 16384-bit integer and 3818 // will use here a total of 6k bytes of stack space. 3819 int divisor = sizeof(julong) * 3; 3820 guarantee(longwords <= (8192 / divisor), "must be"); 3821 int total_allocation = longwords * sizeof (julong) * 3; 3822 julong *scratch = (julong *)alloca(total_allocation); 3823 3824 // Local scratch arrays 3825 julong 3826 *a = scratch + 0 * longwords, 3827 *n = scratch + 1 * longwords, 3828 *m = scratch + 2 * longwords; 3829 3830 reverse_words((julong *)a_ints, a, longwords); 3831 reverse_words((julong *)n_ints, n, longwords); 3832 3833 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3834 ::montgomery_square(a, n, m, (julong)inv, longwords); 3835 } else { 3836 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3837 } 3838 3839 reverse_words(m, (julong *)m_ints, longwords); 3840 } 3841 3842 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 3843 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 3844 if (buf == nullptr) { 3845 return nullptr; 3846 } 3847 CodeBuffer buffer(buf); 3848 short buffer_locs[20]; 3849 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 3850 sizeof(buffer_locs)/sizeof(relocInfo)); 3851 3852 MacroAssembler* masm = new MacroAssembler(&buffer); 3853 3854 const Array<SigEntry>* sig_vk = vk->extended_sig(); 3855 const Array<VMRegPair>* regs = vk->return_regs(); 3856 3857 int pack_fields_jobject_off = __ offset(); 3858 // Resolve pre-allocated buffer from JNI handle. 3859 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 3860 __ movptr(rax, Address(r13, 0)); 3861 __ resolve_jobject(rax /* value */, 3862 r12 /* tmp */); 3863 __ movptr(Address(r13, 0), rax); 3864 3865 int pack_fields_off = __ offset(); 3866 3867 int j = 1; 3868 for (int i = 0; i < sig_vk->length(); i++) { 3869 BasicType bt = sig_vk->at(i)._bt; 3870 if (bt == T_METADATA) { 3871 continue; 3872 } 3873 if (bt == T_VOID) { 3874 if (sig_vk->at(i-1)._bt == T_LONG || 3875 sig_vk->at(i-1)._bt == T_DOUBLE) { 3876 j++; 3877 } 3878 continue; 3879 } 3880 int off = sig_vk->at(i)._offset; 3881 assert(off > 0, "offset in object should be positive"); 3882 VMRegPair pair = regs->at(j); 3883 VMReg r_1 = pair.first(); 3884 VMReg r_2 = pair.second(); 3885 Address to(rax, off); 3886 if (bt == T_FLOAT) { 3887 __ movflt(to, r_1->as_XMMRegister()); 3888 } else if (bt == T_DOUBLE) { 3889 __ movdbl(to, r_1->as_XMMRegister()); 3890 } else { 3891 Register val = r_1->as_Register(); 3892 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 3893 if (is_reference_type(bt)) { 3894 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 3895 } else { 3896 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 3897 } 3898 } 3899 j++; 3900 } 3901 assert(j == regs->length(), "missed a field?"); 3902 if (vk->has_nullable_atomic_layout()) { 3903 // Set the null marker 3904 __ movb(Address(rax, vk->null_marker_offset()), 1); 3905 } 3906 __ ret(0); 3907 3908 int unpack_fields_off = __ offset(); 3909 3910 Label skip; 3911 Label not_null; 3912 __ testptr(rax, rax); 3913 __ jcc(Assembler::notZero, not_null); 3914 3915 // Return value is null. Zero oop registers to make the GC happy. 3916 j = 1; 3917 for (int i = 0; i < sig_vk->length(); i++) { 3918 BasicType bt = sig_vk->at(i)._bt; 3919 if (bt == T_METADATA) { 3920 continue; 3921 } 3922 if (bt == T_VOID) { 3923 if (sig_vk->at(i-1)._bt == T_LONG || 3924 sig_vk->at(i-1)._bt == T_DOUBLE) { 3925 j++; 3926 } 3927 continue; 3928 } 3929 if (bt == T_OBJECT || bt == T_ARRAY) { 3930 VMRegPair pair = regs->at(j); 3931 VMReg r_1 = pair.first(); 3932 __ xorq(r_1->as_Register(), r_1->as_Register()); 3933 } 3934 j++; 3935 } 3936 __ jmp(skip); 3937 __ bind(not_null); 3938 3939 j = 1; 3940 for (int i = 0; i < sig_vk->length(); i++) { 3941 BasicType bt = sig_vk->at(i)._bt; 3942 if (bt == T_METADATA) { 3943 continue; 3944 } 3945 if (bt == T_VOID) { 3946 if (sig_vk->at(i-1)._bt == T_LONG || 3947 sig_vk->at(i-1)._bt == T_DOUBLE) { 3948 j++; 3949 } 3950 continue; 3951 } 3952 int off = sig_vk->at(i)._offset; 3953 assert(off > 0, "offset in object should be positive"); 3954 VMRegPair pair = regs->at(j); 3955 VMReg r_1 = pair.first(); 3956 VMReg r_2 = pair.second(); 3957 Address from(rax, off); 3958 if (bt == T_FLOAT) { 3959 __ movflt(r_1->as_XMMRegister(), from); 3960 } else if (bt == T_DOUBLE) { 3961 __ movdbl(r_1->as_XMMRegister(), from); 3962 } else if (bt == T_OBJECT || bt == T_ARRAY) { 3963 assert_different_registers(rax, r_1->as_Register()); 3964 __ load_heap_oop(r_1->as_Register(), from); 3965 } else { 3966 assert(is_java_primitive(bt), "unexpected basic type"); 3967 assert_different_registers(rax, r_1->as_Register()); 3968 size_t size_in_bytes = type2aelembytes(bt); 3969 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 3970 } 3971 j++; 3972 } 3973 assert(j == regs->length(), "missed a field?"); 3974 3975 __ bind(skip); 3976 __ ret(0); 3977 3978 __ flush(); 3979 3980 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 3981 } 3982 3983 #if INCLUDE_JFR 3984 3985 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. 3986 // It returns a jobject handle to the event writer. 3987 // The handle is dereferenced and the return value is the event writer oop. 3988 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { 3989 enum layout { 3990 rbp_off, 3991 rbpH_off, 3992 return_off, 3993 return_off2, 3994 framesize // inclusive of return address 3995 }; 3996 3997 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id); 3998 CodeBuffer code(name, 1024, 64); 3999 MacroAssembler* masm = new MacroAssembler(&code); 4000 address start = __ pc(); 4001 4002 __ enter(); 4003 address the_pc = __ pc(); 4004 4005 int frame_complete = the_pc - start; 4006 4007 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 4008 __ movptr(c_rarg0, r15_thread); 4009 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1); 4010 __ reset_last_Java_frame(true); 4011 4012 // rax is jobject handle result, unpack and process it through a barrier. 4013 __ resolve_global_jobject(rax, c_rarg0); 4014 4015 __ leave(); 4016 __ ret(0); 4017 4018 OopMapSet* oop_maps = new OopMapSet(); 4019 OopMap* map = new OopMap(framesize, 1); 4020 oop_maps->add_gc_map(frame_complete, map); 4021 4022 RuntimeStub* stub = 4023 RuntimeStub::new_runtime_stub(name, 4024 &code, 4025 frame_complete, 4026 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 4027 oop_maps, 4028 false); 4029 return stub; 4030 } 4031 4032 // For c2: call to return a leased buffer. 4033 RuntimeStub* SharedRuntime::generate_jfr_return_lease() { 4034 enum layout { 4035 rbp_off, 4036 rbpH_off, 4037 return_off, 4038 return_off2, 4039 framesize // inclusive of return address 4040 }; 4041 4042 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id); 4043 CodeBuffer code(name, 1024, 64); 4044 MacroAssembler* masm = new MacroAssembler(&code); 4045 address start = __ pc(); 4046 4047 __ enter(); 4048 address the_pc = __ pc(); 4049 4050 int frame_complete = the_pc - start; 4051 4052 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch2); 4053 __ movptr(c_rarg0, r15_thread); 4054 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1); 4055 __ reset_last_Java_frame(true); 4056 4057 __ leave(); 4058 __ ret(0); 4059 4060 OopMapSet* oop_maps = new OopMapSet(); 4061 OopMap* map = new OopMap(framesize, 1); 4062 oop_maps->add_gc_map(frame_complete, map); 4063 4064 RuntimeStub* stub = 4065 RuntimeStub::new_runtime_stub(name, 4066 &code, 4067 frame_complete, 4068 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 4069 oop_maps, 4070 false); 4071 return stub; 4072 } 4073 4074 #endif // INCLUDE_JFR