1 /* 2 * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #ifndef _WINDOWS 27 #include "alloca.h" 28 #endif 29 #include "asm/macroAssembler.hpp" 30 #include "asm/macroAssembler.inline.hpp" 31 #include "classfile/symbolTable.hpp" 32 #include "code/compiledIC.hpp" 33 #include "code/debugInfoRec.hpp" 34 #include "code/nativeInst.hpp" 35 #include "code/vtableStubs.hpp" 36 #include "compiler/oopMap.hpp" 37 #include "gc/shared/collectedHeap.hpp" 38 #include "gc/shared/gcLocker.hpp" 39 #include "gc/shared/barrierSet.hpp" 40 #include "gc/shared/barrierSetAssembler.hpp" 41 #include "interpreter/interpreter.hpp" 42 #include "logging/log.hpp" 43 #include "memory/resourceArea.hpp" 44 #include "memory/universe.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/method.inline.hpp" 47 #include "prims/methodHandles.hpp" 48 #include "runtime/continuation.hpp" 49 #include "runtime/continuationEntry.inline.hpp" 50 #include "runtime/globals.hpp" 51 #include "runtime/jniHandles.hpp" 52 #include "runtime/safepointMechanism.hpp" 53 #include "runtime/sharedRuntime.hpp" 54 #include "runtime/signature.hpp" 55 #include "runtime/stubRoutines.hpp" 56 #include "runtime/timerTrace.hpp" 57 #include "runtime/vframeArray.hpp" 58 #include "runtime/vm_version.hpp" 59 #include "utilities/align.hpp" 60 #include "utilities/checkedCast.hpp" 61 #include "utilities/formatBuffer.hpp" 62 #include "vmreg_x86.inline.hpp" 63 #ifdef COMPILER1 64 #include "c1/c1_Runtime1.hpp" 65 #endif 66 #ifdef COMPILER2 67 #include "opto/runtime.hpp" 68 #endif 69 #if INCLUDE_JVMCI 70 #include "jvmci/jvmciJavaClasses.hpp" 71 #endif 72 73 #define __ masm-> 74 75 #ifdef PRODUCT 76 #define BLOCK_COMMENT(str) /* nothing */ 77 #else 78 #define BLOCK_COMMENT(str) __ block_comment(str) 79 #endif // PRODUCT 80 81 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 82 83 class RegisterSaver { 84 // Capture info about frame layout. Layout offsets are in jint 85 // units because compiler frame slots are jints. 86 #define XSAVE_AREA_BEGIN 160 87 #define XSAVE_AREA_YMM_BEGIN 576 88 #define XSAVE_AREA_EGPRS 960 89 #define XSAVE_AREA_OPMASK_BEGIN 1088 90 #define XSAVE_AREA_ZMM_BEGIN 1152 91 #define XSAVE_AREA_UPPERBANK 1664 92 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 93 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off 94 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off 95 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off 96 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off 97 enum layout { 98 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 99 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area 100 DEF_XMM_OFFS(0), 101 DEF_XMM_OFFS(1), 102 // 2..15 are implied in range usage 103 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 104 DEF_YMM_OFFS(0), 105 DEF_YMM_OFFS(1), 106 // 2..15 are implied in range usage 107 r31_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt, 108 r31H_off, 109 r30_off, r30H_off, 110 r29_off, r29H_off, 111 r28_off, r28H_off, 112 r27_off, r27H_off, 113 r26_off, r26H_off, 114 r25_off, r25H_off, 115 r24_off, r24H_off, 116 r23_off, r23H_off, 117 r22_off, r22H_off, 118 r21_off, r21H_off, 119 r20_off, r20H_off, 120 r19_off, r19H_off, 121 r18_off, r18H_off, 122 r17_off, r17H_off, 123 r16_off, r16H_off, 124 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 125 DEF_OPMASK_OFFS(0), 126 DEF_OPMASK_OFFS(1), 127 // 2..7 are implied in range usage 128 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt, 129 DEF_ZMM_OFFS(0), 130 DEF_ZMM_OFFS(1), 131 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt, 132 DEF_ZMM_UPPER_OFFS(16), 133 DEF_ZMM_UPPER_OFFS(17), 134 // 18..31 are implied in range usage 135 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 136 fpu_stateH_end, 137 r15_off, r15H_off, 138 r14_off, r14H_off, 139 r13_off, r13H_off, 140 r12_off, r12H_off, 141 r11_off, r11H_off, 142 r10_off, r10H_off, 143 r9_off, r9H_off, 144 r8_off, r8H_off, 145 rdi_off, rdiH_off, 146 rsi_off, rsiH_off, 147 ignore_off, ignoreH_off, // extra copy of rbp 148 rsp_off, rspH_off, 149 rbx_off, rbxH_off, 150 rdx_off, rdxH_off, 151 rcx_off, rcxH_off, 152 rax_off, raxH_off, 153 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 154 align_off, alignH_off, 155 flags_off, flagsH_off, 156 // The frame sender code expects that rbp will be in the "natural" place and 157 // will override any oopMap setting for it. We must therefore force the layout 158 // so that it agrees with the frame sender code. 159 rbp_off, rbpH_off, // copy of rbp we will restore 160 return_off, returnH_off, // slot for return address 161 reg_save_size // size in compiler stack slots 162 }; 163 164 public: 165 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors); 166 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false); 167 168 // Offsets into the register save area 169 // Used by deoptimization when it is managing result register 170 // values on its own 171 172 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 173 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 174 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 175 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 176 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 177 178 // During deoptimization only the result registers need to be restored, 179 // all the other values have already been extracted. 180 static void restore_result_registers(MacroAssembler* masm); 181 }; 182 183 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) { 184 int off = 0; 185 int num_xmm_regs = XMMRegister::available_xmm_registers(); 186 #if COMPILER2_OR_JVMCI 187 if (save_wide_vectors && UseAVX == 0) { 188 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX 189 } 190 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 191 #else 192 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI 193 #endif 194 195 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated 196 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs); 197 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 198 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 199 // CodeBlob frame size is in words. 200 int frame_size_in_words = frame_size_in_bytes / wordSize; 201 *total_frame_words = frame_size_in_words; 202 203 // Save registers, fpu state, and flags. 204 // We assume caller has already pushed the return address onto the 205 // stack, so rsp is 8-byte aligned here. 206 // We push rpb twice in this sequence because we want the real rbp 207 // to be under the return like a normal enter. 208 209 __ enter(); // rsp becomes 16-byte aligned here 210 __ pushf(); 211 // Make sure rsp stays 16-byte aligned 212 __ subq(rsp, 8); 213 // Push CPU state in multiple of 16 bytes 214 __ save_legacy_gprs(); 215 __ push_FPU_state(); 216 217 218 // push cpu state handles this on EVEX enabled targets 219 if (save_wide_vectors) { 220 // Save upper half of YMM registers(0..15) 221 int base_addr = XSAVE_AREA_YMM_BEGIN; 222 for (int n = 0; n < 16; n++) { 223 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n)); 224 } 225 if (VM_Version::supports_evex()) { 226 // Save upper half of ZMM registers(0..15) 227 base_addr = XSAVE_AREA_ZMM_BEGIN; 228 for (int n = 0; n < 16; n++) { 229 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n)); 230 } 231 // Save full ZMM registers(16..num_xmm_regs) 232 base_addr = XSAVE_AREA_UPPERBANK; 233 off = 0; 234 int vector_len = Assembler::AVX_512bit; 235 for (int n = 16; n < num_xmm_regs; n++) { 236 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 237 } 238 #if COMPILER2_OR_JVMCI 239 base_addr = XSAVE_AREA_OPMASK_BEGIN; 240 off = 0; 241 for(int n = 0; n < KRegister::number_of_registers; n++) { 242 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 243 } 244 #endif 245 } 246 } else { 247 if (VM_Version::supports_evex()) { 248 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 249 int base_addr = XSAVE_AREA_UPPERBANK; 250 off = 0; 251 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 252 for (int n = 16; n < num_xmm_regs; n++) { 253 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len); 254 } 255 #if COMPILER2_OR_JVMCI 256 base_addr = XSAVE_AREA_OPMASK_BEGIN; 257 off = 0; 258 for(int n = 0; n < KRegister::number_of_registers; n++) { 259 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n)); 260 } 261 #endif 262 } 263 } 264 265 #if COMPILER2_OR_JVMCI 266 if (UseAPX) { 267 int base_addr = XSAVE_AREA_EGPRS; 268 off = 0; 269 for(int n = 16; n < Register::number_of_registers; n++) { 270 __ movq(Address(rsp, base_addr+(off++*8)), as_Register(n)); 271 } 272 } 273 #endif 274 275 __ vzeroupper(); 276 if (frame::arg_reg_save_area_bytes != 0) { 277 // Allocate argument register save area 278 __ subptr(rsp, frame::arg_reg_save_area_bytes); 279 } 280 281 // Set an oopmap for the call site. This oopmap will map all 282 // oop-registers and debug-info registers as callee-saved. This 283 // will allow deoptimization at this safepoint to find all possible 284 // debug-info recordings, as well as let GC find all oops. 285 286 OopMapSet *oop_maps = new OopMapSet(); 287 OopMap* map = new OopMap(frame_size_in_slots, 0); 288 289 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x)) 290 291 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 292 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 293 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 294 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 295 // rbp location is known implicitly by the frame sender code, needs no oopmap 296 // and the location where rbp was saved by is ignored 297 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 298 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 299 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 300 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 301 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 302 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 303 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 304 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 305 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 306 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 307 308 if (UseAPX) { 309 map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg()); 310 map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg()); 311 map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg()); 312 map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg()); 313 map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg()); 314 map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg()); 315 map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg()); 316 map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg()); 317 map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg()); 318 map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg()); 319 map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg()); 320 map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg()); 321 map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg()); 322 map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg()); 323 map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg()); 324 map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg()); 325 } 326 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 327 // on EVEX enabled targets, we get it included in the xsave area 328 off = xmm0_off; 329 int delta = xmm1_off - off; 330 for (int n = 0; n < 16; n++) { 331 XMMRegister xmm_name = as_XMMRegister(n); 332 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); 333 off += delta; 334 } 335 if (UseAVX > 2) { 336 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 337 off = zmm16_off; 338 delta = zmm17_off - off; 339 for (int n = 16; n < num_xmm_regs; n++) { 340 XMMRegister zmm_name = as_XMMRegister(n); 341 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()); 342 off += delta; 343 } 344 } 345 346 #if COMPILER2_OR_JVMCI 347 if (save_wide_vectors) { 348 // Save upper half of YMM registers(0..15) 349 off = ymm0_off; 350 delta = ymm1_off - ymm0_off; 351 for (int n = 0; n < 16; n++) { 352 XMMRegister ymm_name = as_XMMRegister(n); 353 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4)); 354 off += delta; 355 } 356 if (VM_Version::supports_evex()) { 357 // Save upper half of ZMM registers(0..15) 358 off = zmm0_off; 359 delta = zmm1_off - zmm0_off; 360 for (int n = 0; n < 16; n++) { 361 XMMRegister zmm_name = as_XMMRegister(n); 362 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8)); 363 off += delta; 364 } 365 } 366 } 367 #endif // COMPILER2_OR_JVMCI 368 369 // %%% These should all be a waste but we'll keep things as they were for now 370 if (true) { 371 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 372 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 373 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 374 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 375 // rbp location is known implicitly by the frame sender code, needs no oopmap 376 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 377 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 378 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 379 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 380 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 381 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 382 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 383 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 384 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 385 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 386 if (UseAPX) { 387 map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next()); 388 map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next()); 389 map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next()); 390 map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next()); 391 map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next()); 392 map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next()); 393 map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next()); 394 map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next()); 395 map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next()); 396 map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next()); 397 map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next()); 398 map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next()); 399 map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next()); 400 map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next()); 401 map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next()); 402 map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next()); 403 } 404 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15, 405 // on EVEX enabled targets, we get it included in the xsave area 406 off = xmm0H_off; 407 delta = xmm1H_off - off; 408 for (int n = 0; n < 16; n++) { 409 XMMRegister xmm_name = as_XMMRegister(n); 410 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next()); 411 off += delta; 412 } 413 if (UseAVX > 2) { 414 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets 415 off = zmm16H_off; 416 delta = zmm17H_off - off; 417 for (int n = 16; n < num_xmm_regs; n++) { 418 XMMRegister zmm_name = as_XMMRegister(n); 419 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next()); 420 off += delta; 421 } 422 } 423 } 424 425 return map; 426 } 427 428 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) { 429 int num_xmm_regs = XMMRegister::available_xmm_registers(); 430 if (frame::arg_reg_save_area_bytes != 0) { 431 // Pop arg register save area 432 __ addptr(rsp, frame::arg_reg_save_area_bytes); 433 } 434 435 #if COMPILER2_OR_JVMCI 436 if (restore_wide_vectors) { 437 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); 438 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); 439 } 440 #else 441 assert(!restore_wide_vectors, "vectors are generated only by C2"); 442 #endif 443 444 __ vzeroupper(); 445 446 // On EVEX enabled targets everything is handled in pop fpu state 447 if (restore_wide_vectors) { 448 // Restore upper half of YMM registers (0..15) 449 int base_addr = XSAVE_AREA_YMM_BEGIN; 450 for (int n = 0; n < 16; n++) { 451 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16)); 452 } 453 if (VM_Version::supports_evex()) { 454 // Restore upper half of ZMM registers (0..15) 455 base_addr = XSAVE_AREA_ZMM_BEGIN; 456 for (int n = 0; n < 16; n++) { 457 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32)); 458 } 459 // Restore full ZMM registers(16..num_xmm_regs) 460 base_addr = XSAVE_AREA_UPPERBANK; 461 int vector_len = Assembler::AVX_512bit; 462 int off = 0; 463 for (int n = 16; n < num_xmm_regs; n++) { 464 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 465 } 466 #if COMPILER2_OR_JVMCI 467 base_addr = XSAVE_AREA_OPMASK_BEGIN; 468 off = 0; 469 for (int n = 0; n < KRegister::number_of_registers; n++) { 470 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 471 } 472 #endif 473 } 474 } else { 475 if (VM_Version::supports_evex()) { 476 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage 477 int base_addr = XSAVE_AREA_UPPERBANK; 478 int off = 0; 479 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit; 480 for (int n = 16; n < num_xmm_regs; n++) { 481 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len); 482 } 483 #if COMPILER2_OR_JVMCI 484 base_addr = XSAVE_AREA_OPMASK_BEGIN; 485 off = 0; 486 for (int n = 0; n < KRegister::number_of_registers; n++) { 487 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8))); 488 } 489 #endif 490 } 491 } 492 493 #if COMPILER2_OR_JVMCI 494 if (UseAPX) { 495 int base_addr = XSAVE_AREA_EGPRS; 496 int off = 0; 497 for (int n = 16; n < Register::number_of_registers; n++) { 498 __ movq(as_Register(n), Address(rsp, base_addr+(off++*8))); 499 } 500 } 501 #endif 502 503 // Recover CPU state 504 __ pop_FPU_state(); 505 __ restore_legacy_gprs(); 506 __ addq(rsp, 8); 507 __ popf(); 508 // Get the rbp described implicitly by the calling convention (no oopMap) 509 __ pop(rbp); 510 } 511 512 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 513 514 // Just restore result register. Only used by deoptimization. By 515 // now any callee save register that needs to be restored to a c2 516 // caller of the deoptee has been extracted into the vframeArray 517 // and will be stuffed into the c2i adapter we create for later 518 // restoration so only result registers need to be restored here. 519 520 // Restore fp result register 521 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 522 // Restore integer result register 523 __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 524 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 525 526 // Pop all of the register save are off the stack except the return address 527 __ addptr(rsp, return_offset_in_bytes()); 528 } 529 530 // Is vector's size (in bytes) bigger than a size saved by default? 531 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 532 bool SharedRuntime::is_wide_vector(int size) { 533 return size > 16; 534 } 535 536 // --------------------------------------------------------------------------- 537 // Read the array of BasicTypes from a signature, and compute where the 538 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 539 // quantities. Values less than VMRegImpl::stack0 are registers, those above 540 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 541 // as framesizes are fixed. 542 // VMRegImpl::stack0 refers to the first slot 0(sp). 543 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. 544 // Register up to Register::number_of_registers are the 64-bit 545 // integer registers. 546 547 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 548 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 549 // units regardless of build. Of course for i486 there is no 64 bit build 550 551 // The Java calling convention is a "shifted" version of the C ABI. 552 // By skipping the first C ABI register we can call non-static jni methods 553 // with small numbers of arguments without having to shuffle the arguments 554 // at all. Since we control the java ABI we ought to at least get some 555 // advantage out of it. 556 557 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 558 VMRegPair *regs, 559 int total_args_passed) { 560 561 // Create the mapping between argument positions and 562 // registers. 563 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 564 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 565 }; 566 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 567 j_farg0, j_farg1, j_farg2, j_farg3, 568 j_farg4, j_farg5, j_farg6, j_farg7 569 }; 570 571 572 uint int_args = 0; 573 uint fp_args = 0; 574 uint stk_args = 0; 575 576 for (int i = 0; i < total_args_passed; i++) { 577 switch (sig_bt[i]) { 578 case T_BOOLEAN: 579 case T_CHAR: 580 case T_BYTE: 581 case T_SHORT: 582 case T_INT: 583 if (int_args < Argument::n_int_register_parameters_j) { 584 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 585 } else { 586 stk_args = align_up(stk_args, 2); 587 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 588 stk_args += 1; 589 } 590 break; 591 case T_VOID: 592 // halves of T_LONG or T_DOUBLE 593 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 594 regs[i].set_bad(); 595 break; 596 case T_LONG: 597 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 598 // fall through 599 case T_OBJECT: 600 case T_ARRAY: 601 case T_ADDRESS: 602 if (int_args < Argument::n_int_register_parameters_j) { 603 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 604 } else { 605 stk_args = align_up(stk_args, 2); 606 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 607 stk_args += 2; 608 } 609 break; 610 case T_FLOAT: 611 if (fp_args < Argument::n_float_register_parameters_j) { 612 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 613 } else { 614 stk_args = align_up(stk_args, 2); 615 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 616 stk_args += 1; 617 } 618 break; 619 case T_DOUBLE: 620 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 621 if (fp_args < Argument::n_float_register_parameters_j) { 622 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 623 } else { 624 stk_args = align_up(stk_args, 2); 625 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 626 stk_args += 2; 627 } 628 break; 629 default: 630 ShouldNotReachHere(); 631 break; 632 } 633 } 634 635 return stk_args; 636 } 637 638 // Same as java_calling_convention() but for multiple return 639 // values. There's no way to store them on the stack so if we don't 640 // have enough registers, multiple values can't be returned. 641 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; 642 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 643 int SharedRuntime::java_return_convention(const BasicType *sig_bt, 644 VMRegPair *regs, 645 int total_args_passed) { 646 // Create the mapping between argument positions and 647 // registers. 648 static const Register INT_ArgReg[java_return_convention_max_int] = { 649 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 650 }; 651 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { 652 j_farg0, j_farg1, j_farg2, j_farg3, 653 j_farg4, j_farg5, j_farg6, j_farg7 654 }; 655 656 657 uint int_args = 0; 658 uint fp_args = 0; 659 660 for (int i = 0; i < total_args_passed; i++) { 661 switch (sig_bt[i]) { 662 case T_BOOLEAN: 663 case T_CHAR: 664 case T_BYTE: 665 case T_SHORT: 666 case T_INT: 667 if (int_args < Argument::n_int_register_parameters_j+1) { 668 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 669 int_args++; 670 } else { 671 return -1; 672 } 673 break; 674 case T_VOID: 675 // halves of T_LONG or T_DOUBLE 676 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 677 regs[i].set_bad(); 678 break; 679 case T_LONG: 680 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 681 // fall through 682 case T_OBJECT: 683 case T_ARRAY: 684 case T_ADDRESS: 685 case T_METADATA: 686 if (int_args < Argument::n_int_register_parameters_j+1) { 687 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 688 int_args++; 689 } else { 690 return -1; 691 } 692 break; 693 case T_FLOAT: 694 if (fp_args < Argument::n_float_register_parameters_j) { 695 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 696 fp_args++; 697 } else { 698 return -1; 699 } 700 break; 701 case T_DOUBLE: 702 assert(sig_bt[i + 1] == T_VOID, "expecting half"); 703 if (fp_args < Argument::n_float_register_parameters_j) { 704 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 705 fp_args++; 706 } else { 707 return -1; 708 } 709 break; 710 default: 711 ShouldNotReachHere(); 712 break; 713 } 714 } 715 716 return int_args + fp_args; 717 } 718 719 // Patch the callers callsite with entry to compiled code if it exists. 720 static void patch_callers_callsite(MacroAssembler *masm) { 721 Label L; 722 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 723 __ jcc(Assembler::equal, L); 724 725 // Save the current stack pointer 726 __ mov(r13, rsp); 727 // Schedule the branch target address early. 728 // Call into the VM to patch the caller, then jump to compiled callee 729 // rax isn't live so capture return address while we easily can 730 __ movptr(rax, Address(rsp, 0)); 731 732 // align stack so push_CPU_state doesn't fault 733 __ andptr(rsp, -(StackAlignmentInBytes)); 734 __ push_CPU_state(); 735 __ vzeroupper(); 736 // VM needs caller's callsite 737 // VM needs target method 738 // This needs to be a long call since we will relocate this adapter to 739 // the codeBuffer and it may not reach 740 741 // Allocate argument register save area 742 if (frame::arg_reg_save_area_bytes != 0) { 743 __ subptr(rsp, frame::arg_reg_save_area_bytes); 744 } 745 __ mov(c_rarg0, rbx); 746 __ mov(c_rarg1, rax); 747 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 748 749 // De-allocate argument register save area 750 if (frame::arg_reg_save_area_bytes != 0) { 751 __ addptr(rsp, frame::arg_reg_save_area_bytes); 752 } 753 754 __ vzeroupper(); 755 __ pop_CPU_state(); 756 // restore sp 757 __ mov(rsp, r13); 758 __ bind(L); 759 } 760 761 // For each inline type argument, sig includes the list of fields of 762 // the inline type. This utility function computes the number of 763 // arguments for the call if inline types are passed by reference (the 764 // calling convention the interpreter expects). 765 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 766 int total_args_passed = 0; 767 if (InlineTypePassFieldsAsArgs) { 768 for (int i = 0; i < sig_extended->length(); i++) { 769 BasicType bt = sig_extended->at(i)._bt; 770 if (bt == T_METADATA) { 771 // In sig_extended, an inline type argument starts with: 772 // T_METADATA, followed by the types of the fields of the 773 // inline type and T_VOID to mark the end of the value 774 // type. Inline types are flattened so, for instance, in the 775 // case of an inline type with an int field and an inline type 776 // field that itself has 2 fields, an int and a long: 777 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second 778 // slot for the T_LONG) T_VOID (inner inline type) T_VOID 779 // (outer inline type) 780 total_args_passed++; 781 int vt = 1; 782 do { 783 i++; 784 BasicType bt = sig_extended->at(i)._bt; 785 BasicType prev_bt = sig_extended->at(i-1)._bt; 786 if (bt == T_METADATA) { 787 vt++; 788 } else if (bt == T_VOID && 789 prev_bt != T_LONG && 790 prev_bt != T_DOUBLE) { 791 vt--; 792 } 793 } while (vt != 0); 794 } else { 795 total_args_passed++; 796 } 797 } 798 } else { 799 total_args_passed = sig_extended->length(); 800 } 801 return total_args_passed; 802 } 803 804 805 static void gen_c2i_adapter_helper(MacroAssembler* masm, 806 BasicType bt, 807 BasicType prev_bt, 808 size_t size_in_bytes, 809 const VMRegPair& reg_pair, 810 const Address& to, 811 int extraspace, 812 bool is_oop) { 813 if (bt == T_VOID) { 814 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 815 return; 816 } 817 818 // Say 4 args: 819 // i st_off 820 // 0 32 T_LONG 821 // 1 24 T_VOID 822 // 2 16 T_OBJECT 823 // 3 8 T_BOOL 824 // - 0 return address 825 // 826 // However to make thing extra confusing. Because we can fit a long/double in 827 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 828 // leaves one slot empty and only stores to a single slot. In this case the 829 // slot that is occupied is the T_VOID slot. See I said it was confusing. 830 831 bool wide = (size_in_bytes == wordSize); 832 VMReg r_1 = reg_pair.first(); 833 VMReg r_2 = reg_pair.second(); 834 assert(r_2->is_valid() == wide, "invalid size"); 835 if (!r_1->is_valid()) { 836 assert(!r_2->is_valid(), "must be invalid"); 837 return; 838 } 839 840 if (!r_1->is_XMMRegister()) { 841 Register val = rax; 842 if (r_1->is_stack()) { 843 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 844 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); 845 } else { 846 val = r_1->as_Register(); 847 } 848 assert_different_registers(to.base(), val, rscratch1); 849 if (is_oop) { 850 __ push(r13); 851 __ push(rbx); 852 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 853 __ pop(rbx); 854 __ pop(r13); 855 } else { 856 __ store_sized_value(to, val, size_in_bytes); 857 } 858 } else { 859 if (wide) { 860 __ movdbl(to, r_1->as_XMMRegister()); 861 } else { 862 __ movflt(to, r_1->as_XMMRegister()); 863 } 864 } 865 } 866 867 static void gen_c2i_adapter(MacroAssembler *masm, 868 const GrowableArray<SigEntry>* sig_extended, 869 const VMRegPair *regs, 870 bool requires_clinit_barrier, 871 address& c2i_no_clinit_check_entry, 872 Label& skip_fixup, 873 address start, 874 OopMapSet* oop_maps, 875 int& frame_complete, 876 int& frame_size_in_words, 877 bool alloc_inline_receiver) { 878 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) { 879 Label L_skip_barrier; 880 Register method = rbx; 881 882 { // Bypass the barrier for non-static methods 883 Register flags = rscratch1; 884 __ movl(flags, Address(method, Method::access_flags_offset())); 885 __ testl(flags, JVM_ACC_STATIC); 886 __ jcc(Assembler::zero, L_skip_barrier); // non-static 887 } 888 889 Register klass = rscratch1; 890 __ load_method_holder(klass, method); 891 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 892 893 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 894 895 __ bind(L_skip_barrier); 896 c2i_no_clinit_check_entry = __ pc(); 897 } 898 899 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 900 bs->c2i_entry_barrier(masm); 901 902 // Before we get into the guts of the C2I adapter, see if we should be here 903 // at all. We've come from compiled code and are attempting to jump to the 904 // interpreter, which means the caller made a static call to get here 905 // (vcalls always get a compiled target if there is one). Check for a 906 // compiled target. If there is one, we need to patch the caller's call. 907 patch_callers_callsite(masm); 908 909 __ bind(skip_fixup); 910 911 if (InlineTypePassFieldsAsArgs) { 912 // Is there an inline type argument? 913 bool has_inline_argument = false; 914 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 915 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA); 916 } 917 if (has_inline_argument) { 918 // There is at least an inline type argument: we're coming from 919 // compiled code so we have no buffers to back the inline types. 920 // Allocate the buffers here with a runtime call. 921 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false); 922 923 frame_complete = __ offset(); 924 925 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 926 927 __ mov(c_rarg0, r15_thread); 928 __ mov(c_rarg1, rbx); 929 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver); 930 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 931 932 oop_maps->add_gc_map((int)(__ pc() - start), map); 933 __ reset_last_Java_frame(false); 934 935 RegisterSaver::restore_live_registers(masm); 936 937 Label no_exception; 938 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 939 __ jcc(Assembler::equal, no_exception); 940 941 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 942 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 943 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 944 945 __ bind(no_exception); 946 947 // We get an array of objects from the runtime call 948 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr() 949 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? 950 } 951 } 952 953 // Since all args are passed on the stack, total_args_passed * 954 // Interpreter::stackElementSize is the space we need. 955 int total_args_passed = compute_total_args_passed_int(sig_extended); 956 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed); 957 958 int extraspace = (total_args_passed * Interpreter::stackElementSize); 959 960 // stack is aligned, keep it that way 961 // This is not currently needed or enforced by the interpreter, but 962 // we might as well conform to the ABI. 963 extraspace = align_up(extraspace, 2*wordSize); 964 965 // set senderSP value 966 __ lea(r13, Address(rsp, wordSize)); 967 968 #ifdef ASSERT 969 __ check_stack_alignment(r13, "sender stack not aligned"); 970 #endif 971 if (extraspace > 0) { 972 // Pop the return address 973 __ pop(rax); 974 975 __ subptr(rsp, extraspace); 976 977 // Push the return address 978 __ push(rax); 979 980 // Account for the return address location since we store it first rather 981 // than hold it in a register across all the shuffling 982 extraspace += wordSize; 983 } 984 985 #ifdef ASSERT 986 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax); 987 #endif 988 989 // Now write the args into the outgoing interpreter space 990 991 // next_arg_comp is the next argument from the compiler point of 992 // view (inline type fields are passed in registers/on the stack). In 993 // sig_extended, an inline type argument starts with: T_METADATA, 994 // followed by the types of the fields of the inline type and T_VOID 995 // to mark the end of the inline type. ignored counts the number of 996 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument: 997 // used to get the buffer for that argument from the pool of buffers 998 // we allocated above and want to pass to the 999 // interpreter. next_arg_int is the next argument from the 1000 // interpreter point of view (inline types are passed by reference). 1001 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 1002 next_arg_comp < sig_extended->length(); next_arg_comp++) { 1003 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 1004 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 1005 BasicType bt = sig_extended->at(next_arg_comp)._bt; 1006 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; 1007 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) { 1008 int next_off = st_off - Interpreter::stackElementSize; 1009 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 1010 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 1011 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 1012 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 1013 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); 1014 next_arg_int++; 1015 #ifdef ASSERT 1016 if (bt == T_LONG || bt == T_DOUBLE) { 1017 // Overwrite the unused slot with known junk 1018 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 1019 __ movptr(Address(rsp, st_off), rax); 1020 } 1021 #endif /* ASSERT */ 1022 } else { 1023 ignored++; 1024 // get the buffer from the just allocated pool of buffers 1025 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT); 1026 __ load_heap_oop(r14, Address(rscratch2, index)); 1027 next_vt_arg++; next_arg_int++; 1028 int vt = 1; 1029 // write fields we get from compiled code in registers/stack 1030 // slots to the buffer: we know we are done with that inline type 1031 // argument when we hit the T_VOID that acts as an end of inline 1032 // type delimiter for this inline type. Inline types are flattened 1033 // so we might encounter embedded inline types. Each entry in 1034 // sig_extended contains a field offset in the buffer. 1035 Label L_null; 1036 do { 1037 next_arg_comp++; 1038 BasicType bt = sig_extended->at(next_arg_comp)._bt; 1039 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; 1040 if (bt == T_METADATA) { 1041 vt++; 1042 ignored++; 1043 } else if (bt == T_VOID && 1044 prev_bt != T_LONG && 1045 prev_bt != T_DOUBLE) { 1046 vt--; 1047 ignored++; 1048 } else { 1049 int off = sig_extended->at(next_arg_comp)._offset; 1050 if (off == -1) { 1051 // Nullable inline type argument, emit null check 1052 VMReg reg = regs[next_arg_comp-ignored].first(); 1053 Label L_notNull; 1054 if (reg->is_stack()) { 1055 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 1056 __ testb(Address(rsp, ld_off), 1); 1057 } else { 1058 __ testb(reg->as_Register(), 1); 1059 } 1060 __ jcc(Assembler::notZero, L_notNull); 1061 __ movptr(Address(rsp, st_off), 0); 1062 __ jmp(L_null); 1063 __ bind(L_notNull); 1064 continue; 1065 } 1066 assert(off > 0, "offset in object should be positive"); 1067 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 1068 bool is_oop = is_reference_type(bt); 1069 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 1070 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop); 1071 } 1072 } while (vt != 0); 1073 // pass the buffer to the interpreter 1074 __ movptr(Address(rsp, st_off), r14); 1075 __ bind(L_null); 1076 } 1077 } 1078 1079 // Schedule the branch target address early. 1080 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1081 __ jmp(rcx); 1082 } 1083 1084 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 1085 address code_start, address code_end, 1086 Label& L_ok) { 1087 Label L_fail; 1088 __ lea(temp_reg, AddressLiteral(code_start, relocInfo::none)); 1089 __ cmpptr(pc_reg, temp_reg); 1090 __ jcc(Assembler::belowEqual, L_fail); 1091 __ lea(temp_reg, AddressLiteral(code_end, relocInfo::none)); 1092 __ cmpptr(pc_reg, temp_reg); 1093 __ jcc(Assembler::below, L_ok); 1094 __ bind(L_fail); 1095 } 1096 1097 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1098 int comp_args_on_stack, 1099 const GrowableArray<SigEntry>* sig, 1100 const VMRegPair *regs) { 1101 1102 // Note: r13 contains the senderSP on entry. We must preserve it since 1103 // we may do a i2c -> c2i transition if we lose a race where compiled 1104 // code goes non-entrant while we get args ready. 1105 // In addition we use r13 to locate all the interpreter args as 1106 // we must align the stack to 16 bytes on an i2c entry else we 1107 // lose alignment we expect in all compiled code and register 1108 // save code can segv when fxsave instructions find improperly 1109 // aligned stack pointer. 1110 1111 // Adapters can be frameless because they do not require the caller 1112 // to perform additional cleanup work, such as correcting the stack pointer. 1113 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1114 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1115 // even if a callee has modified the stack pointer. 1116 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1117 // routinely repairs its caller's stack pointer (from sender_sp, which is set 1118 // up via the senderSP register). 1119 // In other words, if *either* the caller or callee is interpreted, we can 1120 // get the stack pointer repaired after a call. 1121 // This is why c2i and i2c adapters cannot be indefinitely composed. 1122 // In particular, if a c2i adapter were to somehow call an i2c adapter, 1123 // both caller and callee would be compiled methods, and neither would 1124 // clean up the stack pointer changes performed by the two adapters. 1125 // If this happens, control eventually transfers back to the compiled 1126 // caller, but with an uncorrected stack, causing delayed havoc. 1127 1128 if (VerifyAdapterCalls && 1129 (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { 1130 // So, let's test for cascading c2i/i2c adapters right now. 1131 // assert(Interpreter::contains($return_addr) || 1132 // StubRoutines::contains($return_addr), 1133 // "i2c adapter must return to an interpreter frame"); 1134 __ block_comment("verify_i2c { "); 1135 // Pick up the return address 1136 __ movptr(rax, Address(rsp, 0)); 1137 Label L_ok; 1138 if (Interpreter::code() != nullptr) { 1139 range_check(masm, rax, r11, 1140 Interpreter::code()->code_start(), 1141 Interpreter::code()->code_end(), 1142 L_ok); 1143 } 1144 if (StubRoutines::initial_stubs_code() != nullptr) { 1145 range_check(masm, rax, r11, 1146 StubRoutines::initial_stubs_code()->code_begin(), 1147 StubRoutines::initial_stubs_code()->code_end(), 1148 L_ok); 1149 } 1150 if (StubRoutines::final_stubs_code() != nullptr) { 1151 range_check(masm, rax, r11, 1152 StubRoutines::final_stubs_code()->code_begin(), 1153 StubRoutines::final_stubs_code()->code_end(), 1154 L_ok); 1155 } 1156 const char* msg = "i2c adapter must return to an interpreter frame"; 1157 __ block_comment(msg); 1158 __ stop(msg); 1159 __ bind(L_ok); 1160 __ block_comment("} verify_i2ce "); 1161 } 1162 1163 // Must preserve original SP for loading incoming arguments because 1164 // we need to align the outgoing SP for compiled code. 1165 __ movptr(r11, rsp); 1166 1167 // Pick up the return address 1168 __ pop(rax); 1169 1170 // Convert 4-byte c2 stack slots to words. 1171 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1172 1173 if (comp_args_on_stack) { 1174 __ subptr(rsp, comp_words_on_stack * wordSize); 1175 } 1176 1177 // Ensure compiled code always sees stack at proper alignment 1178 __ andptr(rsp, -16); 1179 1180 // push the return address and misalign the stack that youngest frame always sees 1181 // as far as the placement of the call instruction 1182 __ push(rax); 1183 1184 // Put saved SP in another register 1185 const Register saved_sp = rax; 1186 __ movptr(saved_sp, r11); 1187 1188 // Will jump to the compiled code just as if compiled code was doing it. 1189 // Pre-load the register-jump target early, to schedule it better. 1190 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset()))); 1191 1192 #if INCLUDE_JVMCI 1193 if (EnableJVMCI) { 1194 // check if this call should be routed towards a specific entry point 1195 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1196 Label no_alternative_target; 1197 __ jcc(Assembler::equal, no_alternative_target); 1198 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 1199 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); 1200 __ bind(no_alternative_target); 1201 } 1202 #endif // INCLUDE_JVMCI 1203 1204 int total_args_passed = sig->length(); 1205 1206 // Now generate the shuffle code. Pick up all register args and move the 1207 // rest through the floating point stack top. 1208 for (int i = 0; i < total_args_passed; i++) { 1209 BasicType bt = sig->at(i)._bt; 1210 if (bt == T_VOID) { 1211 // Longs and doubles are passed in native word order, but misaligned 1212 // in the 32-bit build. 1213 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL; 1214 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half"); 1215 continue; 1216 } 1217 1218 // Pick up 0, 1 or 2 words from SP+offset. 1219 1220 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1221 "scrambled load targets?"); 1222 // Load in argument order going down. 1223 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1224 // Point to interpreter value (vs. tag) 1225 int next_off = ld_off - Interpreter::stackElementSize; 1226 // 1227 // 1228 // 1229 VMReg r_1 = regs[i].first(); 1230 VMReg r_2 = regs[i].second(); 1231 if (!r_1->is_valid()) { 1232 assert(!r_2->is_valid(), ""); 1233 continue; 1234 } 1235 if (r_1->is_stack()) { 1236 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1237 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1238 1239 // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1240 // and if we end up going thru a c2i because of a miss a reasonable value of r13 1241 // will be generated. 1242 if (!r_2->is_valid()) { 1243 // sign extend??? 1244 __ movl(r13, Address(saved_sp, ld_off)); 1245 __ movptr(Address(rsp, st_off), r13); 1246 } else { 1247 // 1248 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1249 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1250 // So we must adjust where to pick up the data to match the interpreter. 1251 // 1252 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1253 // are accessed as negative so LSW is at LOW address 1254 1255 // ld_off is MSW so get LSW 1256 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1257 next_off : ld_off; 1258 __ movq(r13, Address(saved_sp, offset)); 1259 // st_off is LSW (i.e. reg.first()) 1260 __ movq(Address(rsp, st_off), r13); 1261 } 1262 } else if (r_1->is_Register()) { // Register argument 1263 Register r = r_1->as_Register(); 1264 assert(r != rax, "must be different"); 1265 if (r_2->is_valid()) { 1266 // 1267 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1268 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1269 // So we must adjust where to pick up the data to match the interpreter. 1270 1271 const int offset = (bt==T_LONG||bt==T_DOUBLE)? 1272 next_off : ld_off; 1273 1274 // this can be a misaligned move 1275 __ movq(r, Address(saved_sp, offset)); 1276 } else { 1277 // sign extend and use a full word? 1278 __ movl(r, Address(saved_sp, ld_off)); 1279 } 1280 } else { 1281 if (!r_2->is_valid()) { 1282 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1283 } else { 1284 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1285 } 1286 } 1287 } 1288 1289 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1290 1291 // 6243940 We might end up in handle_wrong_method if 1292 // the callee is deoptimized as we race thru here. If that 1293 // happens we don't want to take a safepoint because the 1294 // caller frame will look interpreted and arguments are now 1295 // "compiled" so it is much better to make this transition 1296 // invisible to the stack walking code. Unfortunately if 1297 // we try and find the callee by normal means a safepoint 1298 // is possible. So we stash the desired callee in the thread 1299 // and the vm will find there should this case occur. 1300 1301 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1302 1303 // put Method* where a c2i would expect should we end up there 1304 // only needed because of c2 resolve stubs return Method* as a result in 1305 // rax 1306 __ mov(rax, rbx); 1307 __ jmp(r11); 1308 } 1309 1310 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 1311 Register data = rax; 1312 __ ic_check(1 /* end_alignment */); 1313 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); 1314 1315 // Method might have been compiled since the call site was patched to 1316 // interpreted if that is the case treat it as a miss so we can get 1317 // the call site corrected. 1318 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); 1319 __ jcc(Assembler::equal, skip_fixup); 1320 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1321 } 1322 1323 // --------------------------------------------------------------- 1324 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1325 int comp_args_on_stack, 1326 const GrowableArray<SigEntry>* sig, 1327 const VMRegPair* regs, 1328 const GrowableArray<SigEntry>* sig_cc, 1329 const VMRegPair* regs_cc, 1330 const GrowableArray<SigEntry>* sig_cc_ro, 1331 const VMRegPair* regs_cc_ro, 1332 AdapterFingerPrint* fingerprint, 1333 AdapterBlob*& new_adapter, 1334 bool allocate_code_blob) { 1335 address i2c_entry = __ pc(); 1336 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1337 1338 // ------------------------------------------------------------------------- 1339 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1340 // to the interpreter. The args start out packed in the compiled layout. They 1341 // need to be unpacked into the interpreter layout. This will almost always 1342 // require some stack space. We grow the current (compiled) stack, then repack 1343 // the args. We finally end in a jump to the generic interpreter entry point. 1344 // On exit from the interpreter, the interpreter will restore our SP (lest the 1345 // compiled code, which relies solely on SP and not RBP, get sick). 1346 1347 address c2i_unverified_entry = __ pc(); 1348 address c2i_unverified_inline_entry = __ pc(); 1349 Label skip_fixup; 1350 1351 gen_inline_cache_check(masm, skip_fixup); 1352 1353 OopMapSet* oop_maps = new OopMapSet(); 1354 int frame_complete = CodeOffsets::frame_never_safe; 1355 int frame_size_in_words = 0; 1356 1357 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1358 address c2i_no_clinit_check_entry = nullptr; 1359 address c2i_inline_ro_entry = __ pc(); 1360 if (regs_cc != regs_cc_ro) { 1361 // No class init barrier needed because method is guaranteed to be non-static 1362 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry, 1363 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1364 skip_fixup.reset(); 1365 } 1366 1367 // Scalarized c2i adapter 1368 address c2i_entry = __ pc(); 1369 address c2i_inline_entry = __ pc(); 1370 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1371 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true); 1372 1373 // Non-scalarized c2i adapter 1374 if (regs != regs_cc) { 1375 c2i_unverified_inline_entry = __ pc(); 1376 Label inline_entry_skip_fixup; 1377 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1378 1379 c2i_inline_entry = __ pc(); 1380 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry, 1381 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false); 1382 } 1383 1384 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that 1385 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1386 if (allocate_code_blob) { 1387 bool caller_must_gc_arguments = (regs != regs_cc); 1388 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1389 } 1390 1391 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1392 } 1393 1394 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1395 VMRegPair *regs, 1396 int total_args_passed) { 1397 1398 // We return the amount of VMRegImpl stack slots we need to reserve for all 1399 // the arguments NOT counting out_preserve_stack_slots. 1400 1401 // NOTE: These arrays will have to change when c1 is ported 1402 #ifdef _WIN64 1403 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1404 c_rarg0, c_rarg1, c_rarg2, c_rarg3 1405 }; 1406 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1407 c_farg0, c_farg1, c_farg2, c_farg3 1408 }; 1409 #else 1410 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1411 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1412 }; 1413 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1414 c_farg0, c_farg1, c_farg2, c_farg3, 1415 c_farg4, c_farg5, c_farg6, c_farg7 1416 }; 1417 #endif // _WIN64 1418 1419 1420 uint int_args = 0; 1421 uint fp_args = 0; 1422 uint stk_args = 0; // inc by 2 each time 1423 1424 for (int i = 0; i < total_args_passed; i++) { 1425 switch (sig_bt[i]) { 1426 case T_BOOLEAN: 1427 case T_CHAR: 1428 case T_BYTE: 1429 case T_SHORT: 1430 case T_INT: 1431 if (int_args < Argument::n_int_register_parameters_c) { 1432 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1433 #ifdef _WIN64 1434 fp_args++; 1435 // Allocate slots for callee to stuff register args the stack. 1436 stk_args += 2; 1437 #endif 1438 } else { 1439 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1440 stk_args += 2; 1441 } 1442 break; 1443 case T_LONG: 1444 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1445 // fall through 1446 case T_OBJECT: 1447 case T_ARRAY: 1448 case T_ADDRESS: 1449 case T_METADATA: 1450 if (int_args < Argument::n_int_register_parameters_c) { 1451 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1452 #ifdef _WIN64 1453 fp_args++; 1454 stk_args += 2; 1455 #endif 1456 } else { 1457 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1458 stk_args += 2; 1459 } 1460 break; 1461 case T_FLOAT: 1462 if (fp_args < Argument::n_float_register_parameters_c) { 1463 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1464 #ifdef _WIN64 1465 int_args++; 1466 // Allocate slots for callee to stuff register args the stack. 1467 stk_args += 2; 1468 #endif 1469 } else { 1470 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1471 stk_args += 2; 1472 } 1473 break; 1474 case T_DOUBLE: 1475 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1476 if (fp_args < Argument::n_float_register_parameters_c) { 1477 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1478 #ifdef _WIN64 1479 int_args++; 1480 // Allocate slots for callee to stuff register args the stack. 1481 stk_args += 2; 1482 #endif 1483 } else { 1484 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1485 stk_args += 2; 1486 } 1487 break; 1488 case T_VOID: // Halves of longs and doubles 1489 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1490 regs[i].set_bad(); 1491 break; 1492 default: 1493 ShouldNotReachHere(); 1494 break; 1495 } 1496 } 1497 #ifdef _WIN64 1498 // windows abi requires that we always allocate enough stack space 1499 // for 4 64bit registers to be stored down. 1500 if (stk_args < 8) { 1501 stk_args = 8; 1502 } 1503 #endif // _WIN64 1504 1505 return stk_args; 1506 } 1507 1508 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1509 uint num_bits, 1510 uint total_args_passed) { 1511 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512, 1512 "only certain vector sizes are supported for now"); 1513 1514 static const XMMRegister VEC_ArgReg[32] = { 1515 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, 1516 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, 1517 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23, 1518 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31 1519 }; 1520 1521 uint stk_args = 0; 1522 uint fp_args = 0; 1523 1524 for (uint i = 0; i < total_args_passed; i++) { 1525 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg(); 1526 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15)); 1527 regs[i].set_pair(vmreg->next(next_val), vmreg); 1528 } 1529 1530 return stk_args; 1531 } 1532 1533 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1534 // We always ignore the frame_slots arg and just use the space just below frame pointer 1535 // which by this time is free to use 1536 switch (ret_type) { 1537 case T_FLOAT: 1538 __ movflt(Address(rbp, -wordSize), xmm0); 1539 break; 1540 case T_DOUBLE: 1541 __ movdbl(Address(rbp, -wordSize), xmm0); 1542 break; 1543 case T_VOID: break; 1544 default: { 1545 __ movptr(Address(rbp, -wordSize), rax); 1546 } 1547 } 1548 } 1549 1550 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1551 // We always ignore the frame_slots arg and just use the space just below frame pointer 1552 // which by this time is free to use 1553 switch (ret_type) { 1554 case T_FLOAT: 1555 __ movflt(xmm0, Address(rbp, -wordSize)); 1556 break; 1557 case T_DOUBLE: 1558 __ movdbl(xmm0, Address(rbp, -wordSize)); 1559 break; 1560 case T_VOID: break; 1561 default: { 1562 __ movptr(rax, Address(rbp, -wordSize)); 1563 } 1564 } 1565 } 1566 1567 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1568 for ( int i = first_arg ; i < arg_count ; i++ ) { 1569 if (args[i].first()->is_Register()) { 1570 __ push(args[i].first()->as_Register()); 1571 } else if (args[i].first()->is_XMMRegister()) { 1572 __ subptr(rsp, 2*wordSize); 1573 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1574 } 1575 } 1576 } 1577 1578 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1579 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1580 if (args[i].first()->is_Register()) { 1581 __ pop(args[i].first()->as_Register()); 1582 } else if (args[i].first()->is_XMMRegister()) { 1583 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1584 __ addptr(rsp, 2*wordSize); 1585 } 1586 } 1587 } 1588 1589 static void verify_oop_args(MacroAssembler* masm, 1590 const methodHandle& method, 1591 const BasicType* sig_bt, 1592 const VMRegPair* regs) { 1593 Register temp_reg = rbx; // not part of any compiled calling seq 1594 if (VerifyOops) { 1595 for (int i = 0; i < method->size_of_parameters(); i++) { 1596 if (is_reference_type(sig_bt[i])) { 1597 VMReg r = regs[i].first(); 1598 assert(r->is_valid(), "bad oop arg"); 1599 if (r->is_stack()) { 1600 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1601 __ verify_oop(temp_reg); 1602 } else { 1603 __ verify_oop(r->as_Register()); 1604 } 1605 } 1606 } 1607 } 1608 } 1609 1610 static void check_continuation_enter_argument(VMReg actual_vmreg, 1611 Register expected_reg, 1612 const char* name) { 1613 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1614 assert(actual_vmreg->as_Register() == expected_reg, 1615 "%s is in unexpected register: %s instead of %s", 1616 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1617 } 1618 1619 1620 //---------------------------- continuation_enter_setup --------------------------- 1621 // 1622 // Arguments: 1623 // None. 1624 // 1625 // Results: 1626 // rsp: pointer to blank ContinuationEntry 1627 // 1628 // Kills: 1629 // rax 1630 // 1631 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) { 1632 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1633 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1634 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1635 1636 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize; 1637 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1638 1639 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size; 1640 OopMap* map = new OopMap(frame_size, 0); 1641 1642 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset())); 1643 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax); 1644 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp); 1645 1646 return map; 1647 } 1648 1649 //---------------------------- fill_continuation_entry --------------------------- 1650 // 1651 // Arguments: 1652 // rsp: pointer to blank Continuation entry 1653 // reg_cont_obj: pointer to the continuation 1654 // reg_flags: flags 1655 // 1656 // Results: 1657 // rsp: pointer to filled out ContinuationEntry 1658 // 1659 // Kills: 1660 // rax 1661 // 1662 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1663 assert_different_registers(rax, reg_cont_obj, reg_flags); 1664 #ifdef ASSERT 1665 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value()); 1666 #endif 1667 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj); 1668 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags); 1669 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0); 1670 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0); 1671 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0); 1672 1673 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset())); 1674 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax); 1675 __ movq(rax, Address(r15_thread, JavaThread::held_monitor_count_offset())); 1676 __ movq(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); 1677 1678 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); 1679 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), 0); 1680 } 1681 1682 //---------------------------- continuation_enter_cleanup --------------------------- 1683 // 1684 // Arguments: 1685 // rsp: pointer to the ContinuationEntry 1686 // 1687 // Results: 1688 // rsp: pointer to the spilled rbp in the entry frame 1689 // 1690 // Kills: 1691 // rbx 1692 // 1693 void static continuation_enter_cleanup(MacroAssembler* masm) { 1694 #ifdef ASSERT 1695 Label L_good_sp; 1696 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1697 __ jcc(Assembler::equal, L_good_sp); 1698 __ stop("Incorrect rsp at continuation_enter_cleanup"); 1699 __ bind(L_good_sp); 1700 #endif 1701 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset())); 1702 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx); 1703 1704 if (CheckJNICalls) { 1705 // Check if this is a virtual thread continuation 1706 Label L_skip_vthread_code; 1707 __ cmpl(Address(rsp, ContinuationEntry::flags_offset()), 0); 1708 __ jcc(Assembler::equal, L_skip_vthread_code); 1709 1710 // If the held monitor count is > 0 and this vthread is terminating then 1711 // it failed to release a JNI monitor. So we issue the same log message 1712 // that JavaThread::exit does. 1713 __ cmpptr(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1714 __ jcc(Assembler::equal, L_skip_vthread_code); 1715 1716 // rax may hold an exception oop, save it before the call 1717 __ push(rax); 1718 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::log_jni_monitor_still_held)); 1719 __ pop(rax); 1720 1721 // For vthreads we have to explicitly zero the JNI monitor count of the carrier 1722 // on termination. The held count is implicitly zeroed below when we restore from 1723 // the parent held count (which has to be zero). 1724 __ movq(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1725 1726 __ bind(L_skip_vthread_code); 1727 } 1728 #ifdef ASSERT 1729 else { 1730 // Check if this is a virtual thread continuation 1731 Label L_skip_vthread_code; 1732 __ cmpl(Address(rsp, ContinuationEntry::flags_offset()), 0); 1733 __ jcc(Assembler::equal, L_skip_vthread_code); 1734 1735 // See comment just above. If not checking JNI calls the JNI count is only 1736 // needed for assertion checking. 1737 __ movq(Address(r15_thread, JavaThread::jni_monitor_count_offset()), 0); 1738 1739 __ bind(L_skip_vthread_code); 1740 } 1741 #endif 1742 1743 __ movq(rbx, Address(rsp, ContinuationEntry::parent_held_monitor_count_offset())); 1744 __ movq(Address(r15_thread, JavaThread::held_monitor_count_offset()), rbx); 1745 1746 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset())); 1747 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx); 1748 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size())); 1749 } 1750 1751 static void gen_continuation_enter(MacroAssembler* masm, 1752 const VMRegPair* regs, 1753 int& exception_offset, 1754 OopMapSet* oop_maps, 1755 int& frame_complete, 1756 int& stack_slots, 1757 int& interpreted_entry_offset, 1758 int& compiled_entry_offset) { 1759 1760 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1761 int pos_cont_obj = 0; 1762 int pos_is_cont = 1; 1763 int pos_is_virtual = 2; 1764 1765 // The platform-specific calling convention may present the arguments in various registers. 1766 // To simplify the rest of the code, we expect the arguments to reside at these known 1767 // registers, and we additionally check the placement here in case calling convention ever 1768 // changes. 1769 Register reg_cont_obj = c_rarg1; 1770 Register reg_is_cont = c_rarg2; 1771 Register reg_is_virtual = c_rarg3; 1772 1773 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1774 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1775 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1776 1777 // Utility methods kill rax, make sure there are no collisions 1778 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual); 1779 1780 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), 1781 relocInfo::static_call_type); 1782 1783 address start = __ pc(); 1784 1785 Label L_thaw, L_exit; 1786 1787 // i2i entry used at interp_only_mode only 1788 interpreted_entry_offset = __ pc() - start; 1789 { 1790 #ifdef ASSERT 1791 Label is_interp_only; 1792 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0); 1793 __ jcc(Assembler::notEqual, is_interp_only); 1794 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1795 __ bind(is_interp_only); 1796 #endif 1797 1798 __ pop(rax); // return address 1799 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1800 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2)); 1801 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1)); 1802 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0)); 1803 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment 1804 __ push(rax); // return address 1805 __ push_cont_fastpath(); 1806 1807 __ enter(); 1808 1809 stack_slots = 2; // will be adjusted in setup 1810 OopMap* map = continuation_enter_setup(masm, stack_slots); 1811 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1812 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1813 1814 __ verify_oop(reg_cont_obj); 1815 1816 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1817 1818 // If continuation, call to thaw. Otherwise, resolve the call and exit. 1819 __ testptr(reg_is_cont, reg_is_cont); 1820 __ jcc(Assembler::notZero, L_thaw); 1821 1822 // --- Resolve path 1823 1824 // Make sure the call is patchable 1825 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1826 // Emit stub for static call 1827 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc()); 1828 if (stub == nullptr) { 1829 fatal("CodeCache is full at gen_continuation_enter"); 1830 } 1831 __ call(resolve); 1832 oop_maps->add_gc_map(__ pc() - start, map); 1833 __ post_call_nop(); 1834 1835 __ jmp(L_exit); 1836 } 1837 1838 // compiled entry 1839 __ align(CodeEntryAlignment); 1840 compiled_entry_offset = __ pc() - start; 1841 __ enter(); 1842 1843 stack_slots = 2; // will be adjusted in setup 1844 OopMap* map = continuation_enter_setup(masm, stack_slots); 1845 1846 // Frame is now completed as far as size and linkage. 1847 frame_complete = __ pc() - start; 1848 1849 __ verify_oop(reg_cont_obj); 1850 1851 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1852 1853 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1854 __ testptr(reg_is_cont, reg_is_cont); 1855 __ jccb(Assembler::notZero, L_thaw); 1856 1857 // --- call Continuation.enter(Continuation c, boolean isContinue) 1858 1859 // Make sure the call is patchable 1860 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset); 1861 1862 // Emit stub for static call 1863 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc()); 1864 if (stub == nullptr) { 1865 fatal("CodeCache is full at gen_continuation_enter"); 1866 } 1867 1868 // The call needs to be resolved. There's a special case for this in 1869 // SharedRuntime::find_callee_info_helper() which calls 1870 // LinkResolver::resolve_continuation_enter() which resolves the call to 1871 // Continuation.enter(Continuation c, boolean isContinue). 1872 __ call(resolve); 1873 1874 oop_maps->add_gc_map(__ pc() - start, map); 1875 __ post_call_nop(); 1876 1877 __ jmpb(L_exit); 1878 1879 // --- Thawing path 1880 1881 __ bind(L_thaw); 1882 1883 __ call(RuntimeAddress(StubRoutines::cont_thaw())); 1884 1885 ContinuationEntry::_return_pc_offset = __ pc() - start; 1886 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1887 __ post_call_nop(); 1888 1889 // --- Normal exit (resolve/thawing) 1890 1891 __ bind(L_exit); 1892 1893 continuation_enter_cleanup(masm); 1894 __ pop(rbp); 1895 __ ret(0); 1896 1897 // --- Exception handling path 1898 1899 exception_offset = __ pc() - start; 1900 1901 continuation_enter_cleanup(masm); 1902 __ pop(rbp); 1903 1904 __ movptr(c_rarg0, r15_thread); 1905 __ movptr(c_rarg1, Address(rsp, 0)); // return address 1906 1907 // rax still holds the original exception oop, save it before the call 1908 __ push(rax); 1909 1910 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2); 1911 __ movptr(rbx, rax); 1912 1913 // Continue at exception handler: 1914 // rax: exception oop 1915 // rbx: exception handler 1916 // rdx: exception pc 1917 __ pop(rax); 1918 __ verify_oop(rax); 1919 __ pop(rdx); 1920 __ jmp(rbx); 1921 } 1922 1923 static void gen_continuation_yield(MacroAssembler* masm, 1924 const VMRegPair* regs, 1925 OopMapSet* oop_maps, 1926 int& frame_complete, 1927 int& stack_slots, 1928 int& compiled_entry_offset) { 1929 enum layout { 1930 rbp_off, 1931 rbpH_off, 1932 return_off, 1933 return_off2, 1934 framesize // inclusive of return address 1935 }; 1936 stack_slots = framesize / VMRegImpl::slots_per_word; 1937 assert(stack_slots == 2, "recheck layout"); 1938 1939 address start = __ pc(); 1940 compiled_entry_offset = __ pc() - start; 1941 __ enter(); 1942 address the_pc = __ pc(); 1943 1944 frame_complete = the_pc - start; 1945 1946 // This nop must be exactly at the PC we push into the frame info. 1947 // We use this nop for fast CodeBlob lookup, associate the OopMap 1948 // with it right away. 1949 __ post_call_nop(); 1950 OopMap* map = new OopMap(framesize, 1); 1951 oop_maps->add_gc_map(frame_complete, map); 1952 1953 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 1954 __ movptr(c_rarg0, r15_thread); 1955 __ movptr(c_rarg1, rsp); 1956 __ call_VM_leaf(Continuation::freeze_entry(), 2); 1957 __ reset_last_Java_frame(true); 1958 1959 Label L_pinned; 1960 1961 __ testptr(rax, rax); 1962 __ jcc(Assembler::notZero, L_pinned); 1963 1964 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset())); 1965 continuation_enter_cleanup(masm); 1966 __ pop(rbp); 1967 __ ret(0); 1968 1969 __ bind(L_pinned); 1970 1971 // Pinned, return to caller 1972 1973 // handle pending exception thrown by freeze 1974 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 1975 Label ok; 1976 __ jcc(Assembler::equal, ok); 1977 __ leave(); 1978 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1979 __ bind(ok); 1980 1981 __ leave(); 1982 __ ret(0); 1983 } 1984 1985 static void gen_special_dispatch(MacroAssembler* masm, 1986 const methodHandle& method, 1987 const BasicType* sig_bt, 1988 const VMRegPair* regs) { 1989 verify_oop_args(masm, method, sig_bt, regs); 1990 vmIntrinsics::ID iid = method->intrinsic_id(); 1991 1992 // Now write the args into the outgoing interpreter space 1993 bool has_receiver = false; 1994 Register receiver_reg = noreg; 1995 int member_arg_pos = -1; 1996 Register member_reg = noreg; 1997 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1998 if (ref_kind != 0) { 1999 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 2000 member_reg = rbx; // known to be free at this point 2001 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 2002 } else if (iid == vmIntrinsics::_invokeBasic) { 2003 has_receiver = true; 2004 } else if (iid == vmIntrinsics::_linkToNative) { 2005 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 2006 member_reg = rbx; // known to be free at this point 2007 } else { 2008 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 2009 } 2010 2011 if (member_reg != noreg) { 2012 // Load the member_arg into register, if necessary. 2013 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 2014 VMReg r = regs[member_arg_pos].first(); 2015 if (r->is_stack()) { 2016 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 2017 } else { 2018 // no data motion is needed 2019 member_reg = r->as_Register(); 2020 } 2021 } 2022 2023 if (has_receiver) { 2024 // Make sure the receiver is loaded into a register. 2025 assert(method->size_of_parameters() > 0, "oob"); 2026 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 2027 VMReg r = regs[0].first(); 2028 assert(r->is_valid(), "bad receiver arg"); 2029 if (r->is_stack()) { 2030 // Porting note: This assumes that compiled calling conventions always 2031 // pass the receiver oop in a register. If this is not true on some 2032 // platform, pick a temp and load the receiver from stack. 2033 fatal("receiver always in a register"); 2034 receiver_reg = j_rarg0; // known to be free at this point 2035 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 2036 } else { 2037 // no data motion is needed 2038 receiver_reg = r->as_Register(); 2039 } 2040 } 2041 2042 // Figure out which address we are really jumping to: 2043 MethodHandles::generate_method_handle_dispatch(masm, iid, 2044 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 2045 } 2046 2047 // --------------------------------------------------------------------------- 2048 // Generate a native wrapper for a given method. The method takes arguments 2049 // in the Java compiled code convention, marshals them to the native 2050 // convention (handlizes oops, etc), transitions to native, makes the call, 2051 // returns to java state (possibly blocking), unhandlizes any result and 2052 // returns. 2053 // 2054 // Critical native functions are a shorthand for the use of 2055 // GetPrimtiveArrayCritical and disallow the use of any other JNI 2056 // functions. The wrapper is expected to unpack the arguments before 2057 // passing them to the callee. Critical native functions leave the state _in_Java, 2058 // since they cannot stop for GC. 2059 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 2060 // block and the check for pending exceptions it's impossible for them 2061 // to be thrown. 2062 // 2063 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 2064 const methodHandle& method, 2065 int compile_id, 2066 BasicType* in_sig_bt, 2067 VMRegPair* in_regs, 2068 BasicType ret_type) { 2069 if (method->is_continuation_native_intrinsic()) { 2070 int exception_offset = -1; 2071 OopMapSet* oop_maps = new OopMapSet(); 2072 int frame_complete = -1; 2073 int stack_slots = -1; 2074 int interpreted_entry_offset = -1; 2075 int vep_offset = -1; 2076 if (method->is_continuation_enter_intrinsic()) { 2077 gen_continuation_enter(masm, 2078 in_regs, 2079 exception_offset, 2080 oop_maps, 2081 frame_complete, 2082 stack_slots, 2083 interpreted_entry_offset, 2084 vep_offset); 2085 } else if (method->is_continuation_yield_intrinsic()) { 2086 gen_continuation_yield(masm, 2087 in_regs, 2088 oop_maps, 2089 frame_complete, 2090 stack_slots, 2091 vep_offset); 2092 } else { 2093 guarantee(false, "Unknown Continuation native intrinsic"); 2094 } 2095 2096 #ifdef ASSERT 2097 if (method->is_continuation_enter_intrinsic()) { 2098 assert(interpreted_entry_offset != -1, "Must be set"); 2099 assert(exception_offset != -1, "Must be set"); 2100 } else { 2101 assert(interpreted_entry_offset == -1, "Must be unset"); 2102 assert(exception_offset == -1, "Must be unset"); 2103 } 2104 assert(frame_complete != -1, "Must be set"); 2105 assert(stack_slots != -1, "Must be set"); 2106 assert(vep_offset != -1, "Must be set"); 2107 #endif 2108 2109 __ flush(); 2110 nmethod* nm = nmethod::new_native_nmethod(method, 2111 compile_id, 2112 masm->code(), 2113 vep_offset, 2114 frame_complete, 2115 stack_slots, 2116 in_ByteSize(-1), 2117 in_ByteSize(-1), 2118 oop_maps, 2119 exception_offset); 2120 if (nm == nullptr) return nm; 2121 if (method->is_continuation_enter_intrinsic()) { 2122 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2123 } else if (method->is_continuation_yield_intrinsic()) { 2124 _cont_doYield_stub = nm; 2125 } 2126 return nm; 2127 } 2128 2129 if (method->is_method_handle_intrinsic()) { 2130 vmIntrinsics::ID iid = method->intrinsic_id(); 2131 intptr_t start = (intptr_t)__ pc(); 2132 int vep_offset = ((intptr_t)__ pc()) - start; 2133 gen_special_dispatch(masm, 2134 method, 2135 in_sig_bt, 2136 in_regs); 2137 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2138 __ flush(); 2139 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2140 return nmethod::new_native_nmethod(method, 2141 compile_id, 2142 masm->code(), 2143 vep_offset, 2144 frame_complete, 2145 stack_slots / VMRegImpl::slots_per_word, 2146 in_ByteSize(-1), 2147 in_ByteSize(-1), 2148 nullptr); 2149 } 2150 address native_func = method->native_function(); 2151 assert(native_func != nullptr, "must have function"); 2152 2153 // An OopMap for lock (and class if static) 2154 OopMapSet *oop_maps = new OopMapSet(); 2155 intptr_t start = (intptr_t)__ pc(); 2156 2157 // We have received a description of where all the java arg are located 2158 // on entry to the wrapper. We need to convert these args to where 2159 // the jni function will expect them. To figure out where they go 2160 // we convert the java signature to a C signature by inserting 2161 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2162 2163 const int total_in_args = method->size_of_parameters(); 2164 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 2165 2166 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2167 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2168 BasicType* in_elem_bt = nullptr; 2169 2170 int argc = 0; 2171 out_sig_bt[argc++] = T_ADDRESS; 2172 if (method->is_static()) { 2173 out_sig_bt[argc++] = T_OBJECT; 2174 } 2175 2176 for (int i = 0; i < total_in_args ; i++ ) { 2177 out_sig_bt[argc++] = in_sig_bt[i]; 2178 } 2179 2180 // Now figure out where the args must be stored and how much stack space 2181 // they require. 2182 int out_arg_slots; 2183 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 2184 2185 // Compute framesize for the wrapper. We need to handlize all oops in 2186 // incoming registers 2187 2188 // Calculate the total number of stack slots we will need. 2189 2190 // First count the abi requirement plus all of the outgoing args 2191 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2192 2193 // Now the space for the inbound oop handle area 2194 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 2195 2196 int oop_handle_offset = stack_slots; 2197 stack_slots += total_save_slots; 2198 2199 // Now any space we need for handlizing a klass if static method 2200 2201 int klass_slot_offset = 0; 2202 int klass_offset = -1; 2203 int lock_slot_offset = 0; 2204 bool is_static = false; 2205 2206 if (method->is_static()) { 2207 klass_slot_offset = stack_slots; 2208 stack_slots += VMRegImpl::slots_per_word; 2209 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2210 is_static = true; 2211 } 2212 2213 // Plus a lock if needed 2214 2215 if (method->is_synchronized()) { 2216 lock_slot_offset = stack_slots; 2217 stack_slots += VMRegImpl::slots_per_word; 2218 } 2219 2220 // Now a place (+2) to save return values or temp during shuffling 2221 // + 4 for return address (which we own) and saved rbp 2222 stack_slots += 6; 2223 2224 // Ok The space we have allocated will look like: 2225 // 2226 // 2227 // FP-> | | 2228 // |---------------------| 2229 // | 2 slots for moves | 2230 // |---------------------| 2231 // | lock box (if sync) | 2232 // |---------------------| <- lock_slot_offset 2233 // | klass (if static) | 2234 // |---------------------| <- klass_slot_offset 2235 // | oopHandle area | 2236 // |---------------------| <- oop_handle_offset (6 java arg registers) 2237 // | outbound memory | 2238 // | based arguments | 2239 // | | 2240 // |---------------------| 2241 // | | 2242 // SP-> | out_preserved_slots | 2243 // 2244 // 2245 2246 2247 // Now compute actual number of stack words we need rounding to make 2248 // stack properly aligned. 2249 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 2250 2251 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2252 2253 // First thing make an ic check to see if we should even be here 2254 2255 // We are free to use all registers as temps without saving them and 2256 // restoring them except rbp. rbp is the only callee save register 2257 // as far as the interpreter and the compiler(s) are concerned. 2258 2259 const Register receiver = j_rarg0; 2260 2261 Label exception_pending; 2262 2263 assert_different_registers(receiver, rscratch1, rscratch2); 2264 __ verify_oop(receiver); 2265 __ ic_check(8 /* end_alignment */); 2266 2267 int vep_offset = ((intptr_t)__ pc()) - start; 2268 2269 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2270 Label L_skip_barrier; 2271 Register klass = r10; 2272 __ mov_metadata(klass, method->method_holder()); // InstanceKlass* 2273 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); 2274 2275 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path 2276 2277 __ bind(L_skip_barrier); 2278 } 2279 2280 #ifdef COMPILER1 2281 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. 2282 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { 2283 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/); 2284 } 2285 #endif // COMPILER1 2286 2287 // The instruction at the verified entry point must be 5 bytes or longer 2288 // because it can be patched on the fly by make_non_entrant. The stack bang 2289 // instruction fits that requirement. 2290 2291 // Generate stack overflow check 2292 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); 2293 2294 // Generate a new frame for the wrapper. 2295 __ enter(); 2296 // -2 because return address is already present and so is saved rbp 2297 __ subptr(rsp, stack_size - 2*wordSize); 2298 2299 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2300 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub 2301 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); 2302 2303 // Frame is now completed as far as size and linkage. 2304 int frame_complete = ((intptr_t)__ pc()) - start; 2305 2306 #ifdef ASSERT 2307 __ check_stack_alignment(rsp, "improperly aligned stack"); 2308 #endif /* ASSERT */ 2309 2310 2311 // We use r14 as the oop handle for the receiver/klass 2312 // It is callee save so it survives the call to native 2313 2314 const Register oop_handle_reg = r14; 2315 2316 // 2317 // We immediately shuffle the arguments so that any vm call we have to 2318 // make from here on out (sync slow path, jvmti, etc.) we will have 2319 // captured the oops from our caller and have a valid oopMap for 2320 // them. 2321 2322 // ----------------- 2323 // The Grand Shuffle 2324 2325 // The Java calling convention is either equal (linux) or denser (win64) than the 2326 // c calling convention. However the because of the jni_env argument the c calling 2327 // convention always has at least one more (and two for static) arguments than Java. 2328 // Therefore if we move the args from java -> c backwards then we will never have 2329 // a register->register conflict and we don't have to build a dependency graph 2330 // and figure out how to break any cycles. 2331 // 2332 2333 // Record esp-based slot for receiver on stack for non-static methods 2334 int receiver_offset = -1; 2335 2336 // This is a trick. We double the stack slots so we can claim 2337 // the oops in the caller's frame. Since we are sure to have 2338 // more args than the caller doubling is enough to make 2339 // sure we can capture all the incoming oop args from the 2340 // caller. 2341 // 2342 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2343 2344 // Mark location of rbp (someday) 2345 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 2346 2347 // Use eax, ebx as temporaries during any memory-memory moves we have to do 2348 // All inbound args are referenced based on rbp and all outbound args via rsp. 2349 2350 2351 #ifdef ASSERT 2352 bool reg_destroyed[Register::number_of_registers]; 2353 bool freg_destroyed[XMMRegister::number_of_registers]; 2354 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) { 2355 reg_destroyed[r] = false; 2356 } 2357 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) { 2358 freg_destroyed[f] = false; 2359 } 2360 2361 #endif /* ASSERT */ 2362 2363 // For JNI natives the incoming and outgoing registers are offset upwards. 2364 GrowableArray<int> arg_order(2 * total_in_args); 2365 2366 VMRegPair tmp_vmreg; 2367 tmp_vmreg.set2(rbx->as_VMReg()); 2368 2369 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 2370 arg_order.push(i); 2371 arg_order.push(c_arg); 2372 } 2373 2374 int temploc = -1; 2375 for (int ai = 0; ai < arg_order.length(); ai += 2) { 2376 int i = arg_order.at(ai); 2377 int c_arg = arg_order.at(ai + 1); 2378 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 2379 #ifdef ASSERT 2380 if (in_regs[i].first()->is_Register()) { 2381 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 2382 } else if (in_regs[i].first()->is_XMMRegister()) { 2383 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 2384 } 2385 if (out_regs[c_arg].first()->is_Register()) { 2386 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2387 } else if (out_regs[c_arg].first()->is_XMMRegister()) { 2388 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 2389 } 2390 #endif /* ASSERT */ 2391 switch (in_sig_bt[i]) { 2392 case T_ARRAY: 2393 case T_OBJECT: 2394 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2395 ((i == 0) && (!is_static)), 2396 &receiver_offset); 2397 break; 2398 case T_VOID: 2399 break; 2400 2401 case T_FLOAT: 2402 __ float_move(in_regs[i], out_regs[c_arg]); 2403 break; 2404 2405 case T_DOUBLE: 2406 assert( i + 1 < total_in_args && 2407 in_sig_bt[i + 1] == T_VOID && 2408 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2409 __ double_move(in_regs[i], out_regs[c_arg]); 2410 break; 2411 2412 case T_LONG : 2413 __ long_move(in_regs[i], out_regs[c_arg]); 2414 break; 2415 2416 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2417 2418 default: 2419 __ move32_64(in_regs[i], out_regs[c_arg]); 2420 } 2421 } 2422 2423 int c_arg; 2424 2425 // Pre-load a static method's oop into r14. Used both by locking code and 2426 // the normal JNI call code. 2427 // point c_arg at the first arg that is already loaded in case we 2428 // need to spill before we call out 2429 c_arg = total_c_args - total_in_args; 2430 2431 if (method->is_static()) { 2432 2433 // load oop into a register 2434 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 2435 2436 // Now handlize the static class mirror it's known not-null. 2437 __ movptr(Address(rsp, klass_offset), oop_handle_reg); 2438 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2439 2440 // Now get the handle 2441 __ lea(oop_handle_reg, Address(rsp, klass_offset)); 2442 // store the klass handle as second argument 2443 __ movptr(c_rarg1, oop_handle_reg); 2444 // and protect the arg if we must spill 2445 c_arg--; 2446 } 2447 2448 // Change state to native (we save the return address in the thread, since it might not 2449 // be pushed on the stack when we do a stack traversal). It is enough that the pc() 2450 // points into the right code segment. It does not have to be the correct return pc. 2451 // We use the same pc/oopMap repeatedly when we call out 2452 2453 intptr_t the_pc = (intptr_t) __ pc(); 2454 oop_maps->add_gc_map(the_pc - start, map); 2455 2456 __ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch1); 2457 2458 2459 // We have all of the arguments setup at this point. We must not touch any register 2460 // argument registers at this point (what if we save/restore them there are no oop? 2461 2462 if (DTraceMethodProbes) { 2463 // protect the args we've loaded 2464 save_args(masm, total_c_args, c_arg, out_regs); 2465 __ mov_metadata(c_rarg1, method()); 2466 __ call_VM_leaf( 2467 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2468 r15_thread, c_rarg1); 2469 restore_args(masm, total_c_args, c_arg, out_regs); 2470 } 2471 2472 // RedefineClasses() tracing support for obsolete method entry 2473 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2474 // protect the args we've loaded 2475 save_args(masm, total_c_args, c_arg, out_regs); 2476 __ mov_metadata(c_rarg1, method()); 2477 __ call_VM_leaf( 2478 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2479 r15_thread, c_rarg1); 2480 restore_args(masm, total_c_args, c_arg, out_regs); 2481 } 2482 2483 // Lock a synchronized method 2484 2485 // Register definitions used by locking and unlocking 2486 2487 const Register swap_reg = rax; // Must use rax for cmpxchg instruction 2488 const Register obj_reg = rbx; // Will contain the oop 2489 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 2490 const Register old_hdr = r13; // value of old header at unlock time 2491 2492 Label slow_path_lock; 2493 Label lock_done; 2494 2495 if (method->is_synchronized()) { 2496 Label count_mon; 2497 2498 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 2499 2500 // Get the handle (the 2nd argument) 2501 __ mov(oop_handle_reg, c_rarg1); 2502 2503 // Get address of the box 2504 2505 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2506 2507 // Load the oop from the handle 2508 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2509 2510 if (LockingMode == LM_MONITOR) { 2511 __ jmp(slow_path_lock); 2512 } else if (LockingMode == LM_LEGACY) { 2513 // Load immediate 1 into swap_reg %rax 2514 __ movl(swap_reg, 1); 2515 2516 // Load (object->mark() | 1) into swap_reg %rax 2517 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2518 if (EnableValhalla) { 2519 // Mask inline_type bit such that we go to the slow path if object is an inline type 2520 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place)); 2521 } 2522 2523 // Save (object->mark() | 1) into BasicLock's displaced header 2524 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2525 2526 // src -> dest iff dest == rax else rax <- dest 2527 __ lock(); 2528 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2529 __ jcc(Assembler::equal, count_mon); 2530 2531 // Hmm should this move to the slow path code area??? 2532 2533 // Test if the oopMark is an obvious stack pointer, i.e., 2534 // 1) (mark & 3) == 0, and 2535 // 2) rsp <= mark < mark + os::pagesize() 2536 // These 3 tests can be done by evaluating the following 2537 // expression: ((mark - rsp) & (3 - os::vm_page_size())), 2538 // assuming both stack pointer and pagesize have their 2539 // least significant 2 bits clear. 2540 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 2541 2542 __ subptr(swap_reg, rsp); 2543 __ andptr(swap_reg, 3 - (int)os::vm_page_size()); 2544 2545 // Save the test result, for recursive case, the result is zero 2546 __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 2547 __ jcc(Assembler::notEqual, slow_path_lock); 2548 } else { 2549 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2550 __ lightweight_lock(lock_reg, obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock); 2551 } 2552 __ bind(count_mon); 2553 __ inc_held_monitor_count(); 2554 2555 // Slow path will re-enter here 2556 __ bind(lock_done); 2557 } 2558 2559 // Finally just about ready to make the JNI call 2560 2561 // get JNIEnv* which is first argument to native 2562 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 2563 2564 // Now set thread in native 2565 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 2566 2567 __ call(RuntimeAddress(native_func)); 2568 2569 // Verify or restore cpu control state after JNI call 2570 __ restore_cpu_control_state_after_jni(rscratch1); 2571 2572 // Unpack native results. 2573 switch (ret_type) { 2574 case T_BOOLEAN: __ c2bool(rax); break; 2575 case T_CHAR : __ movzwl(rax, rax); break; 2576 case T_BYTE : __ sign_extend_byte (rax); break; 2577 case T_SHORT : __ sign_extend_short(rax); break; 2578 case T_INT : /* nothing to do */ break; 2579 case T_DOUBLE : 2580 case T_FLOAT : 2581 // Result is in xmm0 we'll save as needed 2582 break; 2583 case T_ARRAY: // Really a handle 2584 case T_OBJECT: // Really a handle 2585 break; // can't de-handlize until after safepoint check 2586 case T_VOID: break; 2587 case T_LONG: break; 2588 default : ShouldNotReachHere(); 2589 } 2590 2591 Label after_transition; 2592 2593 // Switch thread to "native transition" state before reading the synchronization state. 2594 // This additional state is necessary because reading and testing the synchronization 2595 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2596 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2597 // VM thread changes sync state to synchronizing and suspends threads for GC. 2598 // Thread A is resumed to finish this native method, but doesn't block here since it 2599 // didn't see any synchronization is progress, and escapes. 2600 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 2601 2602 // Force this write out before the read below 2603 if (!UseSystemMemoryBarrier) { 2604 __ membar(Assembler::Membar_mask_bits( 2605 Assembler::LoadLoad | Assembler::LoadStore | 2606 Assembler::StoreLoad | Assembler::StoreStore)); 2607 } 2608 2609 // check for safepoint operation in progress and/or pending suspend requests 2610 { 2611 Label Continue; 2612 Label slow_path; 2613 2614 __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */); 2615 2616 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 2617 __ jcc(Assembler::equal, Continue); 2618 __ bind(slow_path); 2619 2620 // Don't use call_VM as it will see a possible pending exception and forward it 2621 // and never return here preventing us from clearing _last_native_pc down below. 2622 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 2623 // preserved and correspond to the bcp/locals pointers. So we do a runtime call 2624 // by hand. 2625 // 2626 __ vzeroupper(); 2627 save_native_result(masm, ret_type, stack_slots); 2628 __ mov(c_rarg0, r15_thread); 2629 __ mov(r12, rsp); // remember sp 2630 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2631 __ andptr(rsp, -16); // align stack as required by ABI 2632 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2633 __ mov(rsp, r12); // restore sp 2634 __ reinit_heapbase(); 2635 // Restore any method result value 2636 restore_native_result(masm, ret_type, stack_slots); 2637 __ bind(Continue); 2638 } 2639 2640 // change thread state 2641 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 2642 __ bind(after_transition); 2643 2644 Label reguard; 2645 Label reguard_done; 2646 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); 2647 __ jcc(Assembler::equal, reguard); 2648 __ bind(reguard_done); 2649 2650 // native result if any is live 2651 2652 // Unlock 2653 Label slow_path_unlock; 2654 Label unlock_done; 2655 if (method->is_synchronized()) { 2656 2657 Label fast_done; 2658 2659 // Get locked oop from the handle we passed to jni 2660 __ movptr(obj_reg, Address(oop_handle_reg, 0)); 2661 2662 if (LockingMode == LM_LEGACY) { 2663 Label not_recur; 2664 // Simple recursive lock? 2665 __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD); 2666 __ jcc(Assembler::notEqual, not_recur); 2667 __ dec_held_monitor_count(); 2668 __ jmpb(fast_done); 2669 __ bind(not_recur); 2670 } 2671 2672 // Must save rax if it is live now because cmpxchg must use it 2673 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2674 save_native_result(masm, ret_type, stack_slots); 2675 } 2676 2677 if (LockingMode == LM_MONITOR) { 2678 __ jmp(slow_path_unlock); 2679 } else if (LockingMode == LM_LEGACY) { 2680 // get address of the stack lock 2681 __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2682 // get old displaced header 2683 __ movptr(old_hdr, Address(rax, 0)); 2684 2685 // Atomic swap old header if oop still contains the stack lock 2686 __ lock(); 2687 __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2688 __ jcc(Assembler::notEqual, slow_path_unlock); 2689 __ dec_held_monitor_count(); 2690 } else { 2691 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 2692 __ lightweight_unlock(obj_reg, swap_reg, r15_thread, lock_reg, slow_path_unlock); 2693 __ dec_held_monitor_count(); 2694 } 2695 2696 // slow path re-enters here 2697 __ bind(unlock_done); 2698 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2699 restore_native_result(masm, ret_type, stack_slots); 2700 } 2701 2702 __ bind(fast_done); 2703 } 2704 if (DTraceMethodProbes) { 2705 save_native_result(masm, ret_type, stack_slots); 2706 __ mov_metadata(c_rarg1, method()); 2707 __ call_VM_leaf( 2708 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2709 r15_thread, c_rarg1); 2710 restore_native_result(masm, ret_type, stack_slots); 2711 } 2712 2713 __ reset_last_Java_frame(false); 2714 2715 // Unbox oop result, e.g. JNIHandles::resolve value. 2716 if (is_reference_type(ret_type)) { 2717 __ resolve_jobject(rax /* value */, 2718 r15_thread /* thread */, 2719 rcx /* tmp */); 2720 } 2721 2722 if (CheckJNICalls) { 2723 // clear_pending_jni_exception_check 2724 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); 2725 } 2726 2727 // reset handle block 2728 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 2729 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); 2730 2731 // pop our frame 2732 2733 __ leave(); 2734 2735 // Any exception pending? 2736 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2737 __ jcc(Assembler::notEqual, exception_pending); 2738 2739 // Return 2740 2741 __ ret(0); 2742 2743 // Unexpected paths are out of line and go here 2744 2745 // forward the exception 2746 __ bind(exception_pending); 2747 2748 // and forward the exception 2749 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2750 2751 // Slow path locking & unlocking 2752 if (method->is_synchronized()) { 2753 2754 // BEGIN Slow path lock 2755 __ bind(slow_path_lock); 2756 2757 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2758 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2759 2760 // protect the args we've loaded 2761 save_args(masm, total_c_args, c_arg, out_regs); 2762 2763 __ mov(c_rarg0, obj_reg); 2764 __ mov(c_rarg1, lock_reg); 2765 __ mov(c_rarg2, r15_thread); 2766 2767 // Not a leaf but we have last_Java_frame setup as we want 2768 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2769 restore_args(masm, total_c_args, c_arg, out_regs); 2770 2771 #ifdef ASSERT 2772 { Label L; 2773 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2774 __ jcc(Assembler::equal, L); 2775 __ stop("no pending exception allowed on exit from monitorenter"); 2776 __ bind(L); 2777 } 2778 #endif 2779 __ jmp(lock_done); 2780 2781 // END Slow path lock 2782 2783 // BEGIN Slow path unlock 2784 __ bind(slow_path_unlock); 2785 2786 // If we haven't already saved the native result we must save it now as xmm registers 2787 // are still exposed. 2788 __ vzeroupper(); 2789 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2790 save_native_result(masm, ret_type, stack_slots); 2791 } 2792 2793 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2794 2795 __ mov(c_rarg0, obj_reg); 2796 __ mov(c_rarg2, r15_thread); 2797 __ mov(r12, rsp); // remember sp 2798 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2799 __ andptr(rsp, -16); // align stack as required by ABI 2800 2801 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2802 // NOTE that obj_reg == rbx currently 2803 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 2804 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2805 2806 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2807 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 2808 __ mov(rsp, r12); // restore sp 2809 __ reinit_heapbase(); 2810 #ifdef ASSERT 2811 { 2812 Label L; 2813 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 2814 __ jcc(Assembler::equal, L); 2815 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2816 __ bind(L); 2817 } 2818 #endif /* ASSERT */ 2819 2820 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 2821 2822 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2823 restore_native_result(masm, ret_type, stack_slots); 2824 } 2825 __ jmp(unlock_done); 2826 2827 // END Slow path unlock 2828 2829 } // synchronized 2830 2831 // SLOW PATH Reguard the stack if needed 2832 2833 __ bind(reguard); 2834 __ vzeroupper(); 2835 save_native_result(masm, ret_type, stack_slots); 2836 __ mov(r12, rsp); // remember sp 2837 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 2838 __ andptr(rsp, -16); // align stack as required by ABI 2839 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 2840 __ mov(rsp, r12); // restore sp 2841 __ reinit_heapbase(); 2842 restore_native_result(masm, ret_type, stack_slots); 2843 // and continue 2844 __ jmp(reguard_done); 2845 2846 2847 2848 __ flush(); 2849 2850 nmethod *nm = nmethod::new_native_nmethod(method, 2851 compile_id, 2852 masm->code(), 2853 vep_offset, 2854 frame_complete, 2855 stack_slots / VMRegImpl::slots_per_word, 2856 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2857 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2858 oop_maps); 2859 2860 return nm; 2861 } 2862 2863 // this function returns the adjust size (in number of words) to a c2i adapter 2864 // activation for use during deoptimization 2865 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 2866 return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2867 } 2868 2869 2870 uint SharedRuntime::out_preserve_stack_slots() { 2871 return 0; 2872 } 2873 2874 2875 // Number of stack slots between incoming argument block and the start of 2876 // a new frame. The PROLOG must add this many slots to the stack. The 2877 // EPILOG must remove this many slots. amd64 needs two slots for 2878 // return address. 2879 uint SharedRuntime::in_preserve_stack_slots() { 2880 return 4 + 2 * VerifyStackAtCalls; 2881 } 2882 2883 //------------------------------generate_deopt_blob---------------------------- 2884 void SharedRuntime::generate_deopt_blob() { 2885 // Allocate space for the code 2886 ResourceMark rm; 2887 // Setup code generation tools 2888 int pad = 0; 2889 if (UseAVX > 2) { 2890 pad += 1024; 2891 } 2892 if (UseAPX) { 2893 pad += 1024; 2894 } 2895 #if INCLUDE_JVMCI 2896 if (EnableJVMCI) { 2897 pad += 512; // Increase the buffer size when compiling for JVMCI 2898 } 2899 #endif 2900 const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); 2901 CodeBuffer buffer(name, 2560+pad, 1024); 2902 MacroAssembler* masm = new MacroAssembler(&buffer); 2903 int frame_size_in_words; 2904 OopMap* map = nullptr; 2905 OopMapSet *oop_maps = new OopMapSet(); 2906 2907 // ------------- 2908 // This code enters when returning to a de-optimized nmethod. A return 2909 // address has been pushed on the stack, and return values are in 2910 // registers. 2911 // If we are doing a normal deopt then we were called from the patched 2912 // nmethod from the point we returned to the nmethod. So the return 2913 // address on the stack is wrong by NativeCall::instruction_size 2914 // We will adjust the value so it looks like we have the original return 2915 // address on the stack (like when we eagerly deoptimized). 2916 // In the case of an exception pending when deoptimizing, we enter 2917 // with a return address on the stack that points after the call we patched 2918 // into the exception handler. We have the following register state from, 2919 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2920 // rax: exception oop 2921 // rbx: exception handler 2922 // rdx: throwing pc 2923 // So in this case we simply jam rdx into the useless return address and 2924 // the stack looks just like we want. 2925 // 2926 // At this point we need to de-opt. We save the argument return 2927 // registers. We call the first C routine, fetch_unroll_info(). This 2928 // routine captures the return values and returns a structure which 2929 // describes the current frame size and the sizes of all replacement frames. 2930 // The current frame is compiled code and may contain many inlined 2931 // functions, each with their own JVM state. We pop the current frame, then 2932 // push all the new frames. Then we call the C routine unpack_frames() to 2933 // populate these frames. Finally unpack_frames() returns us the new target 2934 // address. Notice that callee-save registers are BLOWN here; they have 2935 // already been captured in the vframeArray at the time the return PC was 2936 // patched. 2937 address start = __ pc(); 2938 Label cont; 2939 2940 // Prolog for non exception case! 2941 2942 // Save everything in sight. 2943 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2944 2945 // Normal deoptimization. Save exec mode for unpack_frames. 2946 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 2947 __ jmp(cont); 2948 2949 int reexecute_offset = __ pc() - start; 2950 #if INCLUDE_JVMCI && !defined(COMPILER1) 2951 if (UseJVMCICompiler) { 2952 // JVMCI does not use this kind of deoptimization 2953 __ should_not_reach_here(); 2954 } 2955 #endif 2956 2957 // Reexecute case 2958 // return address is the pc describes what bci to do re-execute at 2959 2960 // No need to update map as each call to save_live_registers will produce identical oopmap 2961 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2962 2963 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 2964 __ jmp(cont); 2965 2966 #if INCLUDE_JVMCI 2967 Label after_fetch_unroll_info_call; 2968 int implicit_exception_uncommon_trap_offset = 0; 2969 int uncommon_trap_offset = 0; 2970 2971 if (EnableJVMCI) { 2972 implicit_exception_uncommon_trap_offset = __ pc() - start; 2973 2974 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2975 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD); 2976 2977 uncommon_trap_offset = __ pc() - start; 2978 2979 // Save everything in sight. 2980 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 2981 // fetch_unroll_info needs to call last_java_frame() 2982 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 2983 2984 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2985 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1); 2986 2987 __ movl(r14, Deoptimization::Unpack_reexecute); 2988 __ mov(c_rarg0, r15_thread); 2989 __ movl(c_rarg2, r14); // exec mode 2990 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 2991 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2992 2993 __ reset_last_Java_frame(false); 2994 2995 __ jmp(after_fetch_unroll_info_call); 2996 } // EnableJVMCI 2997 #endif // INCLUDE_JVMCI 2998 2999 int exception_offset = __ pc() - start; 3000 3001 // Prolog for exception case 3002 3003 // all registers are dead at this entry point, except for rax, and 3004 // rdx which contain the exception oop and exception pc 3005 // respectively. Set them in TLS and fall thru to the 3006 // unpack_with_exception_in_tls entry point. 3007 3008 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 3009 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 3010 3011 int exception_in_tls_offset = __ pc() - start; 3012 3013 // new implementation because exception oop is now passed in JavaThread 3014 3015 // Prolog for exception case 3016 // All registers must be preserved because they might be used by LinearScan 3017 // Exceptiop oop and throwing PC are passed in JavaThread 3018 // tos: stack at point of call to method that threw the exception (i.e. only 3019 // args are on the stack, no return address) 3020 3021 // make room on stack for the return address 3022 // It will be patched later with the throwing pc. The correct value is not 3023 // available now because loading it from memory would destroy registers. 3024 __ push(0); 3025 3026 // Save everything in sight. 3027 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true); 3028 3029 // Now it is safe to overwrite any register 3030 3031 // Deopt during an exception. Save exec mode for unpack_frames. 3032 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 3033 3034 // load throwing pc from JavaThread and patch it as the return address 3035 // of the current frame. Then clear the field in JavaThread 3036 3037 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3038 __ movptr(Address(rbp, wordSize), rdx); 3039 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3040 3041 #ifdef ASSERT 3042 // verify that there is really an exception oop in JavaThread 3043 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3044 __ verify_oop(rax); 3045 3046 // verify that there is no pending exception 3047 Label no_pending_exception; 3048 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3049 __ testptr(rax, rax); 3050 __ jcc(Assembler::zero, no_pending_exception); 3051 __ stop("must not have pending exception here"); 3052 __ bind(no_pending_exception); 3053 #endif 3054 3055 __ bind(cont); 3056 3057 // Call C code. Need thread and this frame, but NOT official VM entry 3058 // crud. We cannot block on this call, no GC can happen. 3059 // 3060 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 3061 3062 // fetch_unroll_info needs to call last_java_frame(). 3063 3064 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3065 #ifdef ASSERT 3066 { Label L; 3067 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 3068 __ jcc(Assembler::equal, L); 3069 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 3070 __ bind(L); 3071 } 3072 #endif // ASSERT 3073 __ mov(c_rarg0, r15_thread); 3074 __ movl(c_rarg1, r14); // exec_mode 3075 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 3076 3077 // Need to have an oopmap that tells fetch_unroll_info where to 3078 // find any register it might need. 3079 oop_maps->add_gc_map(__ pc() - start, map); 3080 3081 __ reset_last_Java_frame(false); 3082 3083 #if INCLUDE_JVMCI 3084 if (EnableJVMCI) { 3085 __ bind(after_fetch_unroll_info_call); 3086 } 3087 #endif 3088 3089 // Load UnrollBlock* into rdi 3090 __ mov(rdi, rax); 3091 3092 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset())); 3093 Label noException; 3094 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 3095 __ jcc(Assembler::notEqual, noException); 3096 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 3097 // QQQ this is useless it was null above 3098 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 3099 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); 3100 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); 3101 3102 __ verify_oop(rax); 3103 3104 // Overwrite the result registers with the exception results. 3105 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3106 // I think this is useless 3107 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 3108 3109 __ bind(noException); 3110 3111 // Only register save data is on the stack. 3112 // Now restore the result registers. Everything else is either dead 3113 // or captured in the vframeArray. 3114 RegisterSaver::restore_result_registers(masm); 3115 3116 // All of the register save area has been popped of the stack. Only the 3117 // return address remains. 3118 3119 // Pop all the frames we must move/replace. 3120 // 3121 // Frame picture (youngest to oldest) 3122 // 1: self-frame (no frame link) 3123 // 2: deopting frame (no frame link) 3124 // 3: caller of deopting frame (could be compiled/interpreted). 3125 // 3126 // Note: by leaving the return address of self-frame on the stack 3127 // and using the size of frame 2 to adjust the stack 3128 // when we are done the return to frame 3 will still be on the stack. 3129 3130 // Pop deoptimized frame 3131 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); 3132 __ addptr(rsp, rcx); 3133 3134 // rsp should be pointing at the return address to the caller (3) 3135 3136 // Pick up the initial fp we should save 3137 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 3138 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); 3139 3140 #ifdef ASSERT 3141 // Compilers generate code that bang the stack by as much as the 3142 // interpreter would need. So this stack banging should never 3143 // trigger a fault. Verify that it does not on non product builds. 3144 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset())); 3145 __ bang_stack_size(rbx, rcx); 3146 #endif 3147 3148 // Load address of array of frame pcs into rcx 3149 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset())); 3150 3151 // Trash the old pc 3152 __ addptr(rsp, wordSize); 3153 3154 // Load address of array of frame sizes into rsi 3155 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset())); 3156 3157 // Load counter into rdx 3158 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); 3159 3160 // Now adjust the caller's stack to make up for the extra locals 3161 // but record the original sp so that we can save it in the skeletal interpreter 3162 // frame and the stack walking of interpreter_sender will get the unextended sp 3163 // value and not the "real" sp value. 3164 3165 const Register sender_sp = r8; 3166 3167 __ mov(sender_sp, rsp); 3168 __ movl(rbx, Address(rdi, 3169 Deoptimization::UnrollBlock:: 3170 caller_adjustment_offset())); 3171 __ subptr(rsp, rbx); 3172 3173 // Push interpreter frames in a loop 3174 Label loop; 3175 __ bind(loop); 3176 __ movptr(rbx, Address(rsi, 0)); // Load frame size 3177 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 3178 __ pushptr(Address(rcx, 0)); // Save return address 3179 __ enter(); // Save old & set new ebp 3180 __ subptr(rsp, rbx); // Prolog 3181 // This value is corrected by layout_activation_impl 3182 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); 3183 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 3184 __ mov(sender_sp, rsp); // Pass sender_sp to next frame 3185 __ addptr(rsi, wordSize); // Bump array pointer (sizes) 3186 __ addptr(rcx, wordSize); // Bump array pointer (pcs) 3187 __ decrementl(rdx); // Decrement counter 3188 __ jcc(Assembler::notZero, loop); 3189 __ pushptr(Address(rcx, 0)); // Save final return address 3190 3191 // Re-push self-frame 3192 __ enter(); // Save old & set new ebp 3193 3194 // Allocate a full sized register save area. 3195 // Return address and rbp are in place, so we allocate two less words. 3196 __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 3197 3198 // Restore frame locals after moving the frame 3199 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 3200 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3201 3202 // Call C code. Need thread but NOT official VM entry 3203 // crud. We cannot block on this call, no GC can happen. Call should 3204 // restore return values to their stack-slots with the new SP. 3205 // 3206 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 3207 3208 // Use rbp because the frames look interpreted now 3209 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 3210 // Don't need the precise return PC here, just precise enough to point into this code blob. 3211 address the_pc = __ pc(); 3212 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1); 3213 3214 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 3215 __ mov(c_rarg0, r15_thread); 3216 __ movl(c_rarg1, r14); // second arg: exec_mode 3217 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 3218 // Revert SP alignment after call since we're going to do some SP relative addressing below 3219 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 3220 3221 // Set an oopmap for the call site 3222 // Use the same PC we used for the last java frame 3223 oop_maps->add_gc_map(the_pc - start, 3224 new OopMap( frame_size_in_words, 0 )); 3225 3226 // Clear fp AND pc 3227 __ reset_last_Java_frame(true); 3228 3229 // Collect return values 3230 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 3231 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 3232 // I think this is useless (throwing pc?) 3233 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 3234 3235 // Pop self-frame. 3236 __ leave(); // Epilog 3237 3238 // Jump to interpreter 3239 __ ret(0); 3240 3241 // Make sure all code is generated 3242 masm->flush(); 3243 3244 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 3245 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3246 #if INCLUDE_JVMCI 3247 if (EnableJVMCI) { 3248 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 3249 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 3250 } 3251 #endif 3252 } 3253 3254 //------------------------------generate_handler_blob------ 3255 // 3256 // Generate a special Compile2Runtime blob that saves all registers, 3257 // and setup oopmap. 3258 // 3259 SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) { 3260 assert(StubRoutines::forward_exception_entry() != nullptr, 3261 "must be generated before"); 3262 assert(is_polling_page_id(id), "expected a polling page stub id"); 3263 3264 ResourceMark rm; 3265 OopMapSet *oop_maps = new OopMapSet(); 3266 OopMap* map; 3267 3268 // Allocate space for the code. Setup code generation tools. 3269 const char* name = SharedRuntime::stub_name(id); 3270 CodeBuffer buffer(name, 2348, 1024); 3271 MacroAssembler* masm = new MacroAssembler(&buffer); 3272 3273 address start = __ pc(); 3274 address call_pc = nullptr; 3275 int frame_size_in_words; 3276 bool cause_return = (id == SharedStubId::polling_page_return_handler_id); 3277 bool save_wide_vectors = (id == SharedStubId::polling_page_vectors_safepoint_handler_id); 3278 3279 // Make room for return address (or push it again) 3280 if (!cause_return) { 3281 __ push(rbx); 3282 } 3283 3284 // Save registers, fpu state, and flags 3285 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors); 3286 3287 // The following is basically a call_VM. However, we need the precise 3288 // address of the call in order to generate an oopmap. Hence, we do all the 3289 // work ourselves. 3290 3291 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next: 3292 3293 // The return address must always be correct so that frame constructor never 3294 // sees an invalid pc. 3295 3296 if (!cause_return) { 3297 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. 3298 // Additionally, rbx is a callee saved register and we can look at it later to determine 3299 // if someone changed the return address for us! 3300 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 3301 __ movptr(Address(rbp, wordSize), rbx); 3302 } 3303 3304 // Do the call 3305 __ mov(c_rarg0, r15_thread); 3306 __ call(RuntimeAddress(call_ptr)); 3307 3308 // Set an oopmap for the call site. This oopmap will map all 3309 // oop-registers and debug-info registers as callee-saved. This 3310 // will allow deoptimization at this safepoint to find all possible 3311 // debug-info recordings, as well as let GC find all oops. 3312 3313 oop_maps->add_gc_map( __ pc() - start, map); 3314 3315 Label noException; 3316 3317 __ reset_last_Java_frame(false); 3318 3319 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3320 __ jcc(Assembler::equal, noException); 3321 3322 // Exception pending 3323 3324 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3325 3326 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3327 3328 // No exception case 3329 __ bind(noException); 3330 3331 Label no_adjust; 3332 #ifdef ASSERT 3333 Label bail; 3334 #endif 3335 if (!cause_return) { 3336 Label no_prefix, not_special; 3337 3338 // If our stashed return pc was modified by the runtime we avoid touching it 3339 __ cmpptr(rbx, Address(rbp, wordSize)); 3340 __ jccb(Assembler::notEqual, no_adjust); 3341 3342 // Skip over the poll instruction. 3343 // See NativeInstruction::is_safepoint_poll() 3344 // Possible encodings: 3345 // 85 00 test %eax,(%rax) 3346 // 85 01 test %eax,(%rcx) 3347 // 85 02 test %eax,(%rdx) 3348 // 85 03 test %eax,(%rbx) 3349 // 85 06 test %eax,(%rsi) 3350 // 85 07 test %eax,(%rdi) 3351 // 3352 // 41 85 00 test %eax,(%r8) 3353 // 41 85 01 test %eax,(%r9) 3354 // 41 85 02 test %eax,(%r10) 3355 // 41 85 03 test %eax,(%r11) 3356 // 41 85 06 test %eax,(%r14) 3357 // 41 85 07 test %eax,(%r15) 3358 // 3359 // 85 04 24 test %eax,(%rsp) 3360 // 41 85 04 24 test %eax,(%r12) 3361 // 85 45 00 test %eax,0x0(%rbp) 3362 // 41 85 45 00 test %eax,0x0(%r13) 3363 3364 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix); 3365 __ jcc(Assembler::notEqual, no_prefix); 3366 __ addptr(rbx, 1); 3367 __ bind(no_prefix); 3368 #ifdef ASSERT 3369 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below 3370 #endif 3371 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values: 3372 // r12/rsp 0x04 3373 // r13/rbp 0x05 3374 __ movzbq(rcx, Address(rbx, 1)); 3375 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 3376 __ subptr(rcx, 4); // looking for 0x00 .. 0x01 3377 __ cmpptr(rcx, 1); 3378 __ jcc(Assembler::above, not_special); 3379 __ addptr(rbx, 1); 3380 __ bind(not_special); 3381 #ifdef ASSERT 3382 // Verify the correct encoding of the poll we're about to skip. 3383 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); 3384 __ jcc(Assembler::notEqual, bail); 3385 // Mask out the modrm bits 3386 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); 3387 // rax encodes to 0, so if the bits are nonzero it's incorrect 3388 __ jcc(Assembler::notZero, bail); 3389 #endif 3390 // Adjust return pc forward to step over the safepoint poll instruction 3391 __ addptr(rbx, 2); 3392 __ movptr(Address(rbp, wordSize), rbx); 3393 } 3394 3395 __ bind(no_adjust); 3396 // Normal exit, restore registers and exit. 3397 RegisterSaver::restore_live_registers(masm, save_wide_vectors); 3398 __ ret(0); 3399 3400 #ifdef ASSERT 3401 __ bind(bail); 3402 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3403 #endif 3404 3405 // Make sure all code is generated 3406 masm->flush(); 3407 3408 // Fill-out other meta info 3409 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3410 } 3411 3412 // 3413 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3414 // 3415 // Generate a stub that calls into vm to find out the proper destination 3416 // of a java call. All the argument registers are live at this point 3417 // but since this is generic code we don't know what they are and the caller 3418 // must do any gc of the args. 3419 // 3420 RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) { 3421 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); 3422 assert(is_resolve_id(id), "expected a resolve stub id"); 3423 3424 // allocate space for the code 3425 ResourceMark rm; 3426 3427 const char* name = SharedRuntime::stub_name(id); 3428 CodeBuffer buffer(name, 1552, 512); 3429 MacroAssembler* masm = new MacroAssembler(&buffer); 3430 3431 int frame_size_in_words; 3432 3433 OopMapSet *oop_maps = new OopMapSet(); 3434 OopMap* map = nullptr; 3435 3436 int start = __ offset(); 3437 3438 // No need to save vector registers since they are caller-saved anyway. 3439 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false); 3440 3441 int frame_complete = __ offset(); 3442 3443 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); 3444 3445 __ mov(c_rarg0, r15_thread); 3446 3447 __ call(RuntimeAddress(destination)); 3448 3449 3450 // Set an oopmap for the call site. 3451 // We need this not only for callee-saved registers, but also for volatile 3452 // registers that the compiler might be keeping live across a safepoint. 3453 3454 oop_maps->add_gc_map( __ offset() - start, map); 3455 3456 // rax contains the address we are going to jump to assuming no exception got installed 3457 3458 // clear last_Java_sp 3459 __ reset_last_Java_frame(false); 3460 // check for pending exceptions 3461 Label pending; 3462 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3463 __ jcc(Assembler::notEqual, pending); 3464 3465 // get the returned Method* 3466 __ get_vm_result_2(rbx, r15_thread); 3467 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 3468 3469 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 3470 3471 RegisterSaver::restore_live_registers(masm); 3472 3473 // We are back to the original state on entry and ready to go. 3474 3475 __ jmp(rax); 3476 3477 // Pending exception after the safepoint 3478 3479 __ bind(pending); 3480 3481 RegisterSaver::restore_live_registers(masm); 3482 3483 // exception pending => remove activation and forward to exception handler 3484 3485 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD); 3486 3487 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 3488 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3489 3490 // ------------- 3491 // make sure all code is generated 3492 masm->flush(); 3493 3494 // return the blob 3495 // frame_size_words or bytes?? 3496 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3497 } 3498 3499 // Continuation point for throwing of implicit exceptions that are 3500 // not handled in the current activation. Fabricates an exception 3501 // oop and initiates normal exception dispatching in this 3502 // frame. Since we need to preserve callee-saved values (currently 3503 // only for C2, but done for C1 as well) we need a callee-saved oop 3504 // map and therefore have to make these stubs into RuntimeStubs 3505 // rather than BufferBlobs. If the compiler needs all registers to 3506 // be preserved between the fault point and the exception handler 3507 // then it must assume responsibility for that in 3508 // AbstractCompiler::continuation_for_implicit_null_exception or 3509 // continuation_for_implicit_division_by_zero_exception. All other 3510 // implicit exceptions (e.g., NullPointerException or 3511 // AbstractMethodError on entry) are either at call sites or 3512 // otherwise assume that stack unwinding will be initiated, so 3513 // caller saved registers were assumed volatile in the compiler. 3514 RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) { 3515 assert(is_throw_id(id), "expected a throw stub id"); 3516 3517 const char* name = SharedRuntime::stub_name(id); 3518 3519 // Information about frame layout at time of blocking runtime call. 3520 // Note that we only have to preserve callee-saved registers since 3521 // the compilers are responsible for supplying a continuation point 3522 // if they expect all registers to be preserved. 3523 enum layout { 3524 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 3525 rbp_off2, 3526 return_off, 3527 return_off2, 3528 framesize // inclusive of return address 3529 }; 3530 3531 int insts_size = 512; 3532 int locs_size = 64; 3533 3534 ResourceMark rm; 3535 const char* timer_msg = "SharedRuntime generate_throw_exception"; 3536 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); 3537 3538 CodeBuffer code(name, insts_size, locs_size); 3539 OopMapSet* oop_maps = new OopMapSet(); 3540 MacroAssembler* masm = new MacroAssembler(&code); 3541 3542 address start = __ pc(); 3543 3544 // This is an inlined and slightly modified version of call_VM 3545 // which has the ability to fetch the return PC out of 3546 // thread-local storage and also sets up last_Java_sp slightly 3547 // differently than the real call_VM 3548 3549 __ enter(); // required for proper stackwalking of RuntimeStub frame 3550 3551 assert(is_even(framesize/2), "sp not 16-byte aligned"); 3552 3553 // return address and rbp are already in place 3554 __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog 3555 3556 int frame_complete = __ pc() - start; 3557 3558 // Set up last_Java_sp and last_Java_fp 3559 address the_pc = __ pc(); 3560 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 3561 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 3562 3563 // Call runtime 3564 __ movptr(c_rarg0, r15_thread); 3565 BLOCK_COMMENT("call runtime_entry"); 3566 __ call(RuntimeAddress(runtime_entry)); 3567 3568 // Generate oop map 3569 OopMap* map = new OopMap(framesize, 0); 3570 3571 oop_maps->add_gc_map(the_pc - start, map); 3572 3573 __ reset_last_Java_frame(true); 3574 3575 __ leave(); // required for proper stackwalking of RuntimeStub frame 3576 3577 // check for pending exceptions 3578 #ifdef ASSERT 3579 Label L; 3580 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD); 3581 __ jcc(Assembler::notEqual, L); 3582 __ should_not_reach_here(); 3583 __ bind(L); 3584 #endif // ASSERT 3585 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3586 3587 3588 // codeBlob framesize is in words (not VMRegImpl::slot_size) 3589 RuntimeStub* stub = 3590 RuntimeStub::new_runtime_stub(name, 3591 &code, 3592 frame_complete, 3593 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 3594 oop_maps, false); 3595 return stub; 3596 } 3597 3598 //------------------------------Montgomery multiplication------------------------ 3599 // 3600 3601 #ifndef _WINDOWS 3602 3603 // Subtract 0:b from carry:a. Return carry. 3604 static julong 3605 sub(julong a[], julong b[], julong carry, long len) { 3606 long long i = 0, cnt = len; 3607 julong tmp; 3608 asm volatile("clc; " 3609 "0: ; " 3610 "mov (%[b], %[i], 8), %[tmp]; " 3611 "sbb %[tmp], (%[a], %[i], 8); " 3612 "inc %[i]; dec %[cnt]; " 3613 "jne 0b; " 3614 "mov %[carry], %[tmp]; sbb $0, %[tmp]; " 3615 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp) 3616 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry) 3617 : "memory"); 3618 return tmp; 3619 } 3620 3621 // Multiply (unsigned) Long A by Long B, accumulating the double- 3622 // length result into the accumulator formed of T0, T1, and T2. 3623 #define MACC(A, B, T0, T1, T2) \ 3624 do { \ 3625 unsigned long hi, lo; \ 3626 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3627 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3628 : "r"(A), "a"(B) : "cc"); \ 3629 } while(0) 3630 3631 // As above, but add twice the double-length result into the 3632 // accumulator. 3633 #define MACC2(A, B, T0, T1, T2) \ 3634 do { \ 3635 unsigned long hi, lo; \ 3636 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \ 3637 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \ 3638 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \ 3639 : "r"(A), "a"(B) : "cc"); \ 3640 } while(0) 3641 3642 #else //_WINDOWS 3643 3644 static julong 3645 sub(julong a[], julong b[], julong carry, long len) { 3646 long i; 3647 julong tmp; 3648 unsigned char c = 1; 3649 for (i = 0; i < len; i++) { 3650 c = _addcarry_u64(c, a[i], ~b[i], &tmp); 3651 a[i] = tmp; 3652 } 3653 c = _addcarry_u64(c, carry, ~0, &tmp); 3654 return tmp; 3655 } 3656 3657 // Multiply (unsigned) Long A by Long B, accumulating the double- 3658 // length result into the accumulator formed of T0, T1, and T2. 3659 #define MACC(A, B, T0, T1, T2) \ 3660 do { \ 3661 julong hi, lo; \ 3662 lo = _umul128(A, B, &hi); \ 3663 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3664 c = _addcarry_u64(c, hi, T1, &T1); \ 3665 _addcarry_u64(c, T2, 0, &T2); \ 3666 } while(0) 3667 3668 // As above, but add twice the double-length result into the 3669 // accumulator. 3670 #define MACC2(A, B, T0, T1, T2) \ 3671 do { \ 3672 julong hi, lo; \ 3673 lo = _umul128(A, B, &hi); \ 3674 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \ 3675 c = _addcarry_u64(c, hi, T1, &T1); \ 3676 _addcarry_u64(c, T2, 0, &T2); \ 3677 c = _addcarry_u64(0, lo, T0, &T0); \ 3678 c = _addcarry_u64(c, hi, T1, &T1); \ 3679 _addcarry_u64(c, T2, 0, &T2); \ 3680 } while(0) 3681 3682 #endif //_WINDOWS 3683 3684 // Fast Montgomery multiplication. The derivation of the algorithm is 3685 // in A Cryptographic Library for the Motorola DSP56000, 3686 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. 3687 3688 static void NOINLINE 3689 montgomery_multiply(julong a[], julong b[], julong n[], 3690 julong m[], julong inv, int len) { 3691 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3692 int i; 3693 3694 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply"); 3695 3696 for (i = 0; i < len; i++) { 3697 int j; 3698 for (j = 0; j < i; j++) { 3699 MACC(a[j], b[i-j], t0, t1, t2); 3700 MACC(m[j], n[i-j], t0, t1, t2); 3701 } 3702 MACC(a[i], b[0], t0, t1, t2); 3703 m[i] = t0 * inv; 3704 MACC(m[i], n[0], t0, t1, t2); 3705 3706 assert(t0 == 0, "broken Montgomery multiply"); 3707 3708 t0 = t1; t1 = t2; t2 = 0; 3709 } 3710 3711 for (i = len; i < 2*len; i++) { 3712 int j; 3713 for (j = i-len+1; j < len; j++) { 3714 MACC(a[j], b[i-j], t0, t1, t2); 3715 MACC(m[j], n[i-j], t0, t1, t2); 3716 } 3717 m[i-len] = t0; 3718 t0 = t1; t1 = t2; t2 = 0; 3719 } 3720 3721 while (t0) 3722 t0 = sub(m, n, t0, len); 3723 } 3724 3725 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3726 // multiplies so it should be up to 25% faster than Montgomery 3727 // multiplication. However, its loop control is more complex and it 3728 // may actually run slower on some machines. 3729 3730 static void NOINLINE 3731 montgomery_square(julong a[], julong n[], 3732 julong m[], julong inv, int len) { 3733 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3734 int i; 3735 3736 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square"); 3737 3738 for (i = 0; i < len; i++) { 3739 int j; 3740 int end = (i+1)/2; 3741 for (j = 0; j < end; j++) { 3742 MACC2(a[j], a[i-j], t0, t1, t2); 3743 MACC(m[j], n[i-j], t0, t1, t2); 3744 } 3745 if ((i & 1) == 0) { 3746 MACC(a[j], a[j], t0, t1, t2); 3747 } 3748 for (; j < i; j++) { 3749 MACC(m[j], n[i-j], t0, t1, t2); 3750 } 3751 m[i] = t0 * inv; 3752 MACC(m[i], n[0], t0, t1, t2); 3753 3754 assert(t0 == 0, "broken Montgomery square"); 3755 3756 t0 = t1; t1 = t2; t2 = 0; 3757 } 3758 3759 for (i = len; i < 2*len; i++) { 3760 int start = i-len+1; 3761 int end = start + (len - start)/2; 3762 int j; 3763 for (j = start; j < end; j++) { 3764 MACC2(a[j], a[i-j], t0, t1, t2); 3765 MACC(m[j], n[i-j], t0, t1, t2); 3766 } 3767 if ((i & 1) == 0) { 3768 MACC(a[j], a[j], t0, t1, t2); 3769 } 3770 for (; j < len; j++) { 3771 MACC(m[j], n[i-j], t0, t1, t2); 3772 } 3773 m[i-len] = t0; 3774 t0 = t1; t1 = t2; t2 = 0; 3775 } 3776 3777 while (t0) 3778 t0 = sub(m, n, t0, len); 3779 } 3780 3781 // Swap words in a longword. 3782 static julong swap(julong x) { 3783 return (x << 32) | (x >> 32); 3784 } 3785 3786 // Copy len longwords from s to d, word-swapping as we go. The 3787 // destination array is reversed. 3788 static void reverse_words(julong *s, julong *d, int len) { 3789 d += len; 3790 while(len-- > 0) { 3791 d--; 3792 *d = swap(*s); 3793 s++; 3794 } 3795 } 3796 3797 // The threshold at which squaring is advantageous was determined 3798 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3799 #define MONTGOMERY_SQUARING_THRESHOLD 64 3800 3801 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3802 jint len, jlong inv, 3803 jint *m_ints) { 3804 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3805 int longwords = len/2; 3806 3807 // Make very sure we don't use so much space that the stack might 3808 // overflow. 512 jints corresponds to an 16384-bit integer and 3809 // will use here a total of 8k bytes of stack space. 3810 int divisor = sizeof(julong) * 4; 3811 guarantee(longwords <= 8192 / divisor, "must be"); 3812 int total_allocation = longwords * sizeof (julong) * 4; 3813 julong *scratch = (julong *)alloca(total_allocation); 3814 3815 // Local scratch arrays 3816 julong 3817 *a = scratch + 0 * longwords, 3818 *b = scratch + 1 * longwords, 3819 *n = scratch + 2 * longwords, 3820 *m = scratch + 3 * longwords; 3821 3822 reverse_words((julong *)a_ints, a, longwords); 3823 reverse_words((julong *)b_ints, b, longwords); 3824 reverse_words((julong *)n_ints, n, longwords); 3825 3826 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords); 3827 3828 reverse_words(m, (julong *)m_ints, longwords); 3829 } 3830 3831 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3832 jint len, jlong inv, 3833 jint *m_ints) { 3834 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3835 int longwords = len/2; 3836 3837 // Make very sure we don't use so much space that the stack might 3838 // overflow. 512 jints corresponds to an 16384-bit integer and 3839 // will use here a total of 6k bytes of stack space. 3840 int divisor = sizeof(julong) * 3; 3841 guarantee(longwords <= (8192 / divisor), "must be"); 3842 int total_allocation = longwords * sizeof (julong) * 3; 3843 julong *scratch = (julong *)alloca(total_allocation); 3844 3845 // Local scratch arrays 3846 julong 3847 *a = scratch + 0 * longwords, 3848 *n = scratch + 1 * longwords, 3849 *m = scratch + 2 * longwords; 3850 3851 reverse_words((julong *)a_ints, a, longwords); 3852 reverse_words((julong *)n_ints, n, longwords); 3853 3854 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3855 ::montgomery_square(a, n, m, (julong)inv, longwords); 3856 } else { 3857 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords); 3858 } 3859 3860 reverse_words(m, (julong *)m_ints, longwords); 3861 } 3862 3863 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 3864 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 3865 CodeBuffer buffer(buf); 3866 short buffer_locs[20]; 3867 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 3868 sizeof(buffer_locs)/sizeof(relocInfo)); 3869 3870 MacroAssembler* masm = new MacroAssembler(&buffer); 3871 3872 const Array<SigEntry>* sig_vk = vk->extended_sig(); 3873 const Array<VMRegPair>* regs = vk->return_regs(); 3874 3875 int pack_fields_jobject_off = __ offset(); 3876 // Resolve pre-allocated buffer from JNI handle. 3877 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 3878 __ movptr(rax, Address(r13, 0)); 3879 __ resolve_jobject(rax /* value */, 3880 r15_thread /* thread */, 3881 r12 /* tmp */); 3882 __ movptr(Address(r13, 0), rax); 3883 3884 int pack_fields_off = __ offset(); 3885 3886 int j = 1; 3887 for (int i = 0; i < sig_vk->length(); i++) { 3888 BasicType bt = sig_vk->at(i)._bt; 3889 if (bt == T_METADATA) { 3890 continue; 3891 } 3892 if (bt == T_VOID) { 3893 if (sig_vk->at(i-1)._bt == T_LONG || 3894 sig_vk->at(i-1)._bt == T_DOUBLE) { 3895 j++; 3896 } 3897 continue; 3898 } 3899 int off = sig_vk->at(i)._offset; 3900 assert(off > 0, "offset in object should be positive"); 3901 VMRegPair pair = regs->at(j); 3902 VMReg r_1 = pair.first(); 3903 VMReg r_2 = pair.second(); 3904 Address to(rax, off); 3905 if (bt == T_FLOAT) { 3906 __ movflt(to, r_1->as_XMMRegister()); 3907 } else if (bt == T_DOUBLE) { 3908 __ movdbl(to, r_1->as_XMMRegister()); 3909 } else { 3910 Register val = r_1->as_Register(); 3911 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1); 3912 if (is_reference_type(bt)) { 3913 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 3914 } else { 3915 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt)); 3916 } 3917 } 3918 j++; 3919 } 3920 assert(j == regs->length(), "missed a field?"); 3921 3922 __ ret(0); 3923 3924 int unpack_fields_off = __ offset(); 3925 3926 Label skip; 3927 __ testptr(rax, rax); 3928 __ jcc(Assembler::zero, skip); 3929 3930 j = 1; 3931 for (int i = 0; i < sig_vk->length(); i++) { 3932 BasicType bt = sig_vk->at(i)._bt; 3933 if (bt == T_METADATA) { 3934 continue; 3935 } 3936 if (bt == T_VOID) { 3937 if (sig_vk->at(i-1)._bt == T_LONG || 3938 sig_vk->at(i-1)._bt == T_DOUBLE) { 3939 j++; 3940 } 3941 continue; 3942 } 3943 int off = sig_vk->at(i)._offset; 3944 assert(off > 0, "offset in object should be positive"); 3945 VMRegPair pair = regs->at(j); 3946 VMReg r_1 = pair.first(); 3947 VMReg r_2 = pair.second(); 3948 Address from(rax, off); 3949 if (bt == T_FLOAT) { 3950 __ movflt(r_1->as_XMMRegister(), from); 3951 } else if (bt == T_DOUBLE) { 3952 __ movdbl(r_1->as_XMMRegister(), from); 3953 } else if (bt == T_OBJECT || bt == T_ARRAY) { 3954 assert_different_registers(rax, r_1->as_Register()); 3955 __ load_heap_oop(r_1->as_Register(), from); 3956 } else { 3957 assert(is_java_primitive(bt), "unexpected basic type"); 3958 assert_different_registers(rax, r_1->as_Register()); 3959 size_t size_in_bytes = type2aelembytes(bt); 3960 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 3961 } 3962 j++; 3963 } 3964 assert(j == regs->length(), "missed a field?"); 3965 3966 __ bind(skip); 3967 __ ret(0); 3968 3969 __ flush(); 3970 3971 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 3972 } 3973 3974 #if INCLUDE_JFR 3975 3976 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. 3977 // It returns a jobject handle to the event writer. 3978 // The handle is dereferenced and the return value is the event writer oop. 3979 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { 3980 enum layout { 3981 rbp_off, 3982 rbpH_off, 3983 return_off, 3984 return_off2, 3985 framesize // inclusive of return address 3986 }; 3987 3988 const char* name = SharedRuntime::stub_name(SharedStubId::jfr_write_checkpoint_id); 3989 CodeBuffer code(name, 1024, 64); 3990 MacroAssembler* masm = new MacroAssembler(&code); 3991 address start = __ pc(); 3992 3993 __ enter(); 3994 address the_pc = __ pc(); 3995 3996 int frame_complete = the_pc - start; 3997 3998 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1); 3999 __ movptr(c_rarg0, r15_thread); 4000 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1); 4001 __ reset_last_Java_frame(true); 4002 4003 // rax is jobject handle result, unpack and process it through a barrier. 4004 __ resolve_global_jobject(rax, r15_thread, c_rarg0); 4005 4006 __ leave(); 4007 __ ret(0); 4008 4009 OopMapSet* oop_maps = new OopMapSet(); 4010 OopMap* map = new OopMap(framesize, 1); 4011 oop_maps->add_gc_map(frame_complete, map); 4012 4013 RuntimeStub* stub = 4014 RuntimeStub::new_runtime_stub(name, 4015 &code, 4016 frame_complete, 4017 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 4018 oop_maps, 4019 false); 4020 return stub; 4021 } 4022 4023 // For c2: call to return a leased buffer. 4024 RuntimeStub* SharedRuntime::generate_jfr_return_lease() { 4025 enum layout { 4026 rbp_off, 4027 rbpH_off, 4028 return_off, 4029 return_off2, 4030 framesize // inclusive of return address 4031 }; 4032 4033 const char* name = SharedRuntime::stub_name(SharedStubId::jfr_return_lease_id); 4034 CodeBuffer code(name, 1024, 64); 4035 MacroAssembler* masm = new MacroAssembler(&code); 4036 address start = __ pc(); 4037 4038 __ enter(); 4039 address the_pc = __ pc(); 4040 4041 int frame_complete = the_pc - start; 4042 4043 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch2); 4044 __ movptr(c_rarg0, r15_thread); 4045 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1); 4046 __ reset_last_Java_frame(true); 4047 4048 __ leave(); 4049 __ ret(0); 4050 4051 OopMapSet* oop_maps = new OopMapSet(); 4052 OopMap* map = new OopMap(framesize, 1); 4053 oop_maps->add_gc_map(frame_complete, map); 4054 4055 RuntimeStub* stub = 4056 RuntimeStub::new_runtime_stub(name, 4057 &code, 4058 frame_complete, 4059 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 4060 oop_maps, 4061 false); 4062 return stub; 4063 } 4064 4065 #endif // INCLUDE_JFR