1 /* 2 * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "asm/macroAssembler.inline.hpp" 30 #include "classfile/symbolTable.hpp" 31 #include "code/codeCache.hpp" 32 #include "code/debugInfoRec.hpp" 33 #include "code/icBuffer.hpp" 34 #include "code/vtableStubs.hpp" 35 #include "compiler/oopMap.hpp" 36 #include "gc/shared/barrierSetAssembler.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "interpreter/interp_masm.hpp" 39 #include "logging/log.hpp" 40 #include "memory/resourceArea.hpp" 41 #include "nativeInst_aarch64.hpp" 42 #include "oops/compiledICHolder.hpp" 43 #include "oops/klass.inline.hpp" 44 #include "prims/methodHandles.hpp" 45 #include "runtime/jniHandles.hpp" 46 #include "runtime/safepointMechanism.hpp" 47 #include "runtime/sharedRuntime.hpp" 48 #include "runtime/signature.hpp" 49 #include "runtime/stubRoutines.hpp" 50 #include "runtime/vframeArray.hpp" 51 #include "utilities/align.hpp" 52 #include "utilities/formatBuffer.hpp" 53 #include "vmreg_aarch64.inline.hpp" 54 #ifdef COMPILER1 55 #include "c1/c1_Runtime1.hpp" 56 #endif 57 #ifdef COMPILER2 58 #include "adfiles/ad_aarch64.hpp" 59 #include "opto/runtime.hpp" 60 #endif 61 #if INCLUDE_JVMCI 62 #include "jvmci/jvmciJavaClasses.hpp" 63 #endif 64 65 #define __ masm-> 66 67 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 68 69 class SimpleRuntimeFrame { 70 71 public: 72 73 // Most of the runtime stubs have this simple frame layout. 74 // This class exists to make the layout shared in one place. 75 // Offsets are for compiler stack slots, which are jints. 76 enum layout { 77 // The frame sender code expects that rbp will be in the "natural" place and 78 // will override any oopMap setting for it. We must therefore force the layout 79 // so that it agrees with the frame sender code. 80 // we don't expect any arg reg save area so aarch64 asserts that 81 // frame::arg_reg_save_area_bytes == 0 82 rbp_off = 0, 83 rbp_off2, 84 return_off, return_off2, 85 framesize 86 }; 87 }; 88 89 // FIXME -- this is used by C1 90 class RegisterSaver { 91 const bool _save_vectors; 92 public: 93 RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} 94 95 OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); 96 void restore_live_registers(MacroAssembler* masm); 97 98 // Offsets into the register save area 99 // Used by deoptimization when it is managing result register 100 // values on its own 101 102 int reg_offset_in_bytes(Register r); 103 int r0_offset_in_bytes() { return reg_offset_in_bytes(r0); } 104 int rscratch1_offset_in_bytes() { return reg_offset_in_bytes(rscratch1); } 105 int v0_offset_in_bytes(); 106 107 // Total stack size in bytes for saving sve predicate registers. 108 int total_sve_predicate_in_bytes(); 109 110 // Capture info about frame layout 111 // Note this is only correct when not saving full vectors. 112 enum layout { 113 fpu_state_off = 0, 114 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1, 115 // The frame sender code expects that rfp will be in 116 // the "natural" place and will override any oopMap 117 // setting for it. We must therefore force the layout 118 // so that it agrees with the frame sender code. 119 r0_off = fpu_state_off + FPUStateSizeInWords, 120 rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register, 121 return_off = rfp_off + RegisterImpl::max_slots_per_register, // slot for return address 122 reg_save_size = return_off + RegisterImpl::max_slots_per_register}; 123 124 }; 125 126 int RegisterSaver::reg_offset_in_bytes(Register r) { 127 // The integer registers are located above the floating point 128 // registers in the stack frame pushed by save_live_registers() so the 129 // offset depends on whether we are saving full vectors, and whether 130 // those vectors are NEON or SVE. 131 132 int slots_per_vect = FloatRegisterImpl::save_slots_per_register; 133 134 #if COMPILER2_OR_JVMCI 135 if (_save_vectors) { 136 slots_per_vect = FloatRegisterImpl::slots_per_neon_register; 137 138 #ifdef COMPILER2 139 if (Matcher::supports_scalable_vector()) { 140 slots_per_vect = Matcher::scalable_vector_reg_size(T_FLOAT); 141 } 142 #endif 143 } 144 #endif 145 146 int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt; 147 return r0_offset + r->encoding() * wordSize; 148 } 149 150 int RegisterSaver::v0_offset_in_bytes() { 151 // The floating point registers are located above the predicate registers if 152 // they are present in the stack frame pushed by save_live_registers(). So the 153 // offset depends on the saved total predicate vectors in the stack frame. 154 return (total_sve_predicate_in_bytes() / VMRegImpl::stack_slot_size) * BytesPerInt; 155 } 156 157 int RegisterSaver::total_sve_predicate_in_bytes() { 158 #ifdef COMPILER2 159 if (_save_vectors && Matcher::supports_scalable_vector()) { 160 // The number of total predicate bytes is unlikely to be a multiple 161 // of 16 bytes so we manually align it up. 162 return align_up(Matcher::scalable_predicate_reg_slots() * 163 VMRegImpl::stack_slot_size * 164 PRegisterImpl::number_of_saved_registers, 16); 165 } 166 #endif 167 return 0; 168 } 169 170 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { 171 bool use_sve = false; 172 int sve_vector_size_in_bytes = 0; 173 int sve_vector_size_in_slots = 0; 174 int sve_predicate_size_in_slots = 0; 175 int total_predicate_in_bytes = total_sve_predicate_in_bytes(); 176 int total_predicate_in_slots = total_predicate_in_bytes / VMRegImpl::stack_slot_size; 177 178 #ifdef COMPILER2 179 use_sve = Matcher::supports_scalable_vector(); 180 if (use_sve) { 181 sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 182 sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); 183 sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots(); 184 } 185 #endif 186 187 #if COMPILER2_OR_JVMCI 188 if (_save_vectors) { 189 int extra_save_slots_per_register = 0; 190 // Save upper half of vector registers 191 if (use_sve) { 192 extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register; 193 } else { 194 extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register; 195 } 196 int extra_vector_bytes = extra_save_slots_per_register * 197 VMRegImpl::stack_slot_size * 198 FloatRegisterImpl::number_of_registers; 199 additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize); 200 } 201 #else 202 assert(!_save_vectors, "vectors are generated only by C2 and JVMCI"); 203 #endif 204 205 int frame_size_in_bytes = align_up(additional_frame_words * wordSize + 206 reg_save_size * BytesPerInt, 16); 207 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 208 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 209 // The caller will allocate additional_frame_words 210 int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; 211 // CodeBlob frame size is in words. 212 int frame_size_in_words = frame_size_in_bytes / wordSize; 213 *total_frame_words = frame_size_in_words; 214 215 // Save Integer and Float registers. 216 __ enter(); 217 __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes); 218 219 // Set an oopmap for the call site. This oopmap will map all 220 // oop-registers and debug-info registers as callee-saved. This 221 // will allow deoptimization at this safepoint to find all possible 222 // debug-info recordings, as well as let GC find all oops. 223 224 OopMapSet *oop_maps = new OopMapSet(); 225 OopMap* oop_map = new OopMap(frame_size_in_slots, 0); 226 227 for (int i = 0; i < RegisterImpl::number_of_registers; i++) { 228 Register r = as_Register(i); 229 if (r <= rfp && r != rscratch1 && r != rscratch2) { 230 // SP offsets are in 4-byte words. 231 // Register slots are 8 bytes wide, 32 floating-point registers. 232 int sp_offset = RegisterImpl::max_slots_per_register * i + 233 FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers; 234 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg()); 235 } 236 } 237 238 for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { 239 FloatRegister r = as_FloatRegister(i); 240 int sp_offset = 0; 241 if (_save_vectors) { 242 sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) : 243 (FloatRegisterImpl::slots_per_neon_register * i); 244 } else { 245 sp_offset = FloatRegisterImpl::save_slots_per_register * i; 246 } 247 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); 248 } 249 250 if (_save_vectors && use_sve) { 251 for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) { 252 PRegister r = as_PRegister(i); 253 int sp_offset = sve_predicate_size_in_slots * i; 254 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); 255 } 256 } 257 258 return oop_map; 259 } 260 261 void RegisterSaver::restore_live_registers(MacroAssembler* masm) { 262 #ifdef COMPILER2 263 __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(), 264 Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes()); 265 #else 266 #if !INCLUDE_JVMCI 267 assert(!_save_vectors, "vectors are generated only by C2 and JVMCI"); 268 #endif 269 __ pop_CPU_state(_save_vectors); 270 #endif 271 __ leave(); 272 273 } 274 275 // Is vector's size (in bytes) bigger than a size saved by default? 276 // 8 bytes vector registers are saved by default on AArch64. 277 // The SVE supported min vector size is 8 bytes and we need to save 278 // predicate registers when the vector size is 8 bytes as well. 279 bool SharedRuntime::is_wide_vector(int size) { 280 return size > 8 || (UseSVE > 0 && size >= 8); 281 } 282 283 // The java_calling_convention describes stack locations as ideal slots on 284 // a frame with no abi restrictions. Since we must observe abi restrictions 285 // (like the placement of the register window) the slots must be biased by 286 // the following value. 287 static int reg2offset_in(VMReg r) { 288 // Account for saved rfp and lr 289 // This should really be in_preserve_stack_slots 290 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; 291 } 292 293 static int reg2offset_out(VMReg r) { 294 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 295 } 296 297 // --------------------------------------------------------------------------- 298 // Read the array of BasicTypes from a signature, and compute where the 299 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 300 // quantities. Values less than VMRegImpl::stack0 are registers, those above 301 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 302 // as framesizes are fixed. 303 // VMRegImpl::stack0 refers to the first slot 0(sp). 304 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 305 // up to RegisterImpl::number_of_registers) are the 64-bit 306 // integer registers. 307 308 // Note: the INPUTS in sig_bt are in units of Java argument words, 309 // which are 64-bit. The OUTPUTS are in 32-bit units. 310 311 // The Java calling convention is a "shifted" version of the C ABI. 312 // By skipping the first C ABI register we can call non-static jni 313 // methods with small numbers of arguments without having to shuffle 314 // the arguments at all. Since we control the java ABI we ought to at 315 // least get some advantage out of it. 316 317 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 318 VMRegPair *regs, 319 int total_args_passed) { 320 321 // Create the mapping between argument positions and 322 // registers. 323 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 324 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7 325 }; 326 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 327 j_farg0, j_farg1, j_farg2, j_farg3, 328 j_farg4, j_farg5, j_farg6, j_farg7 329 }; 330 331 332 uint int_args = 0; 333 uint fp_args = 0; 334 uint stk_args = 0; // inc by 2 each time 335 336 for (int i = 0; i < total_args_passed; i++) { 337 switch (sig_bt[i]) { 338 case T_BOOLEAN: 339 case T_CHAR: 340 case T_BYTE: 341 case T_SHORT: 342 case T_INT: 343 if (int_args < Argument::n_int_register_parameters_j) { 344 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 345 } else { 346 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 347 stk_args += 2; 348 } 349 break; 350 case T_VOID: 351 // halves of T_LONG or T_DOUBLE 352 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 353 regs[i].set_bad(); 354 break; 355 case T_LONG: 356 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 357 // fall through 358 case T_OBJECT: 359 case T_ARRAY: 360 case T_ADDRESS: 361 case T_PRIMITIVE_OBJECT: 362 if (int_args < Argument::n_int_register_parameters_j) { 363 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 364 } else { 365 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 366 stk_args += 2; 367 } 368 break; 369 case T_FLOAT: 370 if (fp_args < Argument::n_float_register_parameters_j) { 371 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 372 } else { 373 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 374 stk_args += 2; 375 } 376 break; 377 case T_DOUBLE: 378 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 379 if (fp_args < Argument::n_float_register_parameters_j) { 380 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 381 } else { 382 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 383 stk_args += 2; 384 } 385 break; 386 default: 387 ShouldNotReachHere(); 388 break; 389 } 390 } 391 392 return align_up(stk_args, 2); 393 } 394 395 396 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j; 397 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; 398 399 int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) { 400 401 // Create the mapping between argument positions and registers. 402 403 static const Register INT_ArgReg[java_return_convention_max_int] = { 404 r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 405 }; 406 407 static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { 408 j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7 409 }; 410 411 uint int_args = 0; 412 uint fp_args = 0; 413 414 for (int i = 0; i < total_args_passed; i++) { 415 switch (sig_bt[i]) { 416 case T_BOOLEAN: 417 case T_CHAR: 418 case T_BYTE: 419 case T_SHORT: 420 case T_INT: 421 if (int_args < SharedRuntime::java_return_convention_max_int) { 422 regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); 423 int_args ++; 424 } else { 425 return -1; 426 } 427 break; 428 case T_VOID: 429 // halves of T_LONG or T_DOUBLE 430 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 431 regs[i].set_bad(); 432 break; 433 case T_LONG: 434 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 435 // fall through 436 case T_OBJECT: 437 case T_ARRAY: 438 case T_ADDRESS: 439 // Should T_METADATA be added to java_calling_convention as well ? 440 case T_METADATA: 441 case T_PRIMITIVE_OBJECT: 442 if (int_args < SharedRuntime::java_return_convention_max_int) { 443 regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); 444 int_args ++; 445 } else { 446 return -1; 447 } 448 break; 449 case T_FLOAT: 450 if (fp_args < SharedRuntime::java_return_convention_max_float) { 451 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); 452 fp_args ++; 453 } else { 454 return -1; 455 } 456 break; 457 case T_DOUBLE: 458 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 459 if (fp_args < SharedRuntime::java_return_convention_max_float) { 460 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); 461 fp_args ++; 462 } else { 463 return -1; 464 } 465 break; 466 default: 467 ShouldNotReachHere(); 468 break; 469 } 470 } 471 472 return int_args + fp_args; 473 } 474 475 // Patch the callers callsite with entry to compiled code if it exists. 476 static void patch_callers_callsite(MacroAssembler *masm) { 477 Label L; 478 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); 479 __ cbz(rscratch1, L); 480 481 __ enter(); 482 __ push_CPU_state(); 483 484 // VM needs caller's callsite 485 // VM needs target method 486 // This needs to be a long call since we will relocate this adapter to 487 // the codeBuffer and it may not reach 488 489 #ifndef PRODUCT 490 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 491 #endif 492 493 __ mov(c_rarg0, rmethod); 494 __ mov(c_rarg1, lr); 495 __ authenticate_return_address(c_rarg1, rscratch1); 496 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 497 __ blr(rscratch1); 498 499 // Explicit isb required because fixup_callers_callsite may change the code 500 // stream. 501 __ safepoint_isb(); 502 503 __ pop_CPU_state(); 504 // restore sp 505 __ leave(); 506 __ bind(L); 507 } 508 509 // For each inline type argument, sig includes the list of fields of 510 // the inline type. This utility function computes the number of 511 // arguments for the call if inline types are passed by reference (the 512 // calling convention the interpreter expects). 513 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { 514 int total_args_passed = 0; 515 if (InlineTypePassFieldsAsArgs) { 516 for (int i = 0; i < sig_extended->length(); i++) { 517 BasicType bt = sig_extended->at(i)._bt; 518 if (bt == T_PRIMITIVE_OBJECT) { 519 // In sig_extended, an inline type argument starts with: 520 // T_PRIMITIVE_OBJECT, followed by the types of the fields of the 521 // inline type and T_VOID to mark the end of the value 522 // type. Inline types are flattened so, for instance, in the 523 // case of an inline type with an int field and an inline type 524 // field that itself has 2 fields, an int and a long: 525 // T_PRIMITIVE_OBJECT T_INT T_PRIMITIVE_OBJECT T_INT T_LONG T_VOID (second 526 // slot for the T_LONG) T_VOID (inner T_PRIMITIVE_OBJECT) T_VOID 527 // (outer T_PRIMITIVE_OBJECT) 528 total_args_passed++; 529 int vt = 1; 530 do { 531 i++; 532 BasicType bt = sig_extended->at(i)._bt; 533 BasicType prev_bt = sig_extended->at(i-1)._bt; 534 if (bt == T_PRIMITIVE_OBJECT) { 535 vt++; 536 } else if (bt == T_VOID && 537 prev_bt != T_LONG && 538 prev_bt != T_DOUBLE) { 539 vt--; 540 } 541 } while (vt != 0); 542 } else { 543 total_args_passed++; 544 } 545 } 546 } else { 547 total_args_passed = sig_extended->length(); 548 } 549 550 return total_args_passed; 551 } 552 553 554 static void gen_c2i_adapter_helper(MacroAssembler* masm, 555 BasicType bt, 556 BasicType prev_bt, 557 size_t size_in_bytes, 558 const VMRegPair& reg_pair, 559 const Address& to, 560 Register tmp1, 561 Register tmp2, 562 Register tmp3, 563 int extraspace, 564 bool is_oop) { 565 assert(bt != T_PRIMITIVE_OBJECT || !InlineTypePassFieldsAsArgs, "no inline type here"); 566 if (bt == T_VOID) { 567 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); 568 return; 569 } 570 571 // Say 4 args: 572 // i st_off 573 // 0 32 T_LONG 574 // 1 24 T_VOID 575 // 2 16 T_OBJECT 576 // 3 8 T_BOOL 577 // - 0 return address 578 // 579 // However to make thing extra confusing. Because we can fit a Java long/double in 580 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 581 // leaves one slot empty and only stores to a single slot. In this case the 582 // slot that is occupied is the T_VOID slot. See I said it was confusing. 583 584 bool wide = (size_in_bytes == wordSize); 585 VMReg r_1 = reg_pair.first(); 586 VMReg r_2 = reg_pair.second(); 587 assert(r_2->is_valid() == wide, "invalid size"); 588 if (!r_1->is_valid()) { 589 assert(!r_2->is_valid(), ""); 590 return; 591 } 592 593 if (!r_1->is_FloatRegister()) { 594 Register val = tmp3; 595 if (r_1->is_stack()) { 596 // memory to memory use tmp3 (scratch registers are used by store_heap_oop) 597 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 598 __ load_sized_value(val, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); 599 } else { 600 val = r_1->as_Register(); 601 } 602 assert_different_registers(to.base(), val, rscratch2, tmp1, tmp2); 603 if (is_oop) { 604 __ store_heap_oop(to, val, rscratch2, tmp1, tmp2, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); 605 } else { 606 __ store_sized_value(to, val, size_in_bytes); 607 } 608 } else { 609 if (wide) { 610 __ strd(r_1->as_FloatRegister(), to); 611 } else { 612 // only a float use just part of the slot 613 __ strs(r_1->as_FloatRegister(), to); 614 } 615 } 616 } 617 618 static void gen_c2i_adapter(MacroAssembler *masm, 619 const GrowableArray<SigEntry>* sig_extended, 620 const VMRegPair *regs, 621 Label& skip_fixup, 622 address start, 623 OopMapSet* oop_maps, 624 int& frame_complete, 625 int& frame_size_in_words, 626 bool alloc_inline_receiver) { 627 628 // Before we get into the guts of the C2I adapter, see if we should be here 629 // at all. We've come from compiled code and are attempting to jump to the 630 // interpreter, which means the caller made a static call to get here 631 // (vcalls always get a compiled target if there is one). Check for a 632 // compiled target. If there is one, we need to patch the caller's call. 633 patch_callers_callsite(masm); 634 635 __ bind(skip_fixup); 636 637 // Name some registers to be used in the following code. We can use 638 // anything except r0-r7 which are arguments in the Java calling 639 // convention, rmethod (r12), and r13 which holds the outgoing sender 640 // SP for the interpreter. 641 Register buf_array = r10; // Array of buffered inline types 642 Register buf_oop = r11; // Buffered inline type oop 643 Register tmp1 = r15; 644 Register tmp2 = r16; 645 Register tmp3 = r17; 646 647 if (InlineTypePassFieldsAsArgs) { 648 // Is there an inline type argument? 649 bool has_inline_argument = false; 650 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { 651 has_inline_argument = (sig_extended->at(i)._bt == T_PRIMITIVE_OBJECT); 652 } 653 if (has_inline_argument) { 654 // There is at least an inline type argument: we're coming from 655 // compiled code so we have no buffers to back the inline types 656 // Allocate the buffers here with a runtime call. 657 RegisterSaver reg_save(false /* save_vectors */); 658 OopMap* map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); 659 660 frame_complete = __ offset(); 661 address the_pc = __ pc(); 662 663 Label retaddr; 664 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 665 666 __ mov(c_rarg0, rthread); 667 __ mov(c_rarg1, rmethod); 668 __ mov(c_rarg2, (int64_t)alloc_inline_receiver); 669 670 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); 671 __ blr(rscratch1); 672 __ bind(retaddr); 673 674 oop_maps->add_gc_map(__ pc() - start, map); 675 __ reset_last_Java_frame(false); 676 677 reg_save.restore_live_registers(masm); 678 679 Label no_exception; 680 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 681 __ cbz(rscratch1, no_exception); 682 683 __ str(zr, Address(rthread, JavaThread::vm_result_offset())); 684 __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); 685 __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); 686 687 __ bind(no_exception); 688 689 // We get an array of objects from the runtime call 690 __ get_vm_result(buf_array, rthread); 691 __ get_vm_result_2(rmethod, rthread); // TODO: required to keep the callee Method live? 692 } 693 } 694 695 // Since all args are passed on the stack, total_args_passed * 696 // Interpreter::stackElementSize is the space we need. 697 698 int total_args_passed = compute_total_args_passed_int(sig_extended); 699 int extraspace = total_args_passed * Interpreter::stackElementSize; 700 701 // stack is aligned, keep it that way 702 extraspace = align_up(extraspace, StackAlignmentInBytes); 703 704 // set senderSP value 705 __ mov(r13, sp); 706 707 __ sub(sp, sp, extraspace); 708 709 // Now write the args into the outgoing interpreter space 710 711 // next_arg_comp is the next argument from the compiler point of 712 // view (inline type fields are passed in registers/on the stack). In 713 // sig_extended, an inline type argument starts with: T_PRIMITIVE_OBJECT, 714 // followed by the types of the fields of the inline type and T_VOID 715 // to mark the end of the inline type. ignored counts the number of 716 // T_PRIMITIVE_OBJECT/T_VOID. next_vt_arg is the next inline type argument: 717 // used to get the buffer for that argument from the pool of buffers 718 // we allocated above and want to pass to the 719 // interpreter. next_arg_int is the next argument from the 720 // interpreter point of view (inline types are passed by reference). 721 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; 722 next_arg_comp < sig_extended->length(); next_arg_comp++) { 723 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); 724 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); 725 BasicType bt = sig_extended->at(next_arg_comp)._bt; 726 int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; 727 if (!InlineTypePassFieldsAsArgs || bt != T_PRIMITIVE_OBJECT) { 728 int next_off = st_off - Interpreter::stackElementSize; 729 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; 730 const VMRegPair reg_pair = regs[next_arg_comp-ignored]; 731 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; 732 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 733 size_in_bytes, reg_pair, Address(sp, offset), tmp1, tmp2, tmp3, extraspace, false); 734 next_arg_int++; 735 #ifdef ASSERT 736 if (bt == T_LONG || bt == T_DOUBLE) { 737 // Overwrite the unused slot with known junk 738 __ mov(rscratch1, CONST64(0xdeadffffdeadaaaa)); 739 __ str(rscratch1, Address(sp, st_off)); 740 } 741 #endif /* ASSERT */ 742 } else { 743 ignored++; 744 // get the buffer from the just allocated pool of buffers 745 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_PRIMITIVE_OBJECT); 746 __ load_heap_oop(buf_oop, Address(buf_array, index)); 747 next_vt_arg++; next_arg_int++; 748 int vt = 1; 749 // write fields we get from compiled code in registers/stack 750 // slots to the buffer: we know we are done with that inline type 751 // argument when we hit the T_VOID that acts as an end of inline 752 // type delimiter for this inline type. Inline types are flattened 753 // so we might encounter embedded inline types. Each entry in 754 // sig_extended contains a field offset in the buffer. 755 Label L_null; 756 do { 757 next_arg_comp++; 758 BasicType bt = sig_extended->at(next_arg_comp)._bt; 759 BasicType prev_bt = sig_extended->at(next_arg_comp - 1)._bt; 760 if (bt == T_PRIMITIVE_OBJECT) { 761 vt++; 762 ignored++; 763 } else if (bt == T_VOID && prev_bt != T_LONG && prev_bt != T_DOUBLE) { 764 vt--; 765 ignored++; 766 } else { 767 int off = sig_extended->at(next_arg_comp)._offset; 768 if (off == -1) { 769 // Nullable inline type argument, emit null check 770 VMReg reg = regs[next_arg_comp-ignored].first(); 771 Label L_notNull; 772 if (reg->is_stack()) { 773 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 774 __ ldr(tmp1, Address(sp, ld_off)); 775 __ cbnz(tmp1, L_notNull); 776 } else { 777 __ cbnz(reg->as_Register(), L_notNull); 778 } 779 __ str(zr, Address(sp, st_off)); 780 __ b(L_null); 781 __ bind(L_notNull); 782 continue; 783 } 784 assert(off > 0, "offset in object should be positive"); 785 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; 786 bool is_oop = is_reference_type(bt); 787 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, 788 size_in_bytes, regs[next_arg_comp-ignored], Address(buf_oop, off), tmp1, tmp2, tmp3, extraspace, is_oop); 789 } 790 } while (vt != 0); 791 // pass the buffer to the interpreter 792 __ str(buf_oop, Address(sp, st_off)); 793 __ bind(L_null); 794 } 795 } 796 797 __ mov(esp, sp); // Interp expects args on caller's expression stack 798 799 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset()))); 800 __ br(rscratch1); 801 } 802 803 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray<SigEntry>* sig, const VMRegPair *regs) { 804 805 806 // Note: r13 contains the senderSP on entry. We must preserve it since 807 // we may do a i2c -> c2i transition if we lose a race where compiled 808 // code goes non-entrant while we get args ready. 809 810 // In addition we use r13 to locate all the interpreter args because 811 // we must align the stack to 16 bytes. 812 813 // Adapters are frameless. 814 815 // An i2c adapter is frameless because the *caller* frame, which is 816 // interpreted, routinely repairs its own esp (from 817 // interpreter_frame_last_sp), even if a callee has modified the 818 // stack pointer. It also recalculates and aligns sp. 819 820 // A c2i adapter is frameless because the *callee* frame, which is 821 // interpreted, routinely repairs its caller's sp (from sender_sp, 822 // which is set up via the senderSP register). 823 824 // In other words, if *either* the caller or callee is interpreted, we can 825 // get the stack pointer repaired after a call. 826 827 // This is why c2i and i2c adapters cannot be indefinitely composed. 828 // In particular, if a c2i adapter were to somehow call an i2c adapter, 829 // both caller and callee would be compiled methods, and neither would 830 // clean up the stack pointer changes performed by the two adapters. 831 // If this happens, control eventually transfers back to the compiled 832 // caller, but with an uncorrected stack, causing delayed havoc. 833 834 if (VerifyAdapterCalls && 835 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 836 #if 0 837 // So, let's test for cascading c2i/i2c adapters right now. 838 // assert(Interpreter::contains($return_addr) || 839 // StubRoutines::contains($return_addr), 840 // "i2c adapter must return to an interpreter frame"); 841 __ block_comment("verify_i2c { "); 842 Label L_ok; 843 if (Interpreter::code() != NULL) 844 range_check(masm, rax, r11, 845 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 846 L_ok); 847 if (StubRoutines::code1() != NULL) 848 range_check(masm, rax, r11, 849 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 850 L_ok); 851 if (StubRoutines::code2() != NULL) 852 range_check(masm, rax, r11, 853 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 854 L_ok); 855 const char* msg = "i2c adapter must return to an interpreter frame"; 856 __ block_comment(msg); 857 __ stop(msg); 858 __ bind(L_ok); 859 __ block_comment("} verify_i2ce "); 860 #endif 861 } 862 863 // Cut-out for having no stack args. 864 int comp_words_on_stack = 0; 865 if (comp_args_on_stack) { 866 comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; 867 __ sub(rscratch1, sp, comp_words_on_stack * wordSize); 868 __ andr(sp, rscratch1, -16); 869 } 870 871 // Will jump to the compiled code just as if compiled code was doing it. 872 // Pre-load the register-jump target early, to schedule it better. 873 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_inline_offset()))); 874 875 #if INCLUDE_JVMCI 876 if (EnableJVMCI) { 877 // check if this call should be routed towards a specific entry point 878 __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 879 Label no_alternative_target; 880 __ cbz(rscratch2, no_alternative_target); 881 __ mov(rscratch1, rscratch2); 882 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); 883 __ bind(no_alternative_target); 884 } 885 #endif // INCLUDE_JVMCI 886 887 int total_args_passed = sig->length(); 888 889 // Now generate the shuffle code. 890 for (int i = 0; i < total_args_passed; i++) { 891 BasicType bt = sig->at(i)._bt; 892 893 assert(bt != T_PRIMITIVE_OBJECT, "i2c adapter doesn't unpack inline typ args"); 894 if (bt == T_VOID) { 895 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); 896 continue; 897 } 898 899 // Pick up 0, 1 or 2 words from SP+offset. 900 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); 901 902 // Load in argument order going down. 903 int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; 904 // Point to interpreter value (vs. tag) 905 int next_off = ld_off - Interpreter::stackElementSize; 906 // 907 // 908 // 909 VMReg r_1 = regs[i].first(); 910 VMReg r_2 = regs[i].second(); 911 if (!r_1->is_valid()) { 912 assert(!r_2->is_valid(), ""); 913 continue; 914 } 915 if (r_1->is_stack()) { 916 // Convert stack slot to an SP offset (+ wordSize to account for return address ) 917 int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; 918 if (!r_2->is_valid()) { 919 // sign extend??? 920 __ ldrsw(rscratch2, Address(esp, ld_off)); 921 __ str(rscratch2, Address(sp, st_off)); 922 } else { 923 // 924 // We are using two optoregs. This can be either T_OBJECT, 925 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates 926 // two slots but only uses one for thr T_LONG or T_DOUBLE case 927 // So we must adjust where to pick up the data to match the 928 // interpreter. 929 // 930 // Interpreter local[n] == MSW, local[n+1] == LSW however locals 931 // are accessed as negative so LSW is at LOW address 932 933 // ld_off is MSW so get LSW 934 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; 935 __ ldr(rscratch2, Address(esp, offset)); 936 // st_off is LSW (i.e. reg.first()) 937 __ str(rscratch2, Address(sp, st_off)); 938 } 939 } else if (r_1->is_Register()) { // Register argument 940 Register r = r_1->as_Register(); 941 if (r_2->is_valid()) { 942 // 943 // We are using two VMRegs. This can be either T_OBJECT, 944 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates 945 // two slots but only uses one for thr T_LONG or T_DOUBLE case 946 // So we must adjust where to pick up the data to match the 947 // interpreter. 948 949 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; 950 951 // this can be a misaligned move 952 __ ldr(r, Address(esp, offset)); 953 } else { 954 // sign extend and use a full word? 955 __ ldrw(r, Address(esp, ld_off)); 956 } 957 } else { 958 if (!r_2->is_valid()) { 959 __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); 960 } else { 961 __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); 962 } 963 } 964 } 965 966 967 // 6243940 We might end up in handle_wrong_method if 968 // the callee is deoptimized as we race thru here. If that 969 // happens we don't want to take a safepoint because the 970 // caller frame will look interpreted and arguments are now 971 // "compiled" so it is much better to make this transition 972 // invisible to the stack walking code. Unfortunately if 973 // we try and find the callee by normal means a safepoint 974 // is possible. So we stash the desired callee in the thread 975 // and the vm will find there should this case occur. 976 977 __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); 978 __ br(rscratch1); 979 } 980 981 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { 982 983 Label ok; 984 985 Register holder = rscratch2; 986 Register receiver = j_rarg0; 987 Register tmp = r10; // A call-clobbered register not used for arg passing 988 989 // ------------------------------------------------------------------------- 990 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls 991 // to the interpreter. The args start out packed in the compiled layout. They 992 // need to be unpacked into the interpreter layout. This will almost always 993 // require some stack space. We grow the current (compiled) stack, then repack 994 // the args. We finally end in a jump to the generic interpreter entry point. 995 // On exit from the interpreter, the interpreter will restore our SP (lest the 996 // compiled code, which relys solely on SP and not FP, get sick). 997 998 { 999 __ block_comment("c2i_unverified_entry {"); 1000 __ load_klass(rscratch1, receiver); 1001 __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); 1002 __ cmp(rscratch1, tmp); 1003 __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); 1004 __ br(Assembler::EQ, ok); 1005 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1006 1007 __ bind(ok); 1008 // Method might have been compiled since the call site was patched to 1009 // interpreted; if that is the case treat it as a miss so we can get 1010 // the call site corrected. 1011 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); 1012 __ cbz(rscratch1, skip_fixup); 1013 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1014 __ block_comment("} c2i_unverified_entry"); 1015 } 1016 } 1017 1018 1019 // --------------------------------------------------------------- 1020 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm, 1021 int comp_args_on_stack, 1022 const GrowableArray<SigEntry>* sig, 1023 const VMRegPair* regs, 1024 const GrowableArray<SigEntry>* sig_cc, 1025 const VMRegPair* regs_cc, 1026 const GrowableArray<SigEntry>* sig_cc_ro, 1027 const VMRegPair* regs_cc_ro, 1028 AdapterFingerPrint* fingerprint, 1029 AdapterBlob*& new_adapter, 1030 bool allocate_code_blob) { 1031 1032 address i2c_entry = __ pc(); 1033 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); 1034 1035 address c2i_unverified_entry = __ pc(); 1036 Label skip_fixup; 1037 1038 gen_inline_cache_check(masm, skip_fixup); 1039 1040 OopMapSet* oop_maps = new OopMapSet(); 1041 int frame_complete = CodeOffsets::frame_never_safe; 1042 int frame_size_in_words = 0; 1043 1044 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) 1045 address c2i_inline_ro_entry = __ pc(); 1046 if (regs_cc != regs_cc_ro) { 1047 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); 1048 skip_fixup.reset(); 1049 } 1050 1051 // Scalarized c2i adapter 1052 address c2i_entry = __ pc(); 1053 1054 // Class initialization barrier for static methods 1055 address c2i_no_clinit_check_entry = NULL; 1056 if (VM_Version::supports_fast_class_init_checks()) { 1057 Label L_skip_barrier; 1058 1059 { // Bypass the barrier for non-static methods 1060 __ ldrw(rscratch1, Address(rmethod, Method::access_flags_offset())); 1061 __ andsw(zr, rscratch1, JVM_ACC_STATIC); 1062 __ br(Assembler::EQ, L_skip_barrier); // non-static 1063 } 1064 1065 __ load_method_holder(rscratch2, rmethod); 1066 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); 1067 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); 1068 1069 __ bind(L_skip_barrier); 1070 c2i_no_clinit_check_entry = __ pc(); 1071 } 1072 1073 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1074 bs->c2i_entry_barrier(masm); 1075 1076 gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true); 1077 1078 address c2i_unverified_inline_entry = c2i_unverified_entry; 1079 1080 // Non-scalarized c2i adapter 1081 address c2i_inline_entry = c2i_entry; 1082 if (regs != regs_cc) { 1083 Label inline_entry_skip_fixup; 1084 c2i_unverified_inline_entry = __ pc(); 1085 gen_inline_cache_check(masm, inline_entry_skip_fixup); 1086 1087 c2i_inline_entry = __ pc(); 1088 gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); 1089 } 1090 1091 __ flush(); 1092 1093 // The c2i adapter might safepoint and trigger a GC. The caller must make sure that 1094 // the GC knows about the location of oop argument locations passed to the c2i adapter. 1095 if (allocate_code_blob) { 1096 bool caller_must_gc_arguments = (regs != regs_cc); 1097 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); 1098 } 1099 1100 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); 1101 } 1102 1103 static int c_calling_convention_priv(const BasicType *sig_bt, 1104 VMRegPair *regs, 1105 VMRegPair *regs2, 1106 int total_args_passed) { 1107 assert(regs2 == NULL, "not needed on AArch64"); 1108 1109 // We return the amount of VMRegImpl stack slots we need to reserve for all 1110 // the arguments NOT counting out_preserve_stack_slots. 1111 1112 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1113 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 1114 }; 1115 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1116 c_farg0, c_farg1, c_farg2, c_farg3, 1117 c_farg4, c_farg5, c_farg6, c_farg7 1118 }; 1119 1120 uint int_args = 0; 1121 uint fp_args = 0; 1122 uint stk_args = 0; // inc by 2 each time 1123 1124 for (int i = 0; i < total_args_passed; i++) { 1125 switch (sig_bt[i]) { 1126 case T_BOOLEAN: 1127 case T_CHAR: 1128 case T_BYTE: 1129 case T_SHORT: 1130 case T_INT: 1131 if (int_args < Argument::n_int_register_parameters_c) { 1132 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1133 } else { 1134 #ifdef __APPLE__ 1135 // Less-than word types are stored one after another. 1136 // The code is unable to handle this so bailout. 1137 return -1; 1138 #endif 1139 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1140 stk_args += 2; 1141 } 1142 break; 1143 case T_LONG: 1144 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1145 // fall through 1146 case T_OBJECT: 1147 case T_ARRAY: 1148 case T_PRIMITIVE_OBJECT: 1149 case T_ADDRESS: 1150 case T_METADATA: 1151 if (int_args < Argument::n_int_register_parameters_c) { 1152 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1153 } else { 1154 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1155 stk_args += 2; 1156 } 1157 break; 1158 case T_FLOAT: 1159 if (fp_args < Argument::n_float_register_parameters_c) { 1160 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1161 } else { 1162 #ifdef __APPLE__ 1163 // Less-than word types are stored one after another. 1164 // The code is unable to handle this so bailout. 1165 return -1; 1166 #endif 1167 regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1168 stk_args += 2; 1169 } 1170 break; 1171 case T_DOUBLE: 1172 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); 1173 if (fp_args < Argument::n_float_register_parameters_c) { 1174 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1175 } else { 1176 regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1177 stk_args += 2; 1178 } 1179 break; 1180 case T_VOID: // Halves of longs and doubles 1181 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1182 regs[i].set_bad(); 1183 break; 1184 default: 1185 ShouldNotReachHere(); 1186 break; 1187 } 1188 } 1189 1190 return stk_args; 1191 } 1192 1193 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 1194 uint num_bits, 1195 uint total_args_passed) { 1196 Unimplemented(); 1197 return 0; 1198 } 1199 1200 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1201 VMRegPair *regs, 1202 VMRegPair *regs2, 1203 int total_args_passed) 1204 { 1205 int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed); 1206 guarantee(result >= 0, "Unsupported arguments configuration"); 1207 return result; 1208 } 1209 1210 // On 64 bit we will store integer like items to the stack as 1211 // 64 bits items (Aarch64 abi) even though java would only store 1212 // 32bits for a parameter. On 32bit it will simply be 32 bits 1213 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 1214 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1215 if (src.first()->is_stack()) { 1216 if (dst.first()->is_stack()) { 1217 // stack to stack 1218 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); 1219 __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); 1220 } else { 1221 // stack to reg 1222 __ ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); 1223 } 1224 } else if (dst.first()->is_stack()) { 1225 // reg to stack 1226 // Do we really have to sign extend??? 1227 // __ movslq(src.first()->as_Register(), src.first()->as_Register()); 1228 __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 1229 } else { 1230 if (dst.first() != src.first()) { 1231 __ sxtw(dst.first()->as_Register(), src.first()->as_Register()); 1232 } 1233 } 1234 } 1235 1236 // An oop arg. Must pass a handle not the oop itself 1237 static void object_move(MacroAssembler* masm, 1238 OopMap* map, 1239 int oop_handle_offset, 1240 int framesize_in_slots, 1241 VMRegPair src, 1242 VMRegPair dst, 1243 bool is_receiver, 1244 int* receiver_offset) { 1245 1246 // must pass a handle. First figure out the location we use as a handle 1247 1248 Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register(); 1249 1250 // See if oop is NULL if it is we need no handle 1251 1252 if (src.first()->is_stack()) { 1253 1254 // Oop is already on the stack as an argument 1255 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1256 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1257 if (is_receiver) { 1258 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1259 } 1260 1261 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); 1262 __ lea(rHandle, Address(rfp, reg2offset_in(src.first()))); 1263 // conditionally move a NULL 1264 __ cmp(rscratch1, zr); 1265 __ csel(rHandle, zr, rHandle, Assembler::EQ); 1266 } else { 1267 1268 // Oop is in an a register we must store it to the space we reserve 1269 // on the stack for oop_handles and pass a handle if oop is non-NULL 1270 1271 const Register rOop = src.first()->as_Register(); 1272 int oop_slot; 1273 if (rOop == j_rarg0) 1274 oop_slot = 0; 1275 else if (rOop == j_rarg1) 1276 oop_slot = 1; 1277 else if (rOop == j_rarg2) 1278 oop_slot = 2; 1279 else if (rOop == j_rarg3) 1280 oop_slot = 3; 1281 else if (rOop == j_rarg4) 1282 oop_slot = 4; 1283 else if (rOop == j_rarg5) 1284 oop_slot = 5; 1285 else if (rOop == j_rarg6) 1286 oop_slot = 6; 1287 else { 1288 assert(rOop == j_rarg7, "wrong register"); 1289 oop_slot = 7; 1290 } 1291 1292 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 1293 int offset = oop_slot*VMRegImpl::stack_slot_size; 1294 1295 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1296 // Store oop in handle area, may be NULL 1297 __ str(rOop, Address(sp, offset)); 1298 if (is_receiver) { 1299 *receiver_offset = offset; 1300 } 1301 1302 __ cmp(rOop, zr); 1303 __ lea(rHandle, Address(sp, offset)); 1304 // conditionally move a NULL 1305 __ csel(rHandle, zr, rHandle, Assembler::EQ); 1306 } 1307 1308 // If arg is on the stack then place it otherwise it is already in correct reg. 1309 if (dst.first()->is_stack()) { 1310 __ str(rHandle, Address(sp, reg2offset_out(dst.first()))); 1311 } 1312 } 1313 1314 // A float arg may have to do float reg int reg conversion 1315 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1316 assert(src.first()->is_stack() && dst.first()->is_stack() || 1317 src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error"); 1318 if (src.first()->is_stack()) { 1319 if (dst.first()->is_stack()) { 1320 __ ldrw(rscratch1, Address(rfp, reg2offset_in(src.first()))); 1321 __ strw(rscratch1, Address(sp, reg2offset_out(dst.first()))); 1322 } else { 1323 ShouldNotReachHere(); 1324 } 1325 } else if (src.first() != dst.first()) { 1326 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) 1327 __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1328 else 1329 ShouldNotReachHere(); 1330 } 1331 } 1332 1333 // A long move 1334 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1335 if (src.first()->is_stack()) { 1336 if (dst.first()->is_stack()) { 1337 // stack to stack 1338 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); 1339 __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); 1340 } else { 1341 // stack to reg 1342 __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); 1343 } 1344 } else if (dst.first()->is_stack()) { 1345 // reg to stack 1346 // Do we really have to sign extend??? 1347 // __ movslq(src.first()->as_Register(), src.first()->as_Register()); 1348 __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 1349 } else { 1350 if (dst.first() != src.first()) { 1351 __ mov(dst.first()->as_Register(), src.first()->as_Register()); 1352 } 1353 } 1354 } 1355 1356 1357 // A double move 1358 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1359 assert(src.first()->is_stack() && dst.first()->is_stack() || 1360 src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error"); 1361 if (src.first()->is_stack()) { 1362 if (dst.first()->is_stack()) { 1363 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); 1364 __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); 1365 } else { 1366 ShouldNotReachHere(); 1367 } 1368 } else if (src.first() != dst.first()) { 1369 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) 1370 __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1371 else 1372 ShouldNotReachHere(); 1373 } 1374 } 1375 1376 1377 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1378 // We always ignore the frame_slots arg and just use the space just below frame pointer 1379 // which by this time is free to use 1380 switch (ret_type) { 1381 case T_FLOAT: 1382 __ strs(v0, Address(rfp, -wordSize)); 1383 break; 1384 case T_DOUBLE: 1385 __ strd(v0, Address(rfp, -wordSize)); 1386 break; 1387 case T_VOID: break; 1388 default: { 1389 __ str(r0, Address(rfp, -wordSize)); 1390 } 1391 } 1392 } 1393 1394 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1395 // We always ignore the frame_slots arg and just use the space just below frame pointer 1396 // which by this time is free to use 1397 switch (ret_type) { 1398 case T_FLOAT: 1399 __ ldrs(v0, Address(rfp, -wordSize)); 1400 break; 1401 case T_DOUBLE: 1402 __ ldrd(v0, Address(rfp, -wordSize)); 1403 break; 1404 case T_VOID: break; 1405 default: { 1406 __ ldr(r0, Address(rfp, -wordSize)); 1407 } 1408 } 1409 } 1410 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1411 RegSet x; 1412 for ( int i = first_arg ; i < arg_count ; i++ ) { 1413 if (args[i].first()->is_Register()) { 1414 x = x + args[i].first()->as_Register(); 1415 } else if (args[i].first()->is_FloatRegister()) { 1416 __ strd(args[i].first()->as_FloatRegister(), Address(__ pre(sp, -2 * wordSize))); 1417 } 1418 } 1419 __ push(x, sp); 1420 } 1421 1422 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1423 RegSet x; 1424 for ( int i = first_arg ; i < arg_count ; i++ ) { 1425 if (args[i].first()->is_Register()) { 1426 x = x + args[i].first()->as_Register(); 1427 } else { 1428 ; 1429 } 1430 } 1431 __ pop(x, sp); 1432 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1433 if (args[i].first()->is_Register()) { 1434 ; 1435 } else if (args[i].first()->is_FloatRegister()) { 1436 __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize))); 1437 } 1438 } 1439 } 1440 1441 static void rt_call(MacroAssembler* masm, address dest) { 1442 CodeBlob *cb = CodeCache::find_blob(dest); 1443 if (cb) { 1444 __ far_call(RuntimeAddress(dest)); 1445 } else { 1446 __ lea(rscratch1, RuntimeAddress(dest)); 1447 __ blr(rscratch1); 1448 } 1449 } 1450 1451 static void verify_oop_args(MacroAssembler* masm, 1452 const methodHandle& method, 1453 const BasicType* sig_bt, 1454 const VMRegPair* regs) { 1455 Register temp_reg = r19; // not part of any compiled calling seq 1456 if (VerifyOops) { 1457 for (int i = 0; i < method->size_of_parameters(); i++) { 1458 if (sig_bt[i] == T_OBJECT || 1459 sig_bt[i] == T_ARRAY) { 1460 VMReg r = regs[i].first(); 1461 assert(r->is_valid(), "bad oop arg"); 1462 if (r->is_stack()) { 1463 __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); 1464 __ verify_oop(temp_reg); 1465 } else { 1466 __ verify_oop(r->as_Register()); 1467 } 1468 } 1469 } 1470 } 1471 } 1472 1473 static void gen_special_dispatch(MacroAssembler* masm, 1474 const methodHandle& method, 1475 const BasicType* sig_bt, 1476 const VMRegPair* regs) { 1477 verify_oop_args(masm, method, sig_bt, regs); 1478 vmIntrinsics::ID iid = method->intrinsic_id(); 1479 1480 // Now write the args into the outgoing interpreter space 1481 bool has_receiver = false; 1482 Register receiver_reg = noreg; 1483 int member_arg_pos = -1; 1484 Register member_reg = noreg; 1485 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1486 if (ref_kind != 0) { 1487 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1488 member_reg = r19; // known to be free at this point 1489 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1490 } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { 1491 has_receiver = true; 1492 } else { 1493 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1494 } 1495 1496 if (member_reg != noreg) { 1497 // Load the member_arg into register, if necessary. 1498 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1499 VMReg r = regs[member_arg_pos].first(); 1500 if (r->is_stack()) { 1501 __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); 1502 } else { 1503 // no data motion is needed 1504 member_reg = r->as_Register(); 1505 } 1506 } 1507 1508 if (has_receiver) { 1509 // Make sure the receiver is loaded into a register. 1510 assert(method->size_of_parameters() > 0, "oob"); 1511 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1512 VMReg r = regs[0].first(); 1513 assert(r->is_valid(), "bad receiver arg"); 1514 if (r->is_stack()) { 1515 // Porting note: This assumes that compiled calling conventions always 1516 // pass the receiver oop in a register. If this is not true on some 1517 // platform, pick a temp and load the receiver from stack. 1518 fatal("receiver always in a register"); 1519 receiver_reg = r2; // known to be free at this point 1520 __ ldr(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); 1521 } else { 1522 // no data motion is needed 1523 receiver_reg = r->as_Register(); 1524 } 1525 } 1526 1527 // Figure out which address we are really jumping to: 1528 MethodHandles::generate_method_handle_dispatch(masm, iid, 1529 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1530 } 1531 1532 // --------------------------------------------------------------------------- 1533 // Generate a native wrapper for a given method. The method takes arguments 1534 // in the Java compiled code convention, marshals them to the native 1535 // convention (handlizes oops, etc), transitions to native, makes the call, 1536 // returns to java state (possibly blocking), unhandlizes any result and 1537 // returns. 1538 // 1539 // Critical native functions are a shorthand for the use of 1540 // GetPrimtiveArrayCritical and disallow the use of any other JNI 1541 // functions. The wrapper is expected to unpack the arguments before 1542 // passing them to the callee. Critical native functions leave the state _in_Java, 1543 // since they block out GC. 1544 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 1545 // block and the check for pending exceptions it's impossible for them 1546 // to be thrown. 1547 // 1548 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1549 const methodHandle& method, 1550 int compile_id, 1551 BasicType* in_sig_bt, 1552 VMRegPair* in_regs, 1553 BasicType ret_type) { 1554 if (method->is_method_handle_intrinsic()) { 1555 vmIntrinsics::ID iid = method->intrinsic_id(); 1556 intptr_t start = (intptr_t)__ pc(); 1557 int vep_offset = ((intptr_t)__ pc()) - start; 1558 1559 // First instruction must be a nop as it may need to be patched on deoptimisation 1560 __ nop(); 1561 gen_special_dispatch(masm, 1562 method, 1563 in_sig_bt, 1564 in_regs); 1565 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1566 __ flush(); 1567 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1568 return nmethod::new_native_nmethod(method, 1569 compile_id, 1570 masm->code(), 1571 vep_offset, 1572 frame_complete, 1573 stack_slots / VMRegImpl::slots_per_word, 1574 in_ByteSize(-1), 1575 in_ByteSize(-1), 1576 (OopMapSet*)NULL); 1577 } 1578 address native_func = method->native_function(); 1579 assert(native_func != NULL, "must have function"); 1580 1581 // An OopMap for lock (and class if static) 1582 OopMapSet *oop_maps = new OopMapSet(); 1583 intptr_t start = (intptr_t)__ pc(); 1584 1585 // We have received a description of where all the java arg are located 1586 // on entry to the wrapper. We need to convert these args to where 1587 // the jni function will expect them. To figure out where they go 1588 // we convert the java signature to a C signature by inserting 1589 // the hidden arguments as arg[0] and possibly arg[1] (static method) 1590 1591 const int total_in_args = method->size_of_parameters(); 1592 int total_c_args = total_in_args + (method->is_static() ? 2 : 1); 1593 1594 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1595 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1596 BasicType* in_elem_bt = NULL; 1597 1598 int argc = 0; 1599 out_sig_bt[argc++] = T_ADDRESS; 1600 if (method->is_static()) { 1601 out_sig_bt[argc++] = T_OBJECT; 1602 } 1603 1604 for (int i = 0; i < total_in_args ; i++ ) { 1605 out_sig_bt[argc++] = in_sig_bt[i]; 1606 } 1607 1608 // Now figure out where the args must be stored and how much stack space 1609 // they require. 1610 int out_arg_slots; 1611 out_arg_slots = c_calling_convention_priv(out_sig_bt, out_regs, NULL, total_c_args); 1612 1613 if (out_arg_slots < 0) { 1614 return NULL; 1615 } 1616 1617 // Compute framesize for the wrapper. We need to handlize all oops in 1618 // incoming registers 1619 1620 // Calculate the total number of stack slots we will need. 1621 1622 // First count the abi requirement plus all of the outgoing args 1623 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1624 1625 // Now the space for the inbound oop handle area 1626 int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers 1627 1628 int oop_handle_offset = stack_slots; 1629 stack_slots += total_save_slots; 1630 1631 // Now any space we need for handlizing a klass if static method 1632 1633 int klass_slot_offset = 0; 1634 int klass_offset = -1; 1635 int lock_slot_offset = 0; 1636 bool is_static = false; 1637 1638 if (method->is_static()) { 1639 klass_slot_offset = stack_slots; 1640 stack_slots += VMRegImpl::slots_per_word; 1641 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1642 is_static = true; 1643 } 1644 1645 // Plus a lock if needed 1646 1647 if (method->is_synchronized()) { 1648 lock_slot_offset = stack_slots; 1649 stack_slots += VMRegImpl::slots_per_word; 1650 } 1651 1652 // Now a place (+2) to save return values or temp during shuffling 1653 // + 4 for return address (which we own) and saved rfp 1654 stack_slots += 6; 1655 1656 // Ok The space we have allocated will look like: 1657 // 1658 // 1659 // FP-> | | 1660 // |---------------------| 1661 // | 2 slots for moves | 1662 // |---------------------| 1663 // | lock box (if sync) | 1664 // |---------------------| <- lock_slot_offset 1665 // | klass (if static) | 1666 // |---------------------| <- klass_slot_offset 1667 // | oopHandle area | 1668 // |---------------------| <- oop_handle_offset (8 java arg registers) 1669 // | outbound memory | 1670 // | based arguments | 1671 // | | 1672 // |---------------------| 1673 // | | 1674 // SP-> | out_preserved_slots | 1675 // 1676 // 1677 1678 1679 // Now compute actual number of stack words we need rounding to make 1680 // stack properly aligned. 1681 stack_slots = align_up(stack_slots, StackAlignmentInSlots); 1682 1683 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1684 1685 // First thing make an ic check to see if we should even be here 1686 1687 // We are free to use all registers as temps without saving them and 1688 // restoring them except rfp. rfp is the only callee save register 1689 // as far as the interpreter and the compiler(s) are concerned. 1690 1691 1692 const Register ic_reg = rscratch2; 1693 const Register receiver = j_rarg0; 1694 1695 Label hit; 1696 Label exception_pending; 1697 1698 assert_different_registers(ic_reg, receiver, rscratch1); 1699 __ verify_oop(receiver); 1700 __ cmp_klass(receiver, ic_reg, rscratch1); 1701 __ br(Assembler::EQ, hit); 1702 1703 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1704 1705 // Verified entry point must be aligned 1706 __ align(8); 1707 1708 __ bind(hit); 1709 1710 int vep_offset = ((intptr_t)__ pc()) - start; 1711 1712 // If we have to make this method not-entrant we'll overwrite its 1713 // first instruction with a jump. For this action to be legal we 1714 // must ensure that this first instruction is a B, BL, NOP, BKPT, 1715 // SVC, HVC, or SMC. Make it a NOP. 1716 __ nop(); 1717 1718 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 1719 Label L_skip_barrier; 1720 __ mov_metadata(rscratch2, method->method_holder()); // InstanceKlass* 1721 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); 1722 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); 1723 1724 __ bind(L_skip_barrier); 1725 } 1726 1727 // Generate stack overflow check 1728 __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size())); 1729 1730 // Generate a new frame for the wrapper. 1731 __ enter(); 1732 // -2 because return address is already present and so is saved rfp 1733 __ sub(sp, sp, stack_size - 2*wordSize); 1734 1735 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1736 bs->nmethod_entry_barrier(masm); 1737 1738 // Frame is now completed as far as size and linkage. 1739 int frame_complete = ((intptr_t)__ pc()) - start; 1740 1741 // We use r20 as the oop handle for the receiver/klass 1742 // It is callee save so it survives the call to native 1743 1744 const Register oop_handle_reg = r20; 1745 1746 // 1747 // We immediately shuffle the arguments so that any vm call we have to 1748 // make from here on out (sync slow path, jvmti, etc.) we will have 1749 // captured the oops from our caller and have a valid oopMap for 1750 // them. 1751 1752 // ----------------- 1753 // The Grand Shuffle 1754 1755 // The Java calling convention is either equal (linux) or denser (win64) than the 1756 // c calling convention. However the because of the jni_env argument the c calling 1757 // convention always has at least one more (and two for static) arguments than Java. 1758 // Therefore if we move the args from java -> c backwards then we will never have 1759 // a register->register conflict and we don't have to build a dependency graph 1760 // and figure out how to break any cycles. 1761 // 1762 1763 // Record esp-based slot for receiver on stack for non-static methods 1764 int receiver_offset = -1; 1765 1766 // This is a trick. We double the stack slots so we can claim 1767 // the oops in the caller's frame. Since we are sure to have 1768 // more args than the caller doubling is enough to make 1769 // sure we can capture all the incoming oop args from the 1770 // caller. 1771 // 1772 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1773 1774 // Mark location of rfp (someday) 1775 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp)); 1776 1777 1778 int float_args = 0; 1779 int int_args = 0; 1780 1781 #ifdef ASSERT 1782 bool reg_destroyed[RegisterImpl::number_of_registers]; 1783 bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1784 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1785 reg_destroyed[r] = false; 1786 } 1787 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1788 freg_destroyed[f] = false; 1789 } 1790 1791 #endif /* ASSERT */ 1792 1793 // For JNI natives the incoming and outgoing registers are offset upwards. 1794 GrowableArray<int> arg_order(2 * total_in_args); 1795 VMRegPair tmp_vmreg; 1796 tmp_vmreg.set2(r19->as_VMReg()); 1797 1798 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1799 arg_order.push(i); 1800 arg_order.push(c_arg); 1801 } 1802 1803 int temploc = -1; 1804 for (int ai = 0; ai < arg_order.length(); ai += 2) { 1805 int i = arg_order.at(ai); 1806 int c_arg = arg_order.at(ai + 1); 1807 __ block_comment(err_msg("move %d -> %d", i, c_arg)); 1808 assert(c_arg != -1 && i != -1, "wrong order"); 1809 #ifdef ASSERT 1810 if (in_regs[i].first()->is_Register()) { 1811 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 1812 } else if (in_regs[i].first()->is_FloatRegister()) { 1813 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); 1814 } 1815 if (out_regs[c_arg].first()->is_Register()) { 1816 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1817 } else if (out_regs[c_arg].first()->is_FloatRegister()) { 1818 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; 1819 } 1820 #endif /* ASSERT */ 1821 switch (in_sig_bt[i]) { 1822 case T_ARRAY: 1823 case T_PRIMITIVE_OBJECT: 1824 case T_OBJECT: 1825 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1826 ((i == 0) && (!is_static)), 1827 &receiver_offset); 1828 int_args++; 1829 break; 1830 case T_VOID: 1831 break; 1832 1833 case T_FLOAT: 1834 float_move(masm, in_regs[i], out_regs[c_arg]); 1835 float_args++; 1836 break; 1837 1838 case T_DOUBLE: 1839 assert( i + 1 < total_in_args && 1840 in_sig_bt[i + 1] == T_VOID && 1841 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1842 double_move(masm, in_regs[i], out_regs[c_arg]); 1843 float_args++; 1844 break; 1845 1846 case T_LONG : 1847 long_move(masm, in_regs[i], out_regs[c_arg]); 1848 int_args++; 1849 break; 1850 1851 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1852 1853 default: 1854 move32_64(masm, in_regs[i], out_regs[c_arg]); 1855 int_args++; 1856 } 1857 } 1858 1859 // point c_arg at the first arg that is already loaded in case we 1860 // need to spill before we call out 1861 int c_arg = total_c_args - total_in_args; 1862 1863 // Pre-load a static method's oop into c_rarg1. 1864 if (method->is_static()) { 1865 1866 // load oop into a register 1867 __ movoop(c_rarg1, 1868 JNIHandles::make_local(method->method_holder()->java_mirror()), 1869 /*immediate*/true); 1870 1871 // Now handlize the static class mirror it's known not-null. 1872 __ str(c_rarg1, Address(sp, klass_offset)); 1873 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1874 1875 // Now get the handle 1876 __ lea(c_rarg1, Address(sp, klass_offset)); 1877 // and protect the arg if we must spill 1878 c_arg--; 1879 } 1880 1881 // Change state to native (we save the return address in the thread, since it might not 1882 // be pushed on the stack when we do a stack traversal). 1883 // We use the same pc/oopMap repeatedly when we call out 1884 1885 Label native_return; 1886 __ set_last_Java_frame(sp, noreg, native_return, rscratch1); 1887 1888 Label dtrace_method_entry, dtrace_method_entry_done; 1889 { 1890 uint64_t offset; 1891 __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); 1892 __ ldrb(rscratch1, Address(rscratch1, offset)); 1893 __ cbnzw(rscratch1, dtrace_method_entry); 1894 __ bind(dtrace_method_entry_done); 1895 } 1896 1897 // RedefineClasses() tracing support for obsolete method entry 1898 if (log_is_enabled(Trace, redefine, class, obsolete)) { 1899 // protect the args we've loaded 1900 save_args(masm, total_c_args, c_arg, out_regs); 1901 __ mov_metadata(c_rarg1, method()); 1902 __ call_VM_leaf( 1903 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 1904 rthread, c_rarg1); 1905 restore_args(masm, total_c_args, c_arg, out_regs); 1906 } 1907 1908 // Lock a synchronized method 1909 1910 // Register definitions used by locking and unlocking 1911 1912 const Register swap_reg = r0; 1913 const Register obj_reg = r19; // Will contain the oop 1914 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 1915 const Register old_hdr = r13; // value of old header at unlock time 1916 const Register tmp = lr; 1917 1918 Label slow_path_lock; 1919 Label lock_done; 1920 1921 if (method->is_synchronized()) { 1922 1923 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 1924 1925 // Get the handle (the 2nd argument) 1926 __ mov(oop_handle_reg, c_rarg1); 1927 1928 // Get address of the box 1929 1930 __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); 1931 1932 // Load the oop from the handle 1933 __ ldr(obj_reg, Address(oop_handle_reg, 0)); 1934 1935 if (!UseHeavyMonitors) { 1936 // Load (object->mark() | 1) into swap_reg %r0 1937 __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1938 __ orr(swap_reg, rscratch1, 1); 1939 if (EnableValhalla) { 1940 // Mask inline_type bit such that we go to the slow path if object is an inline type 1941 __ andr(swap_reg, swap_reg, ~((int) markWord::inline_type_bit_in_place)); 1942 } 1943 1944 // Save (object->mark() | 1) into BasicLock's displaced header 1945 __ str(swap_reg, Address(lock_reg, mark_word_offset)); 1946 1947 // src -> dest iff dest == r0 else r0 <- dest 1948 { Label here; 1949 __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL); 1950 } 1951 1952 // Hmm should this move to the slow path code area??? 1953 1954 // Test if the oopMark is an obvious stack pointer, i.e., 1955 // 1) (mark & 3) == 0, and 1956 // 2) sp <= mark < mark + os::pagesize() 1957 // These 3 tests can be done by evaluating the following 1958 // expression: ((mark - sp) & (3 - os::vm_page_size())), 1959 // assuming both stack pointer and pagesize have their 1960 // least significant 2 bits clear. 1961 // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg 1962 1963 __ sub(swap_reg, sp, swap_reg); 1964 __ neg(swap_reg, swap_reg); 1965 __ ands(swap_reg, swap_reg, 3 - os::vm_page_size()); 1966 1967 // Save the test result, for recursive case, the result is zero 1968 __ str(swap_reg, Address(lock_reg, mark_word_offset)); 1969 __ br(Assembler::NE, slow_path_lock); 1970 } else { 1971 __ b(slow_path_lock); 1972 } 1973 1974 // Slow path will re-enter here 1975 __ bind(lock_done); 1976 } 1977 1978 1979 // Finally just about ready to make the JNI call 1980 1981 // get JNIEnv* which is first argument to native 1982 __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); 1983 1984 // Now set thread in native 1985 __ mov(rscratch1, _thread_in_native); 1986 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); 1987 __ stlrw(rscratch1, rscratch2); 1988 1989 rt_call(masm, native_func); 1990 1991 __ bind(native_return); 1992 1993 intptr_t return_pc = (intptr_t) __ pc(); 1994 oop_maps->add_gc_map(return_pc - start, map); 1995 1996 // Unpack native results. 1997 switch (ret_type) { 1998 case T_BOOLEAN: __ c2bool(r0); break; 1999 case T_CHAR : __ ubfx(r0, r0, 0, 16); break; 2000 case T_BYTE : __ sbfx(r0, r0, 0, 8); break; 2001 case T_SHORT : __ sbfx(r0, r0, 0, 16); break; 2002 case T_INT : __ sbfx(r0, r0, 0, 32); break; 2003 case T_DOUBLE : 2004 case T_FLOAT : 2005 // Result is in v0 we'll save as needed 2006 break; 2007 case T_ARRAY: // Really a handle 2008 case T_PRIMITIVE_OBJECT: // Really a handle 2009 case T_OBJECT: // Really a handle 2010 break; // can't de-handlize until after safepoint check 2011 case T_VOID: break; 2012 case T_LONG: break; 2013 default : ShouldNotReachHere(); 2014 } 2015 2016 Label safepoint_in_progress, safepoint_in_progress_done; 2017 Label after_transition; 2018 2019 // Switch thread to "native transition" state before reading the synchronization state. 2020 // This additional state is necessary because reading and testing the synchronization 2021 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2022 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2023 // VM thread changes sync state to synchronizing and suspends threads for GC. 2024 // Thread A is resumed to finish this native method, but doesn't block here since it 2025 // didn't see any synchronization is progress, and escapes. 2026 __ mov(rscratch1, _thread_in_native_trans); 2027 2028 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); 2029 2030 // Force this write out before the read below 2031 __ dmb(Assembler::ISH); 2032 2033 __ verify_sve_vector_length(); 2034 2035 // Check for safepoint operation in progress and/or pending suspend requests. 2036 { 2037 // We need an acquire here to ensure that any subsequent load of the 2038 // global SafepointSynchronize::_state flag is ordered after this load 2039 // of the thread-local polling word. We don't want this poll to 2040 // return false (i.e. not safepointing) and a later poll of the global 2041 // SafepointSynchronize::_state spuriously to return true. 2042 // 2043 // This is to avoid a race when we're in a native->Java transition 2044 // racing the code which wakes up from a safepoint. 2045 2046 __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); 2047 __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); 2048 __ cbnzw(rscratch1, safepoint_in_progress); 2049 __ bind(safepoint_in_progress_done); 2050 } 2051 2052 // change thread state 2053 __ mov(rscratch1, _thread_in_Java); 2054 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); 2055 __ stlrw(rscratch1, rscratch2); 2056 __ bind(after_transition); 2057 2058 Label reguard; 2059 Label reguard_done; 2060 __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); 2061 __ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled); 2062 __ br(Assembler::EQ, reguard); 2063 __ bind(reguard_done); 2064 2065 // native result if any is live 2066 2067 // Unlock 2068 Label unlock_done; 2069 Label slow_path_unlock; 2070 if (method->is_synchronized()) { 2071 2072 // Get locked oop from the handle we passed to jni 2073 __ ldr(obj_reg, Address(oop_handle_reg, 0)); 2074 2075 Label done; 2076 2077 if (!UseHeavyMonitors) { 2078 // Simple recursive lock? 2079 __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2080 __ cbz(rscratch1, done); 2081 } 2082 2083 // Must save r0 if if it is live now because cmpxchg must use it 2084 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2085 save_native_result(masm, ret_type, stack_slots); 2086 } 2087 2088 if (!UseHeavyMonitors) { 2089 // get address of the stack lock 2090 __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2091 // get old displaced header 2092 __ ldr(old_hdr, Address(r0, 0)); 2093 2094 // Atomic swap old header if oop still contains the stack lock 2095 Label succeed; 2096 __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); 2097 __ bind(succeed); 2098 } else { 2099 __ b(slow_path_unlock); 2100 } 2101 2102 // slow path re-enters here 2103 __ bind(unlock_done); 2104 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 2105 restore_native_result(masm, ret_type, stack_slots); 2106 } 2107 2108 __ bind(done); 2109 } 2110 2111 Label dtrace_method_exit, dtrace_method_exit_done; 2112 { 2113 uint64_t offset; 2114 __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); 2115 __ ldrb(rscratch1, Address(rscratch1, offset)); 2116 __ cbnzw(rscratch1, dtrace_method_exit); 2117 __ bind(dtrace_method_exit_done); 2118 } 2119 2120 __ reset_last_Java_frame(false); 2121 2122 // Unbox oop result, e.g. JNIHandles::resolve result. 2123 if (is_reference_type(ret_type)) { 2124 __ resolve_jobject(r0, rthread, rscratch2); 2125 } 2126 2127 if (CheckJNICalls) { 2128 // clear_pending_jni_exception_check 2129 __ str(zr, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset())); 2130 } 2131 2132 // reset handle block 2133 __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); 2134 __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); 2135 2136 __ leave(); 2137 2138 // Any exception pending? 2139 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2140 __ cbnz(rscratch1, exception_pending); 2141 2142 // We're done 2143 __ ret(lr); 2144 2145 // Unexpected paths are out of line and go here 2146 2147 // forward the exception 2148 __ bind(exception_pending); 2149 2150 // and forward the exception 2151 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2152 2153 // Slow path locking & unlocking 2154 if (method->is_synchronized()) { 2155 2156 __ block_comment("Slow path lock {"); 2157 __ bind(slow_path_lock); 2158 2159 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 2160 // args are (oop obj, BasicLock* lock, JavaThread* thread) 2161 2162 // protect the args we've loaded 2163 save_args(masm, total_c_args, c_arg, out_regs); 2164 2165 __ mov(c_rarg0, obj_reg); 2166 __ mov(c_rarg1, lock_reg); 2167 __ mov(c_rarg2, rthread); 2168 2169 // Not a leaf but we have last_Java_frame setup as we want 2170 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 2171 restore_args(masm, total_c_args, c_arg, out_regs); 2172 2173 #ifdef ASSERT 2174 { Label L; 2175 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2176 __ cbz(rscratch1, L); 2177 __ stop("no pending exception allowed on exit from monitorenter"); 2178 __ bind(L); 2179 } 2180 #endif 2181 __ b(lock_done); 2182 2183 __ block_comment("} Slow path lock"); 2184 2185 __ block_comment("Slow path unlock {"); 2186 __ bind(slow_path_unlock); 2187 2188 // If we haven't already saved the native result we must save it now as xmm registers 2189 // are still exposed. 2190 2191 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2192 save_native_result(masm, ret_type, stack_slots); 2193 } 2194 2195 __ mov(c_rarg2, rthread); 2196 __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); 2197 __ mov(c_rarg0, obj_reg); 2198 2199 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 2200 // NOTE that obj_reg == r19 currently 2201 __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2202 __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2203 2204 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); 2205 2206 #ifdef ASSERT 2207 { 2208 Label L; 2209 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2210 __ cbz(rscratch1, L); 2211 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 2212 __ bind(L); 2213 } 2214 #endif /* ASSERT */ 2215 2216 __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); 2217 2218 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 2219 restore_native_result(masm, ret_type, stack_slots); 2220 } 2221 __ b(unlock_done); 2222 2223 __ block_comment("} Slow path unlock"); 2224 2225 } // synchronized 2226 2227 // SLOW PATH Reguard the stack if needed 2228 2229 __ bind(reguard); 2230 save_native_result(masm, ret_type, stack_slots); 2231 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 2232 restore_native_result(masm, ret_type, stack_slots); 2233 // and continue 2234 __ b(reguard_done); 2235 2236 // SLOW PATH safepoint 2237 { 2238 __ block_comment("safepoint {"); 2239 __ bind(safepoint_in_progress); 2240 2241 // Don't use call_VM as it will see a possible pending exception and forward it 2242 // and never return here preventing us from clearing _last_native_pc down below. 2243 // 2244 save_native_result(masm, ret_type, stack_slots); 2245 __ mov(c_rarg0, rthread); 2246 #ifndef PRODUCT 2247 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 2248 #endif 2249 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 2250 __ blr(rscratch1); 2251 2252 // Restore any method result value 2253 restore_native_result(masm, ret_type, stack_slots); 2254 2255 __ b(safepoint_in_progress_done); 2256 __ block_comment("} safepoint"); 2257 } 2258 2259 // SLOW PATH dtrace support 2260 { 2261 __ block_comment("dtrace entry {"); 2262 __ bind(dtrace_method_entry); 2263 2264 // We have all of the arguments setup at this point. We must not touch any register 2265 // argument registers at this point (what if we save/restore them there are no oop? 2266 2267 save_args(masm, total_c_args, c_arg, out_regs); 2268 __ mov_metadata(c_rarg1, method()); 2269 __ call_VM_leaf( 2270 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2271 rthread, c_rarg1); 2272 restore_args(masm, total_c_args, c_arg, out_regs); 2273 __ b(dtrace_method_entry_done); 2274 __ block_comment("} dtrace entry"); 2275 } 2276 2277 { 2278 __ block_comment("dtrace exit {"); 2279 __ bind(dtrace_method_exit); 2280 save_native_result(masm, ret_type, stack_slots); 2281 __ mov_metadata(c_rarg1, method()); 2282 __ call_VM_leaf( 2283 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2284 rthread, c_rarg1); 2285 restore_native_result(masm, ret_type, stack_slots); 2286 __ b(dtrace_method_exit_done); 2287 __ block_comment("} dtrace exit"); 2288 } 2289 2290 2291 __ flush(); 2292 2293 nmethod *nm = nmethod::new_native_nmethod(method, 2294 compile_id, 2295 masm->code(), 2296 vep_offset, 2297 frame_complete, 2298 stack_slots / VMRegImpl::slots_per_word, 2299 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2300 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 2301 oop_maps); 2302 2303 return nm; 2304 } 2305 2306 // this function returns the adjust size (in number of words) to a c2i adapter 2307 // activation for use during deoptimization 2308 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 2309 assert(callee_locals >= callee_parameters, 2310 "test and remove; got more parms than locals"); 2311 if (callee_locals < callee_parameters) 2312 return 0; // No adjustment for negative locals 2313 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; 2314 // diff is counted in stack words 2315 return align_up(diff, 2); 2316 } 2317 2318 2319 //------------------------------generate_deopt_blob---------------------------- 2320 void SharedRuntime::generate_deopt_blob() { 2321 // Allocate space for the code 2322 ResourceMark rm; 2323 // Setup code generation tools 2324 int pad = 0; 2325 #if INCLUDE_JVMCI 2326 if (EnableJVMCI) { 2327 pad += 512; // Increase the buffer size when compiling for JVMCI 2328 } 2329 #endif 2330 CodeBuffer buffer("deopt_blob", 2048+pad, 1024); 2331 MacroAssembler* masm = new MacroAssembler(&buffer); 2332 int frame_size_in_words; 2333 OopMap* map = NULL; 2334 OopMapSet *oop_maps = new OopMapSet(); 2335 RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0); 2336 2337 // ------------- 2338 // This code enters when returning to a de-optimized nmethod. A return 2339 // address has been pushed on the the stack, and return values are in 2340 // registers. 2341 // If we are doing a normal deopt then we were called from the patched 2342 // nmethod from the point we returned to the nmethod. So the return 2343 // address on the stack is wrong by NativeCall::instruction_size 2344 // We will adjust the value so it looks like we have the original return 2345 // address on the stack (like when we eagerly deoptimized). 2346 // In the case of an exception pending when deoptimizing, we enter 2347 // with a return address on the stack that points after the call we patched 2348 // into the exception handler. We have the following register state from, 2349 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 2350 // r0: exception oop 2351 // r19: exception handler 2352 // r3: throwing pc 2353 // So in this case we simply jam r3 into the useless return address and 2354 // the stack looks just like we want. 2355 // 2356 // At this point we need to de-opt. We save the argument return 2357 // registers. We call the first C routine, fetch_unroll_info(). This 2358 // routine captures the return values and returns a structure which 2359 // describes the current frame size and the sizes of all replacement frames. 2360 // The current frame is compiled code and may contain many inlined 2361 // functions, each with their own JVM state. We pop the current frame, then 2362 // push all the new frames. Then we call the C routine unpack_frames() to 2363 // populate these frames. Finally unpack_frames() returns us the new target 2364 // address. Notice that callee-save registers are BLOWN here; they have 2365 // already been captured in the vframeArray at the time the return PC was 2366 // patched. 2367 address start = __ pc(); 2368 Label cont; 2369 2370 // Prolog for non exception case! 2371 2372 // Save everything in sight. 2373 map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); 2374 2375 // Normal deoptimization. Save exec mode for unpack_frames. 2376 __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved 2377 __ b(cont); 2378 2379 int reexecute_offset = __ pc() - start; 2380 #if INCLUDE_JVMCI && !defined(COMPILER1) 2381 if (EnableJVMCI && UseJVMCICompiler) { 2382 // JVMCI does not use this kind of deoptimization 2383 __ should_not_reach_here(); 2384 } 2385 #endif 2386 2387 // Reexecute case 2388 // return address is the pc describes what bci to do re-execute at 2389 2390 // No need to update map as each call to save_live_registers will produce identical oopmap 2391 (void) reg_save.save_live_registers(masm, 0, &frame_size_in_words); 2392 2393 __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved 2394 __ b(cont); 2395 2396 #if INCLUDE_JVMCI 2397 Label after_fetch_unroll_info_call; 2398 int implicit_exception_uncommon_trap_offset = 0; 2399 int uncommon_trap_offset = 0; 2400 2401 if (EnableJVMCI) { 2402 implicit_exception_uncommon_trap_offset = __ pc() - start; 2403 2404 __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2405 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); 2406 2407 uncommon_trap_offset = __ pc() - start; 2408 2409 // Save everything in sight. 2410 reg_save.save_live_registers(masm, 0, &frame_size_in_words); 2411 // fetch_unroll_info needs to call last_java_frame() 2412 Label retaddr; 2413 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 2414 2415 __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2416 __ movw(rscratch1, -1); 2417 __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset()))); 2418 2419 __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute); 2420 __ mov(c_rarg0, rthread); 2421 __ movw(c_rarg2, rcpool); // exec mode 2422 __ lea(rscratch1, 2423 RuntimeAddress(CAST_FROM_FN_PTR(address, 2424 Deoptimization::uncommon_trap))); 2425 __ blr(rscratch1); 2426 __ bind(retaddr); 2427 oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); 2428 2429 __ reset_last_Java_frame(false); 2430 2431 __ b(after_fetch_unroll_info_call); 2432 } // EnableJVMCI 2433 #endif // INCLUDE_JVMCI 2434 2435 int exception_offset = __ pc() - start; 2436 2437 // Prolog for exception case 2438 2439 // all registers are dead at this entry point, except for r0, and 2440 // r3 which contain the exception oop and exception pc 2441 // respectively. Set them in TLS and fall thru to the 2442 // unpack_with_exception_in_tls entry point. 2443 2444 __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); 2445 __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); 2446 2447 int exception_in_tls_offset = __ pc() - start; 2448 2449 // new implementation because exception oop is now passed in JavaThread 2450 2451 // Prolog for exception case 2452 // All registers must be preserved because they might be used by LinearScan 2453 // Exceptiop oop and throwing PC are passed in JavaThread 2454 // tos: stack at point of call to method that threw the exception (i.e. only 2455 // args are on the stack, no return address) 2456 2457 // The return address pushed by save_live_registers will be patched 2458 // later with the throwing pc. The correct value is not available 2459 // now because loading it from memory would destroy registers. 2460 2461 // NB: The SP at this point must be the SP of the method that is 2462 // being deoptimized. Deoptimization assumes that the frame created 2463 // here by save_live_registers is immediately below the method's SP. 2464 // This is a somewhat fragile mechanism. 2465 2466 // Save everything in sight. 2467 map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); 2468 2469 // Now it is safe to overwrite any register 2470 2471 // Deopt during an exception. Save exec mode for unpack_frames. 2472 __ mov(rcpool, Deoptimization::Unpack_exception); // callee-saved 2473 2474 // load throwing pc from JavaThread and patch it as the return address 2475 // of the current frame. Then clear the field in JavaThread 2476 __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); 2477 __ protect_return_address(r3, rscratch1); 2478 __ str(r3, Address(rfp, wordSize)); 2479 __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); 2480 2481 #ifdef ASSERT 2482 // verify that there is really an exception oop in JavaThread 2483 __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); 2484 __ verify_oop(r0); 2485 2486 // verify that there is no pending exception 2487 Label no_pending_exception; 2488 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 2489 __ cbz(rscratch1, no_pending_exception); 2490 __ stop("must not have pending exception here"); 2491 __ bind(no_pending_exception); 2492 #endif 2493 2494 __ bind(cont); 2495 2496 // Call C code. Need thread and this frame, but NOT official VM entry 2497 // crud. We cannot block on this call, no GC can happen. 2498 // 2499 // UnrollBlock* fetch_unroll_info(JavaThread* thread) 2500 2501 // fetch_unroll_info needs to call last_java_frame(). 2502 2503 Label retaddr; 2504 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 2505 #ifdef ASSERT0 2506 { Label L; 2507 __ ldr(rscratch1, Address(rthread, 2508 JavaThread::last_Java_fp_offset())); 2509 __ cbz(rscratch1, L); 2510 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2511 __ bind(L); 2512 } 2513 #endif // ASSERT 2514 __ mov(c_rarg0, rthread); 2515 __ mov(c_rarg1, rcpool); 2516 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 2517 __ blr(rscratch1); 2518 __ bind(retaddr); 2519 2520 // Need to have an oopmap that tells fetch_unroll_info where to 2521 // find any register it might need. 2522 oop_maps->add_gc_map(__ pc() - start, map); 2523 2524 __ reset_last_Java_frame(false); 2525 2526 #if INCLUDE_JVMCI 2527 if (EnableJVMCI) { 2528 __ bind(after_fetch_unroll_info_call); 2529 } 2530 #endif 2531 2532 // Load UnrollBlock* into r5 2533 __ mov(r5, r0); 2534 2535 __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); 2536 Label noException; 2537 __ cmpw(rcpool, Deoptimization::Unpack_exception); // Was exception pending? 2538 __ br(Assembler::NE, noException); 2539 __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); 2540 // QQQ this is useless it was NULL above 2541 __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); 2542 __ str(zr, Address(rthread, JavaThread::exception_oop_offset())); 2543 __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); 2544 2545 __ verify_oop(r0); 2546 2547 // Overwrite the result registers with the exception results. 2548 __ str(r0, Address(sp, reg_save.r0_offset_in_bytes())); 2549 // I think this is useless 2550 // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); 2551 2552 __ bind(noException); 2553 2554 // Only register save data is on the stack. 2555 // Now restore the result registers. Everything else is either dead 2556 // or captured in the vframeArray. 2557 2558 // Restore fp result register 2559 __ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes())); 2560 // Restore integer result register 2561 __ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes())); 2562 2563 // Pop all of the register save area off the stack 2564 __ add(sp, sp, frame_size_in_words * wordSize); 2565 2566 // All of the register save area has been popped of the stack. Only the 2567 // return address remains. 2568 2569 // Pop all the frames we must move/replace. 2570 // 2571 // Frame picture (youngest to oldest) 2572 // 1: self-frame (no frame link) 2573 // 2: deopting frame (no frame link) 2574 // 3: caller of deopting frame (could be compiled/interpreted). 2575 // 2576 // Note: by leaving the return address of self-frame on the stack 2577 // and using the size of frame 2 to adjust the stack 2578 // when we are done the return to frame 3 will still be on the stack. 2579 2580 // Pop deoptimized frame 2581 __ ldrw(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); 2582 __ sub(r2, r2, 2 * wordSize); 2583 __ add(sp, sp, r2); 2584 __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); 2585 __ authenticate_return_address(); 2586 // LR should now be the return address to the caller (3) 2587 2588 #ifdef ASSERT 2589 // Compilers generate code that bang the stack by as much as the 2590 // interpreter would need. So this stack banging should never 2591 // trigger a fault. Verify that it does not on non product builds. 2592 __ ldrw(r19, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 2593 __ bang_stack_size(r19, r2); 2594 #endif 2595 // Load address of array of frame pcs into r2 2596 __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 2597 2598 // Trash the old pc 2599 // __ addptr(sp, wordSize); FIXME ???? 2600 2601 // Load address of array of frame sizes into r4 2602 __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); 2603 2604 // Load counter into r3 2605 __ ldrw(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); 2606 2607 // Now adjust the caller's stack to make up for the extra locals 2608 // but record the original sp so that we can save it in the skeletal interpreter 2609 // frame and the stack walking of interpreter_sender will get the unextended sp 2610 // value and not the "real" sp value. 2611 2612 const Register sender_sp = r6; 2613 2614 __ mov(sender_sp, sp); 2615 __ ldrw(r19, Address(r5, 2616 Deoptimization::UnrollBlock:: 2617 caller_adjustment_offset_in_bytes())); 2618 __ sub(sp, sp, r19); 2619 2620 // Push interpreter frames in a loop 2621 __ mov(rscratch1, (uint64_t)0xDEADDEAD); // Make a recognizable pattern 2622 __ mov(rscratch2, rscratch1); 2623 Label loop; 2624 __ bind(loop); 2625 __ ldr(r19, Address(__ post(r4, wordSize))); // Load frame size 2626 __ sub(r19, r19, 2*wordSize); // We'll push pc and fp by hand 2627 __ ldr(lr, Address(__ post(r2, wordSize))); // Load pc 2628 __ enter(); // Save old & set new fp 2629 __ sub(sp, sp, r19); // Prolog 2630 // This value is corrected by layout_activation_impl 2631 __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); 2632 __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable 2633 __ mov(sender_sp, sp); // Pass sender_sp to next frame 2634 __ sub(r3, r3, 1); // Decrement counter 2635 __ cbnz(r3, loop); 2636 2637 // Re-push self-frame 2638 __ ldr(lr, Address(r2)); 2639 __ enter(); 2640 2641 // Allocate a full sized register save area. We subtract 2 because 2642 // enter() just pushed 2 words 2643 __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); 2644 2645 // Restore frame locals after moving the frame 2646 __ strd(v0, Address(sp, reg_save.v0_offset_in_bytes())); 2647 __ str(r0, Address(sp, reg_save.r0_offset_in_bytes())); 2648 2649 // Call C code. Need thread but NOT official VM entry 2650 // crud. We cannot block on this call, no GC can happen. Call should 2651 // restore return values to their stack-slots with the new SP. 2652 // 2653 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 2654 2655 // Use rfp because the frames look interpreted now 2656 // Don't need the precise return PC here, just precise enough to point into this code blob. 2657 address the_pc = __ pc(); 2658 __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); 2659 2660 __ mov(c_rarg0, rthread); 2661 __ movw(c_rarg1, rcpool); // second arg: exec_mode 2662 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 2663 __ blr(rscratch1); 2664 2665 // Set an oopmap for the call site 2666 // Use the same PC we used for the last java frame 2667 oop_maps->add_gc_map(the_pc - start, 2668 new OopMap( frame_size_in_words, 0 )); 2669 2670 // Clear fp AND pc 2671 __ reset_last_Java_frame(true); 2672 2673 // Collect return values 2674 __ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes())); 2675 __ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes())); 2676 // I think this is useless (throwing pc?) 2677 // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); 2678 2679 // Pop self-frame. 2680 __ leave(); // Epilog 2681 2682 // Jump to interpreter 2683 __ ret(lr); 2684 2685 // Make sure all code is generated 2686 masm->flush(); 2687 2688 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 2689 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 2690 #if INCLUDE_JVMCI 2691 if (EnableJVMCI) { 2692 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); 2693 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); 2694 } 2695 #endif 2696 } 2697 2698 // Number of stack slots between incoming argument block and the start of 2699 // a new frame. The PROLOG must add this many slots to the stack. The 2700 // EPILOG must remove this many slots. aarch64 needs two slots for 2701 // return address and fp. 2702 // TODO think this is correct but check 2703 uint SharedRuntime::in_preserve_stack_slots() { 2704 return 4; 2705 } 2706 2707 uint SharedRuntime::out_preserve_stack_slots() { 2708 return 0; 2709 } 2710 2711 #ifdef COMPILER2 2712 //------------------------------generate_uncommon_trap_blob-------------------- 2713 void SharedRuntime::generate_uncommon_trap_blob() { 2714 // Allocate space for the code 2715 ResourceMark rm; 2716 // Setup code generation tools 2717 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 2718 MacroAssembler* masm = new MacroAssembler(&buffer); 2719 2720 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 2721 2722 address start = __ pc(); 2723 2724 // Push self-frame. We get here with a return address in LR 2725 // and sp should be 16 byte aligned 2726 // push rfp and retaddr by hand 2727 __ protect_return_address(); 2728 __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); 2729 // we don't expect an arg reg save area 2730 #ifndef PRODUCT 2731 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 2732 #endif 2733 // compiler left unloaded_class_index in j_rarg0 move to where the 2734 // runtime expects it. 2735 if (c_rarg1 != j_rarg0) { 2736 __ movw(c_rarg1, j_rarg0); 2737 } 2738 2739 // we need to set the past SP to the stack pointer of the stub frame 2740 // and the pc to the address where this runtime call will return 2741 // although actually any pc in this code blob will do). 2742 Label retaddr; 2743 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 2744 2745 // Call C code. Need thread but NOT official VM entry 2746 // crud. We cannot block on this call, no GC can happen. Call should 2747 // capture callee-saved registers as well as return values. 2748 // Thread is in rdi already. 2749 // 2750 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 2751 // 2752 // n.b. 2 gp args, 0 fp args, integral return type 2753 2754 __ mov(c_rarg0, rthread); 2755 __ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); 2756 __ lea(rscratch1, 2757 RuntimeAddress(CAST_FROM_FN_PTR(address, 2758 Deoptimization::uncommon_trap))); 2759 __ blr(rscratch1); 2760 __ bind(retaddr); 2761 2762 // Set an oopmap for the call site 2763 OopMapSet* oop_maps = new OopMapSet(); 2764 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 2765 2766 // location of rfp is known implicitly by the frame sender code 2767 2768 oop_maps->add_gc_map(__ pc() - start, map); 2769 2770 __ reset_last_Java_frame(false); 2771 2772 // move UnrollBlock* into r4 2773 __ mov(r4, r0); 2774 2775 #ifdef ASSERT 2776 { Label L; 2777 __ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); 2778 __ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap); 2779 __ br(Assembler::EQ, L); 2780 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 2781 __ bind(L); 2782 } 2783 #endif 2784 2785 // Pop all the frames we must move/replace. 2786 // 2787 // Frame picture (youngest to oldest) 2788 // 1: self-frame (no frame link) 2789 // 2: deopting frame (no frame link) 2790 // 3: caller of deopting frame (could be compiled/interpreted). 2791 2792 // Pop self-frame. We have no frame, and must rely only on r0 and sp. 2793 __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! 2794 2795 // Pop deoptimized frame (int) 2796 __ ldrw(r2, Address(r4, 2797 Deoptimization::UnrollBlock:: 2798 size_of_deoptimized_frame_offset_in_bytes())); 2799 __ sub(r2, r2, 2 * wordSize); 2800 __ add(sp, sp, r2); 2801 __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); 2802 __ authenticate_return_address(); 2803 // LR should now be the return address to the caller (3) frame 2804 2805 #ifdef ASSERT 2806 // Compilers generate code that bang the stack by as much as the 2807 // interpreter would need. So this stack banging should never 2808 // trigger a fault. Verify that it does not on non product builds. 2809 __ ldrw(r1, Address(r4, 2810 Deoptimization::UnrollBlock:: 2811 total_frame_sizes_offset_in_bytes())); 2812 __ bang_stack_size(r1, r2); 2813 #endif 2814 2815 // Load address of array of frame pcs into r2 (address*) 2816 __ ldr(r2, Address(r4, 2817 Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 2818 2819 // Load address of array of frame sizes into r5 (intptr_t*) 2820 __ ldr(r5, Address(r4, 2821 Deoptimization::UnrollBlock:: 2822 frame_sizes_offset_in_bytes())); 2823 2824 // Counter 2825 __ ldrw(r3, Address(r4, 2826 Deoptimization::UnrollBlock:: 2827 number_of_frames_offset_in_bytes())); // (int) 2828 2829 // Now adjust the caller's stack to make up for the extra locals but 2830 // record the original sp so that we can save it in the skeletal 2831 // interpreter frame and the stack walking of interpreter_sender 2832 // will get the unextended sp value and not the "real" sp value. 2833 2834 const Register sender_sp = r8; 2835 2836 __ mov(sender_sp, sp); 2837 __ ldrw(r1, Address(r4, 2838 Deoptimization::UnrollBlock:: 2839 caller_adjustment_offset_in_bytes())); // (int) 2840 __ sub(sp, sp, r1); 2841 2842 // Push interpreter frames in a loop 2843 Label loop; 2844 __ bind(loop); 2845 __ ldr(r1, Address(r5, 0)); // Load frame size 2846 __ sub(r1, r1, 2 * wordSize); // We'll push pc and rfp by hand 2847 __ ldr(lr, Address(r2, 0)); // Save return address 2848 __ enter(); // and old rfp & set new rfp 2849 __ sub(sp, sp, r1); // Prolog 2850 __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable 2851 // This value is corrected by layout_activation_impl 2852 __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); 2853 __ mov(sender_sp, sp); // Pass sender_sp to next frame 2854 __ add(r5, r5, wordSize); // Bump array pointer (sizes) 2855 __ add(r2, r2, wordSize); // Bump array pointer (pcs) 2856 __ subsw(r3, r3, 1); // Decrement counter 2857 __ br(Assembler::GT, loop); 2858 __ ldr(lr, Address(r2, 0)); // save final return address 2859 // Re-push self-frame 2860 __ enter(); // & old rfp & set new rfp 2861 2862 // Use rfp because the frames look interpreted now 2863 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 2864 // Don't need the precise return PC here, just precise enough to point into this code blob. 2865 address the_pc = __ pc(); 2866 __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); 2867 2868 // Call C code. Need thread but NOT official VM entry 2869 // crud. We cannot block on this call, no GC can happen. Call should 2870 // restore return values to their stack-slots with the new SP. 2871 // Thread is in rdi already. 2872 // 2873 // BasicType unpack_frames(JavaThread* thread, int exec_mode); 2874 // 2875 // n.b. 2 gp args, 0 fp args, integral return type 2876 2877 // sp should already be aligned 2878 __ mov(c_rarg0, rthread); 2879 __ movw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); 2880 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 2881 __ blr(rscratch1); 2882 2883 // Set an oopmap for the call site 2884 // Use the same PC we used for the last java frame 2885 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 2886 2887 // Clear fp AND pc 2888 __ reset_last_Java_frame(true); 2889 2890 // Pop self-frame. 2891 __ leave(); // Epilog 2892 2893 // Jump to interpreter 2894 __ ret(lr); 2895 2896 // Make sure all code is generated 2897 masm->flush(); 2898 2899 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 2900 SimpleRuntimeFrame::framesize >> 1); 2901 } 2902 #endif // COMPILER2 2903 2904 2905 //------------------------------generate_handler_blob------ 2906 // 2907 // Generate a special Compile2Runtime blob that saves all registers, 2908 // and setup oopmap. 2909 // 2910 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 2911 ResourceMark rm; 2912 OopMapSet *oop_maps = new OopMapSet(); 2913 OopMap* map; 2914 2915 // Allocate space for the code. Setup code generation tools. 2916 CodeBuffer buffer("handler_blob", 2048, 1024); 2917 MacroAssembler* masm = new MacroAssembler(&buffer); 2918 2919 address start = __ pc(); 2920 address call_pc = NULL; 2921 int frame_size_in_words; 2922 bool cause_return = (poll_type == POLL_AT_RETURN); 2923 RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); 2924 2925 // When the signal occured, the LR was either signed and stored on the stack (in which 2926 // case it will be restored from the stack before being used) or unsigned and not stored 2927 // on the stack. Stipping ensures we get the right value. 2928 __ strip_return_address(); 2929 2930 // Save Integer and Float registers. 2931 map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); 2932 2933 // The following is basically a call_VM. However, we need the precise 2934 // address of the call in order to generate an oopmap. Hence, we do all the 2935 // work outselves. 2936 2937 Label retaddr; 2938 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 2939 2940 // The return address must always be correct so that frame constructor never 2941 // sees an invalid pc. 2942 2943 if (!cause_return) { 2944 // overwrite the return address pushed by save_live_registers 2945 // Additionally, r20 is a callee-saved register so we can look at 2946 // it later to determine if someone changed the return address for 2947 // us! 2948 __ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset())); 2949 __ protect_return_address(r20, rscratch1); 2950 __ str(r20, Address(rfp, wordSize)); 2951 } 2952 2953 // Do the call 2954 __ mov(c_rarg0, rthread); 2955 __ lea(rscratch1, RuntimeAddress(call_ptr)); 2956 __ blr(rscratch1); 2957 __ bind(retaddr); 2958 2959 // Set an oopmap for the call site. This oopmap will map all 2960 // oop-registers and debug-info registers as callee-saved. This 2961 // will allow deoptimization at this safepoint to find all possible 2962 // debug-info recordings, as well as let GC find all oops. 2963 2964 oop_maps->add_gc_map( __ pc() - start, map); 2965 2966 Label noException; 2967 2968 __ reset_last_Java_frame(false); 2969 2970 __ membar(Assembler::LoadLoad | Assembler::LoadStore); 2971 2972 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 2973 __ cbz(rscratch1, noException); 2974 2975 // Exception pending 2976 2977 reg_save.restore_live_registers(masm); 2978 2979 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 2980 2981 // No exception case 2982 __ bind(noException); 2983 2984 Label no_adjust, bail; 2985 if (!cause_return) { 2986 // If our stashed return pc was modified by the runtime we avoid touching it 2987 __ ldr(rscratch1, Address(rfp, wordSize)); 2988 __ cmp(r20, rscratch1); 2989 __ br(Assembler::NE, no_adjust); 2990 __ authenticate_return_address(r20, rscratch1); 2991 2992 #ifdef ASSERT 2993 // Verify the correct encoding of the poll we're about to skip. 2994 // See NativeInstruction::is_ldrw_to_zr() 2995 __ ldrw(rscratch1, Address(r20)); 2996 __ ubfx(rscratch2, rscratch1, 22, 10); 2997 __ cmpw(rscratch2, 0b1011100101); 2998 __ br(Assembler::NE, bail); 2999 __ ubfx(rscratch2, rscratch1, 0, 5); 3000 __ cmpw(rscratch2, 0b11111); 3001 __ br(Assembler::NE, bail); 3002 #endif 3003 // Adjust return pc forward to step over the safepoint poll instruction 3004 __ add(r20, r20, NativeInstruction::instruction_size); 3005 __ protect_return_address(r20, rscratch1); 3006 __ str(r20, Address(rfp, wordSize)); 3007 } 3008 3009 __ bind(no_adjust); 3010 // Normal exit, restore registers and exit. 3011 reg_save.restore_live_registers(masm); 3012 3013 __ ret(lr); 3014 3015 #ifdef ASSERT 3016 __ bind(bail); 3017 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); 3018 #endif 3019 3020 // Make sure all code is generated 3021 masm->flush(); 3022 3023 // Fill-out other meta info 3024 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 3025 } 3026 3027 // 3028 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3029 // 3030 // Generate a stub that calls into vm to find out the proper destination 3031 // of a java call. All the argument registers are live at this point 3032 // but since this is generic code we don't know what they are and the caller 3033 // must do any gc of the args. 3034 // 3035 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3036 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3037 3038 // allocate space for the code 3039 ResourceMark rm; 3040 3041 CodeBuffer buffer(name, 1000, 512); 3042 MacroAssembler* masm = new MacroAssembler(&buffer); 3043 3044 int frame_size_in_words; 3045 RegisterSaver reg_save(false /* save_vectors */); 3046 3047 OopMapSet *oop_maps = new OopMapSet(); 3048 OopMap* map = NULL; 3049 3050 int start = __ offset(); 3051 3052 map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); 3053 3054 int frame_complete = __ offset(); 3055 3056 { 3057 Label retaddr; 3058 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); 3059 3060 __ mov(c_rarg0, rthread); 3061 __ lea(rscratch1, RuntimeAddress(destination)); 3062 3063 __ blr(rscratch1); 3064 __ bind(retaddr); 3065 } 3066 3067 // Set an oopmap for the call site. 3068 // We need this not only for callee-saved registers, but also for volatile 3069 // registers that the compiler might be keeping live across a safepoint. 3070 3071 oop_maps->add_gc_map( __ offset() - start, map); 3072 3073 // r0 contains the address we are going to jump to assuming no exception got installed 3074 3075 // clear last_Java_sp 3076 __ reset_last_Java_frame(false); 3077 // check for pending exceptions 3078 Label pending; 3079 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); 3080 __ cbnz(rscratch1, pending); 3081 3082 // get the returned Method* 3083 __ get_vm_result_2(rmethod, rthread); 3084 __ str(rmethod, Address(sp, reg_save.reg_offset_in_bytes(rmethod))); 3085 3086 // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch 3087 __ str(r0, Address(sp, reg_save.rscratch1_offset_in_bytes())); 3088 reg_save.restore_live_registers(masm); 3089 3090 // We are back the the original state on entry and ready to go. 3091 3092 __ br(rscratch1); 3093 3094 // Pending exception after the safepoint 3095 3096 __ bind(pending); 3097 3098 reg_save.restore_live_registers(masm); 3099 3100 // exception pending => remove activation and forward to exception handler 3101 3102 __ str(zr, Address(rthread, JavaThread::vm_result_offset())); 3103 3104 __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); 3105 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 3106 3107 // ------------- 3108 // make sure all code is generated 3109 masm->flush(); 3110 3111 // return the blob 3112 // frame_size_words or bytes?? 3113 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 3114 } 3115 3116 #ifdef COMPILER2 3117 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 3118 // 3119 //------------------------------generate_exception_blob--------------------------- 3120 // creates exception blob at the end 3121 // Using exception blob, this code is jumped from a compiled method. 3122 // (see emit_exception_handler in x86_64.ad file) 3123 // 3124 // Given an exception pc at a call we call into the runtime for the 3125 // handler in this method. This handler might merely restore state 3126 // (i.e. callee save registers) unwind the frame and jump to the 3127 // exception handler for the nmethod if there is no Java level handler 3128 // for the nmethod. 3129 // 3130 // This code is entered with a jmp. 3131 // 3132 // Arguments: 3133 // r0: exception oop 3134 // r3: exception pc 3135 // 3136 // Results: 3137 // r0: exception oop 3138 // r3: exception pc in caller or ??? 3139 // destination: exception handler of caller 3140 // 3141 // Note: the exception pc MUST be at a call (precise debug information) 3142 // Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved. 3143 // 3144 3145 void OptoRuntime::generate_exception_blob() { 3146 assert(!OptoRuntime::is_callee_saved_register(R3_num), ""); 3147 assert(!OptoRuntime::is_callee_saved_register(R0_num), ""); 3148 assert(!OptoRuntime::is_callee_saved_register(R2_num), ""); 3149 3150 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 3151 3152 // Allocate space for the code 3153 ResourceMark rm; 3154 // Setup code generation tools 3155 CodeBuffer buffer("exception_blob", 2048, 1024); 3156 MacroAssembler* masm = new MacroAssembler(&buffer); 3157 3158 // TODO check various assumptions made here 3159 // 3160 // make sure we do so before running this 3161 3162 address start = __ pc(); 3163 3164 // push rfp and retaddr by hand 3165 // Exception pc is 'return address' for stack walker 3166 __ protect_return_address(); 3167 __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); 3168 // there are no callee save registers and we don't expect an 3169 // arg reg save area 3170 #ifndef PRODUCT 3171 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 3172 #endif 3173 // Store exception in Thread object. We cannot pass any arguments to the 3174 // handle_exception call, since we do not want to make any assumption 3175 // about the size of the frame where the exception happened in. 3176 __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); 3177 __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); 3178 3179 // This call does all the hard work. It checks if an exception handler 3180 // exists in the method. 3181 // If so, it returns the handler address. 3182 // If not, it prepares for stack-unwinding, restoring the callee-save 3183 // registers of the frame being removed. 3184 // 3185 // address OptoRuntime::handle_exception_C(JavaThread* thread) 3186 // 3187 // n.b. 1 gp arg, 0 fp args, integral return type 3188 3189 // the stack should always be aligned 3190 address the_pc = __ pc(); 3191 __ set_last_Java_frame(sp, noreg, the_pc, rscratch1); 3192 __ mov(c_rarg0, rthread); 3193 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 3194 __ blr(rscratch1); 3195 // handle_exception_C is a special VM call which does not require an explicit 3196 // instruction sync afterwards. 3197 3198 // May jump to SVE compiled code 3199 __ reinitialize_ptrue(); 3200 3201 // Set an oopmap for the call site. This oopmap will only be used if we 3202 // are unwinding the stack. Hence, all locations will be dead. 3203 // Callee-saved registers will be the same as the frame above (i.e., 3204 // handle_exception_stub), since they were restored when we got the 3205 // exception. 3206 3207 OopMapSet* oop_maps = new OopMapSet(); 3208 3209 oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 3210 3211 __ reset_last_Java_frame(false); 3212 3213 // Restore callee-saved registers 3214 3215 // rfp is an implicitly saved callee saved register (i.e. the calling 3216 // convention will save restore it in prolog/epilog) Other than that 3217 // there are no callee save registers now that adapter frames are gone. 3218 // and we dont' expect an arg reg save area 3219 __ ldp(rfp, r3, Address(__ post(sp, 2 * wordSize))); 3220 __ authenticate_return_address(r3); 3221 3222 // r0: exception handler 3223 3224 // We have a handler in r0 (could be deopt blob). 3225 __ mov(r8, r0); 3226 3227 // Get the exception oop 3228 __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); 3229 // Get the exception pc in case we are deoptimized 3230 __ ldr(r4, Address(rthread, JavaThread::exception_pc_offset())); 3231 #ifdef ASSERT 3232 __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset())); 3233 __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); 3234 #endif 3235 // Clear the exception oop so GC no longer processes it as a root. 3236 __ str(zr, Address(rthread, JavaThread::exception_oop_offset())); 3237 3238 // r0: exception oop 3239 // r8: exception handler 3240 // r4: exception pc 3241 // Jump to handler 3242 3243 __ br(r8); 3244 3245 // Make sure all code is generated 3246 masm->flush(); 3247 3248 // Set exception blob 3249 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 3250 } 3251 #endif // COMPILER2 3252 3253 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) { 3254 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K); 3255 CodeBuffer buffer(buf); 3256 short buffer_locs[20]; 3257 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, 3258 sizeof(buffer_locs)/sizeof(relocInfo)); 3259 3260 MacroAssembler _masm(&buffer); 3261 MacroAssembler* masm = &_masm; 3262 3263 const Array<SigEntry>* sig_vk = vk->extended_sig(); 3264 const Array<VMRegPair>* regs = vk->return_regs(); 3265 3266 int pack_fields_jobject_off = __ offset(); 3267 // Resolve pre-allocated buffer from JNI handle. 3268 // We cannot do this in generate_call_stub() because it requires GC code to be initialized. 3269 Register Rresult = r14; // See StubGenerator::generate_call_stub(). 3270 __ ldr(r0, Address(Rresult)); 3271 __ resolve_jobject(r0 /* value */, 3272 rthread /* thread */, 3273 r12 /* tmp */); 3274 __ str(r0, Address(Rresult)); 3275 3276 int pack_fields_off = __ offset(); 3277 3278 int j = 1; 3279 for (int i = 0; i < sig_vk->length(); i++) { 3280 BasicType bt = sig_vk->at(i)._bt; 3281 if (bt == T_PRIMITIVE_OBJECT) { 3282 continue; 3283 } 3284 if (bt == T_VOID) { 3285 if (sig_vk->at(i-1)._bt == T_LONG || 3286 sig_vk->at(i-1)._bt == T_DOUBLE) { 3287 j++; 3288 } 3289 continue; 3290 } 3291 int off = sig_vk->at(i)._offset; 3292 VMRegPair pair = regs->at(j); 3293 VMReg r_1 = pair.first(); 3294 VMReg r_2 = pair.second(); 3295 Address to(r0, off); 3296 if (bt == T_FLOAT) { 3297 __ strs(r_1->as_FloatRegister(), to); 3298 } else if (bt == T_DOUBLE) { 3299 __ strd(r_1->as_FloatRegister(), to); 3300 } else if (bt == T_OBJECT || bt == T_ARRAY) { 3301 Register val = r_1->as_Register(); 3302 assert_different_registers(r0, val); 3303 // We don't need barriers because the destination is a newly allocated object. 3304 // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. 3305 if (UseCompressedOops) { 3306 __ encode_heap_oop(val); 3307 __ str(val, to); 3308 } else { 3309 __ str(val, to); 3310 } 3311 } else { 3312 assert(is_java_primitive(bt), "unexpected basic type"); 3313 assert_different_registers(r0, r_1->as_Register()); 3314 size_t size_in_bytes = type2aelembytes(bt); 3315 __ store_sized_value(to, r_1->as_Register(), size_in_bytes); 3316 } 3317 j++; 3318 } 3319 assert(j == regs->length(), "missed a field?"); 3320 3321 __ ret(lr); 3322 3323 int unpack_fields_off = __ offset(); 3324 3325 Label skip; 3326 __ cbz(r0, skip); 3327 3328 j = 1; 3329 for (int i = 0; i < sig_vk->length(); i++) { 3330 BasicType bt = sig_vk->at(i)._bt; 3331 if (bt == T_PRIMITIVE_OBJECT) { 3332 continue; 3333 } 3334 if (bt == T_VOID) { 3335 if (sig_vk->at(i-1)._bt == T_LONG || 3336 sig_vk->at(i-1)._bt == T_DOUBLE) { 3337 j++; 3338 } 3339 continue; 3340 } 3341 int off = sig_vk->at(i)._offset; 3342 assert(off > 0, "offset in object should be positive"); 3343 VMRegPair pair = regs->at(j); 3344 VMReg r_1 = pair.first(); 3345 VMReg r_2 = pair.second(); 3346 Address from(r0, off); 3347 if (bt == T_FLOAT) { 3348 __ ldrs(r_1->as_FloatRegister(), from); 3349 } else if (bt == T_DOUBLE) { 3350 __ ldrd(r_1->as_FloatRegister(), from); 3351 } else if (bt == T_OBJECT || bt == T_ARRAY) { 3352 assert_different_registers(r0, r_1->as_Register()); 3353 __ load_heap_oop(r_1->as_Register(), from); 3354 } else { 3355 assert(is_java_primitive(bt), "unexpected basic type"); 3356 assert_different_registers(r0, r_1->as_Register()); 3357 3358 size_t size_in_bytes = type2aelembytes(bt); 3359 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); 3360 } 3361 j++; 3362 } 3363 assert(j == regs->length(), "missed a field?"); 3364 3365 __ bind(skip); 3366 3367 __ ret(lr); 3368 3369 __ flush(); 3370 3371 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off); 3372 } 3373 3374 // --------------------------------------------------------------- 3375 3376 class NativeInvokerGenerator : public StubCodeGenerator { 3377 address _call_target; 3378 int _shadow_space_bytes; 3379 3380 const GrowableArray<VMReg>& _input_registers; 3381 const GrowableArray<VMReg>& _output_registers; 3382 3383 int _frame_complete; 3384 int _framesize; 3385 OopMapSet* _oop_maps; 3386 public: 3387 NativeInvokerGenerator(CodeBuffer* buffer, 3388 address call_target, 3389 int shadow_space_bytes, 3390 const GrowableArray<VMReg>& input_registers, 3391 const GrowableArray<VMReg>& output_registers) 3392 : StubCodeGenerator(buffer, PrintMethodHandleStubs), 3393 _call_target(call_target), 3394 _shadow_space_bytes(shadow_space_bytes), 3395 _input_registers(input_registers), 3396 _output_registers(output_registers), 3397 _frame_complete(0), 3398 _framesize(0), 3399 _oop_maps(NULL) { 3400 assert(_output_registers.length() <= 1 3401 || (_output_registers.length() == 2 && !_output_registers.at(1)->is_valid()), "no multi-reg returns"); 3402 } 3403 3404 void generate(); 3405 3406 int spill_size_in_bytes() const { 3407 if (_output_registers.length() == 0) { 3408 return 0; 3409 } 3410 VMReg reg = _output_registers.at(0); 3411 assert(reg->is_reg(), "must be a register"); 3412 if (reg->is_Register()) { 3413 return 8; 3414 } else if (reg->is_FloatRegister()) { 3415 bool use_sve = Matcher::supports_scalable_vector(); 3416 if (use_sve) { 3417 return Matcher::scalable_vector_reg_size(T_BYTE); 3418 } 3419 return 16; 3420 } else { 3421 ShouldNotReachHere(); 3422 } 3423 return 0; 3424 } 3425 3426 void spill_output_registers() { 3427 if (_output_registers.length() == 0) { 3428 return; 3429 } 3430 VMReg reg = _output_registers.at(0); 3431 assert(reg->is_reg(), "must be a register"); 3432 MacroAssembler* masm = _masm; 3433 if (reg->is_Register()) { 3434 __ spill(reg->as_Register(), true, 0); 3435 } else if (reg->is_FloatRegister()) { 3436 bool use_sve = Matcher::supports_scalable_vector(); 3437 if (use_sve) { 3438 __ spill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE)); 3439 } else { 3440 __ spill(reg->as_FloatRegister(), __ Q, 0); 3441 } 3442 } else { 3443 ShouldNotReachHere(); 3444 } 3445 } 3446 3447 void fill_output_registers() { 3448 if (_output_registers.length() == 0) { 3449 return; 3450 } 3451 VMReg reg = _output_registers.at(0); 3452 assert(reg->is_reg(), "must be a register"); 3453 MacroAssembler* masm = _masm; 3454 if (reg->is_Register()) { 3455 __ unspill(reg->as_Register(), true, 0); 3456 } else if (reg->is_FloatRegister()) { 3457 bool use_sve = Matcher::supports_scalable_vector(); 3458 if (use_sve) { 3459 __ unspill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE)); 3460 } else { 3461 __ unspill(reg->as_FloatRegister(), __ Q, 0); 3462 } 3463 } else { 3464 ShouldNotReachHere(); 3465 } 3466 } 3467 3468 int frame_complete() const { 3469 return _frame_complete; 3470 } 3471 3472 int framesize() const { 3473 return (_framesize >> (LogBytesPerWord - LogBytesPerInt)); 3474 } 3475 3476 OopMapSet* oop_maps() const { 3477 return _oop_maps; 3478 } 3479 3480 private: 3481 #ifdef ASSERT 3482 bool target_uses_register(VMReg reg) { 3483 return _input_registers.contains(reg) || _output_registers.contains(reg); 3484 } 3485 #endif 3486 }; 3487 3488 static const int native_invoker_code_size = 1024; 3489 3490 RuntimeStub* SharedRuntime::make_native_invoker(address call_target, 3491 int shadow_space_bytes, 3492 const GrowableArray<VMReg>& input_registers, 3493 const GrowableArray<VMReg>& output_registers) { 3494 int locs_size = 64; 3495 CodeBuffer code("nep_invoker_blob", native_invoker_code_size, locs_size); 3496 NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers); 3497 g.generate(); 3498 code.log_section_sizes("nep_invoker_blob"); 3499 3500 RuntimeStub* stub = 3501 RuntimeStub::new_runtime_stub("nep_invoker_blob", 3502 &code, 3503 g.frame_complete(), 3504 g.framesize(), 3505 g.oop_maps(), false); 3506 return stub; 3507 } 3508 3509 void NativeInvokerGenerator::generate() { 3510 assert(!(target_uses_register(rscratch1->as_VMReg()) 3511 || target_uses_register(rscratch2->as_VMReg()) 3512 || target_uses_register(rthread->as_VMReg())), 3513 "Register conflict"); 3514 3515 enum layout { 3516 rbp_off, 3517 rbp_off2, 3518 return_off, 3519 return_off2, 3520 framesize // inclusive of return address 3521 }; 3522 3523 assert(_shadow_space_bytes == 0, "not expecting shadow space on AArch64"); 3524 _framesize = align_up(framesize + (spill_size_in_bytes() >> LogBytesPerInt), 4); 3525 assert(is_even(_framesize/2), "sp not 16-byte aligned"); 3526 3527 _oop_maps = new OopMapSet(); 3528 MacroAssembler* masm = _masm; 3529 3530 address start = __ pc(); 3531 3532 __ enter(); 3533 3534 // lr and fp are already in place 3535 __ sub(sp, rfp, ((unsigned)_framesize-4) << LogBytesPerInt); // prolog 3536 3537 _frame_complete = __ pc() - start; 3538 3539 address the_pc = __ pc(); 3540 __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); 3541 OopMap* map = new OopMap(_framesize, 0); 3542 _oop_maps->add_gc_map(the_pc - start, map); 3543 3544 // State transition 3545 __ mov(rscratch1, _thread_in_native); 3546 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); 3547 __ stlrw(rscratch1, rscratch2); 3548 3549 rt_call(masm, _call_target); 3550 3551 __ mov(rscratch1, _thread_in_native_trans); 3552 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); 3553 3554 // Force this write out before the read below 3555 __ membar(Assembler::LoadLoad | Assembler::LoadStore | 3556 Assembler::StoreLoad | Assembler::StoreStore); 3557 3558 __ verify_sve_vector_length(); 3559 3560 Label L_after_safepoint_poll; 3561 Label L_safepoint_poll_slow_path; 3562 3563 __ safepoint_poll(L_safepoint_poll_slow_path, true /* at_return */, true /* acquire */, false /* in_nmethod */); 3564 3565 __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); 3566 __ cbnzw(rscratch1, L_safepoint_poll_slow_path); 3567 3568 __ bind(L_after_safepoint_poll); 3569 3570 // change thread state 3571 __ mov(rscratch1, _thread_in_Java); 3572 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); 3573 __ stlrw(rscratch1, rscratch2); 3574 3575 __ block_comment("reguard stack check"); 3576 Label L_reguard; 3577 Label L_after_reguard; 3578 __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); 3579 __ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled); 3580 __ br(Assembler::EQ, L_reguard); 3581 __ bind(L_after_reguard); 3582 3583 __ reset_last_Java_frame(true); 3584 3585 __ leave(); // required for proper stackwalking of RuntimeStub frame 3586 __ ret(lr); 3587 3588 ////////////////////////////////////////////////////////////////////////////// 3589 3590 __ block_comment("{ L_safepoint_poll_slow_path"); 3591 __ bind(L_safepoint_poll_slow_path); 3592 3593 // Need to save the native result registers around any runtime calls. 3594 spill_output_registers(); 3595 3596 __ mov(c_rarg0, rthread); 3597 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); 3598 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 3599 __ blr(rscratch1); 3600 3601 fill_output_registers(); 3602 3603 __ b(L_after_safepoint_poll); 3604 __ block_comment("} L_safepoint_poll_slow_path"); 3605 3606 ////////////////////////////////////////////////////////////////////////////// 3607 3608 __ block_comment("{ L_reguard"); 3609 __ bind(L_reguard); 3610 3611 spill_output_registers(); 3612 3613 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 3614 3615 fill_output_registers(); 3616 3617 __ b(L_after_reguard); 3618 3619 __ block_comment("} L_reguard"); 3620 3621 ////////////////////////////////////////////////////////////////////////////// 3622 3623 __ flush(); 3624 }