9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/aotCodeCache.hpp"
30 #include "code/codeCache.hpp"
31 #include "code/compiledIC.hpp"
32 #include "code/debugInfoRec.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "compiler/oopMap.hpp"
35 #include "gc/shared/barrierSetAssembler.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "interpreter/interp_masm.hpp"
38 #include "logging/log.hpp"
39 #include "memory/resourceArea.hpp"
40 #include "nativeInst_aarch64.hpp"
41 #include "oops/klass.inline.hpp"
42 #include "oops/method.inline.hpp"
43 #include "prims/methodHandles.hpp"
44 #include "runtime/continuation.hpp"
45 #include "runtime/continuationEntry.inline.hpp"
46 #include "runtime/globals.hpp"
47 #include "runtime/jniHandles.hpp"
48 #include "runtime/safepointMechanism.hpp"
184
185 int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
186 reg_save_size * BytesPerInt, 16);
187 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
188 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
189 // The caller will allocate additional_frame_words
190 int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
191 // CodeBlob frame size is in words.
192 int frame_size_in_words = frame_size_in_bytes / wordSize;
193 *total_frame_words = frame_size_in_words;
194
195 // Save Integer and Float registers.
196 __ enter();
197 __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes);
198
199 // Set an oopmap for the call site. This oopmap will map all
200 // oop-registers and debug-info registers as callee-saved. This
201 // will allow deoptimization at this safepoint to find all possible
202 // debug-info recordings, as well as let GC find all oops.
203
204 OopMapSet *oop_maps = new OopMapSet();
205 OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
206
207 for (int i = 0; i < Register::number_of_registers; i++) {
208 Register r = as_Register(i);
209 if (i <= rfp->encoding() && r != rscratch1 && r != rscratch2) {
210 // SP offsets are in 4-byte words.
211 // Register slots are 8 bytes wide, 32 floating-point registers.
212 int sp_offset = Register::max_slots_per_register * i +
213 FloatRegister::save_slots_per_register * FloatRegister::number_of_registers;
214 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
215 }
216 }
217
218 for (int i = 0; i < FloatRegister::number_of_registers; i++) {
219 FloatRegister r = as_FloatRegister(i);
220 int sp_offset = 0;
221 if (_save_vectors) {
222 sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) :
223 (FloatRegister::slots_per_neon_register * i);
224 } else {
334 break;
335 case T_DOUBLE:
336 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
337 if (fp_args < Argument::n_float_register_parameters_j) {
338 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
339 } else {
340 stk_args = align_up(stk_args, 2);
341 regs[i].set2(VMRegImpl::stack2reg(stk_args));
342 stk_args += 2;
343 }
344 break;
345 default:
346 ShouldNotReachHere();
347 break;
348 }
349 }
350
351 return stk_args;
352 }
353
354 // Patch the callers callsite with entry to compiled code if it exists.
355 static void patch_callers_callsite(MacroAssembler *masm) {
356 Label L;
357 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
358 __ cbz(rscratch1, L);
359
360 __ enter();
361 __ push_CPU_state();
362
363 // VM needs caller's callsite
364 // VM needs target method
365 // This needs to be a long call since we will relocate this adapter to
366 // the codeBuffer and it may not reach
367
368 #ifndef PRODUCT
369 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
370 #endif
371
372 __ mov(c_rarg0, rmethod);
373 __ mov(c_rarg1, lr);
374 __ authenticate_return_address(c_rarg1);
375 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
376 __ blr(rscratch1);
377
378 // Explicit isb required because fixup_callers_callsite may change the code
379 // stream.
380 __ safepoint_isb();
381
382 __ pop_CPU_state();
383 // restore sp
384 __ leave();
385 __ bind(L);
386 }
387
388 static void gen_c2i_adapter(MacroAssembler *masm,
389 int total_args_passed,
390 int comp_args_on_stack,
391 const BasicType *sig_bt,
392 const VMRegPair *regs,
393 Label& skip_fixup) {
394 // Before we get into the guts of the C2I adapter, see if we should be here
395 // at all. We've come from compiled code and are attempting to jump to the
396 // interpreter, which means the caller made a static call to get here
397 // (vcalls always get a compiled target if there is one). Check for a
398 // compiled target. If there is one, we need to patch the caller's call.
399 patch_callers_callsite(masm);
400
401 __ bind(skip_fixup);
402
403 int words_pushed = 0;
404
405 // Since all args are passed on the stack, total_args_passed *
406 // Interpreter::stackElementSize is the space we need.
407
408 int extraspace = total_args_passed * Interpreter::stackElementSize;
409
410 __ mov(r19_sender_sp, sp);
411
412 // stack is aligned, keep it that way
413 extraspace = align_up(extraspace, 2*wordSize);
414
415 if (extraspace)
416 __ sub(sp, sp, extraspace);
417
418 // Now write the args into the outgoing interpreter space
419 for (int i = 0; i < total_args_passed; i++) {
420 if (sig_bt[i] == T_VOID) {
421 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
422 continue;
423 }
424
425 // offset to start parameters
426 int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
427 int next_off = st_off - Interpreter::stackElementSize;
428
429 // Say 4 args:
430 // i st_off
431 // 0 32 T_LONG
432 // 1 24 T_VOID
433 // 2 16 T_OBJECT
434 // 3 8 T_BOOL
435 // - 0 return address
436 //
437 // However to make thing extra confusing. Because we can fit a Java long/double in
438 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
439 // leaves one slot empty and only stores to a single slot. In this case the
440 // slot that is occupied is the T_VOID slot. See I said it was confusing.
441
442 VMReg r_1 = regs[i].first();
443 VMReg r_2 = regs[i].second();
444 if (!r_1->is_valid()) {
445 assert(!r_2->is_valid(), "");
446 continue;
447 }
448 if (r_1->is_stack()) {
449 // memory to memory use rscratch1
450 int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
451 + extraspace
452 + words_pushed * wordSize);
453 if (!r_2->is_valid()) {
454 // sign extend??
455 __ ldrw(rscratch1, Address(sp, ld_off));
456 __ str(rscratch1, Address(sp, st_off));
457
458 } else {
459
460 __ ldr(rscratch1, Address(sp, ld_off));
461
462 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
463 // T_DOUBLE and T_LONG use two slots in the interpreter
464 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
465 // ld_off == LSW, ld_off+wordSize == MSW
466 // st_off == MSW, next_off == LSW
467 __ str(rscratch1, Address(sp, next_off));
468 #ifdef ASSERT
469 // Overwrite the unused slot with known junk
470 __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaaaull);
471 __ str(rscratch1, Address(sp, st_off));
472 #endif /* ASSERT */
473 } else {
474 __ str(rscratch1, Address(sp, st_off));
475 }
476 }
477 } else if (r_1->is_Register()) {
478 Register r = r_1->as_Register();
479 if (!r_2->is_valid()) {
480 // must be only an int (or less ) so move only 32bits to slot
481 // why not sign extend??
482 __ str(r, Address(sp, st_off));
483 } else {
484 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
485 // T_DOUBLE and T_LONG use two slots in the interpreter
486 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
487 // jlong/double in gpr
488 #ifdef ASSERT
489 // Overwrite the unused slot with known junk
490 __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaabull);
491 __ str(rscratch1, Address(sp, st_off));
492 #endif /* ASSERT */
493 __ str(r, Address(sp, next_off));
494 } else {
495 __ str(r, Address(sp, st_off));
496 }
497 }
498 } else {
499 assert(r_1->is_FloatRegister(), "");
500 if (!r_2->is_valid()) {
501 // only a float use just part of the slot
502 __ strs(r_1->as_FloatRegister(), Address(sp, st_off));
503 } else {
504 #ifdef ASSERT
505 // Overwrite the unused slot with known junk
506 __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaacull);
507 __ str(rscratch1, Address(sp, st_off));
508 #endif /* ASSERT */
509 __ strd(r_1->as_FloatRegister(), Address(sp, next_off));
510 }
511 }
512 }
513
514 __ mov(esp, sp); // Interp expects args on caller's expression stack
515
516 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
517 __ br(rscratch1);
518 }
519
520
521 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
522 int total_args_passed,
523 int comp_args_on_stack,
524 const BasicType *sig_bt,
525 const VMRegPair *regs) {
526
527 // Note: r19_sender_sp contains the senderSP on entry. We must
528 // preserve it since we may do a i2c -> c2i transition if we lose a
529 // race where compiled code goes non-entrant while we get args
530 // ready.
531
532 // Adapters are frameless.
533
534 // An i2c adapter is frameless because the *caller* frame, which is
535 // interpreted, routinely repairs its own esp (from
536 // interpreter_frame_last_sp), even if a callee has modified the
537 // stack pointer. It also recalculates and aligns sp.
538
539 // A c2i adapter is frameless because the *callee* frame, which is
540 // interpreted, routinely repairs its caller's sp (from sender_sp,
541 // which is set up via the senderSP register).
542
543 // In other words, if *either* the caller or callee is interpreted, we can
544 // get the stack pointer repaired after a call.
545
546 // This is why c2i and i2c adapters cannot be indefinitely composed.
547 // In particular, if a c2i adapter were to somehow call an i2c adapter,
548 // both caller and callee would be compiled methods, and neither would
549 // clean up the stack pointer changes performed by the two adapters.
550 // If this happens, control eventually transfers back to the compiled
551 // caller, but with an uncorrected stack, causing delayed havoc.
552
553 // Cut-out for having no stack args.
554 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
555 if (comp_args_on_stack) {
556 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
557 __ andr(sp, rscratch1, -16);
558 }
559
560 // Will jump to the compiled code just as if compiled code was doing it.
561 // Pre-load the register-jump target early, to schedule it better.
562 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
563
564 // Now generate the shuffle code.
565 for (int i = 0; i < total_args_passed; i++) {
566 if (sig_bt[i] == T_VOID) {
567 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
568 continue;
569 }
570
571 // Pick up 0, 1 or 2 words from SP+offset.
572
573 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
574 "scrambled load targets?");
575 // Load in argument order going down.
576 int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
577 // Point to interpreter value (vs. tag)
578 int next_off = ld_off - Interpreter::stackElementSize;
579 //
580 //
581 //
582 VMReg r_1 = regs[i].first();
583 VMReg r_2 = regs[i].second();
584 if (!r_1->is_valid()) {
585 assert(!r_2->is_valid(), "");
586 continue;
587 }
588 if (r_1->is_stack()) {
589 // Convert stack slot to an SP offset (+ wordSize to account for return address )
590 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
591 if (!r_2->is_valid()) {
592 // sign extend???
593 __ ldrsw(rscratch2, Address(esp, ld_off));
594 __ str(rscratch2, Address(sp, st_off));
595 } else {
596 //
597 // We are using two optoregs. This can be either T_OBJECT,
598 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
599 // two slots but only uses one for thr T_LONG or T_DOUBLE case
600 // So we must adjust where to pick up the data to match the
601 // interpreter.
602 //
603 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
604 // are accessed as negative so LSW is at LOW address
605
606 // ld_off is MSW so get LSW
607 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
608 next_off : ld_off;
609 __ ldr(rscratch2, Address(esp, offset));
610 // st_off is LSW (i.e. reg.first())
611 __ str(rscratch2, Address(sp, st_off));
612 }
613 } else if (r_1->is_Register()) { // Register argument
614 Register r = r_1->as_Register();
615 if (r_2->is_valid()) {
616 //
617 // We are using two VMRegs. This can be either T_OBJECT,
618 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
619 // two slots but only uses one for thr T_LONG or T_DOUBLE case
620 // So we must adjust where to pick up the data to match the
621 // interpreter.
622
623 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
624 next_off : ld_off;
625
626 // this can be a misaligned move
627 __ ldr(r, Address(esp, offset));
628 } else {
629 // sign extend and use a full word?
630 __ ldrw(r, Address(esp, ld_off));
631 }
632 } else {
633 if (!r_2->is_valid()) {
634 __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
635 } else {
636 __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
637 }
638 }
639 }
640
641 __ mov(rscratch2, rscratch1);
642 __ push_cont_fastpath(rthread); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about; kills rscratch1
643 __ mov(rscratch1, rscratch2);
644
645 // 6243940 We might end up in handle_wrong_method if
646 // the callee is deoptimized as we race thru here. If that
647 // happens we don't want to take a safepoint because the
648 // caller frame will look interpreted and arguments are now
649 // "compiled" so it is much better to make this transition
650 // invisible to the stack walking code. Unfortunately if
651 // we try and find the callee by normal means a safepoint
652 // is possible. So we stash the desired callee in the thread
653 // and the vm will find there should this case occur.
654
655 __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
656
657 __ br(rscratch1);
658 }
659
660 // ---------------------------------------------------------------
661 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
662 int total_args_passed,
663 int comp_args_on_stack,
664 const BasicType *sig_bt,
665 const VMRegPair *regs,
666 address entry_address[AdapterBlob::ENTRY_COUNT]) {
667 entry_address[AdapterBlob::I2C] = __ pc();
668
669 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
670
671 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
672 Label skip_fixup;
673
674 Register data = rscratch2;
675 Register receiver = j_rarg0;
676 Register tmp = r10; // A call-clobbered register not used for arg passing
677
678 // -------------------------------------------------------------------------
679 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
680 // to the interpreter. The args start out packed in the compiled layout. They
681 // need to be unpacked into the interpreter layout. This will almost always
682 // require some stack space. We grow the current (compiled) stack, then repack
683 // the args. We finally end in a jump to the generic interpreter entry point.
684 // On exit from the interpreter, the interpreter will restore our SP (lest the
685 // compiled code, which relies solely on SP and not FP, get sick).
686
687 {
688 __ block_comment("c2i_unverified_entry {");
689 // Method might have been compiled since the call site was patched to
690 // interpreted; if that is the case treat it as a miss so we can get
691 // the call site corrected.
692 __ ic_check(1 /* end_alignment */);
693 __ ldr(rmethod, Address(data, CompiledICData::speculated_method_offset()));
694
695 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
696 __ cbz(rscratch1, skip_fixup);
697 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
698 __ block_comment("} c2i_unverified_entry");
699 }
700
701 entry_address[AdapterBlob::C2I] = __ pc();
702
703 // Class initialization barrier for static methods
704 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
705 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
706 Label L_skip_barrier;
707
708 // Bypass the barrier for non-static methods
709 __ ldrh(rscratch1, Address(rmethod, Method::access_flags_offset()));
710 __ andsw(zr, rscratch1, JVM_ACC_STATIC);
711 __ br(Assembler::EQ, L_skip_barrier); // non-static
712
713 __ load_method_holder(rscratch2, rmethod);
714 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
715 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
716
717 __ bind(L_skip_barrier);
718 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
719
720 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
721 bs->c2i_entry_barrier(masm);
722
723 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
724 return;
725 }
726
727 static int c_calling_convention_priv(const BasicType *sig_bt,
728 VMRegPair *regs,
729 int total_args_passed) {
730
731 // We return the amount of VMRegImpl stack slots we need to reserve for all
732 // the arguments NOT counting out_preserve_stack_slots.
733
734 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
735 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
736 };
737 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
738 c_farg0, c_farg1, c_farg2, c_farg3,
739 c_farg4, c_farg5, c_farg6, c_farg7
740 };
741
742 uint int_args = 0;
743 uint fp_args = 0;
744 uint stk_args = 0; // inc by 2 each time
2603
2604 // exception pending => remove activation and forward to exception handler
2605
2606 __ str(zr, Address(rthread, JavaThread::vm_result_oop_offset()));
2607
2608 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2609 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2610
2611 // -------------
2612 // make sure all code is generated
2613 masm->flush();
2614
2615 // return the blob
2616 // frame_size_words or bytes??
2617 RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2618
2619 AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
2620 return rs_blob;
2621 }
2622
2623 // Continuation point for throwing of implicit exceptions that are
2624 // not handled in the current activation. Fabricates an exception
2625 // oop and initiates normal exception dispatching in this
2626 // frame. Since we need to preserve callee-saved values (currently
2627 // only for C2, but done for C1 as well) we need a callee-saved oop
2628 // map and therefore have to make these stubs into RuntimeStubs
2629 // rather than BufferBlobs. If the compiler needs all registers to
2630 // be preserved between the fault point and the exception handler
2631 // then it must assume responsibility for that in
2632 // AbstractCompiler::continuation_for_implicit_null_exception or
2633 // continuation_for_implicit_division_by_zero_exception. All other
2634 // implicit exceptions (e.g., NullPointerException or
2635 // AbstractMethodError on entry) are either at call sites or
2636 // otherwise assume that stack unwinding will be initiated, so
2637 // caller saved registers were assumed volatile in the compiler.
2638
2639 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
2640 assert(is_throw_id(id), "expected a throw stub id");
2641
2642 const char* name = SharedRuntime::stub_name(id);
|
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "classfile/symbolTable.hpp"
30 #include "code/aotCodeCache.hpp"
31 #include "code/codeCache.hpp"
32 #include "code/compiledIC.hpp"
33 #include "code/debugInfoRec.hpp"
34 #include "code/vtableStubs.hpp"
35 #include "compiler/oopMap.hpp"
36 #include "gc/shared/barrierSetAssembler.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "interpreter/interp_masm.hpp"
39 #include "logging/log.hpp"
40 #include "memory/resourceArea.hpp"
41 #include "nativeInst_aarch64.hpp"
42 #include "oops/klass.inline.hpp"
43 #include "oops/method.inline.hpp"
44 #include "prims/methodHandles.hpp"
45 #include "runtime/continuation.hpp"
46 #include "runtime/continuationEntry.inline.hpp"
47 #include "runtime/globals.hpp"
48 #include "runtime/jniHandles.hpp"
49 #include "runtime/safepointMechanism.hpp"
185
186 int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
187 reg_save_size * BytesPerInt, 16);
188 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
189 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
190 // The caller will allocate additional_frame_words
191 int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
192 // CodeBlob frame size is in words.
193 int frame_size_in_words = frame_size_in_bytes / wordSize;
194 *total_frame_words = frame_size_in_words;
195
196 // Save Integer and Float registers.
197 __ enter();
198 __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes);
199
200 // Set an oopmap for the call site. This oopmap will map all
201 // oop-registers and debug-info registers as callee-saved. This
202 // will allow deoptimization at this safepoint to find all possible
203 // debug-info recordings, as well as let GC find all oops.
204
205 OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
206
207 for (int i = 0; i < Register::number_of_registers; i++) {
208 Register r = as_Register(i);
209 if (i <= rfp->encoding() && r != rscratch1 && r != rscratch2) {
210 // SP offsets are in 4-byte words.
211 // Register slots are 8 bytes wide, 32 floating-point registers.
212 int sp_offset = Register::max_slots_per_register * i +
213 FloatRegister::save_slots_per_register * FloatRegister::number_of_registers;
214 oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
215 }
216 }
217
218 for (int i = 0; i < FloatRegister::number_of_registers; i++) {
219 FloatRegister r = as_FloatRegister(i);
220 int sp_offset = 0;
221 if (_save_vectors) {
222 sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) :
223 (FloatRegister::slots_per_neon_register * i);
224 } else {
334 break;
335 case T_DOUBLE:
336 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
337 if (fp_args < Argument::n_float_register_parameters_j) {
338 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
339 } else {
340 stk_args = align_up(stk_args, 2);
341 regs[i].set2(VMRegImpl::stack2reg(stk_args));
342 stk_args += 2;
343 }
344 break;
345 default:
346 ShouldNotReachHere();
347 break;
348 }
349 }
350
351 return stk_args;
352 }
353
354
355 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
356 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
357
358 int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
359
360 // Create the mapping between argument positions and registers.
361
362 static const Register INT_ArgReg[java_return_convention_max_int] = {
363 r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
364 };
365
366 static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
367 j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7
368 };
369
370 uint int_args = 0;
371 uint fp_args = 0;
372
373 for (int i = 0; i < total_args_passed; i++) {
374 switch (sig_bt[i]) {
375 case T_BOOLEAN:
376 case T_CHAR:
377 case T_BYTE:
378 case T_SHORT:
379 case T_INT:
380 if (int_args < SharedRuntime::java_return_convention_max_int) {
381 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
382 int_args ++;
383 } else {
384 return -1;
385 }
386 break;
387 case T_VOID:
388 // halves of T_LONG or T_DOUBLE
389 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
390 regs[i].set_bad();
391 break;
392 case T_LONG:
393 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
394 // fall through
395 case T_OBJECT:
396 case T_ARRAY:
397 case T_ADDRESS:
398 // Should T_METADATA be added to java_calling_convention as well ?
399 case T_METADATA:
400 if (int_args < SharedRuntime::java_return_convention_max_int) {
401 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
402 int_args ++;
403 } else {
404 return -1;
405 }
406 break;
407 case T_FLOAT:
408 if (fp_args < SharedRuntime::java_return_convention_max_float) {
409 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
410 fp_args ++;
411 } else {
412 return -1;
413 }
414 break;
415 case T_DOUBLE:
416 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
417 if (fp_args < SharedRuntime::java_return_convention_max_float) {
418 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
419 fp_args ++;
420 } else {
421 return -1;
422 }
423 break;
424 default:
425 ShouldNotReachHere();
426 break;
427 }
428 }
429
430 return int_args + fp_args;
431 }
432
433 // Patch the callers callsite with entry to compiled code if it exists.
434 static void patch_callers_callsite(MacroAssembler *masm) {
435 Label L;
436 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
437 __ cbz(rscratch1, L);
438
439 __ enter();
440 __ push_CPU_state();
441
442 // VM needs caller's callsite
443 // VM needs target method
444 // This needs to be a long call since we will relocate this adapter to
445 // the codeBuffer and it may not reach
446
447 #ifndef PRODUCT
448 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
449 #endif
450
451 __ mov(c_rarg0, rmethod);
452 __ mov(c_rarg1, lr);
453 __ authenticate_return_address(c_rarg1);
454 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
455 __ blr(rscratch1);
456
457 // Explicit isb required because fixup_callers_callsite may change the code
458 // stream.
459 __ safepoint_isb();
460
461 __ pop_CPU_state();
462 // restore sp
463 __ leave();
464 __ bind(L);
465 }
466
467 // For each inline type argument, sig includes the list of fields of
468 // the inline type. This utility function computes the number of
469 // arguments for the call if inline types are passed by reference (the
470 // calling convention the interpreter expects).
471 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
472 int total_args_passed = 0;
473 if (InlineTypePassFieldsAsArgs) {
474 for (int i = 0; i < sig_extended->length(); i++) {
475 BasicType bt = sig_extended->at(i)._bt;
476 if (bt == T_METADATA) {
477 // In sig_extended, an inline type argument starts with:
478 // T_METADATA, followed by the types of the fields of the
479 // inline type and T_VOID to mark the end of the value
480 // type. Inline types are flattened so, for instance, in the
481 // case of an inline type with an int field and an inline type
482 // field that itself has 2 fields, an int and a long:
483 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
484 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
485 // (outer inline type)
486 total_args_passed++;
487 int vt = 1;
488 do {
489 i++;
490 BasicType bt = sig_extended->at(i)._bt;
491 BasicType prev_bt = sig_extended->at(i-1)._bt;
492 if (bt == T_METADATA) {
493 vt++;
494 } else if (bt == T_VOID &&
495 prev_bt != T_LONG &&
496 prev_bt != T_DOUBLE) {
497 vt--;
498 }
499 } while (vt != 0);
500 } else {
501 total_args_passed++;
502 }
503 }
504 } else {
505 total_args_passed = sig_extended->length();
506 }
507 return total_args_passed;
508 }
509
510
511 static void gen_c2i_adapter_helper(MacroAssembler* masm,
512 BasicType bt,
513 BasicType prev_bt,
514 size_t size_in_bytes,
515 const VMRegPair& reg_pair,
516 const Address& to,
517 Register tmp1,
518 Register tmp2,
519 Register tmp3,
520 int extraspace,
521 bool is_oop) {
522 if (bt == T_VOID) {
523 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
524 return;
525 }
526
527 // Say 4 args:
528 // i st_off
529 // 0 32 T_LONG
530 // 1 24 T_VOID
531 // 2 16 T_OBJECT
532 // 3 8 T_BOOL
533 // - 0 return address
534 //
535 // However to make thing extra confusing. Because we can fit a Java long/double in
536 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
537 // leaves one slot empty and only stores to a single slot. In this case the
538 // slot that is occupied is the T_VOID slot. See I said it was confusing.
539
540 bool wide = (size_in_bytes == wordSize);
541 VMReg r_1 = reg_pair.first();
542 VMReg r_2 = reg_pair.second();
543 assert(r_2->is_valid() == wide, "invalid size");
544 if (!r_1->is_valid()) {
545 assert(!r_2->is_valid(), "");
546 return;
547 }
548
549 if (!r_1->is_FloatRegister()) {
550 Register val = r25;
551 if (r_1->is_stack()) {
552 // memory to memory use r25 (scratch registers is used by store_heap_oop)
553 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
554 __ load_sized_value(val, Address(sp, ld_off), size_in_bytes, /* is_signed */ false);
555 } else {
556 val = r_1->as_Register();
557 }
558 assert_different_registers(to.base(), val, tmp1, tmp2, tmp3);
559 if (is_oop) {
560 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep it valid.
561 __ push(to.base(), sp);
562 __ store_heap_oop(to, val, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
563 __ pop(to.base(), sp);
564 } else {
565 __ store_sized_value(to, val, size_in_bytes);
566 }
567 } else {
568 if (wide) {
569 __ strd(r_1->as_FloatRegister(), to);
570 } else {
571 // only a float use just part of the slot
572 __ strs(r_1->as_FloatRegister(), to);
573 }
574 }
575 }
576
577 static void gen_c2i_adapter(MacroAssembler *masm,
578 const GrowableArray<SigEntry>* sig_extended,
579 const VMRegPair *regs,
580 bool requires_clinit_barrier,
581 address& c2i_no_clinit_check_entry,
582 Label& skip_fixup,
583 address start,
584 OopMapSet* oop_maps,
585 int& frame_complete,
586 int& frame_size_in_words,
587 bool alloc_inline_receiver) {
588 if (requires_clinit_barrier) {
589 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
590 Label L_skip_barrier;
591
592 { // Bypass the barrier for non-static methods
593 __ ldrh(rscratch1, Address(rmethod, Method::access_flags_offset()));
594 __ andsw(zr, rscratch1, JVM_ACC_STATIC);
595 __ br(Assembler::EQ, L_skip_barrier); // non-static
596 }
597
598 __ load_method_holder(rscratch2, rmethod);
599 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
600 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
601
602 __ bind(L_skip_barrier);
603 c2i_no_clinit_check_entry = __ pc();
604 }
605
606 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
607 bs->c2i_entry_barrier(masm);
608
609 // Before we get into the guts of the C2I adapter, see if we should be here
610 // at all. We've come from compiled code and are attempting to jump to the
611 // interpreter, which means the caller made a static call to get here
612 // (vcalls always get a compiled target if there is one). Check for a
613 // compiled target. If there is one, we need to patch the caller's call.
614 patch_callers_callsite(masm);
615
616 __ bind(skip_fixup);
617
618 // Name some registers to be used in the following code. We can use
619 // anything except r0-r7 which are arguments in the Java calling
620 // convention, rmethod (r12), and r19 which holds the outgoing sender
621 // SP for the interpreter.
622 Register buf_array = r10; // Array of buffered inline types
623 Register buf_oop = r11; // Buffered inline type oop
624 Register tmp1 = r15;
625 Register tmp2 = r16;
626 Register tmp3 = r17;
627
628 #ifdef ASSERT
629 RegSet clobbered_gp_regs = MacroAssembler::call_clobbered_gp_registers();
630 assert(clobbered_gp_regs.contains(buf_array), "buf_array must be saved explicitly if it's not a clobber");
631 assert(clobbered_gp_regs.contains(buf_oop), "buf_oop must be saved explicitly if it's not a clobber");
632 assert(clobbered_gp_regs.contains(tmp1), "tmp1 must be saved explicitly if it's not a clobber");
633 assert(clobbered_gp_regs.contains(tmp2), "tmp2 must be saved explicitly if it's not a clobber");
634 assert(clobbered_gp_regs.contains(tmp3), "tmp3 must be saved explicitly if it's not a clobber");
635 #endif
636
637 if (InlineTypePassFieldsAsArgs) {
638 // Is there an inline type argument?
639 bool has_inline_argument = false;
640 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
641 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
642 }
643 if (has_inline_argument) {
644 // There is at least a value type argument: we're coming from
645 // compiled code so we may not have buffers to back the value
646 // objects. Allocate the buffers here with a runtime call for
647 // the value arguments that needs a buffer.
648 RegisterSaver reg_save(true /* save_vectors */);
649 OopMap* map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
650
651 frame_complete = __ offset();
652 address the_pc = __ pc();
653
654 Label retaddr;
655 __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
656
657 __ mov(c_rarg0, rthread);
658 __ mov(c_rarg1, rmethod);
659 __ mov(c_rarg2, (int64_t)alloc_inline_receiver);
660
661 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
662 __ blr(rscratch1);
663 __ bind(retaddr);
664
665 oop_maps->add_gc_map(__ pc() - start, map);
666 __ reset_last_Java_frame(false);
667
668 reg_save.restore_live_registers(masm);
669
670 Label no_exception;
671 __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
672 __ cbz(rscratch1, no_exception);
673
674 __ str(zr, Address(rthread, JavaThread::vm_result_oop_offset()));
675 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
676 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
677
678 __ bind(no_exception);
679
680 // We get an array of objects from the runtime call
681 __ get_vm_result_oop(buf_array, rthread);
682 }
683 }
684
685 // Since all args are passed on the stack, total_args_passed *
686 // Interpreter::stackElementSize is the space we need.
687
688 int total_args_passed = compute_total_args_passed_int(sig_extended);
689 int extraspace = total_args_passed * Interpreter::stackElementSize;
690
691 // stack is aligned, keep it that way
692 extraspace = align_up(extraspace, StackAlignmentInBytes);
693
694 // set senderSP value
695 __ mov(r19_sender_sp, sp);
696
697 __ sub(sp, sp, extraspace);
698
699 // Now write the args into the outgoing interpreter space
700
701 // next_arg_comp is the next argument from the compiler point of
702 // view (inline type fields are passed in registers/on the stack). In
703 // sig_extended, an inline type argument starts with: T_METADATA,
704 // followed by the types of the fields of the inline type and T_VOID
705 // to mark the end of the inline type. ignored counts the number of
706 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument:
707 // used to get the buffer for that argument from the pool of buffers
708 // we allocated above and want to pass to the
709 // interpreter. next_arg_int is the next argument from the
710 // interpreter point of view (inline types are passed by reference).
711 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
712 next_arg_comp < sig_extended->length(); next_arg_comp++) {
713 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
714 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
715 BasicType bt = sig_extended->at(next_arg_comp)._bt;
716 int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize;
717 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) {
718 int next_off = st_off - Interpreter::stackElementSize;
719 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
720 const VMRegPair reg_pair = regs[next_arg_comp-ignored];
721 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
722 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
723 size_in_bytes, reg_pair, Address(sp, offset), tmp1, tmp2, tmp3, extraspace, false);
724 next_arg_int++;
725 #ifdef ASSERT
726 if (bt == T_LONG || bt == T_DOUBLE) {
727 // Overwrite the unused slot with known junk
728 __ mov(rscratch1, CONST64(0xdeadffffdeadaaaa));
729 __ str(rscratch1, Address(sp, st_off));
730 }
731 #endif /* ASSERT */
732 } else {
733 ignored++;
734 next_arg_int++;
735 int vt = 1;
736 // write fields we get from compiled code in registers/stack
737 // slots to the buffer: we know we are done with that inline type
738 // argument when we hit the T_VOID that acts as an end of inline
739 // type delimiter for this inline type. Inline types are flattened
740 // so we might encounter embedded inline types. Each entry in
741 // sig_extended contains a field offset in the buffer.
742 Label L_null;
743 Label not_null_buffer;
744 do {
745 next_arg_comp++;
746 BasicType bt = sig_extended->at(next_arg_comp)._bt;
747 BasicType prev_bt = sig_extended->at(next_arg_comp - 1)._bt;
748 if (bt == T_METADATA) {
749 vt++;
750 ignored++;
751 } else if (bt == T_VOID && prev_bt != T_LONG && prev_bt != T_DOUBLE) {
752 vt--;
753 ignored++;
754 } else if (sig_extended->at(next_arg_comp)._vt_oop) {
755 VMReg buffer = regs[next_arg_comp-ignored].first();
756 if (buffer->is_stack()) {
757 int ld_off = buffer->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
758 __ ldr(buf_oop, Address(sp, ld_off));
759 } else {
760 __ mov(buf_oop, buffer->as_Register());
761 }
762 __ cbnz(buf_oop, not_null_buffer);
763 // get the buffer from the just allocated pool of buffers
764 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT);
765 __ load_heap_oop(buf_oop, Address(buf_array, index), rscratch1, tmp2);
766 next_vt_arg++;
767 } else {
768 int off = sig_extended->at(next_arg_comp)._offset;
769 if (off == -1) {
770 // Nullable inline type argument, emit null check
771 VMReg reg = regs[next_arg_comp-ignored].first();
772 Label L_notNull;
773 if (reg->is_stack()) {
774 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
775 __ ldrb(tmp1, Address(sp, ld_off));
776 __ cbnz(tmp1, L_notNull);
777 } else {
778 __ cbnz(reg->as_Register(), L_notNull);
779 }
780 __ str(zr, Address(sp, st_off));
781 __ b(L_null);
782 __ bind(L_notNull);
783 continue;
784 }
785 assert(off > 0, "offset in object should be positive");
786 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
787 bool is_oop = is_reference_type(bt);
788 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
789 size_in_bytes, regs[next_arg_comp-ignored], Address(buf_oop, off), tmp1, tmp2, tmp3, extraspace, is_oop);
790 }
791 } while (vt != 0);
792 // pass the buffer to the interpreter
793 __ bind(not_null_buffer);
794 __ str(buf_oop, Address(sp, st_off));
795 __ bind(L_null);
796 }
797 }
798
799 __ mov(esp, sp); // Interp expects args on caller's expression stack
800
801 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
802 __ br(rscratch1);
803 }
804
805 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray<SigEntry>* sig, const VMRegPair *regs) {
806
807
808 // Note: r19_sender_sp contains the senderSP on entry. We must
809 // preserve it since we may do a i2c -> c2i transition if we lose a
810 // race where compiled code goes non-entrant while we get args
811 // ready.
812
813 // Adapters are frameless.
814
815 // An i2c adapter is frameless because the *caller* frame, which is
816 // interpreted, routinely repairs its own esp (from
817 // interpreter_frame_last_sp), even if a callee has modified the
818 // stack pointer. It also recalculates and aligns sp.
819
820 // A c2i adapter is frameless because the *callee* frame, which is
821 // interpreted, routinely repairs its caller's sp (from sender_sp,
822 // which is set up via the senderSP register).
823
824 // In other words, if *either* the caller or callee is interpreted, we can
825 // get the stack pointer repaired after a call.
826
827 // This is why c2i and i2c adapters cannot be indefinitely composed.
828 // In particular, if a c2i adapter were to somehow call an i2c adapter,
829 // both caller and callee would be compiled methods, and neither would
830 // clean up the stack pointer changes performed by the two adapters.
831 // If this happens, control eventually transfers back to the compiled
832 // caller, but with an uncorrected stack, causing delayed havoc.
833
834 // Cut-out for having no stack args.
835 int comp_words_on_stack = 0;
836 if (comp_args_on_stack) {
837 comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
838 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
839 __ andr(sp, rscratch1, -16);
840 }
841
842 // Will jump to the compiled code just as if compiled code was doing it.
843 // Pre-load the register-jump target early, to schedule it better.
844 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_inline_offset())));
845
846 int total_args_passed = sig->length();
847
848 // Now generate the shuffle code.
849 for (int i = 0; i < total_args_passed; i++) {
850 BasicType bt = sig->at(i)._bt;
851 if (bt == T_VOID) {
852 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
853 continue;
854 }
855
856 // Pick up 0, 1 or 2 words from SP+offset.
857 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
858
859 // Load in argument order going down.
860 int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
861 // Point to interpreter value (vs. tag)
862 int next_off = ld_off - Interpreter::stackElementSize;
863 //
864 //
865 //
866 VMReg r_1 = regs[i].first();
867 VMReg r_2 = regs[i].second();
868 if (!r_1->is_valid()) {
869 assert(!r_2->is_valid(), "");
870 continue;
871 }
872 if (r_1->is_stack()) {
873 // Convert stack slot to an SP offset (+ wordSize to account for return address )
874 int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
875 if (!r_2->is_valid()) {
876 // sign extend???
877 __ ldrsw(rscratch2, Address(esp, ld_off));
878 __ str(rscratch2, Address(sp, st_off));
879 } else {
880 //
881 // We are using two optoregs. This can be either T_OBJECT,
882 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
883 // two slots but only uses one for thr T_LONG or T_DOUBLE case
884 // So we must adjust where to pick up the data to match the
885 // interpreter.
886 //
887 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
888 // are accessed as negative so LSW is at LOW address
889
890 // ld_off is MSW so get LSW
891 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
892 __ ldr(rscratch2, Address(esp, offset));
893 // st_off is LSW (i.e. reg.first())
894 __ str(rscratch2, Address(sp, st_off));
895 }
896 } else if (r_1->is_Register()) { // Register argument
897 Register r = r_1->as_Register();
898 if (r_2->is_valid()) {
899 //
900 // We are using two VMRegs. This can be either T_OBJECT,
901 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
902 // two slots but only uses one for thr T_LONG or T_DOUBLE case
903 // So we must adjust where to pick up the data to match the
904 // interpreter.
905
906 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
907
908 // this can be a misaligned move
909 __ ldr(r, Address(esp, offset));
910 } else {
911 // sign extend and use a full word?
912 __ ldrw(r, Address(esp, ld_off));
913 }
914 } else {
915 if (!r_2->is_valid()) {
916 __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
917 } else {
918 __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
919 }
920 }
921 }
922
923 __ mov(rscratch2, rscratch1);
924 __ push_cont_fastpath(rthread); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about; kills rscratch1
925 __ mov(rscratch1, rscratch2);
926
927 // 6243940 We might end up in handle_wrong_method if
928 // the callee is deoptimized as we race thru here. If that
929 // happens we don't want to take a safepoint because the
930 // caller frame will look interpreted and arguments are now
931 // "compiled" so it is much better to make this transition
932 // invisible to the stack walking code. Unfortunately if
933 // we try and find the callee by normal means a safepoint
934 // is possible. So we stash the desired callee in the thread
935 // and the vm will find there should this case occur.
936
937 __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
938
939 __ br(rscratch1);
940 }
941
942 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
943 Register data = rscratch2;
944 __ ic_check(1 /* end_alignment */);
945 __ ldr(rmethod, Address(data, CompiledICData::speculated_method_offset()));
946
947 // Method might have been compiled since the call site was patched to
948 // interpreted; if that is the case treat it as a miss so we can get
949 // the call site corrected.
950 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
951 __ cbz(rscratch1, skip_fixup);
952 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
953 }
954
955 // ---------------------------------------------------------------
956 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
957 int comp_args_on_stack,
958 const GrowableArray<SigEntry>* sig,
959 const VMRegPair* regs,
960 const GrowableArray<SigEntry>* sig_cc,
961 const VMRegPair* regs_cc,
962 const GrowableArray<SigEntry>* sig_cc_ro,
963 const VMRegPair* regs_cc_ro,
964 address entry_address[AdapterBlob::ENTRY_COUNT],
965 AdapterBlob*& new_adapter,
966 bool allocate_code_blob) {
967
968 entry_address[AdapterBlob::I2C] = __ pc();
969 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
970
971 // -------------------------------------------------------------------------
972 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
973 // to the interpreter. The args start out packed in the compiled layout. They
974 // need to be unpacked into the interpreter layout. This will almost always
975 // require some stack space. We grow the current (compiled) stack, then repack
976 // the args. We finally end in a jump to the generic interpreter entry point.
977 // On exit from the interpreter, the interpreter will restore our SP (lest the
978 // compiled code, which relies solely on SP and not FP, get sick).
979
980 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
981 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
982 Label skip_fixup;
983
984 gen_inline_cache_check(masm, skip_fixup);
985
986 OopMapSet* oop_maps = new OopMapSet();
987 int frame_complete = CodeOffsets::frame_never_safe;
988 int frame_size_in_words = 0;
989
990 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
991 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
992 entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
993 if (regs_cc != regs_cc_ro) {
994 // No class init barrier needed because method is guaranteed to be non-static
995 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
996 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
997 skip_fixup.reset();
998 }
999
1000 // Scalarized c2i adapter
1001 entry_address[AdapterBlob::C2I] = __ pc();
1002 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1003 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1004 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1005
1006 // Non-scalarized c2i adapter
1007 if (regs != regs_cc) {
1008 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1009 Label inline_entry_skip_fixup;
1010 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1011
1012 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1013 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1014 inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1015 }
1016
1017 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1018 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1019 if (allocate_code_blob) {
1020 bool caller_must_gc_arguments = (regs != regs_cc);
1021 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1022 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1023 AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1024 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1025 }
1026 }
1027
1028 static int c_calling_convention_priv(const BasicType *sig_bt,
1029 VMRegPair *regs,
1030 int total_args_passed) {
1031
1032 // We return the amount of VMRegImpl stack slots we need to reserve for all
1033 // the arguments NOT counting out_preserve_stack_slots.
1034
1035 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1036 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
1037 };
1038 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1039 c_farg0, c_farg1, c_farg2, c_farg3,
1040 c_farg4, c_farg5, c_farg6, c_farg7
1041 };
1042
1043 uint int_args = 0;
1044 uint fp_args = 0;
1045 uint stk_args = 0; // inc by 2 each time
2904
2905 // exception pending => remove activation and forward to exception handler
2906
2907 __ str(zr, Address(rthread, JavaThread::vm_result_oop_offset()));
2908
2909 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2910 __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2911
2912 // -------------
2913 // make sure all code is generated
2914 masm->flush();
2915
2916 // return the blob
2917 // frame_size_words or bytes??
2918 RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2919
2920 AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
2921 return rs_blob;
2922 }
2923
2924 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
2925 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
2926 if (buf == nullptr) {
2927 return nullptr;
2928 }
2929 CodeBuffer buffer(buf);
2930 short buffer_locs[20];
2931 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
2932 sizeof(buffer_locs)/sizeof(relocInfo));
2933
2934 MacroAssembler _masm(&buffer);
2935 MacroAssembler* masm = &_masm;
2936
2937 const Array<SigEntry>* sig_vk = vk->extended_sig();
2938 const Array<VMRegPair>* regs = vk->return_regs();
2939
2940 int pack_fields_jobject_off = __ offset();
2941 // Resolve pre-allocated buffer from JNI handle.
2942 // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
2943 Register Rresult = r14; // See StubGenerator::generate_call_stub().
2944 __ ldr(r0, Address(Rresult));
2945 __ resolve_jobject(r0 /* value */,
2946 rthread /* thread */,
2947 r12 /* tmp */);
2948 __ str(r0, Address(Rresult));
2949
2950 int pack_fields_off = __ offset();
2951
2952 int j = 1;
2953 for (int i = 0; i < sig_vk->length(); i++) {
2954 BasicType bt = sig_vk->at(i)._bt;
2955 if (bt == T_METADATA) {
2956 continue;
2957 }
2958 if (bt == T_VOID) {
2959 if (sig_vk->at(i-1)._bt == T_LONG ||
2960 sig_vk->at(i-1)._bt == T_DOUBLE) {
2961 j++;
2962 }
2963 continue;
2964 }
2965 int off = sig_vk->at(i)._offset;
2966 VMRegPair pair = regs->at(j);
2967 VMReg r_1 = pair.first();
2968 VMReg r_2 = pair.second();
2969 Address to(r0, off);
2970 if (bt == T_FLOAT) {
2971 __ strs(r_1->as_FloatRegister(), to);
2972 } else if (bt == T_DOUBLE) {
2973 __ strd(r_1->as_FloatRegister(), to);
2974 } else {
2975 Register val = r_1->as_Register();
2976 assert_different_registers(to.base(), val, r15, r16, r17);
2977 if (is_reference_type(bt)) {
2978 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep r0 valid.
2979 __ mov(r17, r0);
2980 Address to_with_r17(r17, off);
2981 __ store_heap_oop(to_with_r17, val, r15, r16, r17, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
2982 } else {
2983 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
2984 }
2985 }
2986 j++;
2987 }
2988 assert(j == regs->length(), "missed a field?");
2989 if (vk->supports_nullable_layouts()) {
2990 // Zero the null marker (setting it to 1 would be better but would require an additional register)
2991 __ strb(zr, Address(r0, vk->null_marker_offset()));
2992 }
2993 __ ret(lr);
2994
2995 int unpack_fields_off = __ offset();
2996
2997 Label skip;
2998 Label not_null;
2999 __ cbnz(r0, not_null);
3000
3001 // Return value is null. Zero all registers because the runtime requires a canonical
3002 // representation of a flat null.
3003 j = 1;
3004 for (int i = 0; i < sig_vk->length(); i++) {
3005 BasicType bt = sig_vk->at(i)._bt;
3006 if (bt == T_METADATA) {
3007 continue;
3008 }
3009 if (bt == T_VOID) {
3010 if (sig_vk->at(i-1)._bt == T_LONG ||
3011 sig_vk->at(i-1)._bt == T_DOUBLE) {
3012 j++;
3013 }
3014 continue;
3015 }
3016
3017 VMRegPair pair = regs->at(j);
3018 VMReg r_1 = pair.first();
3019 if (r_1->is_FloatRegister()) {
3020 __ mov(r_1->as_FloatRegister(), Assembler::T2S, 0);
3021 } else {
3022 __ mov(r_1->as_Register(), zr);
3023 }
3024 j++;
3025 }
3026 __ b(skip);
3027 __ bind(not_null);
3028
3029 j = 1;
3030 for (int i = 0; i < sig_vk->length(); i++) {
3031 BasicType bt = sig_vk->at(i)._bt;
3032 if (bt == T_METADATA) {
3033 continue;
3034 }
3035 if (bt == T_VOID) {
3036 if (sig_vk->at(i-1)._bt == T_LONG ||
3037 sig_vk->at(i-1)._bt == T_DOUBLE) {
3038 j++;
3039 }
3040 continue;
3041 }
3042 int off = sig_vk->at(i)._offset;
3043 assert(off > 0, "offset in object should be positive");
3044 VMRegPair pair = regs->at(j);
3045 VMReg r_1 = pair.first();
3046 VMReg r_2 = pair.second();
3047 Address from(r0, off);
3048 if (bt == T_FLOAT) {
3049 __ ldrs(r_1->as_FloatRegister(), from);
3050 } else if (bt == T_DOUBLE) {
3051 __ ldrd(r_1->as_FloatRegister(), from);
3052 } else if (bt == T_OBJECT || bt == T_ARRAY) {
3053 assert_different_registers(r0, r_1->as_Register());
3054 __ load_heap_oop(r_1->as_Register(), from, rscratch1, rscratch2);
3055 } else {
3056 assert(is_java_primitive(bt), "unexpected basic type");
3057 assert_different_registers(r0, r_1->as_Register());
3058 size_t size_in_bytes = type2aelembytes(bt);
3059 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
3060 }
3061 j++;
3062 }
3063 assert(j == regs->length(), "missed a field?");
3064
3065 __ bind(skip);
3066
3067 __ ret(lr);
3068
3069 __ flush();
3070
3071 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
3072 }
3073
3074 // Continuation point for throwing of implicit exceptions that are
3075 // not handled in the current activation. Fabricates an exception
3076 // oop and initiates normal exception dispatching in this
3077 // frame. Since we need to preserve callee-saved values (currently
3078 // only for C2, but done for C1 as well) we need a callee-saved oop
3079 // map and therefore have to make these stubs into RuntimeStubs
3080 // rather than BufferBlobs. If the compiler needs all registers to
3081 // be preserved between the fault point and the exception handler
3082 // then it must assume responsibility for that in
3083 // AbstractCompiler::continuation_for_implicit_null_exception or
3084 // continuation_for_implicit_division_by_zero_exception. All other
3085 // implicit exceptions (e.g., NullPointerException or
3086 // AbstractMethodError on entry) are either at call sites or
3087 // otherwise assume that stack unwinding will be initiated, so
3088 // caller saved registers were assumed volatile in the compiler.
3089
3090 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3091 assert(is_throw_id(id), "expected a throw stub id");
3092
3093 const char* name = SharedRuntime::stub_name(id);
|