11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #ifndef _WINDOWS
27 #include "alloca.h"
28 #endif
29 #include "asm/macroAssembler.hpp"
30 #include "asm/macroAssembler.inline.hpp"
31 #include "code/compiledIC.hpp"
32 #include "code/debugInfoRec.hpp"
33 #include "code/nativeInst.hpp"
34 #include "code/vtableStubs.hpp"
35 #include "compiler/oopMap.hpp"
36 #include "gc/shared/collectedHeap.hpp"
37 #include "gc/shared/gcLocker.hpp"
38 #include "gc/shared/barrierSet.hpp"
39 #include "gc/shared/barrierSetAssembler.hpp"
40 #include "interpreter/interpreter.hpp"
41 #include "logging/log.hpp"
42 #include "memory/resourceArea.hpp"
43 #include "memory/universe.hpp"
44 #include "oops/klass.inline.hpp"
45 #include "oops/method.inline.hpp"
46 #include "prims/methodHandles.hpp"
47 #include "runtime/continuation.hpp"
48 #include "runtime/continuationEntry.inline.hpp"
49 #include "runtime/globals.hpp"
50 #include "runtime/jniHandles.hpp"
543 break;
544 case T_DOUBLE:
545 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
546 if (fp_args < Argument::n_float_register_parameters_j) {
547 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
548 } else {
549 stk_args = align_up(stk_args, 2);
550 regs[i].set2(VMRegImpl::stack2reg(stk_args));
551 stk_args += 2;
552 }
553 break;
554 default:
555 ShouldNotReachHere();
556 break;
557 }
558 }
559
560 return stk_args;
561 }
562
563 // Patch the callers callsite with entry to compiled code if it exists.
564 static void patch_callers_callsite(MacroAssembler *masm) {
565 Label L;
566 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
567 __ jcc(Assembler::equal, L);
568
569 // Save the current stack pointer
570 __ mov(r13, rsp);
571 // Schedule the branch target address early.
572 // Call into the VM to patch the caller, then jump to compiled callee
573 // rax isn't live so capture return address while we easily can
574 __ movptr(rax, Address(rsp, 0));
575
576 // align stack so push_CPU_state doesn't fault
577 __ andptr(rsp, -(StackAlignmentInBytes));
578 __ push_CPU_state();
579 __ vzeroupper();
580 // VM needs caller's callsite
581 // VM needs target method
582 // This needs to be a long call since we will relocate this adapter to
585 // Allocate argument register save area
586 if (frame::arg_reg_save_area_bytes != 0) {
587 __ subptr(rsp, frame::arg_reg_save_area_bytes);
588 }
589 __ mov(c_rarg0, rbx);
590 __ mov(c_rarg1, rax);
591 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
592
593 // De-allocate argument register save area
594 if (frame::arg_reg_save_area_bytes != 0) {
595 __ addptr(rsp, frame::arg_reg_save_area_bytes);
596 }
597
598 __ vzeroupper();
599 __ pop_CPU_state();
600 // restore sp
601 __ mov(rsp, r13);
602 __ bind(L);
603 }
604
605
606 static void gen_c2i_adapter(MacroAssembler *masm,
607 int total_args_passed,
608 int comp_args_on_stack,
609 const BasicType *sig_bt,
610 const VMRegPair *regs,
611 Label& skip_fixup) {
612 // Before we get into the guts of the C2I adapter, see if we should be here
613 // at all. We've come from compiled code and are attempting to jump to the
614 // interpreter, which means the caller made a static call to get here
615 // (vcalls always get a compiled target if there is one). Check for a
616 // compiled target. If there is one, we need to patch the caller's call.
617 patch_callers_callsite(masm);
618
619 __ bind(skip_fixup);
620
621 // Since all args are passed on the stack, total_args_passed *
622 // Interpreter::stackElementSize is the space we need.
623
624 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
625
626 int extraspace = (total_args_passed * Interpreter::stackElementSize);
627
628 // stack is aligned, keep it that way
629 // This is not currently needed or enforced by the interpreter, but
630 // we might as well conform to the ABI.
631 extraspace = align_up(extraspace, 2*wordSize);
632
633 // set senderSP value
634 __ lea(r13, Address(rsp, wordSize));
635
636 #ifdef ASSERT
637 __ check_stack_alignment(r13, "sender stack not aligned");
638 #endif
639 if (extraspace > 0) {
640 // Pop the return address
641 __ pop(rax);
642
643 __ subptr(rsp, extraspace);
644
645 // Push the return address
646 __ push(rax);
647
648 // Account for the return address location since we store it first rather
649 // than hold it in a register across all the shuffling
650 extraspace += wordSize;
651 }
652
653 #ifdef ASSERT
654 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
655 #endif
656
657 // Now write the args into the outgoing interpreter space
658 for (int i = 0; i < total_args_passed; i++) {
659 if (sig_bt[i] == T_VOID) {
660 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
661 continue;
662 }
663
664 // offset to start parameters
665 int st_off = (total_args_passed - i) * Interpreter::stackElementSize;
666 int next_off = st_off - Interpreter::stackElementSize;
667
668 // Say 4 args:
669 // i st_off
670 // 0 32 T_LONG
671 // 1 24 T_VOID
672 // 2 16 T_OBJECT
673 // 3 8 T_BOOL
674 // - 0 return address
675 //
676 // However to make thing extra confusing. Because we can fit a long/double in
677 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
678 // leaves one slot empty and only stores to a single slot. In this case the
679 // slot that is occupied is the T_VOID slot. See I said it was confusing.
680
681 VMReg r_1 = regs[i].first();
682 VMReg r_2 = regs[i].second();
683 if (!r_1->is_valid()) {
684 assert(!r_2->is_valid(), "");
685 continue;
686 }
687 if (r_1->is_stack()) {
688 // memory to memory use rax
689 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
690 if (!r_2->is_valid()) {
691 // sign extend??
692 __ movl(rax, Address(rsp, ld_off));
693 __ movptr(Address(rsp, st_off), rax);
694
695 } else {
696
697 __ movq(rax, Address(rsp, ld_off));
698
699 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
700 // T_DOUBLE and T_LONG use two slots in the interpreter
701 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
702 // ld_off == LSW, ld_off+wordSize == MSW
703 // st_off == MSW, next_off == LSW
704 __ movq(Address(rsp, next_off), rax);
705 #ifdef ASSERT
706 // Overwrite the unused slot with known junk
707 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
708 __ movptr(Address(rsp, st_off), rax);
709 #endif /* ASSERT */
710 } else {
711 __ movq(Address(rsp, st_off), rax);
712 }
713 }
714 } else if (r_1->is_Register()) {
715 Register r = r_1->as_Register();
716 if (!r_2->is_valid()) {
717 // must be only an int (or less ) so move only 32bits to slot
718 // why not sign extend??
719 __ movl(Address(rsp, st_off), r);
720 } else {
721 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
722 // T_DOUBLE and T_LONG use two slots in the interpreter
723 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
724 // long/double in gpr
725 #ifdef ASSERT
726 // Overwrite the unused slot with known junk
727 __ mov64(rax, CONST64(0xdeadffffdeadaaab));
728 __ movptr(Address(rsp, st_off), rax);
729 #endif /* ASSERT */
730 __ movq(Address(rsp, next_off), r);
731 } else {
732 __ movptr(Address(rsp, st_off), r);
733 }
734 }
735 } else {
736 assert(r_1->is_XMMRegister(), "");
737 if (!r_2->is_valid()) {
738 // only a float use just part of the slot
739 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
740 } else {
741 #ifdef ASSERT
742 // Overwrite the unused slot with known junk
743 __ mov64(rax, CONST64(0xdeadffffdeadaaac));
744 __ movptr(Address(rsp, st_off), rax);
745 #endif /* ASSERT */
746 __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
747 }
748 }
749 }
750
751 // Schedule the branch target address early.
752 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
753 __ jmp(rcx);
754 }
755
756 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
757 address code_start, address code_end,
758 Label& L_ok) {
759 Label L_fail;
760 __ lea(temp_reg, ExternalAddress(code_start));
761 __ cmpptr(pc_reg, temp_reg);
762 __ jcc(Assembler::belowEqual, L_fail);
763 __ lea(temp_reg, ExternalAddress(code_end));
764 __ cmpptr(pc_reg, temp_reg);
765 __ jcc(Assembler::below, L_ok);
766 __ bind(L_fail);
767 }
768
769 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
770 int total_args_passed,
771 int comp_args_on_stack,
772 const BasicType *sig_bt,
773 const VMRegPair *regs) {
774
775 // Note: r13 contains the senderSP on entry. We must preserve it since
776 // we may do a i2c -> c2i transition if we lose a race where compiled
777 // code goes non-entrant while we get args ready.
778 // In addition we use r13 to locate all the interpreter args as
779 // we must align the stack to 16 bytes on an i2c entry else we
780 // lose alignment we expect in all compiled code and register
781 // save code can segv when fxsave instructions find improperly
782 // aligned stack pointer.
783
784 // Adapters can be frameless because they do not require the caller
785 // to perform additional cleanup work, such as correcting the stack pointer.
786 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
787 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
788 // even if a callee has modified the stack pointer.
789 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
790 // routinely repairs its caller's stack pointer (from sender_sp, which is set
791 // up via the senderSP register).
792 // In other words, if *either* the caller or callee is interpreted, we can
843 // Convert 4-byte c2 stack slots to words.
844 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
845
846 if (comp_args_on_stack) {
847 __ subptr(rsp, comp_words_on_stack * wordSize);
848 }
849
850 // Ensure compiled code always sees stack at proper alignment
851 __ andptr(rsp, -16);
852
853 // push the return address and misalign the stack that youngest frame always sees
854 // as far as the placement of the call instruction
855 __ push(rax);
856
857 // Put saved SP in another register
858 const Register saved_sp = rax;
859 __ movptr(saved_sp, r11);
860
861 // Will jump to the compiled code just as if compiled code was doing it.
862 // Pre-load the register-jump target early, to schedule it better.
863 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
864
865 #if INCLUDE_JVMCI
866 if (EnableJVMCI) {
867 // check if this call should be routed towards a specific entry point
868 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
869 Label no_alternative_target;
870 __ jcc(Assembler::equal, no_alternative_target);
871 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
872 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
873 __ bind(no_alternative_target);
874 }
875 #endif // INCLUDE_JVMCI
876
877 // Now generate the shuffle code. Pick up all register args and move the
878 // rest through the floating point stack top.
879 for (int i = 0; i < total_args_passed; i++) {
880 if (sig_bt[i] == T_VOID) {
881 // Longs and doubles are passed in native word order, but misaligned
882 // in the 32-bit build.
883 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
884 continue;
885 }
886
887 // Pick up 0, 1 or 2 words from SP+offset.
888
889 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
890 "scrambled load targets?");
891 // Load in argument order going down.
892 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
893 // Point to interpreter value (vs. tag)
894 int next_off = ld_off - Interpreter::stackElementSize;
895 //
896 //
897 //
898 VMReg r_1 = regs[i].first();
899 VMReg r_2 = regs[i].second();
900 if (!r_1->is_valid()) {
901 assert(!r_2->is_valid(), "");
902 continue;
903 }
905 // Convert stack slot to an SP offset (+ wordSize to account for return address )
906 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
907
908 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
909 // and if we end up going thru a c2i because of a miss a reasonable value of r13
910 // will be generated.
911 if (!r_2->is_valid()) {
912 // sign extend???
913 __ movl(r13, Address(saved_sp, ld_off));
914 __ movptr(Address(rsp, st_off), r13);
915 } else {
916 //
917 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
918 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
919 // So we must adjust where to pick up the data to match the interpreter.
920 //
921 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
922 // are accessed as negative so LSW is at LOW address
923
924 // ld_off is MSW so get LSW
925 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
926 next_off : ld_off;
927 __ movq(r13, Address(saved_sp, offset));
928 // st_off is LSW (i.e. reg.first())
929 __ movq(Address(rsp, st_off), r13);
930 }
931 } else if (r_1->is_Register()) { // Register argument
932 Register r = r_1->as_Register();
933 assert(r != rax, "must be different");
934 if (r_2->is_valid()) {
935 //
936 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
937 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
938 // So we must adjust where to pick up the data to match the interpreter.
939
940 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
941 next_off : ld_off;
942
943 // this can be a misaligned move
944 __ movq(r, Address(saved_sp, offset));
945 } else {
946 // sign extend and use a full word?
947 __ movl(r, Address(saved_sp, ld_off));
948 }
949 } else {
950 if (!r_2->is_valid()) {
951 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
952 } else {
953 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
954 }
955 }
956 }
957
958 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
959
960 // 6243940 We might end up in handle_wrong_method if
961 // the callee is deoptimized as we race thru here. If that
962 // happens we don't want to take a safepoint because the
963 // caller frame will look interpreted and arguments are now
964 // "compiled" so it is much better to make this transition
965 // invisible to the stack walking code. Unfortunately if
966 // we try and find the callee by normal means a safepoint
967 // is possible. So we stash the desired callee in the thread
968 // and the vm will find there should this case occur.
969
970 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
971
972 // put Method* where a c2i would expect should we end up there
973 // only needed because eof c2 resolve stubs return Method* as a result in
974 // rax
975 __ mov(rax, rbx);
976 __ jmp(r11);
977 }
978
979 // ---------------------------------------------------------------
980 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
981 int total_args_passed,
982 int comp_args_on_stack,
983 const BasicType *sig_bt,
984 const VMRegPair *regs,
985 AdapterFingerPrint* fingerprint) {
986 address i2c_entry = __ pc();
987
988 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
989
990 // -------------------------------------------------------------------------
991 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
992 // to the interpreter. The args start out packed in the compiled layout. They
993 // need to be unpacked into the interpreter layout. This will almost always
994 // require some stack space. We grow the current (compiled) stack, then repack
995 // the args. We finally end in a jump to the generic interpreter entry point.
996 // On exit from the interpreter, the interpreter will restore our SP (lest the
997 // compiled code, which relies solely on SP and not RBP, get sick).
998
999 address c2i_unverified_entry = __ pc();
1000 Label skip_fixup;
1001
1002 Register data = rax;
1003 Register receiver = j_rarg0;
1004 Register temp = rbx;
1005
1006 {
1007 __ ic_check(1 /* end_alignment */);
1008 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
1009 // Method might have been compiled since the call site was patched to
1010 // interpreted if that is the case treat it as a miss so we can get
1011 // the call site corrected.
1012 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
1013 __ jcc(Assembler::equal, skip_fixup);
1014 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1015 }
1016
1017 address c2i_entry = __ pc();
1018
1019 // Class initialization barrier for static methods
1020 address c2i_no_clinit_check_entry = nullptr;
1021 if (VM_Version::supports_fast_class_init_checks()) {
1022 Label L_skip_barrier;
1023 Register method = rbx;
1024
1025 { // Bypass the barrier for non-static methods
1026 Register flags = rscratch1;
1027 __ movl(flags, Address(method, Method::access_flags_offset()));
1028 __ testl(flags, JVM_ACC_STATIC);
1029 __ jcc(Assembler::zero, L_skip_barrier); // non-static
1030 }
1031
1032 Register klass = rscratch1;
1033 __ load_method_holder(klass, method);
1034 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
1035
1036 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1037
1038 __ bind(L_skip_barrier);
1039 c2i_no_clinit_check_entry = __ pc();
1040 }
1041
1042 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1043 bs->c2i_entry_barrier(masm);
1044
1045 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1046
1047 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
1048 }
1049
1050 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1051 VMRegPair *regs,
1052 int total_args_passed) {
1053
1054 // We return the amount of VMRegImpl stack slots we need to reserve for all
1055 // the arguments NOT counting out_preserve_stack_slots.
1056
1057 // NOTE: These arrays will have to change when c1 is ported
1058 #ifdef _WIN64
1059 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1060 c_rarg0, c_rarg1, c_rarg2, c_rarg3
1061 };
1062 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1063 c_farg0, c_farg1, c_farg2, c_farg3
1064 };
1065 #else
1066 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1067 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
2162 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2163
2164 // Get the handle (the 2nd argument)
2165 __ mov(oop_handle_reg, c_rarg1);
2166
2167 // Get address of the box
2168
2169 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2170
2171 // Load the oop from the handle
2172 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2173
2174 if (LockingMode == LM_MONITOR) {
2175 __ jmp(slow_path_lock);
2176 } else if (LockingMode == LM_LEGACY) {
2177 // Load immediate 1 into swap_reg %rax
2178 __ movl(swap_reg, 1);
2179
2180 // Load (object->mark() | 1) into swap_reg %rax
2181 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2182
2183 // Save (object->mark() | 1) into BasicLock's displaced header
2184 __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
2185
2186 // src -> dest iff dest == rax else rax <- dest
2187 __ lock();
2188 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2189 __ jcc(Assembler::equal, count_mon);
2190
2191 // Hmm should this move to the slow path code area???
2192
2193 // Test if the oopMark is an obvious stack pointer, i.e.,
2194 // 1) (mark & 3) == 0, and
2195 // 2) rsp <= mark < mark + os::pagesize()
2196 // These 3 tests can be done by evaluating the following
2197 // expression: ((mark - rsp) & (3 - os::vm_page_size())),
2198 // assuming both stack pointer and pagesize have their
2199 // least significant 2 bits clear.
2200 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
2201
3718 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), NULL_WORD);
3719 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
3720 #endif
3721 // Clear the exception oop so GC no longer processes it as a root.
3722 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD);
3723
3724 // rax: exception oop
3725 // r8: exception handler
3726 // rdx: exception pc
3727 // Jump to handler
3728
3729 __ jmp(r8);
3730
3731 // Make sure all code is generated
3732 masm->flush();
3733
3734 // Set exception blob
3735 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3736 }
3737 #endif // COMPILER2
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #ifndef _WINDOWS
27 #include "alloca.h"
28 #endif
29 #include "asm/macroAssembler.hpp"
30 #include "asm/macroAssembler.inline.hpp"
31 #include "classfile/symbolTable.hpp"
32 #include "code/compiledIC.hpp"
33 #include "code/debugInfoRec.hpp"
34 #include "code/nativeInst.hpp"
35 #include "code/vtableStubs.hpp"
36 #include "compiler/oopMap.hpp"
37 #include "gc/shared/collectedHeap.hpp"
38 #include "gc/shared/gcLocker.hpp"
39 #include "gc/shared/barrierSet.hpp"
40 #include "gc/shared/barrierSetAssembler.hpp"
41 #include "interpreter/interpreter.hpp"
42 #include "logging/log.hpp"
43 #include "memory/resourceArea.hpp"
44 #include "memory/universe.hpp"
45 #include "oops/klass.inline.hpp"
46 #include "oops/method.inline.hpp"
47 #include "prims/methodHandles.hpp"
48 #include "runtime/continuation.hpp"
49 #include "runtime/continuationEntry.inline.hpp"
50 #include "runtime/globals.hpp"
51 #include "runtime/jniHandles.hpp"
544 break;
545 case T_DOUBLE:
546 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
547 if (fp_args < Argument::n_float_register_parameters_j) {
548 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
549 } else {
550 stk_args = align_up(stk_args, 2);
551 regs[i].set2(VMRegImpl::stack2reg(stk_args));
552 stk_args += 2;
553 }
554 break;
555 default:
556 ShouldNotReachHere();
557 break;
558 }
559 }
560
561 return stk_args;
562 }
563
564 // Same as java_calling_convention() but for multiple return
565 // values. There's no way to store them on the stack so if we don't
566 // have enough registers, multiple values can't be returned.
567 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
568 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
569 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
570 VMRegPair *regs,
571 int total_args_passed) {
572 // Create the mapping between argument positions and
573 // registers.
574 static const Register INT_ArgReg[java_return_convention_max_int] = {
575 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
576 };
577 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
578 j_farg0, j_farg1, j_farg2, j_farg3,
579 j_farg4, j_farg5, j_farg6, j_farg7
580 };
581
582
583 uint int_args = 0;
584 uint fp_args = 0;
585
586 for (int i = 0; i < total_args_passed; i++) {
587 switch (sig_bt[i]) {
588 case T_BOOLEAN:
589 case T_CHAR:
590 case T_BYTE:
591 case T_SHORT:
592 case T_INT:
593 if (int_args < Argument::n_int_register_parameters_j+1) {
594 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
595 int_args++;
596 } else {
597 return -1;
598 }
599 break;
600 case T_VOID:
601 // halves of T_LONG or T_DOUBLE
602 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
603 regs[i].set_bad();
604 break;
605 case T_LONG:
606 assert(sig_bt[i + 1] == T_VOID, "expecting half");
607 // fall through
608 case T_OBJECT:
609 case T_ARRAY:
610 case T_ADDRESS:
611 case T_METADATA:
612 if (int_args < Argument::n_int_register_parameters_j+1) {
613 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
614 int_args++;
615 } else {
616 return -1;
617 }
618 break;
619 case T_FLOAT:
620 if (fp_args < Argument::n_float_register_parameters_j) {
621 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
622 fp_args++;
623 } else {
624 return -1;
625 }
626 break;
627 case T_DOUBLE:
628 assert(sig_bt[i + 1] == T_VOID, "expecting half");
629 if (fp_args < Argument::n_float_register_parameters_j) {
630 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
631 fp_args++;
632 } else {
633 return -1;
634 }
635 break;
636 default:
637 ShouldNotReachHere();
638 break;
639 }
640 }
641
642 return int_args + fp_args;
643 }
644
645 // Patch the callers callsite with entry to compiled code if it exists.
646 static void patch_callers_callsite(MacroAssembler *masm) {
647 Label L;
648 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
649 __ jcc(Assembler::equal, L);
650
651 // Save the current stack pointer
652 __ mov(r13, rsp);
653 // Schedule the branch target address early.
654 // Call into the VM to patch the caller, then jump to compiled callee
655 // rax isn't live so capture return address while we easily can
656 __ movptr(rax, Address(rsp, 0));
657
658 // align stack so push_CPU_state doesn't fault
659 __ andptr(rsp, -(StackAlignmentInBytes));
660 __ push_CPU_state();
661 __ vzeroupper();
662 // VM needs caller's callsite
663 // VM needs target method
664 // This needs to be a long call since we will relocate this adapter to
667 // Allocate argument register save area
668 if (frame::arg_reg_save_area_bytes != 0) {
669 __ subptr(rsp, frame::arg_reg_save_area_bytes);
670 }
671 __ mov(c_rarg0, rbx);
672 __ mov(c_rarg1, rax);
673 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
674
675 // De-allocate argument register save area
676 if (frame::arg_reg_save_area_bytes != 0) {
677 __ addptr(rsp, frame::arg_reg_save_area_bytes);
678 }
679
680 __ vzeroupper();
681 __ pop_CPU_state();
682 // restore sp
683 __ mov(rsp, r13);
684 __ bind(L);
685 }
686
687 // For each inline type argument, sig includes the list of fields of
688 // the inline type. This utility function computes the number of
689 // arguments for the call if inline types are passed by reference (the
690 // calling convention the interpreter expects).
691 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
692 int total_args_passed = 0;
693 if (InlineTypePassFieldsAsArgs) {
694 for (int i = 0; i < sig_extended->length(); i++) {
695 BasicType bt = sig_extended->at(i)._bt;
696 if (bt == T_METADATA) {
697 // In sig_extended, an inline type argument starts with:
698 // T_METADATA, followed by the types of the fields of the
699 // inline type and T_VOID to mark the end of the value
700 // type. Inline types are flattened so, for instance, in the
701 // case of an inline type with an int field and an inline type
702 // field that itself has 2 fields, an int and a long:
703 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
704 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
705 // (outer inline type)
706 total_args_passed++;
707 int vt = 1;
708 do {
709 i++;
710 BasicType bt = sig_extended->at(i)._bt;
711 BasicType prev_bt = sig_extended->at(i-1)._bt;
712 if (bt == T_METADATA) {
713 vt++;
714 } else if (bt == T_VOID &&
715 prev_bt != T_LONG &&
716 prev_bt != T_DOUBLE) {
717 vt--;
718 }
719 } while (vt != 0);
720 } else {
721 total_args_passed++;
722 }
723 }
724 } else {
725 total_args_passed = sig_extended->length();
726 }
727 return total_args_passed;
728 }
729
730
731 static void gen_c2i_adapter_helper(MacroAssembler* masm,
732 BasicType bt,
733 BasicType prev_bt,
734 size_t size_in_bytes,
735 const VMRegPair& reg_pair,
736 const Address& to,
737 int extraspace,
738 bool is_oop) {
739 if (bt == T_VOID) {
740 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
741 return;
742 }
743
744 // Say 4 args:
745 // i st_off
746 // 0 32 T_LONG
747 // 1 24 T_VOID
748 // 2 16 T_OBJECT
749 // 3 8 T_BOOL
750 // - 0 return address
751 //
752 // However to make thing extra confusing. Because we can fit a long/double in
753 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
754 // leaves one slot empty and only stores to a single slot. In this case the
755 // slot that is occupied is the T_VOID slot. See I said it was confusing.
756
757 bool wide = (size_in_bytes == wordSize);
758 VMReg r_1 = reg_pair.first();
759 VMReg r_2 = reg_pair.second();
760 assert(r_2->is_valid() == wide, "invalid size");
761 if (!r_1->is_valid()) {
762 assert(!r_2->is_valid(), "must be invalid");
763 return;
764 }
765
766 if (!r_1->is_XMMRegister()) {
767 Register val = rax;
768 if (r_1->is_stack()) {
769 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
770 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
771 } else {
772 val = r_1->as_Register();
773 }
774 assert_different_registers(to.base(), val, rscratch1);
775 if (is_oop) {
776 __ push(r13);
777 __ push(rbx);
778 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
779 __ pop(rbx);
780 __ pop(r13);
781 } else {
782 __ store_sized_value(to, val, size_in_bytes);
783 }
784 } else {
785 if (wide) {
786 __ movdbl(to, r_1->as_XMMRegister());
787 } else {
788 __ movflt(to, r_1->as_XMMRegister());
789 }
790 }
791 }
792
793 static void gen_c2i_adapter(MacroAssembler *masm,
794 const GrowableArray<SigEntry>* sig_extended,
795 const VMRegPair *regs,
796 bool requires_clinit_barrier,
797 address& c2i_no_clinit_check_entry,
798 Label& skip_fixup,
799 address start,
800 OopMapSet* oop_maps,
801 int& frame_complete,
802 int& frame_size_in_words,
803 bool alloc_inline_receiver) {
804 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) {
805 Label L_skip_barrier;
806 Register method = rbx;
807
808 { // Bypass the barrier for non-static methods
809 Register flags = rscratch1;
810 __ movl(flags, Address(method, Method::access_flags_offset()));
811 __ testl(flags, JVM_ACC_STATIC);
812 __ jcc(Assembler::zero, L_skip_barrier); // non-static
813 }
814
815 Register klass = rscratch1;
816 __ load_method_holder(klass, method);
817 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
818
819 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
820
821 __ bind(L_skip_barrier);
822 c2i_no_clinit_check_entry = __ pc();
823 }
824
825 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
826 bs->c2i_entry_barrier(masm);
827
828 // Before we get into the guts of the C2I adapter, see if we should be here
829 // at all. We've come from compiled code and are attempting to jump to the
830 // interpreter, which means the caller made a static call to get here
831 // (vcalls always get a compiled target if there is one). Check for a
832 // compiled target. If there is one, we need to patch the caller's call.
833 patch_callers_callsite(masm);
834
835 __ bind(skip_fixup);
836
837 if (InlineTypePassFieldsAsArgs) {
838 // Is there an inline type argument?
839 bool has_inline_argument = false;
840 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
841 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
842 }
843 if (has_inline_argument) {
844 // There is at least an inline type argument: we're coming from
845 // compiled code so we have no buffers to back the inline types.
846 // Allocate the buffers here with a runtime call.
847 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false);
848
849 frame_complete = __ offset();
850
851 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
852
853 __ mov(c_rarg0, r15_thread);
854 __ mov(c_rarg1, rbx);
855 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
856 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
857
858 oop_maps->add_gc_map((int)(__ pc() - start), map);
859 __ reset_last_Java_frame(false);
860
861 RegisterSaver::restore_live_registers(masm);
862
863 Label no_exception;
864 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
865 __ jcc(Assembler::equal, no_exception);
866
867 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD);
868 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
869 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
870
871 __ bind(no_exception);
872
873 // We get an array of objects from the runtime call
874 __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
875 __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live?
876 }
877 }
878
879 // Since all args are passed on the stack, total_args_passed *
880 // Interpreter::stackElementSize is the space we need.
881 int total_args_passed = compute_total_args_passed_int(sig_extended);
882 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
883
884 int extraspace = (total_args_passed * Interpreter::stackElementSize);
885
886 // stack is aligned, keep it that way
887 // This is not currently needed or enforced by the interpreter, but
888 // we might as well conform to the ABI.
889 extraspace = align_up(extraspace, 2*wordSize);
890
891 // set senderSP value
892 __ lea(r13, Address(rsp, wordSize));
893
894 #ifdef ASSERT
895 __ check_stack_alignment(r13, "sender stack not aligned");
896 #endif
897 if (extraspace > 0) {
898 // Pop the return address
899 __ pop(rax);
900
901 __ subptr(rsp, extraspace);
902
903 // Push the return address
904 __ push(rax);
905
906 // Account for the return address location since we store it first rather
907 // than hold it in a register across all the shuffling
908 extraspace += wordSize;
909 }
910
911 #ifdef ASSERT
912 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
913 #endif
914
915 // Now write the args into the outgoing interpreter space
916
917 // next_arg_comp is the next argument from the compiler point of
918 // view (inline type fields are passed in registers/on the stack). In
919 // sig_extended, an inline type argument starts with: T_METADATA,
920 // followed by the types of the fields of the inline type and T_VOID
921 // to mark the end of the inline type. ignored counts the number of
922 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument:
923 // used to get the buffer for that argument from the pool of buffers
924 // we allocated above and want to pass to the
925 // interpreter. next_arg_int is the next argument from the
926 // interpreter point of view (inline types are passed by reference).
927 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
928 next_arg_comp < sig_extended->length(); next_arg_comp++) {
929 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
930 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
931 BasicType bt = sig_extended->at(next_arg_comp)._bt;
932 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
933 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) {
934 int next_off = st_off - Interpreter::stackElementSize;
935 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
936 const VMRegPair reg_pair = regs[next_arg_comp-ignored];
937 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
938 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
939 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
940 next_arg_int++;
941 #ifdef ASSERT
942 if (bt == T_LONG || bt == T_DOUBLE) {
943 // Overwrite the unused slot with known junk
944 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
945 __ movptr(Address(rsp, st_off), rax);
946 }
947 #endif /* ASSERT */
948 } else {
949 ignored++;
950 // get the buffer from the just allocated pool of buffers
951 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT);
952 __ load_heap_oop(r14, Address(rscratch2, index));
953 next_vt_arg++; next_arg_int++;
954 int vt = 1;
955 // write fields we get from compiled code in registers/stack
956 // slots to the buffer: we know we are done with that inline type
957 // argument when we hit the T_VOID that acts as an end of inline
958 // type delimiter for this inline type. Inline types are flattened
959 // so we might encounter embedded inline types. Each entry in
960 // sig_extended contains a field offset in the buffer.
961 Label L_null;
962 do {
963 next_arg_comp++;
964 BasicType bt = sig_extended->at(next_arg_comp)._bt;
965 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
966 if (bt == T_METADATA) {
967 vt++;
968 ignored++;
969 } else if (bt == T_VOID &&
970 prev_bt != T_LONG &&
971 prev_bt != T_DOUBLE) {
972 vt--;
973 ignored++;
974 } else {
975 int off = sig_extended->at(next_arg_comp)._offset;
976 if (off == -1) {
977 // Nullable inline type argument, emit null check
978 VMReg reg = regs[next_arg_comp-ignored].first();
979 Label L_notNull;
980 if (reg->is_stack()) {
981 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
982 __ testb(Address(rsp, ld_off), 1);
983 } else {
984 __ testb(reg->as_Register(), 1);
985 }
986 __ jcc(Assembler::notZero, L_notNull);
987 __ movptr(Address(rsp, st_off), 0);
988 __ jmp(L_null);
989 __ bind(L_notNull);
990 continue;
991 }
992 assert(off > 0, "offset in object should be positive");
993 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
994 bool is_oop = is_reference_type(bt);
995 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
996 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
997 }
998 } while (vt != 0);
999 // pass the buffer to the interpreter
1000 __ movptr(Address(rsp, st_off), r14);
1001 __ bind(L_null);
1002 }
1003 }
1004
1005 // Schedule the branch target address early.
1006 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
1007 __ jmp(rcx);
1008 }
1009
1010 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
1011 address code_start, address code_end,
1012 Label& L_ok) {
1013 Label L_fail;
1014 __ lea(temp_reg, ExternalAddress(code_start));
1015 __ cmpptr(pc_reg, temp_reg);
1016 __ jcc(Assembler::belowEqual, L_fail);
1017 __ lea(temp_reg, ExternalAddress(code_end));
1018 __ cmpptr(pc_reg, temp_reg);
1019 __ jcc(Assembler::below, L_ok);
1020 __ bind(L_fail);
1021 }
1022
1023 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1024 int comp_args_on_stack,
1025 const GrowableArray<SigEntry>* sig,
1026 const VMRegPair *regs) {
1027
1028 // Note: r13 contains the senderSP on entry. We must preserve it since
1029 // we may do a i2c -> c2i transition if we lose a race where compiled
1030 // code goes non-entrant while we get args ready.
1031 // In addition we use r13 to locate all the interpreter args as
1032 // we must align the stack to 16 bytes on an i2c entry else we
1033 // lose alignment we expect in all compiled code and register
1034 // save code can segv when fxsave instructions find improperly
1035 // aligned stack pointer.
1036
1037 // Adapters can be frameless because they do not require the caller
1038 // to perform additional cleanup work, such as correcting the stack pointer.
1039 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
1040 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
1041 // even if a callee has modified the stack pointer.
1042 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
1043 // routinely repairs its caller's stack pointer (from sender_sp, which is set
1044 // up via the senderSP register).
1045 // In other words, if *either* the caller or callee is interpreted, we can
1096 // Convert 4-byte c2 stack slots to words.
1097 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1098
1099 if (comp_args_on_stack) {
1100 __ subptr(rsp, comp_words_on_stack * wordSize);
1101 }
1102
1103 // Ensure compiled code always sees stack at proper alignment
1104 __ andptr(rsp, -16);
1105
1106 // push the return address and misalign the stack that youngest frame always sees
1107 // as far as the placement of the call instruction
1108 __ push(rax);
1109
1110 // Put saved SP in another register
1111 const Register saved_sp = rax;
1112 __ movptr(saved_sp, r11);
1113
1114 // Will jump to the compiled code just as if compiled code was doing it.
1115 // Pre-load the register-jump target early, to schedule it better.
1116 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
1117
1118 #if INCLUDE_JVMCI
1119 if (EnableJVMCI) {
1120 // check if this call should be routed towards a specific entry point
1121 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1122 Label no_alternative_target;
1123 __ jcc(Assembler::equal, no_alternative_target);
1124 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1125 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1126 __ bind(no_alternative_target);
1127 }
1128 #endif // INCLUDE_JVMCI
1129
1130 int total_args_passed = sig->length();
1131
1132 // Now generate the shuffle code. Pick up all register args and move the
1133 // rest through the floating point stack top.
1134 for (int i = 0; i < total_args_passed; i++) {
1135 BasicType bt = sig->at(i)._bt;
1136 if (bt == T_VOID) {
1137 // Longs and doubles are passed in native word order, but misaligned
1138 // in the 32-bit build.
1139 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
1140 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
1141 continue;
1142 }
1143
1144 // Pick up 0, 1 or 2 words from SP+offset.
1145
1146 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1147 "scrambled load targets?");
1148 // Load in argument order going down.
1149 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
1150 // Point to interpreter value (vs. tag)
1151 int next_off = ld_off - Interpreter::stackElementSize;
1152 //
1153 //
1154 //
1155 VMReg r_1 = regs[i].first();
1156 VMReg r_2 = regs[i].second();
1157 if (!r_1->is_valid()) {
1158 assert(!r_2->is_valid(), "");
1159 continue;
1160 }
1162 // Convert stack slot to an SP offset (+ wordSize to account for return address )
1163 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
1164
1165 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
1166 // and if we end up going thru a c2i because of a miss a reasonable value of r13
1167 // will be generated.
1168 if (!r_2->is_valid()) {
1169 // sign extend???
1170 __ movl(r13, Address(saved_sp, ld_off));
1171 __ movptr(Address(rsp, st_off), r13);
1172 } else {
1173 //
1174 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1175 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1176 // So we must adjust where to pick up the data to match the interpreter.
1177 //
1178 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1179 // are accessed as negative so LSW is at LOW address
1180
1181 // ld_off is MSW so get LSW
1182 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1183 next_off : ld_off;
1184 __ movq(r13, Address(saved_sp, offset));
1185 // st_off is LSW (i.e. reg.first())
1186 __ movq(Address(rsp, st_off), r13);
1187 }
1188 } else if (r_1->is_Register()) { // Register argument
1189 Register r = r_1->as_Register();
1190 assert(r != rax, "must be different");
1191 if (r_2->is_valid()) {
1192 //
1193 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1194 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1195 // So we must adjust where to pick up the data to match the interpreter.
1196
1197 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1198 next_off : ld_off;
1199
1200 // this can be a misaligned move
1201 __ movq(r, Address(saved_sp, offset));
1202 } else {
1203 // sign extend and use a full word?
1204 __ movl(r, Address(saved_sp, ld_off));
1205 }
1206 } else {
1207 if (!r_2->is_valid()) {
1208 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
1209 } else {
1210 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
1211 }
1212 }
1213 }
1214
1215 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1216
1217 // 6243940 We might end up in handle_wrong_method if
1218 // the callee is deoptimized as we race thru here. If that
1219 // happens we don't want to take a safepoint because the
1220 // caller frame will look interpreted and arguments are now
1221 // "compiled" so it is much better to make this transition
1222 // invisible to the stack walking code. Unfortunately if
1223 // we try and find the callee by normal means a safepoint
1224 // is possible. So we stash the desired callee in the thread
1225 // and the vm will find there should this case occur.
1226
1227 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1228
1229 // put Method* where a c2i would expect should we end up there
1230 // only needed because of c2 resolve stubs return Method* as a result in
1231 // rax
1232 __ mov(rax, rbx);
1233 __ jmp(r11);
1234 }
1235
1236 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1237 Register data = rax;
1238 __ ic_check(1 /* end_alignment */);
1239 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
1240
1241 // Method might have been compiled since the call site was patched to
1242 // interpreted if that is the case treat it as a miss so we can get
1243 // the call site corrected.
1244 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
1245 __ jcc(Assembler::equal, skip_fixup);
1246 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1247 }
1248
1249 // ---------------------------------------------------------------
1250 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1251 int comp_args_on_stack,
1252 const GrowableArray<SigEntry>* sig,
1253 const VMRegPair* regs,
1254 const GrowableArray<SigEntry>* sig_cc,
1255 const VMRegPair* regs_cc,
1256 const GrowableArray<SigEntry>* sig_cc_ro,
1257 const VMRegPair* regs_cc_ro,
1258 AdapterFingerPrint* fingerprint,
1259 AdapterBlob*& new_adapter,
1260 bool allocate_code_blob) {
1261 address i2c_entry = __ pc();
1262 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1263
1264 // -------------------------------------------------------------------------
1265 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
1266 // to the interpreter. The args start out packed in the compiled layout. They
1267 // need to be unpacked into the interpreter layout. This will almost always
1268 // require some stack space. We grow the current (compiled) stack, then repack
1269 // the args. We finally end in a jump to the generic interpreter entry point.
1270 // On exit from the interpreter, the interpreter will restore our SP (lest the
1271 // compiled code, which relies solely on SP and not RBP, get sick).
1272
1273 address c2i_unverified_entry = __ pc();
1274 address c2i_unverified_inline_entry = __ pc();
1275 Label skip_fixup;
1276
1277 gen_inline_cache_check(masm, skip_fixup);
1278
1279 OopMapSet* oop_maps = new OopMapSet();
1280 int frame_complete = CodeOffsets::frame_never_safe;
1281 int frame_size_in_words = 0;
1282
1283 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1284 address c2i_no_clinit_check_entry = nullptr;
1285 address c2i_inline_ro_entry = __ pc();
1286 if (regs_cc != regs_cc_ro) {
1287 // No class init barrier needed because method is guaranteed to be non-static
1288 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry,
1289 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1290 skip_fixup.reset();
1291 }
1292
1293 // Scalarized c2i adapter
1294 address c2i_entry = __ pc();
1295 address c2i_inline_entry = __ pc();
1296 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry,
1297 skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1298
1299 // Non-scalarized c2i adapter
1300 if (regs != regs_cc) {
1301 c2i_unverified_inline_entry = __ pc();
1302 Label inline_entry_skip_fixup;
1303 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1304
1305 c2i_inline_entry = __ pc();
1306 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry,
1307 inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1308 }
1309
1310 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1311 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1312 if (allocate_code_blob) {
1313 bool caller_must_gc_arguments = (regs != regs_cc);
1314 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1315 }
1316
1317 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry);
1318 }
1319
1320 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1321 VMRegPair *regs,
1322 int total_args_passed) {
1323
1324 // We return the amount of VMRegImpl stack slots we need to reserve for all
1325 // the arguments NOT counting out_preserve_stack_slots.
1326
1327 // NOTE: These arrays will have to change when c1 is ported
1328 #ifdef _WIN64
1329 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1330 c_rarg0, c_rarg1, c_rarg2, c_rarg3
1331 };
1332 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1333 c_farg0, c_farg1, c_farg2, c_farg3
1334 };
1335 #else
1336 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1337 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
2432 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2433
2434 // Get the handle (the 2nd argument)
2435 __ mov(oop_handle_reg, c_rarg1);
2436
2437 // Get address of the box
2438
2439 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2440
2441 // Load the oop from the handle
2442 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2443
2444 if (LockingMode == LM_MONITOR) {
2445 __ jmp(slow_path_lock);
2446 } else if (LockingMode == LM_LEGACY) {
2447 // Load immediate 1 into swap_reg %rax
2448 __ movl(swap_reg, 1);
2449
2450 // Load (object->mark() | 1) into swap_reg %rax
2451 __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2452 if (EnableValhalla) {
2453 // Mask inline_type bit such that we go to the slow path if object is an inline type
2454 __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place));
2455 }
2456
2457 // Save (object->mark() | 1) into BasicLock's displaced header
2458 __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
2459
2460 // src -> dest iff dest == rax else rax <- dest
2461 __ lock();
2462 __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2463 __ jcc(Assembler::equal, count_mon);
2464
2465 // Hmm should this move to the slow path code area???
2466
2467 // Test if the oopMark is an obvious stack pointer, i.e.,
2468 // 1) (mark & 3) == 0, and
2469 // 2) rsp <= mark < mark + os::pagesize()
2470 // These 3 tests can be done by evaluating the following
2471 // expression: ((mark - rsp) & (3 - os::vm_page_size())),
2472 // assuming both stack pointer and pagesize have their
2473 // least significant 2 bits clear.
2474 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
2475
3992 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), NULL_WORD);
3993 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
3994 #endif
3995 // Clear the exception oop so GC no longer processes it as a root.
3996 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD);
3997
3998 // rax: exception oop
3999 // r8: exception handler
4000 // rdx: exception pc
4001 // Jump to handler
4002
4003 __ jmp(r8);
4004
4005 // Make sure all code is generated
4006 masm->flush();
4007
4008 // Set exception blob
4009 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
4010 }
4011 #endif // COMPILER2
4012
4013 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
4014 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
4015 CodeBuffer buffer(buf);
4016 short buffer_locs[20];
4017 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
4018 sizeof(buffer_locs)/sizeof(relocInfo));
4019
4020 MacroAssembler* masm = new MacroAssembler(&buffer);
4021
4022 const Array<SigEntry>* sig_vk = vk->extended_sig();
4023 const Array<VMRegPair>* regs = vk->return_regs();
4024
4025 int pack_fields_jobject_off = __ offset();
4026 // Resolve pre-allocated buffer from JNI handle.
4027 // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
4028 __ movptr(rax, Address(r13, 0));
4029 __ resolve_jobject(rax /* value */,
4030 r15_thread /* thread */,
4031 r12 /* tmp */);
4032 __ movptr(Address(r13, 0), rax);
4033
4034 int pack_fields_off = __ offset();
4035
4036 int j = 1;
4037 for (int i = 0; i < sig_vk->length(); i++) {
4038 BasicType bt = sig_vk->at(i)._bt;
4039 if (bt == T_METADATA) {
4040 continue;
4041 }
4042 if (bt == T_VOID) {
4043 if (sig_vk->at(i-1)._bt == T_LONG ||
4044 sig_vk->at(i-1)._bt == T_DOUBLE) {
4045 j++;
4046 }
4047 continue;
4048 }
4049 int off = sig_vk->at(i)._offset;
4050 assert(off > 0, "offset in object should be positive");
4051 VMRegPair pair = regs->at(j);
4052 VMReg r_1 = pair.first();
4053 VMReg r_2 = pair.second();
4054 Address to(rax, off);
4055 if (bt == T_FLOAT) {
4056 __ movflt(to, r_1->as_XMMRegister());
4057 } else if (bt == T_DOUBLE) {
4058 __ movdbl(to, r_1->as_XMMRegister());
4059 } else {
4060 Register val = r_1->as_Register();
4061 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
4062 if (is_reference_type(bt)) {
4063 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
4064 } else {
4065 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
4066 }
4067 }
4068 j++;
4069 }
4070 assert(j == regs->length(), "missed a field?");
4071
4072 __ ret(0);
4073
4074 int unpack_fields_off = __ offset();
4075
4076 Label skip;
4077 __ testptr(rax, rax);
4078 __ jcc(Assembler::zero, skip);
4079
4080 j = 1;
4081 for (int i = 0; i < sig_vk->length(); i++) {
4082 BasicType bt = sig_vk->at(i)._bt;
4083 if (bt == T_METADATA) {
4084 continue;
4085 }
4086 if (bt == T_VOID) {
4087 if (sig_vk->at(i-1)._bt == T_LONG ||
4088 sig_vk->at(i-1)._bt == T_DOUBLE) {
4089 j++;
4090 }
4091 continue;
4092 }
4093 int off = sig_vk->at(i)._offset;
4094 assert(off > 0, "offset in object should be positive");
4095 VMRegPair pair = regs->at(j);
4096 VMReg r_1 = pair.first();
4097 VMReg r_2 = pair.second();
4098 Address from(rax, off);
4099 if (bt == T_FLOAT) {
4100 __ movflt(r_1->as_XMMRegister(), from);
4101 } else if (bt == T_DOUBLE) {
4102 __ movdbl(r_1->as_XMMRegister(), from);
4103 } else if (bt == T_OBJECT || bt == T_ARRAY) {
4104 assert_different_registers(rax, r_1->as_Register());
4105 __ load_heap_oop(r_1->as_Register(), from);
4106 } else {
4107 assert(is_java_primitive(bt), "unexpected basic type");
4108 assert_different_registers(rax, r_1->as_Register());
4109 size_t size_in_bytes = type2aelembytes(bt);
4110 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
4111 }
4112 j++;
4113 }
4114 assert(j == regs->length(), "missed a field?");
4115
4116 __ bind(skip);
4117 __ ret(0);
4118
4119 __ flush();
4120
4121 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
4122 }
|