< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp

Print this page

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"

  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/nativeInst.hpp"
  34 #include "code/vtableStubs.hpp"
  35 #include "compiler/oopMap.hpp"
  36 #include "gc/shared/collectedHeap.hpp"
  37 #include "gc/shared/gcLocker.hpp"
  38 #include "gc/shared/barrierSet.hpp"
  39 #include "gc/shared/barrierSetAssembler.hpp"
  40 #include "interpreter/interpreter.hpp"
  41 #include "logging/log.hpp"
  42 #include "memory/resourceArea.hpp"
  43 #include "memory/universe.hpp"
  44 #include "oops/compiledICHolder.hpp"
  45 #include "oops/klass.inline.hpp"
  46 #include "prims/methodHandles.hpp"
  47 #include "runtime/jniHandles.hpp"
  48 #include "runtime/safepointMechanism.hpp"
  49 #include "runtime/sharedRuntime.hpp"
  50 #include "runtime/signature.hpp"

 511     case T_SHORT:
 512     case T_INT:
 513       if (int_args < Argument::n_int_register_parameters_j) {
 514         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 515       } else {
 516         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 517         stk_args += 2;
 518       }
 519       break;
 520     case T_VOID:
 521       // halves of T_LONG or T_DOUBLE
 522       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 523       regs[i].set_bad();
 524       break;
 525     case T_LONG:
 526       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 527       // fall through
 528     case T_OBJECT:
 529     case T_ARRAY:
 530     case T_ADDRESS:

 531       if (int_args < Argument::n_int_register_parameters_j) {
 532         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 533       } else {
 534         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 535         stk_args += 2;
 536       }
 537       break;
 538     case T_FLOAT:
 539       if (fp_args < Argument::n_float_register_parameters_j) {
 540         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 541       } else {
 542         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 543         stk_args += 2;
 544       }
 545       break;
 546     case T_DOUBLE:
 547       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 548       if (fp_args < Argument::n_float_register_parameters_j) {
 549         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 550       } else {
 551         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 552         stk_args += 2;
 553       }
 554       break;
 555     default:
 556       ShouldNotReachHere();
 557       break;
 558     }
 559   }
 560 
 561   return align_up(stk_args, 2);
 562 }
 563 


















































































 564 // Patch the callers callsite with entry to compiled code if it exists.
 565 static void patch_callers_callsite(MacroAssembler *masm) {
 566   Label L;
 567   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 568   __ jcc(Assembler::equal, L);
 569 
 570   // Save the current stack pointer
 571   __ mov(r13, rsp);
 572   // Schedule the branch target address early.
 573   // Call into the VM to patch the caller, then jump to compiled callee
 574   // rax isn't live so capture return address while we easily can
 575   __ movptr(rax, Address(rsp, 0));
 576 
 577   // align stack so push_CPU_state doesn't fault
 578   __ andptr(rsp, -(StackAlignmentInBytes));
 579   __ push_CPU_state();
 580   __ vzeroupper();
 581   // VM needs caller's callsite
 582   // VM needs target method
 583   // This needs to be a long call since we will relocate this adapter to

 586   // Allocate argument register save area
 587   if (frame::arg_reg_save_area_bytes != 0) {
 588     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 589   }
 590   __ mov(c_rarg0, rbx);
 591   __ mov(c_rarg1, rax);
 592   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 593 
 594   // De-allocate argument register save area
 595   if (frame::arg_reg_save_area_bytes != 0) {
 596     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 597   }
 598 
 599   __ vzeroupper();
 600   __ pop_CPU_state();
 601   // restore sp
 602   __ mov(rsp, r13);
 603   __ bind(L);
 604 }
 605 










































































































 606 
 607 static void gen_c2i_adapter(MacroAssembler *masm,
 608                             int total_args_passed,
 609                             int comp_args_on_stack,
 610                             const BasicType *sig_bt,
 611                             const VMRegPair *regs,
 612                             Label& skip_fixup) {





 613   // Before we get into the guts of the C2I adapter, see if we should be here
 614   // at all.  We've come from compiled code and are attempting to jump to the
 615   // interpreter, which means the caller made a static call to get here
 616   // (vcalls always get a compiled target if there is one).  Check for a
 617   // compiled target.  If there is one, we need to patch the caller's call.
 618   patch_callers_callsite(masm);
 619 
 620   __ bind(skip_fixup);
 621 










































 622   // Since all args are passed on the stack, total_args_passed *
 623   // Interpreter::stackElementSize is the space we need. Plus 1 because
 624   // we also account for the return address location since
 625   // we store it first rather than hold it in rax across all the shuffling
 626 
 627   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 628 
 629   // stack is aligned, keep it that way
 630   extraspace = align_up(extraspace, 2*wordSize);
 631 
 632   // Get return address
 633   __ pop(rax);
 634 
 635   // set senderSP value
 636   __ mov(r13, rsp);
 637 
 638   __ subptr(rsp, extraspace);
 639 
 640   // Store the return address in the expected location
 641   __ movptr(Address(rsp, 0), rax);
 642 
 643   // Now write the args into the outgoing interpreter space
 644   for (int i = 0; i < total_args_passed; i++) {
 645     if (sig_bt[i] == T_VOID) {
 646       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 647       continue;
 648     }
 649 
 650     // offset to start parameters
 651     int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
 652     int next_off = st_off - Interpreter::stackElementSize;
 653 
 654     // Say 4 args:
 655     // i   st_off
 656     // 0   32 T_LONG
 657     // 1   24 T_VOID
 658     // 2   16 T_OBJECT
 659     // 3    8 T_BOOL
 660     // -    0 return address
 661     //
 662     // However to make thing extra confusing. Because we can fit a long/double in
 663     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 664     // leaves one slot empty and only stores to a single slot. In this case the
 665     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 666 
 667     VMReg r_1 = regs[i].first();
 668     VMReg r_2 = regs[i].second();
 669     if (!r_1->is_valid()) {
 670       assert(!r_2->is_valid(), "");
 671       continue;
 672     }
 673     if (r_1->is_stack()) {
 674       // memory to memory use rax
 675       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 676       if (!r_2->is_valid()) {
 677         // sign extend??
 678         __ movl(rax, Address(rsp, ld_off));
 679         __ movptr(Address(rsp, st_off), rax);
 680 
 681       } else {
 682 
 683         __ movq(rax, Address(rsp, ld_off));
 684 
 685         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 686         // T_DOUBLE and T_LONG use two slots in the interpreter
 687         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 688           // ld_off == LSW, ld_off+wordSize == MSW
 689           // st_off == MSW, next_off == LSW
 690           __ movq(Address(rsp, next_off), rax);
 691 #ifdef ASSERT
 692           // Overwrite the unused slot with known junk
 693           __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 694           __ movptr(Address(rsp, st_off), rax);
 695 #endif /* ASSERT */
 696         } else {
 697           __ movq(Address(rsp, st_off), rax);
 698         }
 699       }
 700     } else if (r_1->is_Register()) {
 701       Register r = r_1->as_Register();
 702       if (!r_2->is_valid()) {
 703         // must be only an int (or less ) so move only 32bits to slot
 704         // why not sign extend??
 705         __ movl(Address(rsp, st_off), r);
 706       } else {
 707         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 708         // T_DOUBLE and T_LONG use two slots in the interpreter
 709         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 710           // long/double in gpr
 711 #ifdef ASSERT
 712           // Overwrite the unused slot with known junk
 713           __ mov64(rax, CONST64(0xdeadffffdeadaaab));
 714           __ movptr(Address(rsp, st_off), rax);
 715 #endif /* ASSERT */
 716           __ movq(Address(rsp, next_off), r);

























 717         } else {
 718           __ movptr(Address(rsp, st_off), r);





















 719         }
 720       }
 721     } else {
 722       assert(r_1->is_XMMRegister(), "");
 723       if (!r_2->is_valid()) {
 724         // only a float use just part of the slot
 725         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 726       } else {
 727 #ifdef ASSERT
 728         // Overwrite the unused slot with known junk
 729         __ mov64(rax, CONST64(0xdeadffffdeadaaac));
 730         __ movptr(Address(rsp, st_off), rax);
 731 #endif /* ASSERT */
 732         __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
 733       }
 734     }
 735   }
 736 
 737   // Schedule the branch target address early.
 738   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 739   __ jmp(rcx);
 740 }
 741 
 742 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 743                         address code_start, address code_end,
 744                         Label& L_ok) {
 745   Label L_fail;
 746   __ lea(temp_reg, ExternalAddress(code_start));
 747   __ cmpptr(pc_reg, temp_reg);
 748   __ jcc(Assembler::belowEqual, L_fail);
 749   __ lea(temp_reg, ExternalAddress(code_end));
 750   __ cmpptr(pc_reg, temp_reg);
 751   __ jcc(Assembler::below, L_ok);
 752   __ bind(L_fail);
 753 }
 754 
 755 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 756                                     int total_args_passed,
 757                                     int comp_args_on_stack,
 758                                     const BasicType *sig_bt,
 759                                     const VMRegPair *regs) {
 760 
 761   // Note: r13 contains the senderSP on entry. We must preserve it since
 762   // we may do a i2c -> c2i transition if we lose a race where compiled
 763   // code goes non-entrant while we get args ready.
 764   // In addition we use r13 to locate all the interpreter args as
 765   // we must align the stack to 16 bytes on an i2c entry else we
 766   // lose alignment we expect in all compiled code and register
 767   // save code can segv when fxsave instructions find improperly
 768   // aligned stack pointer.
 769 
 770   // Adapters can be frameless because they do not require the caller
 771   // to perform additional cleanup work, such as correcting the stack pointer.
 772   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 773   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 774   // even if a callee has modified the stack pointer.
 775   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 776   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 777   // up via the senderSP register).
 778   // In other words, if *either* the caller or callee is interpreted, we can

 830     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 831     // Round up to miminum stack alignment, in wordSize
 832     comp_words_on_stack = align_up(comp_words_on_stack, 2);
 833     __ subptr(rsp, comp_words_on_stack * wordSize);
 834   }
 835 
 836 
 837   // Ensure compiled code always sees stack at proper alignment
 838   __ andptr(rsp, -16);
 839 
 840   // push the return address and misalign the stack that youngest frame always sees
 841   // as far as the placement of the call instruction
 842   __ push(rax);
 843 
 844   // Put saved SP in another register
 845   const Register saved_sp = rax;
 846   __ movptr(saved_sp, r11);
 847 
 848   // Will jump to the compiled code just as if compiled code was doing it.
 849   // Pre-load the register-jump target early, to schedule it better.
 850   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 851 
 852 #if INCLUDE_JVMCI
 853   if (EnableJVMCI) {
 854     // check if this call should be routed towards a specific entry point
 855     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 856     Label no_alternative_target;
 857     __ jcc(Assembler::equal, no_alternative_target);
 858     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 859     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 860     __ bind(no_alternative_target);
 861   }
 862 #endif // INCLUDE_JVMCI
 863 


 864   // Now generate the shuffle code.  Pick up all register args and move the
 865   // rest through the floating point stack top.
 866   for (int i = 0; i < total_args_passed; i++) {
 867     if (sig_bt[i] == T_VOID) {


 868       // Longs and doubles are passed in native word order, but misaligned
 869       // in the 32-bit build.
 870       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");

 871       continue;
 872     }
 873 
 874     // Pick up 0, 1 or 2 words from SP+offset.
 875 
 876     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 877             "scrambled load targets?");
 878     // Load in argument order going down.
 879     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
 880     // Point to interpreter value (vs. tag)
 881     int next_off = ld_off - Interpreter::stackElementSize;
 882     //
 883     //
 884     //
 885     VMReg r_1 = regs[i].first();
 886     VMReg r_2 = regs[i].second();
 887     if (!r_1->is_valid()) {
 888       assert(!r_2->is_valid(), "");
 889       continue;
 890     }

 892       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 893       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 894 
 895       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
 896       // and if we end up going thru a c2i because of a miss a reasonable value of r13
 897       // will be generated.
 898       if (!r_2->is_valid()) {
 899         // sign extend???
 900         __ movl(r13, Address(saved_sp, ld_off));
 901         __ movptr(Address(rsp, st_off), r13);
 902       } else {
 903         //
 904         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 905         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 906         // So we must adjust where to pick up the data to match the interpreter.
 907         //
 908         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 909         // are accessed as negative so LSW is at LOW address
 910 
 911         // ld_off is MSW so get LSW
 912         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 913                            next_off : ld_off;
 914         __ movq(r13, Address(saved_sp, offset));
 915         // st_off is LSW (i.e. reg.first())
 916         __ movq(Address(rsp, st_off), r13);
 917       }
 918     } else if (r_1->is_Register()) {  // Register argument
 919       Register r = r_1->as_Register();
 920       assert(r != rax, "must be different");
 921       if (r_2->is_valid()) {
 922         //
 923         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 924         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 925         // So we must adjust where to pick up the data to match the interpreter.
 926 
 927         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 928                            next_off : ld_off;
 929 
 930         // this can be a misaligned move
 931         __ movq(r, Address(saved_sp, offset));
 932       } else {
 933         // sign extend and use a full word?
 934         __ movl(r, Address(saved_sp, ld_off));
 935       }
 936     } else {
 937       if (!r_2->is_valid()) {
 938         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 939       } else {
 940         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
 941       }
 942     }
 943   }
 944 
 945   // 6243940 We might end up in handle_wrong_method if
 946   // the callee is deoptimized as we race thru here. If that
 947   // happens we don't want to take a safepoint because the
 948   // caller frame will look interpreted and arguments are now
 949   // "compiled" so it is much better to make this transition
 950   // invisible to the stack walking code. Unfortunately if
 951   // we try and find the callee by normal means a safepoint
 952   // is possible. So we stash the desired callee in the thread
 953   // and the vm will find there should this case occur.
 954 
 955   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 956 
 957   // put Method* where a c2i would expect should we end up there
 958   // only needed becaus eof c2 resolve stubs return Method* as a result in
 959   // rax
 960   __ mov(rax, rbx);
 961   __ jmp(r11);
 962 }
 963 






















 964 // ---------------------------------------------------------------
 965 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 966                                                             int total_args_passed,
 967                                                             int comp_args_on_stack,
 968                                                             const BasicType *sig_bt,
 969                                                             const VMRegPair *regs,
 970                                                             AdapterFingerPrint* fingerprint) {






 971   address i2c_entry = __ pc();
 972 
 973   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 974 
 975   // -------------------------------------------------------------------------
 976   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
 977   // to the interpreter.  The args start out packed in the compiled layout.  They
 978   // need to be unpacked into the interpreter layout.  This will almost always
 979   // require some stack space.  We grow the current (compiled) stack, then repack
 980   // the args.  We  finally end in a jump to the generic interpreter entry point.
 981   // On exit from the interpreter, the interpreter will restore our SP (lest the
 982   // compiled code, which relys solely on SP and not RBP, get sick).
 983 
 984   address c2i_unverified_entry = __ pc();
 985   Label skip_fixup;
 986   Label ok;
 987 
 988   Register holder = rax;
 989   Register receiver = j_rarg0;
 990   Register temp = rbx;
 991 
 992   {
 993     __ load_klass(temp, receiver, rscratch1);
 994     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 995     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 996     __ jcc(Assembler::equal, ok);
 997     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 998 
 999     __ bind(ok);
1000     // Method might have been compiled since the call site was patched to
1001     // interpreted if that is the case treat it as a miss so we can get
1002     // the call site corrected.
1003     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1004     __ jcc(Assembler::equal, skip_fixup);
1005     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));


1006   }
1007 

1008   address c2i_entry = __ pc();
1009 
1010   // Class initialization barrier for static methods
1011   address c2i_no_clinit_check_entry = NULL;
1012   if (VM_Version::supports_fast_class_init_checks()) {
1013     Label L_skip_barrier;
1014     Register method = rbx;
1015 
1016     { // Bypass the barrier for non-static methods
1017       Register flags  = rscratch1;
1018       __ movl(flags, Address(method, Method::access_flags_offset()));
1019       __ testl(flags, JVM_ACC_STATIC);
1020       __ jcc(Assembler::zero, L_skip_barrier); // non-static
1021     }
1022 
1023     Register klass = rscratch1;
1024     __ load_method_holder(klass, method);
1025     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
1026 
1027     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1028 
1029     __ bind(L_skip_barrier);
1030     c2i_no_clinit_check_entry = __ pc();
1031   }
1032 
1033   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1034   bs->c2i_entry_barrier(masm);
1035 
1036   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);













1037 
1038   __ flush();
1039   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);








1040 }
1041 
1042 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1043                                          VMRegPair *regs,
1044                                          VMRegPair *regs2,
1045                                          int total_args_passed) {
1046   assert(regs2 == NULL, "not needed on x86");
1047 // We return the amount of VMRegImpl stack slots we need to reserve for all
1048 // the arguments NOT counting out_preserve_stack_slots.
1049 
1050 // NOTE: These arrays will have to change when c1 is ported
1051 #ifdef _WIN64
1052     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1053       c_rarg0, c_rarg1, c_rarg2, c_rarg3
1054     };
1055     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1056       c_farg0, c_farg1, c_farg2, c_farg3
1057     };
1058 #else
1059     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {

1077       case T_BYTE:
1078       case T_SHORT:
1079       case T_INT:
1080         if (int_args < Argument::n_int_register_parameters_c) {
1081           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1082 #ifdef _WIN64
1083           fp_args++;
1084           // Allocate slots for callee to stuff register args the stack.
1085           stk_args += 2;
1086 #endif
1087         } else {
1088           regs[i].set1(VMRegImpl::stack2reg(stk_args));
1089           stk_args += 2;
1090         }
1091         break;
1092       case T_LONG:
1093         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1094         // fall through
1095       case T_OBJECT:
1096       case T_ARRAY:

1097       case T_ADDRESS:
1098       case T_METADATA:
1099         if (int_args < Argument::n_int_register_parameters_c) {
1100           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1101 #ifdef _WIN64
1102           fp_args++;
1103           stk_args += 2;
1104 #endif
1105         } else {
1106           regs[i].set2(VMRegImpl::stack2reg(stk_args));
1107           stk_args += 2;
1108         }
1109         break;
1110       case T_FLOAT:
1111         if (fp_args < Argument::n_float_register_parameters_c) {
1112           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1113 #ifdef _WIN64
1114           int_args++;
1115           // Allocate slots for callee to stuff register args the stack.
1116           stk_args += 2;

1784 
1785   int temploc = -1;
1786   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1787     int i = arg_order.at(ai);
1788     int c_arg = arg_order.at(ai + 1);
1789     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1790 #ifdef ASSERT
1791     if (in_regs[i].first()->is_Register()) {
1792       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1793     } else if (in_regs[i].first()->is_XMMRegister()) {
1794       assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
1795     }
1796     if (out_regs[c_arg].first()->is_Register()) {
1797       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1798     } else if (out_regs[c_arg].first()->is_XMMRegister()) {
1799       freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
1800     }
1801 #endif /* ASSERT */
1802     switch (in_sig_bt[i]) {
1803       case T_ARRAY:

1804       case T_OBJECT:
1805         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1806                     ((i == 0) && (!is_static)),
1807                     &receiver_offset);
1808         break;
1809       case T_VOID:
1810         break;
1811 
1812       case T_FLOAT:
1813         __ float_move(in_regs[i], out_regs[c_arg]);
1814           break;
1815 
1816       case T_DOUBLE:
1817         assert( i + 1 < total_in_args &&
1818                 in_sig_bt[i + 1] == T_VOID &&
1819                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1820         __ double_move(in_regs[i], out_regs[c_arg]);
1821         break;
1822 
1823       case T_LONG :

1907   if (method->is_synchronized()) {
1908 
1909     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1910 
1911     // Get the handle (the 2nd argument)
1912     __ mov(oop_handle_reg, c_rarg1);
1913 
1914     // Get address of the box
1915 
1916     __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1917 
1918     // Load the oop from the handle
1919     __ movptr(obj_reg, Address(oop_handle_reg, 0));
1920 
1921     if (!UseHeavyMonitors) {
1922       // Load immediate 1 into swap_reg %rax
1923       __ movl(swap_reg, 1);
1924 
1925       // Load (object->mark() | 1) into swap_reg %rax
1926       __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));




1927 
1928       // Save (object->mark() | 1) into BasicLock's displaced header
1929       __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1930 
1931       // src -> dest iff dest == rax else rax <- dest
1932       __ lock();
1933       __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1934       __ jcc(Assembler::equal, lock_done);
1935 
1936       // Hmm should this move to the slow path code area???
1937 
1938       // Test if the oopMark is an obvious stack pointer, i.e.,
1939       //  1) (mark & 3) == 0, and
1940       //  2) rsp <= mark < mark + os::pagesize()
1941       // These 3 tests can be done by evaluating the following
1942       // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1943       // assuming both stack pointer and pagesize have their
1944       // least significant 2 bits clear.
1945       // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
1946 

1967   // Now set thread in native
1968   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
1969 
1970   __ call(RuntimeAddress(native_func));
1971 
1972   // Verify or restore cpu control state after JNI call
1973   __ restore_cpu_control_state_after_jni();
1974 
1975   // Unpack native results.
1976   switch (ret_type) {
1977   case T_BOOLEAN: __ c2bool(rax);            break;
1978   case T_CHAR   : __ movzwl(rax, rax);      break;
1979   case T_BYTE   : __ sign_extend_byte (rax); break;
1980   case T_SHORT  : __ sign_extend_short(rax); break;
1981   case T_INT    : /* nothing to do */        break;
1982   case T_DOUBLE :
1983   case T_FLOAT  :
1984     // Result is in xmm0 we'll save as needed
1985     break;
1986   case T_ARRAY:                 // Really a handle

1987   case T_OBJECT:                // Really a handle
1988       break; // can't de-handlize until after safepoint check
1989   case T_VOID: break;
1990   case T_LONG: break;
1991   default       : ShouldNotReachHere();
1992   }
1993 
1994   Label after_transition;
1995 
1996   // Switch thread to "native transition" state before reading the synchronization state.
1997   // This additional state is necessary because reading and testing the synchronization
1998   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1999   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2000   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2001   //     Thread A is resumed to finish this native method, but doesn't block here since it
2002   //     didn't see any synchronization is progress, and escapes.
2003   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2004 
2005   // Force this write out before the read below
2006   __ membar(Assembler::Membar_mask_bits(

3712   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3713 #endif
3714   // Clear the exception oop so GC no longer processes it as a root.
3715   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3716 
3717   // rax: exception oop
3718   // r8:  exception handler
3719   // rdx: exception pc
3720   // Jump to handler
3721 
3722   __ jmp(r8);
3723 
3724   // Make sure all code is generated
3725   masm->flush();
3726 
3727   // Set exception blob
3728   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3729 }
3730 #endif // COMPILER2
3731 















































































































3732 void SharedRuntime::compute_move_order(const BasicType* in_sig_bt,
3733                                        int total_in_args, const VMRegPair* in_regs,
3734                                        int total_out_args, VMRegPair* out_regs,
3735                                        GrowableArray<int>& arg_order,
3736                                        VMRegPair tmp_vmreg) {
3737   ComputeMoveOrder order(total_in_args, in_regs,
3738                          total_out_args, out_regs,
3739                          in_sig_bt, arg_order, tmp_vmreg);
3740 }

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "classfile/symbolTable.hpp"
  32 #include "code/debugInfoRec.hpp"
  33 #include "code/icBuffer.hpp"
  34 #include "code/nativeInst.hpp"
  35 #include "code/vtableStubs.hpp"
  36 #include "compiler/oopMap.hpp"
  37 #include "gc/shared/collectedHeap.hpp"
  38 #include "gc/shared/gcLocker.hpp"
  39 #include "gc/shared/barrierSet.hpp"
  40 #include "gc/shared/barrierSetAssembler.hpp"
  41 #include "interpreter/interpreter.hpp"
  42 #include "logging/log.hpp"
  43 #include "memory/resourceArea.hpp"
  44 #include "memory/universe.hpp"
  45 #include "oops/compiledICHolder.hpp"
  46 #include "oops/klass.inline.hpp"
  47 #include "prims/methodHandles.hpp"
  48 #include "runtime/jniHandles.hpp"
  49 #include "runtime/safepointMechanism.hpp"
  50 #include "runtime/sharedRuntime.hpp"
  51 #include "runtime/signature.hpp"

 512     case T_SHORT:
 513     case T_INT:
 514       if (int_args < Argument::n_int_register_parameters_j) {
 515         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 516       } else {
 517         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 518         stk_args += 2;
 519       }
 520       break;
 521     case T_VOID:
 522       // halves of T_LONG or T_DOUBLE
 523       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 524       regs[i].set_bad();
 525       break;
 526     case T_LONG:
 527       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 528       // fall through
 529     case T_OBJECT:
 530     case T_ARRAY:
 531     case T_ADDRESS:
 532     case T_PRIMITIVE_OBJECT:
 533       if (int_args < Argument::n_int_register_parameters_j) {
 534         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 535       } else {
 536         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 537         stk_args += 2;
 538       }
 539       break;
 540     case T_FLOAT:
 541       if (fp_args < Argument::n_float_register_parameters_j) {
 542         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 543       } else {
 544         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 545         stk_args += 2;
 546       }
 547       break;
 548     case T_DOUBLE:
 549       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 550       if (fp_args < Argument::n_float_register_parameters_j) {
 551         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 552       } else {
 553         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 554         stk_args += 2;
 555       }
 556       break;
 557     default:
 558       ShouldNotReachHere();
 559       break;
 560     }
 561   }
 562 
 563   return align_up(stk_args, 2);
 564 }
 565 
 566 // Same as java_calling_convention() but for multiple return
 567 // values. There's no way to store them on the stack so if we don't
 568 // have enough registers, multiple values can't be returned.
 569 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
 570 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
 571 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
 572                                           VMRegPair *regs,
 573                                           int total_args_passed) {
 574   // Create the mapping between argument positions and
 575   // registers.
 576   static const Register INT_ArgReg[java_return_convention_max_int] = {
 577     rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
 578   };
 579   static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
 580     j_farg0, j_farg1, j_farg2, j_farg3,
 581     j_farg4, j_farg5, j_farg6, j_farg7
 582   };
 583 
 584 
 585   uint int_args = 0;
 586   uint fp_args = 0;
 587 
 588   for (int i = 0; i < total_args_passed; i++) {
 589     switch (sig_bt[i]) {
 590     case T_BOOLEAN:
 591     case T_CHAR:
 592     case T_BYTE:
 593     case T_SHORT:
 594     case T_INT:
 595       if (int_args < Argument::n_int_register_parameters_j+1) {
 596         regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
 597         int_args++;
 598       } else {
 599         return -1;
 600       }
 601       break;
 602     case T_VOID:
 603       // halves of T_LONG or T_DOUBLE
 604       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 605       regs[i].set_bad();
 606       break;
 607     case T_LONG:
 608       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 609       // fall through
 610     case T_OBJECT:
 611     case T_PRIMITIVE_OBJECT:
 612     case T_ARRAY:
 613     case T_ADDRESS:
 614     case T_METADATA:
 615       if (int_args < Argument::n_int_register_parameters_j+1) {
 616         regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
 617         int_args++;
 618       } else {
 619         return -1;
 620       }
 621       break;
 622     case T_FLOAT:
 623       if (fp_args < Argument::n_float_register_parameters_j) {
 624         regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
 625         fp_args++;
 626       } else {
 627         return -1;
 628       }
 629       break;
 630     case T_DOUBLE:
 631       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 632       if (fp_args < Argument::n_float_register_parameters_j) {
 633         regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
 634         fp_args++;
 635       } else {
 636         return -1;
 637       }
 638       break;
 639     default:
 640       ShouldNotReachHere();
 641       break;
 642     }
 643   }
 644 
 645   return int_args + fp_args;
 646 }
 647 
 648 // Patch the callers callsite with entry to compiled code if it exists.
 649 static void patch_callers_callsite(MacroAssembler *masm) {
 650   Label L;
 651   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 652   __ jcc(Assembler::equal, L);
 653 
 654   // Save the current stack pointer
 655   __ mov(r13, rsp);
 656   // Schedule the branch target address early.
 657   // Call into the VM to patch the caller, then jump to compiled callee
 658   // rax isn't live so capture return address while we easily can
 659   __ movptr(rax, Address(rsp, 0));
 660 
 661   // align stack so push_CPU_state doesn't fault
 662   __ andptr(rsp, -(StackAlignmentInBytes));
 663   __ push_CPU_state();
 664   __ vzeroupper();
 665   // VM needs caller's callsite
 666   // VM needs target method
 667   // This needs to be a long call since we will relocate this adapter to

 670   // Allocate argument register save area
 671   if (frame::arg_reg_save_area_bytes != 0) {
 672     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 673   }
 674   __ mov(c_rarg0, rbx);
 675   __ mov(c_rarg1, rax);
 676   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 677 
 678   // De-allocate argument register save area
 679   if (frame::arg_reg_save_area_bytes != 0) {
 680     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 681   }
 682 
 683   __ vzeroupper();
 684   __ pop_CPU_state();
 685   // restore sp
 686   __ mov(rsp, r13);
 687   __ bind(L);
 688 }
 689 
 690 // For each inline type argument, sig includes the list of fields of
 691 // the inline type. This utility function computes the number of
 692 // arguments for the call if inline types are passed by reference (the
 693 // calling convention the interpreter expects).
 694 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
 695   int total_args_passed = 0;
 696   if (InlineTypePassFieldsAsArgs) {
 697     for (int i = 0; i < sig_extended->length(); i++) {
 698       BasicType bt = sig_extended->at(i)._bt;
 699       if (bt == T_PRIMITIVE_OBJECT) {
 700         // In sig_extended, an inline type argument starts with:
 701         // T_PRIMITIVE_OBJECT, followed by the types of the fields of the
 702         // inline type and T_VOID to mark the end of the value
 703         // type. Inline types are flattened so, for instance, in the
 704         // case of an inline type with an int field and an inline type
 705         // field that itself has 2 fields, an int and a long:
 706         // T_PRIMITIVE_OBJECT T_INT T_PRIMITIVE_OBJECT T_INT T_LONG T_VOID (second
 707         // slot for the T_LONG) T_VOID (inner T_PRIMITIVE_OBJECT) T_VOID
 708         // (outer T_PRIMITIVE_OBJECT)
 709         total_args_passed++;
 710         int vt = 1;
 711         do {
 712           i++;
 713           BasicType bt = sig_extended->at(i)._bt;
 714           BasicType prev_bt = sig_extended->at(i-1)._bt;
 715           if (bt == T_PRIMITIVE_OBJECT) {
 716             vt++;
 717           } else if (bt == T_VOID &&
 718                      prev_bt != T_LONG &&
 719                      prev_bt != T_DOUBLE) {
 720             vt--;
 721           }
 722         } while (vt != 0);
 723       } else {
 724         total_args_passed++;
 725       }
 726     }
 727   } else {
 728     total_args_passed = sig_extended->length();
 729   }
 730   return total_args_passed;
 731 }
 732 
 733 
 734 static void gen_c2i_adapter_helper(MacroAssembler* masm,
 735                                    BasicType bt,
 736                                    BasicType prev_bt,
 737                                    size_t size_in_bytes,
 738                                    const VMRegPair& reg_pair,
 739                                    const Address& to,
 740                                    int extraspace,
 741                                    bool is_oop) {
 742   assert(bt != T_PRIMITIVE_OBJECT || !InlineTypePassFieldsAsArgs, "no inline type here");
 743   if (bt == T_VOID) {
 744     assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
 745     return;
 746   }
 747 
 748   // Say 4 args:
 749   // i   st_off
 750   // 0   32 T_LONG
 751   // 1   24 T_VOID
 752   // 2   16 T_OBJECT
 753   // 3    8 T_BOOL
 754   // -    0 return address
 755   //
 756   // However to make thing extra confusing. Because we can fit a long/double in
 757   // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 758   // leaves one slot empty and only stores to a single slot. In this case the
 759   // slot that is occupied is the T_VOID slot. See I said it was confusing.
 760 
 761   bool wide = (size_in_bytes == wordSize);
 762   VMReg r_1 = reg_pair.first();
 763   VMReg r_2 = reg_pair.second();
 764   assert(r_2->is_valid() == wide, "invalid size");
 765   if (!r_1->is_valid()) {
 766     assert(!r_2->is_valid(), "must be invalid");
 767     return;
 768   }
 769 
 770   if (!r_1->is_XMMRegister()) {
 771     Register val = rax;
 772     if (r_1->is_stack()) {
 773       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 774       __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
 775     } else {
 776       val = r_1->as_Register();
 777     }
 778     assert_different_registers(to.base(), val, rscratch1);
 779     if (is_oop) {
 780       __ push(r13);
 781       __ push(rbx);
 782       __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
 783       __ pop(rbx);
 784       __ pop(r13);
 785     } else {
 786       __ store_sized_value(to, val, size_in_bytes);
 787     }
 788   } else {
 789     if (wide) {
 790       __ movdbl(to, r_1->as_XMMRegister());
 791     } else {
 792       __ movflt(to, r_1->as_XMMRegister());
 793     }
 794   }
 795 }
 796 
 797 static void gen_c2i_adapter(MacroAssembler *masm,
 798                             const GrowableArray<SigEntry>* sig_extended,


 799                             const VMRegPair *regs,
 800                             Label& skip_fixup,
 801                             address start,
 802                             OopMapSet* oop_maps,
 803                             int& frame_complete,
 804                             int& frame_size_in_words,
 805                             bool alloc_inline_receiver) {
 806   // Before we get into the guts of the C2I adapter, see if we should be here
 807   // at all.  We've come from compiled code and are attempting to jump to the
 808   // interpreter, which means the caller made a static call to get here
 809   // (vcalls always get a compiled target if there is one).  Check for a
 810   // compiled target.  If there is one, we need to patch the caller's call.
 811   patch_callers_callsite(masm);
 812 
 813   __ bind(skip_fixup);
 814 
 815   if (InlineTypePassFieldsAsArgs) {
 816     // Is there an inline type argument?
 817     bool has_inline_argument = false;
 818     for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
 819       has_inline_argument = (sig_extended->at(i)._bt == T_PRIMITIVE_OBJECT);
 820     }
 821     if (has_inline_argument) {
 822       // There is at least an inline type argument: we're coming from
 823       // compiled code so we have no buffers to back the inline types.
 824       // Allocate the buffers here with a runtime call.
 825       OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false);
 826 
 827       frame_complete = __ offset();
 828 
 829       __ set_last_Java_frame(noreg, noreg, NULL);
 830 
 831       __ mov(c_rarg0, r15_thread);
 832       __ mov(c_rarg1, rbx);
 833       __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
 834       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
 835 
 836       oop_maps->add_gc_map((int)(__ pc() - start), map);
 837       __ reset_last_Java_frame(false);
 838 
 839       RegisterSaver::restore_live_registers(masm);
 840 
 841       Label no_exception;
 842       __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
 843       __ jcc(Assembler::equal, no_exception);
 844 
 845       __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
 846       __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
 847       __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 848 
 849       __ bind(no_exception);
 850 
 851       // We get an array of objects from the runtime call
 852       __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
 853       __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live?
 854     }
 855   }
 856 
 857   // Since all args are passed on the stack, total_args_passed *
 858   // Interpreter::stackElementSize is the space we need. Plus 1 because
 859   // we also account for the return address location since
 860   // we store it first rather than hold it in rax across all the shuffling
 861   int total_args_passed = compute_total_args_passed_int(sig_extended);
 862   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 863 
 864   // stack is aligned, keep it that way
 865   extraspace = align_up(extraspace, 2*wordSize);
 866 
 867   // Get return address
 868   __ pop(rax);
 869 
 870   // set senderSP value
 871   __ mov(r13, rsp);
 872 
 873   __ subptr(rsp, extraspace);
 874 
 875   // Store the return address in the expected location
 876   __ movptr(Address(rsp, 0), rax);
 877 
 878   // Now write the args into the outgoing interpreter space






















 879 
 880   // next_arg_comp is the next argument from the compiler point of
 881   // view (inline type fields are passed in registers/on the stack). In
 882   // sig_extended, an inline type argument starts with: T_PRIMITIVE_OBJECT,
 883   // followed by the types of the fields of the inline type and T_VOID
 884   // to mark the end of the inline type. ignored counts the number of
 885   // T_PRIMITIVE_OBJECT/T_VOID. next_vt_arg is the next inline type argument:
 886   // used to get the buffer for that argument from the pool of buffers
 887   // we allocated above and want to pass to the
 888   // interpreter. next_arg_int is the next argument from the
 889   // interpreter point of view (inline types are passed by reference).
 890   for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
 891        next_arg_comp < sig_extended->length(); next_arg_comp++) {
 892     assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
 893     assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
 894     BasicType bt = sig_extended->at(next_arg_comp)._bt;
 895     int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
 896     if (!InlineTypePassFieldsAsArgs || bt != T_PRIMITIVE_OBJECT) {
 897       int next_off = st_off - Interpreter::stackElementSize;
 898       const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
 899       const VMRegPair reg_pair = regs[next_arg_comp-ignored];
 900       size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
 901       gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
 902                              size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
 903       next_arg_int++;
 904 #ifdef ASSERT
 905       if (bt == T_LONG || bt == T_DOUBLE) {
 906         // Overwrite the unused slot with known junk
 907         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 908         __ movptr(Address(rsp, st_off), rax);



 909       }















 910 #endif /* ASSERT */
 911     } else {
 912       ignored++;
 913       // get the buffer from the just allocated pool of buffers
 914       int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_PRIMITIVE_OBJECT);
 915       __ load_heap_oop(r14, Address(rscratch2, index));
 916       next_vt_arg++; next_arg_int++;
 917       int vt = 1;
 918       // write fields we get from compiled code in registers/stack
 919       // slots to the buffer: we know we are done with that inline type
 920       // argument when we hit the T_VOID that acts as an end of inline
 921       // type delimiter for this inline type. Inline types are flattened
 922       // so we might encounter embedded inline types. Each entry in
 923       // sig_extended contains a field offset in the buffer.
 924       Label L_null;
 925       do {
 926         next_arg_comp++;
 927         BasicType bt = sig_extended->at(next_arg_comp)._bt;
 928         BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
 929         if (bt == T_PRIMITIVE_OBJECT) {
 930           vt++;
 931           ignored++;
 932         } else if (bt == T_VOID &&
 933                    prev_bt != T_LONG &&
 934                    prev_bt != T_DOUBLE) {
 935           vt--;
 936           ignored++;
 937         } else {
 938           int off = sig_extended->at(next_arg_comp)._offset;
 939           if (off == -1) {
 940             // Nullable inline type argument, emit null check
 941             VMReg reg = regs[next_arg_comp-ignored].first();
 942             Label L_notNull;
 943             if (reg->is_stack()) {
 944               int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 945               __ testb(Address(rsp, ld_off), 1);
 946             } else {
 947               __ testb(reg->as_Register(), 1);
 948             }
 949             __ jcc(Assembler::notZero, L_notNull);
 950             __ movptr(Address(rsp, st_off), 0);
 951             __ jmp(L_null);
 952             __ bind(L_notNull);
 953             continue;
 954           }
 955           assert(off > 0, "offset in object should be positive");
 956           size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
 957           bool is_oop = is_reference_type(bt);
 958           gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
 959                                  size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
 960         }
 961       } while (vt != 0);
 962       // pass the buffer to the interpreter
 963       __ movptr(Address(rsp, st_off), r14);
 964       __ bind(L_null);










 965     }
 966   }
 967 
 968   // Schedule the branch target address early.
 969   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 970   __ jmp(rcx);
 971 }
 972 
 973 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 974                         address code_start, address code_end,
 975                         Label& L_ok) {
 976   Label L_fail;
 977   __ lea(temp_reg, ExternalAddress(code_start));
 978   __ cmpptr(pc_reg, temp_reg);
 979   __ jcc(Assembler::belowEqual, L_fail);
 980   __ lea(temp_reg, ExternalAddress(code_end));
 981   __ cmpptr(pc_reg, temp_reg);
 982   __ jcc(Assembler::below, L_ok);
 983   __ bind(L_fail);
 984 }
 985 
 986 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 987                                     int comp_args_on_stack,
 988                                     const GrowableArray<SigEntry>* sig,
 989                                     const VMRegPair *regs) {
 990 
 991   // Note: r13 contains the senderSP on entry. We must preserve it since
 992   // we may do a i2c -> c2i transition if we lose a race where compiled
 993   // code goes non-entrant while we get args ready.
 994   // In addition we use r13 to locate all the interpreter args as
 995   // we must align the stack to 16 bytes on an i2c entry else we
 996   // lose alignment we expect in all compiled code and register
 997   // save code can segv when fxsave instructions find improperly
 998   // aligned stack pointer.
 999 
1000   // Adapters can be frameless because they do not require the caller
1001   // to perform additional cleanup work, such as correcting the stack pointer.
1002   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
1003   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
1004   // even if a callee has modified the stack pointer.
1005   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
1006   // routinely repairs its caller's stack pointer (from sender_sp, which is set
1007   // up via the senderSP register).
1008   // In other words, if *either* the caller or callee is interpreted, we can

1060     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1061     // Round up to miminum stack alignment, in wordSize
1062     comp_words_on_stack = align_up(comp_words_on_stack, 2);
1063     __ subptr(rsp, comp_words_on_stack * wordSize);
1064   }
1065 
1066 
1067   // Ensure compiled code always sees stack at proper alignment
1068   __ andptr(rsp, -16);
1069 
1070   // push the return address and misalign the stack that youngest frame always sees
1071   // as far as the placement of the call instruction
1072   __ push(rax);
1073 
1074   // Put saved SP in another register
1075   const Register saved_sp = rax;
1076   __ movptr(saved_sp, r11);
1077 
1078   // Will jump to the compiled code just as if compiled code was doing it.
1079   // Pre-load the register-jump target early, to schedule it better.
1080   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
1081 
1082 #if INCLUDE_JVMCI
1083   if (EnableJVMCI) {
1084     // check if this call should be routed towards a specific entry point
1085     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1086     Label no_alternative_target;
1087     __ jcc(Assembler::equal, no_alternative_target);
1088     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1089     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1090     __ bind(no_alternative_target);
1091   }
1092 #endif // INCLUDE_JVMCI
1093 
1094   int total_args_passed = sig->length();
1095 
1096   // Now generate the shuffle code.  Pick up all register args and move the
1097   // rest through the floating point stack top.
1098   for (int i = 0; i < total_args_passed; i++) {
1099     BasicType bt = sig->at(i)._bt;
1100     assert(bt != T_PRIMITIVE_OBJECT, "i2c adapter doesn't unpack inline type args");
1101     if (bt == T_VOID) {
1102       // Longs and doubles are passed in native word order, but misaligned
1103       // in the 32-bit build.
1104       BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
1105       assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
1106       continue;
1107     }
1108 
1109     // Pick up 0, 1 or 2 words from SP+offset.
1110 
1111     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1112             "scrambled load targets?");
1113     // Load in argument order going down.
1114     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
1115     // Point to interpreter value (vs. tag)
1116     int next_off = ld_off - Interpreter::stackElementSize;
1117     //
1118     //
1119     //
1120     VMReg r_1 = regs[i].first();
1121     VMReg r_2 = regs[i].second();
1122     if (!r_1->is_valid()) {
1123       assert(!r_2->is_valid(), "");
1124       continue;
1125     }

1127       // Convert stack slot to an SP offset (+ wordSize to account for return address )
1128       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
1129 
1130       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
1131       // and if we end up going thru a c2i because of a miss a reasonable value of r13
1132       // will be generated.
1133       if (!r_2->is_valid()) {
1134         // sign extend???
1135         __ movl(r13, Address(saved_sp, ld_off));
1136         __ movptr(Address(rsp, st_off), r13);
1137       } else {
1138         //
1139         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1140         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1141         // So we must adjust where to pick up the data to match the interpreter.
1142         //
1143         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1144         // are accessed as negative so LSW is at LOW address
1145 
1146         // ld_off is MSW so get LSW
1147         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1148                            next_off : ld_off;
1149         __ movq(r13, Address(saved_sp, offset));
1150         // st_off is LSW (i.e. reg.first())
1151         __ movq(Address(rsp, st_off), r13);
1152       }
1153     } else if (r_1->is_Register()) {  // Register argument
1154       Register r = r_1->as_Register();
1155       assert(r != rax, "must be different");
1156       if (r_2->is_valid()) {
1157         //
1158         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1159         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1160         // So we must adjust where to pick up the data to match the interpreter.
1161 
1162         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1163                            next_off : ld_off;
1164 
1165         // this can be a misaligned move
1166         __ movq(r, Address(saved_sp, offset));
1167       } else {
1168         // sign extend and use a full word?
1169         __ movl(r, Address(saved_sp, ld_off));
1170       }
1171     } else {
1172       if (!r_2->is_valid()) {
1173         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
1174       } else {
1175         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
1176       }
1177     }
1178   }
1179 
1180   // 6243940 We might end up in handle_wrong_method if
1181   // the callee is deoptimized as we race thru here. If that
1182   // happens we don't want to take a safepoint because the
1183   // caller frame will look interpreted and arguments are now
1184   // "compiled" so it is much better to make this transition
1185   // invisible to the stack walking code. Unfortunately if
1186   // we try and find the callee by normal means a safepoint
1187   // is possible. So we stash the desired callee in the thread
1188   // and the vm will find there should this case occur.
1189 
1190   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1191 
1192   // put Method* where a c2i would expect should we end up there
1193   // only needed because of c2 resolve stubs return Method* as a result in
1194   // rax
1195   __ mov(rax, rbx);
1196   __ jmp(r11);
1197 }
1198 
1199 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1200   Label ok;
1201 
1202   Register holder = rax;
1203   Register receiver = j_rarg0;
1204   Register temp = rbx;
1205 
1206   __ load_klass(temp, receiver, rscratch1);
1207   __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
1208   __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
1209   __ jcc(Assembler::equal, ok);
1210   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1211 
1212   __ bind(ok);
1213   // Method might have been compiled since the call site was patched to
1214   // interpreted if that is the case treat it as a miss so we can get
1215   // the call site corrected.
1216   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1217   __ jcc(Assembler::equal, skip_fixup);
1218   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1219 }
1220 
1221 // ---------------------------------------------------------------
1222 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,

1223                                                             int comp_args_on_stack,
1224                                                             const GrowableArray<SigEntry>* sig,
1225                                                             const VMRegPair* regs,
1226                                                             const GrowableArray<SigEntry>* sig_cc,
1227                                                             const VMRegPair* regs_cc,
1228                                                             const GrowableArray<SigEntry>* sig_cc_ro,
1229                                                             const VMRegPair* regs_cc_ro,
1230                                                             AdapterFingerPrint* fingerprint,
1231                                                             AdapterBlob*& new_adapter,
1232                                                             bool allocate_code_blob) {
1233   address i2c_entry = __ pc();
1234   gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);

1235 
1236   // -------------------------------------------------------------------------
1237   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
1238   // to the interpreter.  The args start out packed in the compiled layout.  They
1239   // need to be unpacked into the interpreter layout.  This will almost always
1240   // require some stack space.  We grow the current (compiled) stack, then repack
1241   // the args.  We  finally end in a jump to the generic interpreter entry point.
1242   // On exit from the interpreter, the interpreter will restore our SP (lest the
1243   // compiled code, which relys solely on SP and not RBP, get sick).
1244 
1245   address c2i_unverified_entry = __ pc();
1246   Label skip_fixup;





1247 
1248   gen_inline_cache_check(masm, skip_fixup);





1249 
1250   OopMapSet* oop_maps = new OopMapSet();
1251   int frame_complete = CodeOffsets::frame_never_safe;
1252   int frame_size_in_words = 0;
1253 
1254   // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1255   address c2i_inline_ro_entry = __ pc();
1256   if (regs_cc != regs_cc_ro) {
1257     gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
1258     skip_fixup.reset();
1259   }
1260 
1261   // Scalarized c2i adapter
1262   address c2i_entry = __ pc();
1263 
1264   // Class initialization barrier for static methods
1265   address c2i_no_clinit_check_entry = NULL;
1266   if (VM_Version::supports_fast_class_init_checks()) {
1267     Label L_skip_barrier;
1268     Register method = rbx;
1269 
1270     { // Bypass the barrier for non-static methods
1271       Register flags  = rscratch1;
1272       __ movl(flags, Address(method, Method::access_flags_offset()));
1273       __ testl(flags, JVM_ACC_STATIC);
1274       __ jcc(Assembler::zero, L_skip_barrier); // non-static
1275     }
1276 
1277     Register klass = rscratch1;
1278     __ load_method_holder(klass, method);
1279     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
1280 
1281     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1282 
1283     __ bind(L_skip_barrier);
1284     c2i_no_clinit_check_entry = __ pc();
1285   }
1286 
1287   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1288   bs->c2i_entry_barrier(masm);
1289 
1290   gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true);
1291 
1292   address c2i_unverified_inline_entry = c2i_unverified_entry;
1293 
1294   // Non-scalarized c2i adapter
1295   address c2i_inline_entry = c2i_entry;
1296   if (regs != regs_cc) {
1297     Label inline_entry_skip_fixup;
1298     c2i_unverified_inline_entry = __ pc();
1299     gen_inline_cache_check(masm, inline_entry_skip_fixup);
1300 
1301     c2i_inline_entry = __ pc();
1302     gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
1303   }
1304 
1305   __ flush();
1306 
1307   // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1308   // the GC knows about the location of oop argument locations passed to the c2i adapter.
1309   if (allocate_code_blob) {
1310     bool caller_must_gc_arguments = (regs != regs_cc);
1311     new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1312   }
1313 
1314   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry);
1315 }
1316 
1317 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1318                                          VMRegPair *regs,
1319                                          VMRegPair *regs2,
1320                                          int total_args_passed) {
1321   assert(regs2 == NULL, "not needed on x86");
1322 // We return the amount of VMRegImpl stack slots we need to reserve for all
1323 // the arguments NOT counting out_preserve_stack_slots.
1324 
1325 // NOTE: These arrays will have to change when c1 is ported
1326 #ifdef _WIN64
1327     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1328       c_rarg0, c_rarg1, c_rarg2, c_rarg3
1329     };
1330     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1331       c_farg0, c_farg1, c_farg2, c_farg3
1332     };
1333 #else
1334     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {

1352       case T_BYTE:
1353       case T_SHORT:
1354       case T_INT:
1355         if (int_args < Argument::n_int_register_parameters_c) {
1356           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1357 #ifdef _WIN64
1358           fp_args++;
1359           // Allocate slots for callee to stuff register args the stack.
1360           stk_args += 2;
1361 #endif
1362         } else {
1363           regs[i].set1(VMRegImpl::stack2reg(stk_args));
1364           stk_args += 2;
1365         }
1366         break;
1367       case T_LONG:
1368         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1369         // fall through
1370       case T_OBJECT:
1371       case T_ARRAY:
1372       case T_PRIMITIVE_OBJECT:
1373       case T_ADDRESS:
1374       case T_METADATA:
1375         if (int_args < Argument::n_int_register_parameters_c) {
1376           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1377 #ifdef _WIN64
1378           fp_args++;
1379           stk_args += 2;
1380 #endif
1381         } else {
1382           regs[i].set2(VMRegImpl::stack2reg(stk_args));
1383           stk_args += 2;
1384         }
1385         break;
1386       case T_FLOAT:
1387         if (fp_args < Argument::n_float_register_parameters_c) {
1388           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1389 #ifdef _WIN64
1390           int_args++;
1391           // Allocate slots for callee to stuff register args the stack.
1392           stk_args += 2;

2060 
2061   int temploc = -1;
2062   for (int ai = 0; ai < arg_order.length(); ai += 2) {
2063     int i = arg_order.at(ai);
2064     int c_arg = arg_order.at(ai + 1);
2065     __ block_comment(err_msg("move %d -> %d", i, c_arg));
2066 #ifdef ASSERT
2067     if (in_regs[i].first()->is_Register()) {
2068       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
2069     } else if (in_regs[i].first()->is_XMMRegister()) {
2070       assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
2071     }
2072     if (out_regs[c_arg].first()->is_Register()) {
2073       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2074     } else if (out_regs[c_arg].first()->is_XMMRegister()) {
2075       freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
2076     }
2077 #endif /* ASSERT */
2078     switch (in_sig_bt[i]) {
2079       case T_ARRAY:
2080       case T_PRIMITIVE_OBJECT:
2081       case T_OBJECT:
2082         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2083                     ((i == 0) && (!is_static)),
2084                     &receiver_offset);
2085         break;
2086       case T_VOID:
2087         break;
2088 
2089       case T_FLOAT:
2090         __ float_move(in_regs[i], out_regs[c_arg]);
2091           break;
2092 
2093       case T_DOUBLE:
2094         assert( i + 1 < total_in_args &&
2095                 in_sig_bt[i + 1] == T_VOID &&
2096                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2097         __ double_move(in_regs[i], out_regs[c_arg]);
2098         break;
2099 
2100       case T_LONG :

2184   if (method->is_synchronized()) {
2185 
2186     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2187 
2188     // Get the handle (the 2nd argument)
2189     __ mov(oop_handle_reg, c_rarg1);
2190 
2191     // Get address of the box
2192 
2193     __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2194 
2195     // Load the oop from the handle
2196     __ movptr(obj_reg, Address(oop_handle_reg, 0));
2197 
2198     if (!UseHeavyMonitors) {
2199       // Load immediate 1 into swap_reg %rax
2200       __ movl(swap_reg, 1);
2201 
2202       // Load (object->mark() | 1) into swap_reg %rax
2203       __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2204       if (EnableValhalla) {
2205         // Mask inline_type bit such that we go to the slow path if object is an inline type
2206         __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place));
2207       }
2208 
2209       // Save (object->mark() | 1) into BasicLock's displaced header
2210       __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
2211 
2212       // src -> dest iff dest == rax else rax <- dest
2213       __ lock();
2214       __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2215       __ jcc(Assembler::equal, lock_done);
2216 
2217       // Hmm should this move to the slow path code area???
2218 
2219       // Test if the oopMark is an obvious stack pointer, i.e.,
2220       //  1) (mark & 3) == 0, and
2221       //  2) rsp <= mark < mark + os::pagesize()
2222       // These 3 tests can be done by evaluating the following
2223       // expression: ((mark - rsp) & (3 - os::vm_page_size())),
2224       // assuming both stack pointer and pagesize have their
2225       // least significant 2 bits clear.
2226       // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
2227 

2248   // Now set thread in native
2249   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
2250 
2251   __ call(RuntimeAddress(native_func));
2252 
2253   // Verify or restore cpu control state after JNI call
2254   __ restore_cpu_control_state_after_jni();
2255 
2256   // Unpack native results.
2257   switch (ret_type) {
2258   case T_BOOLEAN: __ c2bool(rax);            break;
2259   case T_CHAR   : __ movzwl(rax, rax);      break;
2260   case T_BYTE   : __ sign_extend_byte (rax); break;
2261   case T_SHORT  : __ sign_extend_short(rax); break;
2262   case T_INT    : /* nothing to do */        break;
2263   case T_DOUBLE :
2264   case T_FLOAT  :
2265     // Result is in xmm0 we'll save as needed
2266     break;
2267   case T_ARRAY:                 // Really a handle
2268   case T_PRIMITIVE_OBJECT:           // Really a handle
2269   case T_OBJECT:                // Really a handle
2270       break; // can't de-handlize until after safepoint check
2271   case T_VOID: break;
2272   case T_LONG: break;
2273   default       : ShouldNotReachHere();
2274   }
2275 
2276   Label after_transition;
2277 
2278   // Switch thread to "native transition" state before reading the synchronization state.
2279   // This additional state is necessary because reading and testing the synchronization
2280   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2281   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2282   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2283   //     Thread A is resumed to finish this native method, but doesn't block here since it
2284   //     didn't see any synchronization is progress, and escapes.
2285   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2286 
2287   // Force this write out before the read below
2288   __ membar(Assembler::Membar_mask_bits(

3994   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3995 #endif
3996   // Clear the exception oop so GC no longer processes it as a root.
3997   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3998 
3999   // rax: exception oop
4000   // r8:  exception handler
4001   // rdx: exception pc
4002   // Jump to handler
4003 
4004   __ jmp(r8);
4005 
4006   // Make sure all code is generated
4007   masm->flush();
4008 
4009   // Set exception blob
4010   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
4011 }
4012 #endif // COMPILER2
4013 
4014 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
4015   BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
4016   CodeBuffer buffer(buf);
4017   short buffer_locs[20];
4018   buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
4019                                          sizeof(buffer_locs)/sizeof(relocInfo));
4020 
4021   MacroAssembler* masm = new MacroAssembler(&buffer);
4022 
4023   const Array<SigEntry>* sig_vk = vk->extended_sig();
4024   const Array<VMRegPair>* regs = vk->return_regs();
4025 
4026   int pack_fields_jobject_off = __ offset();
4027   // Resolve pre-allocated buffer from JNI handle.
4028   // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
4029   __ movptr(rax, Address(r13, 0));
4030   __ resolve_jobject(rax /* value */,
4031                      r15_thread /* thread */,
4032                      r12 /* tmp */);
4033   __ movptr(Address(r13, 0), rax);
4034 
4035   int pack_fields_off = __ offset();
4036 
4037   int j = 1;
4038   for (int i = 0; i < sig_vk->length(); i++) {
4039     BasicType bt = sig_vk->at(i)._bt;
4040     if (bt == T_PRIMITIVE_OBJECT) {
4041       continue;
4042     }
4043     if (bt == T_VOID) {
4044       if (sig_vk->at(i-1)._bt == T_LONG ||
4045           sig_vk->at(i-1)._bt == T_DOUBLE) {
4046         j++;
4047       }
4048       continue;
4049     }
4050     int off = sig_vk->at(i)._offset;
4051     assert(off > 0, "offset in object should be positive");
4052     VMRegPair pair = regs->at(j);
4053     VMReg r_1 = pair.first();
4054     VMReg r_2 = pair.second();
4055     Address to(rax, off);
4056     if (bt == T_FLOAT) {
4057       __ movflt(to, r_1->as_XMMRegister());
4058     } else if (bt == T_DOUBLE) {
4059       __ movdbl(to, r_1->as_XMMRegister());
4060     } else {
4061       Register val = r_1->as_Register();
4062       assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
4063       if (is_reference_type(bt)) {
4064         __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
4065       } else {
4066         __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
4067       }
4068     }
4069     j++;
4070   }
4071   assert(j == regs->length(), "missed a field?");
4072 
4073   __ ret(0);
4074 
4075   int unpack_fields_off = __ offset();
4076 
4077   Label skip;
4078   __ testptr(rax, rax);
4079   __ jcc(Assembler::zero, skip);
4080 
4081   j = 1;
4082   for (int i = 0; i < sig_vk->length(); i++) {
4083     BasicType bt = sig_vk->at(i)._bt;
4084     if (bt == T_PRIMITIVE_OBJECT) {
4085       continue;
4086     }
4087     if (bt == T_VOID) {
4088       if (sig_vk->at(i-1)._bt == T_LONG ||
4089           sig_vk->at(i-1)._bt == T_DOUBLE) {
4090         j++;
4091       }
4092       continue;
4093     }
4094     int off = sig_vk->at(i)._offset;
4095     assert(off > 0, "offset in object should be positive");
4096     VMRegPair pair = regs->at(j);
4097     VMReg r_1 = pair.first();
4098     VMReg r_2 = pair.second();
4099     Address from(rax, off);
4100     if (bt == T_FLOAT) {
4101       __ movflt(r_1->as_XMMRegister(), from);
4102     } else if (bt == T_DOUBLE) {
4103       __ movdbl(r_1->as_XMMRegister(), from);
4104     } else if (bt == T_OBJECT || bt == T_ARRAY) {
4105       assert_different_registers(rax, r_1->as_Register());
4106       __ load_heap_oop(r_1->as_Register(), from);
4107     } else {
4108       assert(is_java_primitive(bt), "unexpected basic type");
4109       assert_different_registers(rax, r_1->as_Register());
4110       size_t size_in_bytes = type2aelembytes(bt);
4111       __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
4112     }
4113     j++;
4114   }
4115   assert(j == regs->length(), "missed a field?");
4116 
4117   __ bind(skip);
4118   __ ret(0);
4119 
4120   __ flush();
4121 
4122   return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
4123 }
4124 
4125 void SharedRuntime::compute_move_order(const BasicType* in_sig_bt,
4126                                        int total_in_args, const VMRegPair* in_regs,
4127                                        int total_out_args, VMRegPair* out_regs,
4128                                        GrowableArray<int>& arg_order,
4129                                        VMRegPair tmp_vmreg) {
4130   ComputeMoveOrder order(total_in_args, in_regs,
4131                          total_out_args, out_regs,
4132                          in_sig_bt, arg_order, tmp_vmreg);
4133 }
< prev index next >