< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp

Print this page

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"

  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/nativeInst.hpp"
  34 #include "code/vtableStubs.hpp"
  35 #include "compiler/oopMap.hpp"
  36 #include "gc/shared/collectedHeap.hpp"
  37 #include "gc/shared/gcLocker.hpp"
  38 #include "gc/shared/barrierSet.hpp"
  39 #include "gc/shared/barrierSetAssembler.hpp"
  40 #include "interpreter/interpreter.hpp"
  41 #include "logging/log.hpp"
  42 #include "memory/resourceArea.hpp"
  43 #include "memory/universe.hpp"
  44 #include "oops/compiledICHolder.hpp"
  45 #include "oops/klass.inline.hpp"
  46 #include "prims/methodHandles.hpp"
  47 #include "runtime/jniHandles.hpp"
  48 #include "runtime/safepointMechanism.hpp"
  49 #include "runtime/sharedRuntime.hpp"
  50 #include "runtime/signature.hpp"

 511     case T_SHORT:
 512     case T_INT:
 513       if (int_args < Argument::n_int_register_parameters_j) {
 514         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 515       } else {
 516         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 517         stk_args += 2;
 518       }
 519       break;
 520     case T_VOID:
 521       // halves of T_LONG or T_DOUBLE
 522       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 523       regs[i].set_bad();
 524       break;
 525     case T_LONG:
 526       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 527       // fall through
 528     case T_OBJECT:
 529     case T_ARRAY:
 530     case T_ADDRESS:

 531       if (int_args < Argument::n_int_register_parameters_j) {
 532         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 533       } else {
 534         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 535         stk_args += 2;
 536       }
 537       break;
 538     case T_FLOAT:
 539       if (fp_args < Argument::n_float_register_parameters_j) {
 540         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 541       } else {
 542         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 543         stk_args += 2;
 544       }
 545       break;
 546     case T_DOUBLE:
 547       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 548       if (fp_args < Argument::n_float_register_parameters_j) {
 549         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 550       } else {
 551         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 552         stk_args += 2;
 553       }
 554       break;
 555     default:
 556       ShouldNotReachHere();
 557       break;
 558     }
 559   }
 560 
 561   return align_up(stk_args, 2);
 562 }
 563 


















































































 564 // Patch the callers callsite with entry to compiled code if it exists.
 565 static void patch_callers_callsite(MacroAssembler *masm) {
 566   Label L;
 567   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 568   __ jcc(Assembler::equal, L);
 569 
 570   // Save the current stack pointer
 571   __ mov(r13, rsp);
 572   // Schedule the branch target address early.
 573   // Call into the VM to patch the caller, then jump to compiled callee
 574   // rax isn't live so capture return address while we easily can
 575   __ movptr(rax, Address(rsp, 0));
 576 
 577   // align stack so push_CPU_state doesn't fault
 578   __ andptr(rsp, -(StackAlignmentInBytes));
 579   __ push_CPU_state();
 580   __ vzeroupper();
 581   // VM needs caller's callsite
 582   // VM needs target method
 583   // This needs to be a long call since we will relocate this adapter to

 586   // Allocate argument register save area
 587   if (frame::arg_reg_save_area_bytes != 0) {
 588     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 589   }
 590   __ mov(c_rarg0, rbx);
 591   __ mov(c_rarg1, rax);
 592   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 593 
 594   // De-allocate argument register save area
 595   if (frame::arg_reg_save_area_bytes != 0) {
 596     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 597   }
 598 
 599   __ vzeroupper();
 600   __ pop_CPU_state();
 601   // restore sp
 602   __ mov(rsp, r13);
 603   __ bind(L);
 604 }
 605 










































































































 606 
 607 static void gen_c2i_adapter(MacroAssembler *masm,
 608                             int total_args_passed,
 609                             int comp_args_on_stack,
 610                             const BasicType *sig_bt,
 611                             const VMRegPair *regs,
 612                             Label& skip_fixup) {





 613   // Before we get into the guts of the C2I adapter, see if we should be here
 614   // at all.  We've come from compiled code and are attempting to jump to the
 615   // interpreter, which means the caller made a static call to get here
 616   // (vcalls always get a compiled target if there is one).  Check for a
 617   // compiled target.  If there is one, we need to patch the caller's call.
 618   patch_callers_callsite(masm);
 619 
 620   __ bind(skip_fixup);
 621 










































 622   // Since all args are passed on the stack, total_args_passed *
 623   // Interpreter::stackElementSize is the space we need. Plus 1 because
 624   // we also account for the return address location since
 625   // we store it first rather than hold it in rax across all the shuffling
 626 
 627   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 628 
 629   // stack is aligned, keep it that way
 630   extraspace = align_up(extraspace, 2*wordSize);
 631 
 632   // Get return address
 633   __ pop(rax);
 634 
 635   // set senderSP value
 636   __ mov(r13, rsp);
 637 
 638   __ subptr(rsp, extraspace);
 639 
 640   // Store the return address in the expected location
 641   __ movptr(Address(rsp, 0), rax);
 642 
 643   // Now write the args into the outgoing interpreter space
 644   for (int i = 0; i < total_args_passed; i++) {
 645     if (sig_bt[i] == T_VOID) {
 646       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 647       continue;
 648     }
 649 
 650     // offset to start parameters
 651     int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
 652     int next_off = st_off - Interpreter::stackElementSize;
 653 
 654     // Say 4 args:
 655     // i   st_off
 656     // 0   32 T_LONG
 657     // 1   24 T_VOID
 658     // 2   16 T_OBJECT
 659     // 3    8 T_BOOL
 660     // -    0 return address
 661     //
 662     // However to make thing extra confusing. Because we can fit a long/double in
 663     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 664     // leaves one slot empty and only stores to a single slot. In this case the
 665     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 666 
 667     VMReg r_1 = regs[i].first();
 668     VMReg r_2 = regs[i].second();
 669     if (!r_1->is_valid()) {
 670       assert(!r_2->is_valid(), "");
 671       continue;
 672     }
 673     if (r_1->is_stack()) {
 674       // memory to memory use rax
 675       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 676       if (!r_2->is_valid()) {
 677         // sign extend??
 678         __ movl(rax, Address(rsp, ld_off));
 679         __ movptr(Address(rsp, st_off), rax);
 680 
 681       } else {
 682 
 683         __ movq(rax, Address(rsp, ld_off));
 684 
 685         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 686         // T_DOUBLE and T_LONG use two slots in the interpreter
 687         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 688           // ld_off == LSW, ld_off+wordSize == MSW
 689           // st_off == MSW, next_off == LSW
 690           __ movq(Address(rsp, next_off), rax);
 691 #ifdef ASSERT
 692           // Overwrite the unused slot with known junk
 693           __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 694           __ movptr(Address(rsp, st_off), rax);
 695 #endif /* ASSERT */
 696         } else {
 697           __ movq(Address(rsp, st_off), rax);
 698         }
 699       }
 700     } else if (r_1->is_Register()) {
 701       Register r = r_1->as_Register();
 702       if (!r_2->is_valid()) {
 703         // must be only an int (or less ) so move only 32bits to slot
 704         // why not sign extend??
 705         __ movl(Address(rsp, st_off), r);
 706       } else {
 707         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 708         // T_DOUBLE and T_LONG use two slots in the interpreter
 709         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 710           // long/double in gpr
 711 #ifdef ASSERT
 712           // Overwrite the unused slot with known junk
 713           __ mov64(rax, CONST64(0xdeadffffdeadaaab));
 714           __ movptr(Address(rsp, st_off), rax);
 715 #endif /* ASSERT */
 716           __ movq(Address(rsp, next_off), r);
























 717         } else {
 718           __ movptr(Address(rsp, st_off), r);





 719         }
 720       }
 721     } else {
 722       assert(r_1->is_XMMRegister(), "");
 723       if (!r_2->is_valid()) {
 724         // only a float use just part of the slot
 725         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 726       } else {
 727 #ifdef ASSERT
 728         // Overwrite the unused slot with known junk
 729         __ mov64(rax, CONST64(0xdeadffffdeadaaac));
 730         __ movptr(Address(rsp, st_off), rax);
 731 #endif /* ASSERT */
 732         __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
 733       }
 734     }
 735   }
 736 
 737   // Schedule the branch target address early.
 738   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 739   __ jmp(rcx);
 740 }
 741 
 742 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 743                         address code_start, address code_end,
 744                         Label& L_ok) {
 745   Label L_fail;
 746   __ lea(temp_reg, ExternalAddress(code_start));
 747   __ cmpptr(pc_reg, temp_reg);
 748   __ jcc(Assembler::belowEqual, L_fail);
 749   __ lea(temp_reg, ExternalAddress(code_end));
 750   __ cmpptr(pc_reg, temp_reg);
 751   __ jcc(Assembler::below, L_ok);
 752   __ bind(L_fail);
 753 }
 754 
 755 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 756                                     int total_args_passed,
 757                                     int comp_args_on_stack,
 758                                     const BasicType *sig_bt,
 759                                     const VMRegPair *regs) {
 760 
 761   // Note: r13 contains the senderSP on entry. We must preserve it since
 762   // we may do a i2c -> c2i transition if we lose a race where compiled
 763   // code goes non-entrant while we get args ready.
 764   // In addition we use r13 to locate all the interpreter args as
 765   // we must align the stack to 16 bytes on an i2c entry else we
 766   // lose alignment we expect in all compiled code and register
 767   // save code can segv when fxsave instructions find improperly
 768   // aligned stack pointer.
 769 
 770   // Adapters can be frameless because they do not require the caller
 771   // to perform additional cleanup work, such as correcting the stack pointer.
 772   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 773   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 774   // even if a callee has modified the stack pointer.
 775   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 776   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 777   // up via the senderSP register).
 778   // In other words, if *either* the caller or callee is interpreted, we can

 830     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 831     // Round up to miminum stack alignment, in wordSize
 832     comp_words_on_stack = align_up(comp_words_on_stack, 2);
 833     __ subptr(rsp, comp_words_on_stack * wordSize);
 834   }
 835 
 836 
 837   // Ensure compiled code always sees stack at proper alignment
 838   __ andptr(rsp, -16);
 839 
 840   // push the return address and misalign the stack that youngest frame always sees
 841   // as far as the placement of the call instruction
 842   __ push(rax);
 843 
 844   // Put saved SP in another register
 845   const Register saved_sp = rax;
 846   __ movptr(saved_sp, r11);
 847 
 848   // Will jump to the compiled code just as if compiled code was doing it.
 849   // Pre-load the register-jump target early, to schedule it better.
 850   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 851 
 852 #if INCLUDE_JVMCI
 853   if (EnableJVMCI) {
 854     // check if this call should be routed towards a specific entry point
 855     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 856     Label no_alternative_target;
 857     __ jcc(Assembler::equal, no_alternative_target);
 858     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 859     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 860     __ bind(no_alternative_target);
 861   }
 862 #endif // INCLUDE_JVMCI
 863 


 864   // Now generate the shuffle code.  Pick up all register args and move the
 865   // rest through the floating point stack top.
 866   for (int i = 0; i < total_args_passed; i++) {
 867     if (sig_bt[i] == T_VOID) {


 868       // Longs and doubles are passed in native word order, but misaligned
 869       // in the 32-bit build.
 870       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");

 871       continue;
 872     }
 873 
 874     // Pick up 0, 1 or 2 words from SP+offset.
 875 
 876     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 877             "scrambled load targets?");
 878     // Load in argument order going down.
 879     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
 880     // Point to interpreter value (vs. tag)
 881     int next_off = ld_off - Interpreter::stackElementSize;
 882     //
 883     //
 884     //
 885     VMReg r_1 = regs[i].first();
 886     VMReg r_2 = regs[i].second();
 887     if (!r_1->is_valid()) {
 888       assert(!r_2->is_valid(), "");
 889       continue;
 890     }

 892       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 893       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 894 
 895       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
 896       // and if we end up going thru a c2i because of a miss a reasonable value of r13
 897       // will be generated.
 898       if (!r_2->is_valid()) {
 899         // sign extend???
 900         __ movl(r13, Address(saved_sp, ld_off));
 901         __ movptr(Address(rsp, st_off), r13);
 902       } else {
 903         //
 904         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 905         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 906         // So we must adjust where to pick up the data to match the interpreter.
 907         //
 908         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 909         // are accessed as negative so LSW is at LOW address
 910 
 911         // ld_off is MSW so get LSW
 912         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 913                            next_off : ld_off;
 914         __ movq(r13, Address(saved_sp, offset));
 915         // st_off is LSW (i.e. reg.first())
 916         __ movq(Address(rsp, st_off), r13);
 917       }
 918     } else if (r_1->is_Register()) {  // Register argument
 919       Register r = r_1->as_Register();
 920       assert(r != rax, "must be different");
 921       if (r_2->is_valid()) {
 922         //
 923         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
 924         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
 925         // So we must adjust where to pick up the data to match the interpreter.
 926 
 927         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 928                            next_off : ld_off;
 929 
 930         // this can be a misaligned move
 931         __ movq(r, Address(saved_sp, offset));
 932       } else {
 933         // sign extend and use a full word?
 934         __ movl(r, Address(saved_sp, ld_off));
 935       }
 936     } else {
 937       if (!r_2->is_valid()) {
 938         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
 939       } else {
 940         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
 941       }
 942     }
 943   }
 944 
 945   // 6243940 We might end up in handle_wrong_method if
 946   // the callee is deoptimized as we race thru here. If that
 947   // happens we don't want to take a safepoint because the
 948   // caller frame will look interpreted and arguments are now
 949   // "compiled" so it is much better to make this transition
 950   // invisible to the stack walking code. Unfortunately if
 951   // we try and find the callee by normal means a safepoint
 952   // is possible. So we stash the desired callee in the thread
 953   // and the vm will find there should this case occur.
 954 
 955   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 956 
 957   // put Method* where a c2i would expect should we end up there
 958   // only needed becaus eof c2 resolve stubs return Method* as a result in
 959   // rax
 960   __ mov(rax, rbx);
 961   __ jmp(r11);
 962 }
 963 






















 964 // ---------------------------------------------------------------
 965 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 966                                                             int total_args_passed,
 967                                                             int comp_args_on_stack,
 968                                                             const BasicType *sig_bt,
 969                                                             const VMRegPair *regs,
 970                                                             AdapterFingerPrint* fingerprint) {






 971   address i2c_entry = __ pc();
 972 
 973   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 974 
 975   // -------------------------------------------------------------------------
 976   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
 977   // to the interpreter.  The args start out packed in the compiled layout.  They
 978   // need to be unpacked into the interpreter layout.  This will almost always
 979   // require some stack space.  We grow the current (compiled) stack, then repack
 980   // the args.  We  finally end in a jump to the generic interpreter entry point.
 981   // On exit from the interpreter, the interpreter will restore our SP (lest the
 982   // compiled code, which relys solely on SP and not RBP, get sick).
 983 
 984   address c2i_unverified_entry = __ pc();
 985   Label skip_fixup;
 986   Label ok;
 987 
 988   Register holder = rax;
 989   Register receiver = j_rarg0;
 990   Register temp = rbx;
 991 
 992   {
 993     __ load_klass(temp, receiver, rscratch1);
 994     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 995     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 996     __ jcc(Assembler::equal, ok);
 997     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 998 
 999     __ bind(ok);
1000     // Method might have been compiled since the call site was patched to
1001     // interpreted if that is the case treat it as a miss so we can get
1002     // the call site corrected.
1003     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1004     __ jcc(Assembler::equal, skip_fixup);
1005     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));


1006   }
1007 

1008   address c2i_entry = __ pc();
1009 
1010   // Class initialization barrier for static methods
1011   address c2i_no_clinit_check_entry = NULL;
1012   if (VM_Version::supports_fast_class_init_checks()) {
1013     Label L_skip_barrier;
1014     Register method = rbx;
1015 
1016     { // Bypass the barrier for non-static methods
1017       Register flags  = rscratch1;
1018       __ movl(flags, Address(method, Method::access_flags_offset()));
1019       __ testl(flags, JVM_ACC_STATIC);
1020       __ jcc(Assembler::zero, L_skip_barrier); // non-static
1021     }
1022 
1023     Register klass = rscratch1;
1024     __ load_method_holder(klass, method);
1025     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
1026 
1027     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1028 
1029     __ bind(L_skip_barrier);
1030     c2i_no_clinit_check_entry = __ pc();
1031   }
1032 
1033   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1034   bs->c2i_entry_barrier(masm);
1035 
1036   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);













1037 
1038   __ flush();
1039   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);








1040 }
1041 
1042 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1043                                          VMRegPair *regs,
1044                                          VMRegPair *regs2,
1045                                          int total_args_passed) {
1046   assert(regs2 == NULL, "not needed on x86");
1047 // We return the amount of VMRegImpl stack slots we need to reserve for all
1048 // the arguments NOT counting out_preserve_stack_slots.
1049 
1050 // NOTE: These arrays will have to change when c1 is ported
1051 #ifdef _WIN64
1052     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1053       c_rarg0, c_rarg1, c_rarg2, c_rarg3
1054     };
1055     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1056       c_farg0, c_farg1, c_farg2, c_farg3
1057     };
1058 #else
1059     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {

1077       case T_BYTE:
1078       case T_SHORT:
1079       case T_INT:
1080         if (int_args < Argument::n_int_register_parameters_c) {
1081           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1082 #ifdef _WIN64
1083           fp_args++;
1084           // Allocate slots for callee to stuff register args the stack.
1085           stk_args += 2;
1086 #endif
1087         } else {
1088           regs[i].set1(VMRegImpl::stack2reg(stk_args));
1089           stk_args += 2;
1090         }
1091         break;
1092       case T_LONG:
1093         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1094         // fall through
1095       case T_OBJECT:
1096       case T_ARRAY:

1097       case T_ADDRESS:
1098       case T_METADATA:
1099         if (int_args < Argument::n_int_register_parameters_c) {
1100           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1101 #ifdef _WIN64
1102           fp_args++;
1103           stk_args += 2;
1104 #endif
1105         } else {
1106           regs[i].set2(VMRegImpl::stack2reg(stk_args));
1107           stk_args += 2;
1108         }
1109         break;
1110       case T_FLOAT:
1111         if (fp_args < Argument::n_float_register_parameters_c) {
1112           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1113 #ifdef _WIN64
1114           int_args++;
1115           // Allocate slots for callee to stuff register args the stack.
1116           stk_args += 2;

1784 
1785   int temploc = -1;
1786   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1787     int i = arg_order.at(ai);
1788     int c_arg = arg_order.at(ai + 1);
1789     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1790 #ifdef ASSERT
1791     if (in_regs[i].first()->is_Register()) {
1792       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1793     } else if (in_regs[i].first()->is_XMMRegister()) {
1794       assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
1795     }
1796     if (out_regs[c_arg].first()->is_Register()) {
1797       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1798     } else if (out_regs[c_arg].first()->is_XMMRegister()) {
1799       freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
1800     }
1801 #endif /* ASSERT */
1802     switch (in_sig_bt[i]) {
1803       case T_ARRAY:

1804       case T_OBJECT:
1805         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1806                     ((i == 0) && (!is_static)),
1807                     &receiver_offset);
1808         break;
1809       case T_VOID:
1810         break;
1811 
1812       case T_FLOAT:
1813         __ float_move(in_regs[i], out_regs[c_arg]);
1814           break;
1815 
1816       case T_DOUBLE:
1817         assert( i + 1 < total_in_args &&
1818                 in_sig_bt[i + 1] == T_VOID &&
1819                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1820         __ double_move(in_regs[i], out_regs[c_arg]);
1821         break;
1822 
1823       case T_LONG :

1906 
1907   if (method->is_synchronized()) {
1908 
1909     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1910 
1911     // Get the handle (the 2nd argument)
1912     __ mov(oop_handle_reg, c_rarg1);
1913 
1914     // Get address of the box
1915 
1916     __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1917 
1918     // Load the oop from the handle
1919     __ movptr(obj_reg, Address(oop_handle_reg, 0));
1920 
1921     // Load immediate 1 into swap_reg %rax
1922     __ movl(swap_reg, 1);
1923 
1924     // Load (object->mark() | 1) into swap_reg %rax
1925     __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));





1926 
1927     // Save (object->mark() | 1) into BasicLock's displaced header
1928     __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1929 
1930     // src -> dest iff dest == rax else rax <- dest
1931     __ lock();
1932     __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1933     __ jcc(Assembler::equal, lock_done);
1934 
1935     // Hmm should this move to the slow path code area???
1936 
1937     // Test if the oopMark is an obvious stack pointer, i.e.,
1938     //  1) (mark & 3) == 0, and
1939     //  2) rsp <= mark < mark + os::pagesize()
1940     // These 3 tests can be done by evaluating the following
1941     // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1942     // assuming both stack pointer and pagesize have their
1943     // least significant 2 bits clear.
1944     // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
1945 

1963   // Now set thread in native
1964   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
1965 
1966   __ call(RuntimeAddress(native_func));
1967 
1968   // Verify or restore cpu control state after JNI call
1969   __ restore_cpu_control_state_after_jni();
1970 
1971   // Unpack native results.
1972   switch (ret_type) {
1973   case T_BOOLEAN: __ c2bool(rax);            break;
1974   case T_CHAR   : __ movzwl(rax, rax);      break;
1975   case T_BYTE   : __ sign_extend_byte (rax); break;
1976   case T_SHORT  : __ sign_extend_short(rax); break;
1977   case T_INT    : /* nothing to do */        break;
1978   case T_DOUBLE :
1979   case T_FLOAT  :
1980     // Result is in xmm0 we'll save as needed
1981     break;
1982   case T_ARRAY:                 // Really a handle

1983   case T_OBJECT:                // Really a handle
1984       break; // can't de-handlize until after safepoint check
1985   case T_VOID: break;
1986   case T_LONG: break;
1987   default       : ShouldNotReachHere();
1988   }
1989 
1990   Label after_transition;
1991 
1992   // Switch thread to "native transition" state before reading the synchronization state.
1993   // This additional state is necessary because reading and testing the synchronization
1994   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1995   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1996   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1997   //     Thread A is resumed to finish this native method, but doesn't block here since it
1998   //     didn't see any synchronization is progress, and escapes.
1999   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2000 
2001   // Force this write out before the read below
2002   __ membar(Assembler::Membar_mask_bits(

3700   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3701 #endif
3702   // Clear the exception oop so GC no longer processes it as a root.
3703   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3704 
3705   // rax: exception oop
3706   // r8:  exception handler
3707   // rdx: exception pc
3708   // Jump to handler
3709 
3710   __ jmp(r8);
3711 
3712   // Make sure all code is generated
3713   masm->flush();
3714 
3715   // Set exception blob
3716   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3717 }
3718 #endif // COMPILER2
3719 















































































































3720 void SharedRuntime::compute_move_order(const BasicType* in_sig_bt,
3721                                        int total_in_args, const VMRegPair* in_regs,
3722                                        int total_out_args, VMRegPair* out_regs,
3723                                        GrowableArray<int>& arg_order,
3724                                        VMRegPair tmp_vmreg) {
3725   ComputeMoveOrder order(total_in_args, in_regs,
3726                          total_out_args, out_regs,
3727                          in_sig_bt, arg_order, tmp_vmreg);
3728 }

  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "classfile/symbolTable.hpp"
  32 #include "code/debugInfoRec.hpp"
  33 #include "code/icBuffer.hpp"
  34 #include "code/nativeInst.hpp"
  35 #include "code/vtableStubs.hpp"
  36 #include "compiler/oopMap.hpp"
  37 #include "gc/shared/collectedHeap.hpp"
  38 #include "gc/shared/gcLocker.hpp"
  39 #include "gc/shared/barrierSet.hpp"
  40 #include "gc/shared/barrierSetAssembler.hpp"
  41 #include "interpreter/interpreter.hpp"
  42 #include "logging/log.hpp"
  43 #include "memory/resourceArea.hpp"
  44 #include "memory/universe.hpp"
  45 #include "oops/compiledICHolder.hpp"
  46 #include "oops/klass.inline.hpp"
  47 #include "prims/methodHandles.hpp"
  48 #include "runtime/jniHandles.hpp"
  49 #include "runtime/safepointMechanism.hpp"
  50 #include "runtime/sharedRuntime.hpp"
  51 #include "runtime/signature.hpp"

 512     case T_SHORT:
 513     case T_INT:
 514       if (int_args < Argument::n_int_register_parameters_j) {
 515         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 516       } else {
 517         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 518         stk_args += 2;
 519       }
 520       break;
 521     case T_VOID:
 522       // halves of T_LONG or T_DOUBLE
 523       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 524       regs[i].set_bad();
 525       break;
 526     case T_LONG:
 527       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 528       // fall through
 529     case T_OBJECT:
 530     case T_ARRAY:
 531     case T_ADDRESS:
 532     case T_INLINE_TYPE:
 533       if (int_args < Argument::n_int_register_parameters_j) {
 534         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 535       } else {
 536         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 537         stk_args += 2;
 538       }
 539       break;
 540     case T_FLOAT:
 541       if (fp_args < Argument::n_float_register_parameters_j) {
 542         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 543       } else {
 544         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 545         stk_args += 2;
 546       }
 547       break;
 548     case T_DOUBLE:
 549       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 550       if (fp_args < Argument::n_float_register_parameters_j) {
 551         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 552       } else {
 553         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 554         stk_args += 2;
 555       }
 556       break;
 557     default:
 558       ShouldNotReachHere();
 559       break;
 560     }
 561   }
 562 
 563   return align_up(stk_args, 2);
 564 }
 565 
 566 // Same as java_calling_convention() but for multiple return
 567 // values. There's no way to store them on the stack so if we don't
 568 // have enough registers, multiple values can't be returned.
 569 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
 570 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
 571 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
 572                                           VMRegPair *regs,
 573                                           int total_args_passed) {
 574   // Create the mapping between argument positions and
 575   // registers.
 576   static const Register INT_ArgReg[java_return_convention_max_int] = {
 577     rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
 578   };
 579   static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
 580     j_farg0, j_farg1, j_farg2, j_farg3,
 581     j_farg4, j_farg5, j_farg6, j_farg7
 582   };
 583 
 584 
 585   uint int_args = 0;
 586   uint fp_args = 0;
 587 
 588   for (int i = 0; i < total_args_passed; i++) {
 589     switch (sig_bt[i]) {
 590     case T_BOOLEAN:
 591     case T_CHAR:
 592     case T_BYTE:
 593     case T_SHORT:
 594     case T_INT:
 595       if (int_args < Argument::n_int_register_parameters_j+1) {
 596         regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
 597         int_args++;
 598       } else {
 599         return -1;
 600       }
 601       break;
 602     case T_VOID:
 603       // halves of T_LONG or T_DOUBLE
 604       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 605       regs[i].set_bad();
 606       break;
 607     case T_LONG:
 608       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 609       // fall through
 610     case T_OBJECT:
 611     case T_INLINE_TYPE:
 612     case T_ARRAY:
 613     case T_ADDRESS:
 614     case T_METADATA:
 615       if (int_args < Argument::n_int_register_parameters_j+1) {
 616         regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
 617         int_args++;
 618       } else {
 619         return -1;
 620       }
 621       break;
 622     case T_FLOAT:
 623       if (fp_args < Argument::n_float_register_parameters_j) {
 624         regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
 625         fp_args++;
 626       } else {
 627         return -1;
 628       }
 629       break;
 630     case T_DOUBLE:
 631       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 632       if (fp_args < Argument::n_float_register_parameters_j) {
 633         regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
 634         fp_args++;
 635       } else {
 636         return -1;
 637       }
 638       break;
 639     default:
 640       ShouldNotReachHere();
 641       break;
 642     }
 643   }
 644 
 645   return int_args + fp_args;
 646 }
 647 
 648 // Patch the callers callsite with entry to compiled code if it exists.
 649 static void patch_callers_callsite(MacroAssembler *masm) {
 650   Label L;
 651   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
 652   __ jcc(Assembler::equal, L);
 653 
 654   // Save the current stack pointer
 655   __ mov(r13, rsp);
 656   // Schedule the branch target address early.
 657   // Call into the VM to patch the caller, then jump to compiled callee
 658   // rax isn't live so capture return address while we easily can
 659   __ movptr(rax, Address(rsp, 0));
 660 
 661   // align stack so push_CPU_state doesn't fault
 662   __ andptr(rsp, -(StackAlignmentInBytes));
 663   __ push_CPU_state();
 664   __ vzeroupper();
 665   // VM needs caller's callsite
 666   // VM needs target method
 667   // This needs to be a long call since we will relocate this adapter to

 670   // Allocate argument register save area
 671   if (frame::arg_reg_save_area_bytes != 0) {
 672     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 673   }
 674   __ mov(c_rarg0, rbx);
 675   __ mov(c_rarg1, rax);
 676   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 677 
 678   // De-allocate argument register save area
 679   if (frame::arg_reg_save_area_bytes != 0) {
 680     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 681   }
 682 
 683   __ vzeroupper();
 684   __ pop_CPU_state();
 685   // restore sp
 686   __ mov(rsp, r13);
 687   __ bind(L);
 688 }
 689 
 690 // For each inline type argument, sig includes the list of fields of
 691 // the inline type. This utility function computes the number of
 692 // arguments for the call if inline types are passed by reference (the
 693 // calling convention the interpreter expects).
 694 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
 695   int total_args_passed = 0;
 696   if (InlineTypePassFieldsAsArgs) {
 697     for (int i = 0; i < sig_extended->length(); i++) {
 698       BasicType bt = sig_extended->at(i)._bt;
 699       if (bt == T_INLINE_TYPE) {
 700         // In sig_extended, an inline type argument starts with:
 701         // T_INLINE_TYPE, followed by the types of the fields of the
 702         // inline type and T_VOID to mark the end of the value
 703         // type. Inline types are flattened so, for instance, in the
 704         // case of an inline type with an int field and an inline type
 705         // field that itself has 2 fields, an int and a long:
 706         // T_INLINE_TYPE T_INT T_INLINE_TYPE T_INT T_LONG T_VOID (second
 707         // slot for the T_LONG) T_VOID (inner T_INLINE_TYPE) T_VOID
 708         // (outer T_INLINE_TYPE)
 709         total_args_passed++;
 710         int vt = 1;
 711         do {
 712           i++;
 713           BasicType bt = sig_extended->at(i)._bt;
 714           BasicType prev_bt = sig_extended->at(i-1)._bt;
 715           if (bt == T_INLINE_TYPE) {
 716             vt++;
 717           } else if (bt == T_VOID &&
 718                      prev_bt != T_LONG &&
 719                      prev_bt != T_DOUBLE) {
 720             vt--;
 721           }
 722         } while (vt != 0);
 723       } else {
 724         total_args_passed++;
 725       }
 726     }
 727   } else {
 728     total_args_passed = sig_extended->length();
 729   }
 730   return total_args_passed;
 731 }
 732 
 733 
 734 static void gen_c2i_adapter_helper(MacroAssembler* masm,
 735                                    BasicType bt,
 736                                    BasicType prev_bt,
 737                                    size_t size_in_bytes,
 738                                    const VMRegPair& reg_pair,
 739                                    const Address& to,
 740                                    int extraspace,
 741                                    bool is_oop) {
 742   assert(bt != T_INLINE_TYPE || !InlineTypePassFieldsAsArgs, "no inline type here");
 743   if (bt == T_VOID) {
 744     assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
 745     return;
 746   }
 747 
 748   // Say 4 args:
 749   // i   st_off
 750   // 0   32 T_LONG
 751   // 1   24 T_VOID
 752   // 2   16 T_OBJECT
 753   // 3    8 T_BOOL
 754   // -    0 return address
 755   //
 756   // However to make thing extra confusing. Because we can fit a long/double in
 757   // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 758   // leaves one slot empty and only stores to a single slot. In this case the
 759   // slot that is occupied is the T_VOID slot. See I said it was confusing.
 760 
 761   bool wide = (size_in_bytes == wordSize);
 762   VMReg r_1 = reg_pair.first();
 763   VMReg r_2 = reg_pair.second();
 764   assert(r_2->is_valid() == wide, "invalid size");
 765   if (!r_1->is_valid()) {
 766     assert(!r_2->is_valid(), "must be invalid");
 767     return;
 768   }
 769 
 770   if (!r_1->is_XMMRegister()) {
 771     Register val = rax;
 772     if (r_1->is_stack()) {
 773       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 774       __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
 775     } else {
 776       val = r_1->as_Register();
 777     }
 778     assert_different_registers(to.base(), val, rscratch1);
 779     if (is_oop) {
 780       __ push(r13);
 781       __ push(rbx);
 782       __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
 783       __ pop(rbx);
 784       __ pop(r13);
 785     } else {
 786       __ store_sized_value(to, val, size_in_bytes);
 787     }
 788   } else {
 789     if (wide) {
 790       __ movdbl(to, r_1->as_XMMRegister());
 791     } else {
 792       __ movflt(to, r_1->as_XMMRegister());
 793     }
 794   }
 795 }
 796 
 797 static void gen_c2i_adapter(MacroAssembler *masm,
 798                             const GrowableArray<SigEntry>* sig_extended,


 799                             const VMRegPair *regs,
 800                             Label& skip_fixup,
 801                             address start,
 802                             OopMapSet* oop_maps,
 803                             int& frame_complete,
 804                             int& frame_size_in_words,
 805                             bool alloc_inline_receiver) {
 806   // Before we get into the guts of the C2I adapter, see if we should be here
 807   // at all.  We've come from compiled code and are attempting to jump to the
 808   // interpreter, which means the caller made a static call to get here
 809   // (vcalls always get a compiled target if there is one).  Check for a
 810   // compiled target.  If there is one, we need to patch the caller's call.
 811   patch_callers_callsite(masm);
 812 
 813   __ bind(skip_fixup);
 814 
 815   if (InlineTypePassFieldsAsArgs) {
 816     // Is there an inline type argument?
 817     bool has_inline_argument = false;
 818     for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
 819       has_inline_argument = (sig_extended->at(i)._bt == T_INLINE_TYPE);
 820     }
 821     if (has_inline_argument) {
 822       // There is at least an inline type argument: we're coming from
 823       // compiled code so we have no buffers to back the inline types.
 824       // Allocate the buffers here with a runtime call.
 825       OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false);
 826 
 827       frame_complete = __ offset();
 828 
 829       __ set_last_Java_frame(noreg, noreg, NULL);
 830 
 831       __ mov(c_rarg0, r15_thread);
 832       __ mov(c_rarg1, rbx);
 833       __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
 834       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
 835 
 836       oop_maps->add_gc_map((int)(__ pc() - start), map);
 837       __ reset_last_Java_frame(false);
 838 
 839       RegisterSaver::restore_live_registers(masm);
 840 
 841       Label no_exception;
 842       __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
 843       __ jcc(Assembler::equal, no_exception);
 844 
 845       __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
 846       __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
 847       __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 848 
 849       __ bind(no_exception);
 850 
 851       // We get an array of objects from the runtime call
 852       __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
 853       __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live?
 854     }
 855   }
 856 
 857   // Since all args are passed on the stack, total_args_passed *
 858   // Interpreter::stackElementSize is the space we need. Plus 1 because
 859   // we also account for the return address location since
 860   // we store it first rather than hold it in rax across all the shuffling
 861   int total_args_passed = compute_total_args_passed_int(sig_extended);
 862   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 863 
 864   // stack is aligned, keep it that way
 865   extraspace = align_up(extraspace, 2*wordSize);
 866 
 867   // Get return address
 868   __ pop(rax);
 869 
 870   // set senderSP value
 871   __ mov(r13, rsp);
 872 
 873   __ subptr(rsp, extraspace);
 874 
 875   // Store the return address in the expected location
 876   __ movptr(Address(rsp, 0), rax);
 877 
 878   // Now write the args into the outgoing interpreter space






















 879 
 880   // next_arg_comp is the next argument from the compiler point of
 881   // view (inline type fields are passed in registers/on the stack). In
 882   // sig_extended, an inline type argument starts with: T_INLINE_TYPE,
 883   // followed by the types of the fields of the inline type and T_VOID
 884   // to mark the end of the inline type. ignored counts the number of
 885   // T_INLINE_TYPE/T_VOID. next_vt_arg is the next inline type argument:
 886   // used to get the buffer for that argument from the pool of buffers
 887   // we allocated above and want to pass to the
 888   // interpreter. next_arg_int is the next argument from the
 889   // interpreter point of view (inline types are passed by reference).
 890   for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
 891        next_arg_comp < sig_extended->length(); next_arg_comp++) {
 892     assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
 893     assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
 894     BasicType bt = sig_extended->at(next_arg_comp)._bt;
 895     int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
 896     if (!InlineTypePassFieldsAsArgs || bt != T_INLINE_TYPE) {
 897       int next_off = st_off - Interpreter::stackElementSize;
 898       const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
 899       const VMRegPair reg_pair = regs[next_arg_comp-ignored];
 900       size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
 901       gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
 902                              size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
 903       next_arg_int++;
 904 #ifdef ASSERT
 905       if (bt == T_LONG || bt == T_DOUBLE) {
 906         // Overwrite the unused slot with known junk
 907         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
 908         __ movptr(Address(rsp, st_off), rax);



 909       }















 910 #endif /* ASSERT */
 911     } else {
 912       ignored++;
 913       // get the buffer from the just allocated pool of buffers
 914       int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_INLINE_TYPE);
 915       __ load_heap_oop(r14, Address(rscratch2, index));
 916       next_vt_arg++; next_arg_int++;
 917       int vt = 1;
 918       // write fields we get from compiled code in registers/stack
 919       // slots to the buffer: we know we are done with that inline type
 920       // argument when we hit the T_VOID that acts as an end of inline
 921       // type delimiter for this inline type. Inline types are flattened
 922       // so we might encounter embedded inline types. Each entry in
 923       // sig_extended contains a field offset in the buffer.
 924       do {
 925         next_arg_comp++;
 926         BasicType bt = sig_extended->at(next_arg_comp)._bt;
 927         BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
 928         if (bt == T_INLINE_TYPE) {
 929           vt++;
 930           ignored++;
 931         } else if (bt == T_VOID &&
 932                    prev_bt != T_LONG &&
 933                    prev_bt != T_DOUBLE) {
 934           vt--;
 935           ignored++;
 936         } else {
 937           int off = sig_extended->at(next_arg_comp)._offset;
 938           assert(off > 0, "offset in object should be positive");
 939           size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
 940           bool is_oop = is_reference_type(bt);
 941           gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
 942                                  size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
 943         }
 944       } while (vt != 0);
 945       // pass the buffer to the interpreter
 946       __ movptr(Address(rsp, st_off), r14);











 947     }
 948   }
 949 
 950   // Schedule the branch target address early.
 951   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 952   __ jmp(rcx);
 953 }
 954 
 955 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 956                         address code_start, address code_end,
 957                         Label& L_ok) {
 958   Label L_fail;
 959   __ lea(temp_reg, ExternalAddress(code_start));
 960   __ cmpptr(pc_reg, temp_reg);
 961   __ jcc(Assembler::belowEqual, L_fail);
 962   __ lea(temp_reg, ExternalAddress(code_end));
 963   __ cmpptr(pc_reg, temp_reg);
 964   __ jcc(Assembler::below, L_ok);
 965   __ bind(L_fail);
 966 }
 967 
 968 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 969                                     int comp_args_on_stack,
 970                                     const GrowableArray<SigEntry>* sig,
 971                                     const VMRegPair *regs) {
 972 
 973   // Note: r13 contains the senderSP on entry. We must preserve it since
 974   // we may do a i2c -> c2i transition if we lose a race where compiled
 975   // code goes non-entrant while we get args ready.
 976   // In addition we use r13 to locate all the interpreter args as
 977   // we must align the stack to 16 bytes on an i2c entry else we
 978   // lose alignment we expect in all compiled code and register
 979   // save code can segv when fxsave instructions find improperly
 980   // aligned stack pointer.
 981 
 982   // Adapters can be frameless because they do not require the caller
 983   // to perform additional cleanup work, such as correcting the stack pointer.
 984   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 985   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 986   // even if a callee has modified the stack pointer.
 987   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 988   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 989   // up via the senderSP register).
 990   // In other words, if *either* the caller or callee is interpreted, we can

1042     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1043     // Round up to miminum stack alignment, in wordSize
1044     comp_words_on_stack = align_up(comp_words_on_stack, 2);
1045     __ subptr(rsp, comp_words_on_stack * wordSize);
1046   }
1047 
1048 
1049   // Ensure compiled code always sees stack at proper alignment
1050   __ andptr(rsp, -16);
1051 
1052   // push the return address and misalign the stack that youngest frame always sees
1053   // as far as the placement of the call instruction
1054   __ push(rax);
1055 
1056   // Put saved SP in another register
1057   const Register saved_sp = rax;
1058   __ movptr(saved_sp, r11);
1059 
1060   // Will jump to the compiled code just as if compiled code was doing it.
1061   // Pre-load the register-jump target early, to schedule it better.
1062   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
1063 
1064 #if INCLUDE_JVMCI
1065   if (EnableJVMCI) {
1066     // check if this call should be routed towards a specific entry point
1067     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1068     Label no_alternative_target;
1069     __ jcc(Assembler::equal, no_alternative_target);
1070     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1071     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1072     __ bind(no_alternative_target);
1073   }
1074 #endif // INCLUDE_JVMCI
1075 
1076   int total_args_passed = sig->length();
1077 
1078   // Now generate the shuffle code.  Pick up all register args and move the
1079   // rest through the floating point stack top.
1080   for (int i = 0; i < total_args_passed; i++) {
1081     BasicType bt = sig->at(i)._bt;
1082     assert(bt != T_INLINE_TYPE, "i2c adapter doesn't unpack inline type args");
1083     if (bt == T_VOID) {
1084       // Longs and doubles are passed in native word order, but misaligned
1085       // in the 32-bit build.
1086       BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
1087       assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
1088       continue;
1089     }
1090 
1091     // Pick up 0, 1 or 2 words from SP+offset.
1092 
1093     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1094             "scrambled load targets?");
1095     // Load in argument order going down.
1096     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
1097     // Point to interpreter value (vs. tag)
1098     int next_off = ld_off - Interpreter::stackElementSize;
1099     //
1100     //
1101     //
1102     VMReg r_1 = regs[i].first();
1103     VMReg r_2 = regs[i].second();
1104     if (!r_1->is_valid()) {
1105       assert(!r_2->is_valid(), "");
1106       continue;
1107     }

1109       // Convert stack slot to an SP offset (+ wordSize to account for return address )
1110       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
1111 
1112       // We can use r13 as a temp here because compiled code doesn't need r13 as an input
1113       // and if we end up going thru a c2i because of a miss a reasonable value of r13
1114       // will be generated.
1115       if (!r_2->is_valid()) {
1116         // sign extend???
1117         __ movl(r13, Address(saved_sp, ld_off));
1118         __ movptr(Address(rsp, st_off), r13);
1119       } else {
1120         //
1121         // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1122         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1123         // So we must adjust where to pick up the data to match the interpreter.
1124         //
1125         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1126         // are accessed as negative so LSW is at LOW address
1127 
1128         // ld_off is MSW so get LSW
1129         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1130                            next_off : ld_off;
1131         __ movq(r13, Address(saved_sp, offset));
1132         // st_off is LSW (i.e. reg.first())
1133         __ movq(Address(rsp, st_off), r13);
1134       }
1135     } else if (r_1->is_Register()) {  // Register argument
1136       Register r = r_1->as_Register();
1137       assert(r != rax, "must be different");
1138       if (r_2->is_valid()) {
1139         //
1140         // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1141         // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1142         // So we must adjust where to pick up the data to match the interpreter.
1143 
1144         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1145                            next_off : ld_off;
1146 
1147         // this can be a misaligned move
1148         __ movq(r, Address(saved_sp, offset));
1149       } else {
1150         // sign extend and use a full word?
1151         __ movl(r, Address(saved_sp, ld_off));
1152       }
1153     } else {
1154       if (!r_2->is_valid()) {
1155         __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
1156       } else {
1157         __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
1158       }
1159     }
1160   }
1161 
1162   // 6243940 We might end up in handle_wrong_method if
1163   // the callee is deoptimized as we race thru here. If that
1164   // happens we don't want to take a safepoint because the
1165   // caller frame will look interpreted and arguments are now
1166   // "compiled" so it is much better to make this transition
1167   // invisible to the stack walking code. Unfortunately if
1168   // we try and find the callee by normal means a safepoint
1169   // is possible. So we stash the desired callee in the thread
1170   // and the vm will find there should this case occur.
1171 
1172   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1173 
1174   // put Method* where a c2i would expect should we end up there
1175   // only needed because of c2 resolve stubs return Method* as a result in
1176   // rax
1177   __ mov(rax, rbx);
1178   __ jmp(r11);
1179 }
1180 
1181 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1182   Label ok;
1183 
1184   Register holder = rax;
1185   Register receiver = j_rarg0;
1186   Register temp = rbx;
1187 
1188   __ load_klass(temp, receiver, rscratch1);
1189   __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
1190   __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
1191   __ jcc(Assembler::equal, ok);
1192   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1193 
1194   __ bind(ok);
1195   // Method might have been compiled since the call site was patched to
1196   // interpreted if that is the case treat it as a miss so we can get
1197   // the call site corrected.
1198   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
1199   __ jcc(Assembler::equal, skip_fixup);
1200   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1201 }
1202 
1203 // ---------------------------------------------------------------
1204 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,

1205                                                             int comp_args_on_stack,
1206                                                             const GrowableArray<SigEntry>* sig,
1207                                                             const VMRegPair* regs,
1208                                                             const GrowableArray<SigEntry>* sig_cc,
1209                                                             const VMRegPair* regs_cc,
1210                                                             const GrowableArray<SigEntry>* sig_cc_ro,
1211                                                             const VMRegPair* regs_cc_ro,
1212                                                             AdapterFingerPrint* fingerprint,
1213                                                             AdapterBlob*& new_adapter,
1214                                                             bool allocate_code_blob) {
1215   address i2c_entry = __ pc();
1216   gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);

1217 
1218   // -------------------------------------------------------------------------
1219   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
1220   // to the interpreter.  The args start out packed in the compiled layout.  They
1221   // need to be unpacked into the interpreter layout.  This will almost always
1222   // require some stack space.  We grow the current (compiled) stack, then repack
1223   // the args.  We  finally end in a jump to the generic interpreter entry point.
1224   // On exit from the interpreter, the interpreter will restore our SP (lest the
1225   // compiled code, which relys solely on SP and not RBP, get sick).
1226 
1227   address c2i_unverified_entry = __ pc();
1228   Label skip_fixup;





1229 
1230   gen_inline_cache_check(masm, skip_fixup);





1231 
1232   OopMapSet* oop_maps = new OopMapSet();
1233   int frame_complete = CodeOffsets::frame_never_safe;
1234   int frame_size_in_words = 0;
1235 
1236   // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1237   address c2i_inline_ro_entry = __ pc();
1238   if (regs_cc != regs_cc_ro) {
1239     gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
1240     skip_fixup.reset();
1241   }
1242 
1243   // Scalarized c2i adapter
1244   address c2i_entry = __ pc();
1245 
1246   // Class initialization barrier for static methods
1247   address c2i_no_clinit_check_entry = NULL;
1248   if (VM_Version::supports_fast_class_init_checks()) {
1249     Label L_skip_barrier;
1250     Register method = rbx;
1251 
1252     { // Bypass the barrier for non-static methods
1253       Register flags  = rscratch1;
1254       __ movl(flags, Address(method, Method::access_flags_offset()));
1255       __ testl(flags, JVM_ACC_STATIC);
1256       __ jcc(Assembler::zero, L_skip_barrier); // non-static
1257     }
1258 
1259     Register klass = rscratch1;
1260     __ load_method_holder(klass, method);
1261     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
1262 
1263     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1264 
1265     __ bind(L_skip_barrier);
1266     c2i_no_clinit_check_entry = __ pc();
1267   }
1268 
1269   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1270   bs->c2i_entry_barrier(masm);
1271 
1272   gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true);
1273 
1274   address c2i_unverified_inline_entry = c2i_unverified_entry;
1275 
1276   // Non-scalarized c2i adapter
1277   address c2i_inline_entry = c2i_entry;
1278   if (regs != regs_cc) {
1279     Label inline_entry_skip_fixup;
1280     c2i_unverified_inline_entry = __ pc();
1281     gen_inline_cache_check(masm, inline_entry_skip_fixup);
1282 
1283     c2i_inline_entry = __ pc();
1284     gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
1285   }
1286 
1287   __ flush();
1288 
1289   // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1290   // the GC knows about the location of oop argument locations passed to the c2i adapter.
1291   if (allocate_code_blob) {
1292     bool caller_must_gc_arguments = (regs != regs_cc);
1293     new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1294   }
1295 
1296   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry);
1297 }
1298 
1299 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1300                                          VMRegPair *regs,
1301                                          VMRegPair *regs2,
1302                                          int total_args_passed) {
1303   assert(regs2 == NULL, "not needed on x86");
1304 // We return the amount of VMRegImpl stack slots we need to reserve for all
1305 // the arguments NOT counting out_preserve_stack_slots.
1306 
1307 // NOTE: These arrays will have to change when c1 is ported
1308 #ifdef _WIN64
1309     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1310       c_rarg0, c_rarg1, c_rarg2, c_rarg3
1311     };
1312     static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1313       c_farg0, c_farg1, c_farg2, c_farg3
1314     };
1315 #else
1316     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {

1334       case T_BYTE:
1335       case T_SHORT:
1336       case T_INT:
1337         if (int_args < Argument::n_int_register_parameters_c) {
1338           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1339 #ifdef _WIN64
1340           fp_args++;
1341           // Allocate slots for callee to stuff register args the stack.
1342           stk_args += 2;
1343 #endif
1344         } else {
1345           regs[i].set1(VMRegImpl::stack2reg(stk_args));
1346           stk_args += 2;
1347         }
1348         break;
1349       case T_LONG:
1350         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1351         // fall through
1352       case T_OBJECT:
1353       case T_ARRAY:
1354       case T_INLINE_TYPE:
1355       case T_ADDRESS:
1356       case T_METADATA:
1357         if (int_args < Argument::n_int_register_parameters_c) {
1358           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1359 #ifdef _WIN64
1360           fp_args++;
1361           stk_args += 2;
1362 #endif
1363         } else {
1364           regs[i].set2(VMRegImpl::stack2reg(stk_args));
1365           stk_args += 2;
1366         }
1367         break;
1368       case T_FLOAT:
1369         if (fp_args < Argument::n_float_register_parameters_c) {
1370           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1371 #ifdef _WIN64
1372           int_args++;
1373           // Allocate slots for callee to stuff register args the stack.
1374           stk_args += 2;

2042 
2043   int temploc = -1;
2044   for (int ai = 0; ai < arg_order.length(); ai += 2) {
2045     int i = arg_order.at(ai);
2046     int c_arg = arg_order.at(ai + 1);
2047     __ block_comment(err_msg("move %d -> %d", i, c_arg));
2048 #ifdef ASSERT
2049     if (in_regs[i].first()->is_Register()) {
2050       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
2051     } else if (in_regs[i].first()->is_XMMRegister()) {
2052       assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
2053     }
2054     if (out_regs[c_arg].first()->is_Register()) {
2055       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2056     } else if (out_regs[c_arg].first()->is_XMMRegister()) {
2057       freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
2058     }
2059 #endif /* ASSERT */
2060     switch (in_sig_bt[i]) {
2061       case T_ARRAY:
2062       case T_INLINE_TYPE:
2063       case T_OBJECT:
2064         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2065                     ((i == 0) && (!is_static)),
2066                     &receiver_offset);
2067         break;
2068       case T_VOID:
2069         break;
2070 
2071       case T_FLOAT:
2072         __ float_move(in_regs[i], out_regs[c_arg]);
2073           break;
2074 
2075       case T_DOUBLE:
2076         assert( i + 1 < total_in_args &&
2077                 in_sig_bt[i + 1] == T_VOID &&
2078                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2079         __ double_move(in_regs[i], out_regs[c_arg]);
2080         break;
2081 
2082       case T_LONG :

2165 
2166   if (method->is_synchronized()) {
2167 
2168     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2169 
2170     // Get the handle (the 2nd argument)
2171     __ mov(oop_handle_reg, c_rarg1);
2172 
2173     // Get address of the box
2174 
2175     __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2176 
2177     // Load the oop from the handle
2178     __ movptr(obj_reg, Address(oop_handle_reg, 0));
2179 
2180     // Load immediate 1 into swap_reg %rax
2181     __ movl(swap_reg, 1);
2182 
2183     // Load (object->mark() | 1) into swap_reg %rax
2184     __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2185     if (EnableValhalla) {
2186       // Mask inline_type bit such that we go to the slow path if object is an inline type
2187       __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place));
2188     }
2189 
2190 
2191     // Save (object->mark() | 1) into BasicLock's displaced header
2192     __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
2193 
2194     // src -> dest iff dest == rax else rax <- dest
2195     __ lock();
2196     __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2197     __ jcc(Assembler::equal, lock_done);
2198 
2199     // Hmm should this move to the slow path code area???
2200 
2201     // Test if the oopMark is an obvious stack pointer, i.e.,
2202     //  1) (mark & 3) == 0, and
2203     //  2) rsp <= mark < mark + os::pagesize()
2204     // These 3 tests can be done by evaluating the following
2205     // expression: ((mark - rsp) & (3 - os::vm_page_size())),
2206     // assuming both stack pointer and pagesize have their
2207     // least significant 2 bits clear.
2208     // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
2209 

2227   // Now set thread in native
2228   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
2229 
2230   __ call(RuntimeAddress(native_func));
2231 
2232   // Verify or restore cpu control state after JNI call
2233   __ restore_cpu_control_state_after_jni();
2234 
2235   // Unpack native results.
2236   switch (ret_type) {
2237   case T_BOOLEAN: __ c2bool(rax);            break;
2238   case T_CHAR   : __ movzwl(rax, rax);      break;
2239   case T_BYTE   : __ sign_extend_byte (rax); break;
2240   case T_SHORT  : __ sign_extend_short(rax); break;
2241   case T_INT    : /* nothing to do */        break;
2242   case T_DOUBLE :
2243   case T_FLOAT  :
2244     // Result is in xmm0 we'll save as needed
2245     break;
2246   case T_ARRAY:                 // Really a handle
2247   case T_INLINE_TYPE:           // Really a handle
2248   case T_OBJECT:                // Really a handle
2249       break; // can't de-handlize until after safepoint check
2250   case T_VOID: break;
2251   case T_LONG: break;
2252   default       : ShouldNotReachHere();
2253   }
2254 
2255   Label after_transition;
2256 
2257   // Switch thread to "native transition" state before reading the synchronization state.
2258   // This additional state is necessary because reading and testing the synchronization
2259   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2260   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2261   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2262   //     Thread A is resumed to finish this native method, but doesn't block here since it
2263   //     didn't see any synchronization is progress, and escapes.
2264   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2265 
2266   // Force this write out before the read below
2267   __ membar(Assembler::Membar_mask_bits(

3965   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3966 #endif
3967   // Clear the exception oop so GC no longer processes it as a root.
3968   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3969 
3970   // rax: exception oop
3971   // r8:  exception handler
3972   // rdx: exception pc
3973   // Jump to handler
3974 
3975   __ jmp(r8);
3976 
3977   // Make sure all code is generated
3978   masm->flush();
3979 
3980   // Set exception blob
3981   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3982 }
3983 #endif // COMPILER2
3984 
3985 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3986   BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
3987   CodeBuffer buffer(buf);
3988   short buffer_locs[20];
3989   buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
3990                                          sizeof(buffer_locs)/sizeof(relocInfo));
3991 
3992   MacroAssembler* masm = new MacroAssembler(&buffer);
3993 
3994   const Array<SigEntry>* sig_vk = vk->extended_sig();
3995   const Array<VMRegPair>* regs = vk->return_regs();
3996 
3997   int pack_fields_jobject_off = __ offset();
3998   // Resolve pre-allocated buffer from JNI handle.
3999   // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
4000   __ movptr(rax, Address(r13, 0));
4001   __ resolve_jobject(rax /* value */,
4002                      r15_thread /* thread */,
4003                      r12 /* tmp */);
4004   __ movptr(Address(r13, 0), rax);
4005 
4006   int pack_fields_off = __ offset();
4007 
4008   int j = 1;
4009   for (int i = 0; i < sig_vk->length(); i++) {
4010     BasicType bt = sig_vk->at(i)._bt;
4011     if (bt == T_INLINE_TYPE) {
4012       continue;
4013     }
4014     if (bt == T_VOID) {
4015       if (sig_vk->at(i-1)._bt == T_LONG ||
4016           sig_vk->at(i-1)._bt == T_DOUBLE) {
4017         j++;
4018       }
4019       continue;
4020     }
4021     int off = sig_vk->at(i)._offset;
4022     assert(off > 0, "offset in object should be positive");
4023     VMRegPair pair = regs->at(j);
4024     VMReg r_1 = pair.first();
4025     VMReg r_2 = pair.second();
4026     Address to(rax, off);
4027     if (bt == T_FLOAT) {
4028       __ movflt(to, r_1->as_XMMRegister());
4029     } else if (bt == T_DOUBLE) {
4030       __ movdbl(to, r_1->as_XMMRegister());
4031     } else {
4032       Register val = r_1->as_Register();
4033       assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
4034       if (is_reference_type(bt)) {
4035         __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
4036       } else {
4037         __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
4038       }
4039     }
4040     j++;
4041   }
4042   assert(j == regs->length(), "missed a field?");
4043 
4044   __ ret(0);
4045 
4046   int unpack_fields_off = __ offset();
4047 
4048   j = 1;
4049   for (int i = 0; i < sig_vk->length(); i++) {
4050     BasicType bt = sig_vk->at(i)._bt;
4051     if (bt == T_INLINE_TYPE) {
4052       continue;
4053     }
4054     if (bt == T_VOID) {
4055       if (sig_vk->at(i-1)._bt == T_LONG ||
4056           sig_vk->at(i-1)._bt == T_DOUBLE) {
4057         j++;
4058       }
4059       continue;
4060     }
4061     int off = sig_vk->at(i)._offset;
4062     assert(off > 0, "offset in object should be positive");
4063     VMRegPair pair = regs->at(j);
4064     VMReg r_1 = pair.first();
4065     VMReg r_2 = pair.second();
4066     Address from(rax, off);
4067     if (bt == T_FLOAT) {
4068       __ movflt(r_1->as_XMMRegister(), from);
4069     } else if (bt == T_DOUBLE) {
4070       __ movdbl(r_1->as_XMMRegister(), from);
4071     } else if (bt == T_OBJECT || bt == T_ARRAY) {
4072       assert_different_registers(rax, r_1->as_Register());
4073       __ load_heap_oop(r_1->as_Register(), from);
4074     } else {
4075       assert(is_java_primitive(bt), "unexpected basic type");
4076       assert_different_registers(rax, r_1->as_Register());
4077       size_t size_in_bytes = type2aelembytes(bt);
4078       __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
4079     }
4080     j++;
4081   }
4082   assert(j == regs->length(), "missed a field?");
4083 
4084   if (StressInlineTypeReturnedAsFields) {
4085     __ load_klass(rax, rax, rscratch1);
4086     __ orptr(rax, 1);
4087   }
4088 
4089   __ ret(0);
4090 
4091   __ flush();
4092 
4093   return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
4094 }
4095 
4096 void SharedRuntime::compute_move_order(const BasicType* in_sig_bt,
4097                                        int total_in_args, const VMRegPair* in_regs,
4098                                        int total_out_args, VMRegPair* out_regs,
4099                                        GrowableArray<int>& arg_order,
4100                                        VMRegPair tmp_vmreg) {
4101   ComputeMoveOrder order(total_in_args, in_regs,
4102                          total_out_args, out_regs,
4103                          in_sig_bt, arg_order, tmp_vmreg);
4104 }
< prev index next >