< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp

Print this page

 465       else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
 466       else // Else double is passed low on the stack to be aligned.
 467         stack += 2;
 468     } else if( sig_bt[i] == T_LONG ) {
 469       stack += 2;
 470     }
 471   }
 472   int dstack = 0;             // Separate counter for placing doubles
 473 
 474   // Now pick where all else goes.
 475   for( i = 0; i < total_args_passed; i++) {
 476     // From the type and the argument number (count) compute the location
 477     switch( sig_bt[i] ) {
 478     case T_SHORT:
 479     case T_CHAR:
 480     case T_BYTE:
 481     case T_BOOLEAN:
 482     case T_INT:
 483     case T_ARRAY:
 484     case T_OBJECT:

 485     case T_ADDRESS:
 486       if( reg_arg0 == 9999 )  {
 487         reg_arg0 = i;
 488         regs[i].set1(rcx->as_VMReg());
 489       } else if( reg_arg1 == 9999 )  {
 490         reg_arg1 = i;
 491         regs[i].set1(rdx->as_VMReg());
 492       } else {
 493         regs[i].set1(VMRegImpl::stack2reg(stack++));
 494       }
 495       break;
 496     case T_FLOAT:
 497       if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
 498         freg_arg0 = i;
 499         regs[i].set1(xmm0->as_VMReg());
 500       } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
 501         freg_arg1 = i;
 502         regs[i].set1(xmm1->as_VMReg());
 503       } else {
 504         regs[i].set1(VMRegImpl::stack2reg(stack++));

 515         regs[i].set2(xmm0->as_VMReg());
 516       } else if( freg_arg1 == (uint)i ) {
 517         regs[i].set2(xmm1->as_VMReg());
 518       } else {
 519         regs[i].set2(VMRegImpl::stack2reg(dstack));
 520         dstack += 2;
 521       }
 522       break;
 523     case T_VOID: regs[i].set_bad(); break;
 524       break;
 525     default:
 526       ShouldNotReachHere();
 527       break;
 528     }
 529   }
 530 
 531   // return value can be odd number of VMRegImpl stack slots make multiple of 2
 532   return align_up(stack, 2);
 533 }
 534 









 535 // Patch the callers callsite with entry to compiled code if it exists.
 536 static void patch_callers_callsite(MacroAssembler *masm) {
 537   Label L;
 538   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
 539   __ jcc(Assembler::equal, L);
 540   // Schedule the branch target address early.
 541   // Call into the VM to patch the caller, then jump to compiled callee
 542   // rax, isn't live so capture return address while we easily can
 543   __ movptr(rax, Address(rsp, 0));
 544   __ pusha();
 545   __ pushf();
 546 
 547   if (UseSSE == 1) {
 548     __ subptr(rsp, 2*wordSize);
 549     __ movflt(Address(rsp, 0), xmm0);
 550     __ movflt(Address(rsp, wordSize), xmm1);
 551   }
 552   if (UseSSE >= 2) {
 553     __ subptr(rsp, 4*wordSize);
 554     __ movdbl(Address(rsp, 0), xmm0);

 576     __ addptr(rsp, 2*wordSize);
 577   }
 578   if (UseSSE >= 2) {
 579     __ movdbl(xmm0, Address(rsp, 0));
 580     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 581     __ addptr(rsp, 4*wordSize);
 582   }
 583 
 584   __ popf();
 585   __ popa();
 586   __ bind(L);
 587 }
 588 
 589 
 590 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 591   int next_off = st_off - Interpreter::stackElementSize;
 592   __ movdbl(Address(rsp, next_off), r);
 593 }
 594 
 595 static void gen_c2i_adapter(MacroAssembler *masm,
 596                             int total_args_passed,
 597                             int comp_args_on_stack,
 598                             const BasicType *sig_bt,
 599                             const VMRegPair *regs,
 600                             Label& skip_fixup) {




 601   // Before we get into the guts of the C2I adapter, see if we should be here
 602   // at all.  We've come from compiled code and are attempting to jump to the
 603   // interpreter, which means the caller made a static call to get here
 604   // (vcalls always get a compiled target if there is one).  Check for a
 605   // compiled target.  If there is one, we need to patch the caller's call.
 606   patch_callers_callsite(masm);
 607 
 608   __ bind(skip_fixup);
 609 
 610 #ifdef COMPILER2
 611   // C2 may leave the stack dirty if not in SSE2+ mode
 612   if (UseSSE >= 2) {
 613     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 614   } else {
 615     __ empty_FPU_stack();
 616   }
 617 #endif /* COMPILER2 */
 618 
 619   // Since all args are passed on the stack, total_args_passed * interpreter_
 620   // stack_element_size  is the
 621   // space we need.
 622   int extraspace = total_args_passed * Interpreter::stackElementSize;
 623 
 624   // Get return address
 625   __ pop(rax);
 626 
 627   // set senderSP value
 628   __ movptr(rsi, rsp);
 629 
 630   __ subptr(rsp, extraspace);
 631 
 632   // Now write the args into the outgoing interpreter space
 633   for (int i = 0; i < total_args_passed; i++) {
 634     if (sig_bt[i] == T_VOID) {
 635       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 636       continue;
 637     }
 638 
 639     // st_off points to lowest address on stack.
 640     int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
 641     int next_off = st_off - Interpreter::stackElementSize;
 642 
 643     // Say 4 args:
 644     // i   st_off
 645     // 0   12 T_LONG
 646     // 1    8 T_VOID
 647     // 2    4 T_OBJECT
 648     // 3    0 T_BOOL
 649     VMReg r_1 = regs[i].first();
 650     VMReg r_2 = regs[i].second();
 651     if (!r_1->is_valid()) {
 652       assert(!r_2->is_valid(), "");
 653       continue;
 654     }
 655 
 656     if (r_1->is_stack()) {
 657       // memory to memory use fpu stack top
 658       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 659 
 660       if (!r_2->is_valid()) {

 666         // st_off == MSW, st_off-wordSize == LSW
 667 
 668         __ movptr(rdi, Address(rsp, ld_off));
 669         __ movptr(Address(rsp, next_off), rdi);
 670         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 671         __ movptr(Address(rsp, st_off), rdi);
 672       }
 673     } else if (r_1->is_Register()) {
 674       Register r = r_1->as_Register();
 675       if (!r_2->is_valid()) {
 676         __ movl(Address(rsp, st_off), r);
 677       } else {
 678         // long/double in gpr
 679         ShouldNotReachHere();
 680       }
 681     } else {
 682       assert(r_1->is_XMMRegister(), "");
 683       if (!r_2->is_valid()) {
 684         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 685       } else {
 686         assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
 687         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 688       }
 689     }
 690   }
 691 
 692   // Schedule the branch target address early.
 693   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 694   // And repush original return address
 695   __ push(rax);
 696   __ jmp(rcx);
 697 }
 698 
 699 
 700 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 701   int next_val_off = ld_off - Interpreter::stackElementSize;
 702   __ movdbl(r, Address(saved_sp, next_val_off));
 703 }
 704 
 705 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 706                         address code_start, address code_end,
 707                         Label& L_ok) {
 708   Label L_fail;
 709   __ lea(temp_reg, ExternalAddress(code_start));
 710   __ cmpptr(pc_reg, temp_reg);
 711   __ jcc(Assembler::belowEqual, L_fail);
 712   __ lea(temp_reg, ExternalAddress(code_end));
 713   __ cmpptr(pc_reg, temp_reg);
 714   __ jcc(Assembler::below, L_ok);
 715   __ bind(L_fail);
 716 }
 717 
 718 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 719                                     int total_args_passed,
 720                                     int comp_args_on_stack,
 721                                     const BasicType *sig_bt,
 722                                     const VMRegPair *regs) {

 723   // Note: rsi contains the senderSP on entry. We must preserve it since
 724   // we may do a i2c -> c2i transition if we lose a race where compiled
 725   // code goes non-entrant while we get args ready.
 726 
 727   // Adapters can be frameless because they do not require the caller
 728   // to perform additional cleanup work, such as correcting the stack pointer.
 729   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 730   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 731   // even if a callee has modified the stack pointer.
 732   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 733   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 734   // up via the senderSP register).
 735   // In other words, if *either* the caller or callee is interpreted, we can
 736   // get the stack pointer repaired after a call.
 737   // This is why c2i and i2c adapters cannot be indefinitely composed.
 738   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 739   // both caller and callee would be compiled methods, and neither would
 740   // clean up the stack pointer changes performed by the two adapters.
 741   // If this happens, control eventually transfers back to the compiled
 742   // caller, but with an uncorrected stack, causing delayed havoc.

 791   }
 792 
 793   // Align the outgoing SP
 794   __ andptr(rsp, -(StackAlignmentInBytes));
 795 
 796   // push the return address on the stack (note that pushing, rather
 797   // than storing it, yields the correct frame alignment for the callee)
 798   __ push(rax);
 799 
 800   // Put saved SP in another register
 801   const Register saved_sp = rax;
 802   __ movptr(saved_sp, rdi);
 803 
 804 
 805   // Will jump to the compiled code just as if compiled code was doing it.
 806   // Pre-load the register-jump target early, to schedule it better.
 807   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 808 
 809   // Now generate the shuffle code.  Pick up all register args and move the
 810   // rest through the floating point stack top.
 811   for (int i = 0; i < total_args_passed; i++) {
 812     if (sig_bt[i] == T_VOID) {
 813       // Longs and doubles are passed in native word order, but misaligned
 814       // in the 32-bit build.
 815       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 816       continue;
 817     }
 818 
 819     // Pick up 0, 1 or 2 words from SP+offset.
 820 
 821     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 822             "scrambled load targets?");
 823     // Load in argument order going down.
 824     int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
 825     // Point to interpreter value (vs. tag)
 826     int next_off = ld_off - Interpreter::stackElementSize;
 827     //
 828     //
 829     //
 830     VMReg r_1 = regs[i].first();
 831     VMReg r_2 = regs[i].second();
 832     if (!r_1->is_valid()) {
 833       assert(!r_2->is_valid(), "");
 834       continue;
 835     }
 836     if (r_1->is_stack()) {
 837       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 838       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 839 
 840       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 841       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 842       // we be generated.
 843       if (!r_2->is_valid()) {
 844         // __ fld_s(Address(saved_sp, ld_off));

 902   // "compiled" so it is much better to make this transition
 903   // invisible to the stack walking code. Unfortunately if
 904   // we try and find the callee by normal means a safepoint
 905   // is possible. So we stash the desired callee in the thread
 906   // and the vm will find there should this case occur.
 907 
 908   __ get_thread(rax);
 909   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 910 
 911   // move Method* to rax, in case we end up in an c2i adapter.
 912   // the c2i adapters expect Method* in rax, (c2) because c2's
 913   // resolve stubs return the result (the method) in rax,.
 914   // I'd love to fix this.
 915   __ mov(rax, rbx);
 916 
 917   __ jmp(rdi);
 918 }
 919 
 920 // ---------------------------------------------------------------
 921 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 922                                                             int total_args_passed,
 923                                                             int comp_args_on_stack,
 924                                                             const BasicType *sig_bt,
 925                                                             const VMRegPair *regs,
 926                                                             AdapterFingerPrint* fingerprint) {

 927   address i2c_entry = __ pc();
 928 
 929   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 930 
 931   // -------------------------------------------------------------------------
 932   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 933   // to the interpreter.  The args start out packed in the compiled layout.  They
 934   // need to be unpacked into the interpreter layout.  This will almost always
 935   // require some stack space.  We grow the current (compiled) stack, then repack
 936   // the args.  We  finally end in a jump to the generic interpreter entry point.
 937   // On exit from the interpreter, the interpreter will restore our SP (lest the
 938   // compiled code, which relies solely on SP and not EBP, get sick).
 939 
 940   address c2i_unverified_entry = __ pc();
 941   Label skip_fixup;
 942 
 943   Register holder = rax;
 944   Register receiver = rcx;
 945   Register temp = rbx;
 946 
 947   {
 948 
 949     Label missed;
 950     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 951     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 952     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 953     __ jcc(Assembler::notEqual, missed);
 954     // Method might have been compiled since the call site was patched to
 955     // interpreted if that is the case treat it as a miss so we can get
 956     // the call site corrected.
 957     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
 958     __ jcc(Assembler::equal, skip_fixup);
 959 
 960     __ bind(missed);
 961     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 962   }
 963 
 964   address c2i_entry = __ pc();
 965 
 966   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 967   bs->c2i_entry_barrier(masm);
 968 
 969   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);



 970 
 971   __ flush();

 972   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 973 }
 974 
 975 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 976                                          VMRegPair *regs,
 977                                          VMRegPair *regs2,
 978                                          int total_args_passed) {
 979   assert(regs2 == NULL, "not needed on x86");
 980 // We return the amount of VMRegImpl stack slots we need to reserve for all
 981 // the arguments NOT counting out_preserve_stack_slots.
 982 
 983   uint    stack = 0;        // All arguments on stack
 984 
 985   for( int i = 0; i < total_args_passed; i++) {
 986     // From the type and the argument number (count) compute the location
 987     switch( sig_bt[i] ) {
 988     case T_BOOLEAN:
 989     case T_CHAR:
 990     case T_FLOAT:
 991     case T_BYTE:
 992     case T_SHORT:
 993     case T_INT:
 994     case T_OBJECT:

 995     case T_ARRAY:
 996     case T_ADDRESS:
 997     case T_METADATA:
 998       regs[i].set1(VMRegImpl::stack2reg(stack++));
 999       break;
1000     case T_LONG:
1001     case T_DOUBLE: // The stack numbering is reversed from Java
1002       // Since C arguments do not get reversed, the ordering for
1003       // doubles on the stack must be opposite the Java convention
1004       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1005       regs[i].set2(VMRegImpl::stack2reg(stack));
1006       stack += 2;
1007       break;
1008     case T_VOID: regs[i].set_bad(); break;
1009     default:
1010       ShouldNotReachHere();
1011       break;
1012     }
1013   }
1014   return stack;

1558   int receiver_offset = -1;
1559 
1560   // This is a trick. We double the stack slots so we can claim
1561   // the oops in the caller's frame. Since we are sure to have
1562   // more args than the caller doubling is enough to make
1563   // sure we can capture all the incoming oop args from the
1564   // caller.
1565   //
1566   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1567 
1568   // Mark location of rbp,
1569   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1570 
1571   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1572   // Are free to temporaries if we have to do  stack to steck moves.
1573   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1574 
1575   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1576     switch (in_sig_bt[i]) {
1577       case T_ARRAY:

1578       case T_OBJECT:
1579         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1580                     ((i == 0) && (!is_static)),
1581                     &receiver_offset);
1582         break;
1583       case T_VOID:
1584         break;
1585 
1586       case T_FLOAT:
1587         float_move(masm, in_regs[i], out_regs[c_arg]);
1588           break;
1589 
1590       case T_DOUBLE:
1591         assert( i + 1 < total_in_args &&
1592                 in_sig_bt[i + 1] == T_VOID &&
1593                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1594         double_move(masm, in_regs[i], out_regs[c_arg]);
1595         break;
1596 
1597       case T_LONG :

1732   // Verify or restore cpu control state after JNI call
1733   __ restore_cpu_control_state_after_jni(noreg);
1734 
1735   // WARNING - on Windows Java Natives use pascal calling convention and pop the
1736   // arguments off of the stack. We could just re-adjust the stack pointer here
1737   // and continue to do SP relative addressing but we instead switch to FP
1738   // relative addressing.
1739 
1740   // Unpack native results.
1741   switch (ret_type) {
1742   case T_BOOLEAN: __ c2bool(rax);            break;
1743   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
1744   case T_BYTE   : __ sign_extend_byte (rax); break;
1745   case T_SHORT  : __ sign_extend_short(rax); break;
1746   case T_INT    : /* nothing to do */        break;
1747   case T_DOUBLE :
1748   case T_FLOAT  :
1749     // Result is in st0 we'll save as needed
1750     break;
1751   case T_ARRAY:                 // Really a handle

1752   case T_OBJECT:                // Really a handle
1753       break; // can't de-handlize until after safepoint check
1754   case T_VOID: break;
1755   case T_LONG: break;
1756   default       : ShouldNotReachHere();
1757   }
1758 
1759   Label after_transition;
1760 
1761   // Switch thread to "native transition" state before reading the synchronization state.
1762   // This additional state is necessary because reading and testing the synchronization
1763   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1764   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1765   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1766   //     Thread A is resumed to finish this native method, but doesn't block here since it
1767   //     didn't see any synchronization is progress, and escapes.
1768   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1769 
1770   // Force this write out before the read below
1771   __ membar(Assembler::Membar_mask_bits(

2819 
2820   __ bind(pending);
2821 
2822   RegisterSaver::restore_live_registers(masm);
2823 
2824   // exception pending => remove activation and forward to exception handler
2825 
2826   __ get_thread(thread);
2827   __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
2828   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
2829   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2830 
2831   // -------------
2832   // make sure all code is generated
2833   masm->flush();
2834 
2835   // return the  blob
2836   // frame_size_words or bytes??
2837   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2838 }






 465       else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
 466       else // Else double is passed low on the stack to be aligned.
 467         stack += 2;
 468     } else if( sig_bt[i] == T_LONG ) {
 469       stack += 2;
 470     }
 471   }
 472   int dstack = 0;             // Separate counter for placing doubles
 473 
 474   // Now pick where all else goes.
 475   for( i = 0; i < total_args_passed; i++) {
 476     // From the type and the argument number (count) compute the location
 477     switch( sig_bt[i] ) {
 478     case T_SHORT:
 479     case T_CHAR:
 480     case T_BYTE:
 481     case T_BOOLEAN:
 482     case T_INT:
 483     case T_ARRAY:
 484     case T_OBJECT:
 485     case T_PRIMITIVE_OBJECT:
 486     case T_ADDRESS:
 487       if( reg_arg0 == 9999 )  {
 488         reg_arg0 = i;
 489         regs[i].set1(rcx->as_VMReg());
 490       } else if( reg_arg1 == 9999 )  {
 491         reg_arg1 = i;
 492         regs[i].set1(rdx->as_VMReg());
 493       } else {
 494         regs[i].set1(VMRegImpl::stack2reg(stack++));
 495       }
 496       break;
 497     case T_FLOAT:
 498       if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
 499         freg_arg0 = i;
 500         regs[i].set1(xmm0->as_VMReg());
 501       } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
 502         freg_arg1 = i;
 503         regs[i].set1(xmm1->as_VMReg());
 504       } else {
 505         regs[i].set1(VMRegImpl::stack2reg(stack++));

 516         regs[i].set2(xmm0->as_VMReg());
 517       } else if( freg_arg1 == (uint)i ) {
 518         regs[i].set2(xmm1->as_VMReg());
 519       } else {
 520         regs[i].set2(VMRegImpl::stack2reg(dstack));
 521         dstack += 2;
 522       }
 523       break;
 524     case T_VOID: regs[i].set_bad(); break;
 525       break;
 526     default:
 527       ShouldNotReachHere();
 528       break;
 529     }
 530   }
 531 
 532   // return value can be odd number of VMRegImpl stack slots make multiple of 2
 533   return align_up(stack, 2);
 534 }
 535 
 536 const uint SharedRuntime::java_return_convention_max_int = 1;
 537 const uint SharedRuntime::java_return_convention_max_float = 1;
 538 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
 539                                           VMRegPair *regs,
 540                                           int total_args_passed) {
 541   Unimplemented();
 542   return 0;
 543 }
 544 
 545 // Patch the callers callsite with entry to compiled code if it exists.
 546 static void patch_callers_callsite(MacroAssembler *masm) {
 547   Label L;
 548   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
 549   __ jcc(Assembler::equal, L);
 550   // Schedule the branch target address early.
 551   // Call into the VM to patch the caller, then jump to compiled callee
 552   // rax, isn't live so capture return address while we easily can
 553   __ movptr(rax, Address(rsp, 0));
 554   __ pusha();
 555   __ pushf();
 556 
 557   if (UseSSE == 1) {
 558     __ subptr(rsp, 2*wordSize);
 559     __ movflt(Address(rsp, 0), xmm0);
 560     __ movflt(Address(rsp, wordSize), xmm1);
 561   }
 562   if (UseSSE >= 2) {
 563     __ subptr(rsp, 4*wordSize);
 564     __ movdbl(Address(rsp, 0), xmm0);

 586     __ addptr(rsp, 2*wordSize);
 587   }
 588   if (UseSSE >= 2) {
 589     __ movdbl(xmm0, Address(rsp, 0));
 590     __ movdbl(xmm1, Address(rsp, 2*wordSize));
 591     __ addptr(rsp, 4*wordSize);
 592   }
 593 
 594   __ popf();
 595   __ popa();
 596   __ bind(L);
 597 }
 598 
 599 
 600 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
 601   int next_off = st_off - Interpreter::stackElementSize;
 602   __ movdbl(Address(rsp, next_off), r);
 603 }
 604 
 605 static void gen_c2i_adapter(MacroAssembler *masm,
 606                             const GrowableArray<SigEntry>& sig_extended,


 607                             const VMRegPair *regs,
 608                             Label& skip_fixup,
 609                             address start,
 610                             OopMapSet*& oop_maps,
 611                             int& frame_complete,
 612                             int& frame_size_in_words) {
 613   // Before we get into the guts of the C2I adapter, see if we should be here
 614   // at all.  We've come from compiled code and are attempting to jump to the
 615   // interpreter, which means the caller made a static call to get here
 616   // (vcalls always get a compiled target if there is one).  Check for a
 617   // compiled target.  If there is one, we need to patch the caller's call.
 618   patch_callers_callsite(masm);
 619 
 620   __ bind(skip_fixup);
 621 
 622 #ifdef COMPILER2
 623   // C2 may leave the stack dirty if not in SSE2+ mode
 624   if (UseSSE >= 2) {
 625     __ verify_FPU(0, "c2i transition should have clean FPU stack");
 626   } else {
 627     __ empty_FPU_stack();
 628   }
 629 #endif /* COMPILER2 */
 630 
 631   // Since all args are passed on the stack, total_args_passed * interpreter_
 632   // stack_element_size  is the
 633   // space we need.
 634   int extraspace = sig_extended.length() * Interpreter::stackElementSize;
 635 
 636   // Get return address
 637   __ pop(rax);
 638 
 639   // set senderSP value
 640   __ movptr(rsi, rsp);
 641 
 642   __ subptr(rsp, extraspace);
 643 
 644   // Now write the args into the outgoing interpreter space
 645   for (int i = 0; i < sig_extended.length(); i++) {
 646     if (sig_extended.at(i)._bt == T_VOID) {
 647       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 648       continue;
 649     }
 650 
 651     // st_off points to lowest address on stack.
 652     int st_off = ((sig_extended.length() - 1) - i) * Interpreter::stackElementSize;
 653     int next_off = st_off - Interpreter::stackElementSize;
 654 
 655     // Say 4 args:
 656     // i   st_off
 657     // 0   12 T_LONG
 658     // 1    8 T_VOID
 659     // 2    4 T_OBJECT
 660     // 3    0 T_BOOL
 661     VMReg r_1 = regs[i].first();
 662     VMReg r_2 = regs[i].second();
 663     if (!r_1->is_valid()) {
 664       assert(!r_2->is_valid(), "");
 665       continue;
 666     }
 667 
 668     if (r_1->is_stack()) {
 669       // memory to memory use fpu stack top
 670       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 671 
 672       if (!r_2->is_valid()) {

 678         // st_off == MSW, st_off-wordSize == LSW
 679 
 680         __ movptr(rdi, Address(rsp, ld_off));
 681         __ movptr(Address(rsp, next_off), rdi);
 682         __ movptr(rdi, Address(rsp, ld_off + wordSize));
 683         __ movptr(Address(rsp, st_off), rdi);
 684       }
 685     } else if (r_1->is_Register()) {
 686       Register r = r_1->as_Register();
 687       if (!r_2->is_valid()) {
 688         __ movl(Address(rsp, st_off), r);
 689       } else {
 690         // long/double in gpr
 691         ShouldNotReachHere();
 692       }
 693     } else {
 694       assert(r_1->is_XMMRegister(), "");
 695       if (!r_2->is_valid()) {
 696         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
 697       } else {
 698         assert(sig_extended.at(i)._bt == T_DOUBLE || sig_extended.at(i)._bt == T_LONG, "wrong type");
 699         move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
 700       }
 701     }
 702   }
 703 
 704   // Schedule the branch target address early.
 705   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 706   // And repush original return address
 707   __ push(rax);
 708   __ jmp(rcx);
 709 }
 710 
 711 
 712 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
 713   int next_val_off = ld_off - Interpreter::stackElementSize;
 714   __ movdbl(r, Address(saved_sp, next_val_off));
 715 }
 716 
 717 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 718                         address code_start, address code_end,
 719                         Label& L_ok) {
 720   Label L_fail;
 721   __ lea(temp_reg, ExternalAddress(code_start));
 722   __ cmpptr(pc_reg, temp_reg);
 723   __ jcc(Assembler::belowEqual, L_fail);
 724   __ lea(temp_reg, ExternalAddress(code_end));
 725   __ cmpptr(pc_reg, temp_reg);
 726   __ jcc(Assembler::below, L_ok);
 727   __ bind(L_fail);
 728 }
 729 
 730 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,

 731                                     int comp_args_on_stack,
 732                                     const GrowableArray<SigEntry>& sig_extended,
 733                                     const VMRegPair *regs) {
 734 
 735   // Note: rsi contains the senderSP on entry. We must preserve it since
 736   // we may do a i2c -> c2i transition if we lose a race where compiled
 737   // code goes non-entrant while we get args ready.
 738 
 739   // Adapters can be frameless because they do not require the caller
 740   // to perform additional cleanup work, such as correcting the stack pointer.
 741   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 742   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 743   // even if a callee has modified the stack pointer.
 744   // A c2i adapter is frameless because the *callee* frame, which is interpreted,
 745   // routinely repairs its caller's stack pointer (from sender_sp, which is set
 746   // up via the senderSP register).
 747   // In other words, if *either* the caller or callee is interpreted, we can
 748   // get the stack pointer repaired after a call.
 749   // This is why c2i and i2c adapters cannot be indefinitely composed.
 750   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 751   // both caller and callee would be compiled methods, and neither would
 752   // clean up the stack pointer changes performed by the two adapters.
 753   // If this happens, control eventually transfers back to the compiled
 754   // caller, but with an uncorrected stack, causing delayed havoc.

 803   }
 804 
 805   // Align the outgoing SP
 806   __ andptr(rsp, -(StackAlignmentInBytes));
 807 
 808   // push the return address on the stack (note that pushing, rather
 809   // than storing it, yields the correct frame alignment for the callee)
 810   __ push(rax);
 811 
 812   // Put saved SP in another register
 813   const Register saved_sp = rax;
 814   __ movptr(saved_sp, rdi);
 815 
 816 
 817   // Will jump to the compiled code just as if compiled code was doing it.
 818   // Pre-load the register-jump target early, to schedule it better.
 819   __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
 820 
 821   // Now generate the shuffle code.  Pick up all register args and move the
 822   // rest through the floating point stack top.
 823   for (int i = 0; i < sig_extended.length(); i++) {
 824     if (sig_extended.at(i)._bt == T_VOID) {
 825       // Longs and doubles are passed in native word order, but misaligned
 826       // in the 32-bit build.
 827       assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
 828       continue;
 829     }
 830 
 831     // Pick up 0, 1 or 2 words from SP+offset.
 832 
 833     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 834             "scrambled load targets?");
 835     // Load in argument order going down.
 836     int ld_off = (sig_extended.length() - i) * Interpreter::stackElementSize;
 837     // Point to interpreter value (vs. tag)
 838     int next_off = ld_off - Interpreter::stackElementSize;
 839     //
 840     //
 841     //
 842     VMReg r_1 = regs[i].first();
 843     VMReg r_2 = regs[i].second();
 844     if (!r_1->is_valid()) {
 845       assert(!r_2->is_valid(), "");
 846       continue;
 847     }
 848     if (r_1->is_stack()) {
 849       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 850       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
 851 
 852       // We can use rsi as a temp here because compiled code doesn't need rsi as an input
 853       // and if we end up going thru a c2i because of a miss a reasonable value of rsi
 854       // we be generated.
 855       if (!r_2->is_valid()) {
 856         // __ fld_s(Address(saved_sp, ld_off));

 914   // "compiled" so it is much better to make this transition
 915   // invisible to the stack walking code. Unfortunately if
 916   // we try and find the callee by normal means a safepoint
 917   // is possible. So we stash the desired callee in the thread
 918   // and the vm will find there should this case occur.
 919 
 920   __ get_thread(rax);
 921   __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
 922 
 923   // move Method* to rax, in case we end up in an c2i adapter.
 924   // the c2i adapters expect Method* in rax, (c2) because c2's
 925   // resolve stubs return the result (the method) in rax,.
 926   // I'd love to fix this.
 927   __ mov(rax, rbx);
 928 
 929   __ jmp(rdi);
 930 }
 931 
 932 // ---------------------------------------------------------------
 933 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,

 934                                                             int comp_args_on_stack,
 935                                                             const GrowableArray<SigEntry>& sig_extended,
 936                                                             const VMRegPair *regs,
 937                                                             AdapterFingerPrint* fingerprint,
 938                                                             AdapterBlob*& new_adapter) {
 939   address i2c_entry = __ pc();
 940 
 941   gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
 942 
 943   // -------------------------------------------------------------------------
 944   // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
 945   // to the interpreter.  The args start out packed in the compiled layout.  They
 946   // need to be unpacked into the interpreter layout.  This will almost always
 947   // require some stack space.  We grow the current (compiled) stack, then repack
 948   // the args.  We  finally end in a jump to the generic interpreter entry point.
 949   // On exit from the interpreter, the interpreter will restore our SP (lest the
 950   // compiled code, which relies solely on SP and not EBP, get sick).
 951 
 952   address c2i_unverified_entry = __ pc();
 953   Label skip_fixup;
 954 
 955   Register holder = rax;
 956   Register receiver = rcx;
 957   Register temp = rbx;
 958 
 959   {
 960 
 961     Label missed;
 962     __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
 963     __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
 964     __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
 965     __ jcc(Assembler::notEqual, missed);
 966     // Method might have been compiled since the call site was patched to
 967     // interpreted if that is the case treat it as a miss so we can get
 968     // the call site corrected.
 969     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
 970     __ jcc(Assembler::equal, skip_fixup);
 971 
 972     __ bind(missed);
 973     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 974   }
 975 
 976   address c2i_entry = __ pc();
 977 
 978   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 979   bs->c2i_entry_barrier(masm);
 980 
 981   OopMapSet* oop_maps = NULL;
 982   int frame_complete = CodeOffsets::frame_never_safe;
 983   int frame_size_in_words = 0;
 984   gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
 985 
 986   __ flush();
 987   new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
 988   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 989 }
 990 
 991 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 992                                          VMRegPair *regs,
 993                                          VMRegPair *regs2,
 994                                          int total_args_passed) {
 995   assert(regs2 == NULL, "not needed on x86");
 996 // We return the amount of VMRegImpl stack slots we need to reserve for all
 997 // the arguments NOT counting out_preserve_stack_slots.
 998 
 999   uint    stack = 0;        // All arguments on stack
1000 
1001   for( int i = 0; i < total_args_passed; i++) {
1002     // From the type and the argument number (count) compute the location
1003     switch( sig_bt[i] ) {
1004     case T_BOOLEAN:
1005     case T_CHAR:
1006     case T_FLOAT:
1007     case T_BYTE:
1008     case T_SHORT:
1009     case T_INT:
1010     case T_OBJECT:
1011     case T_PRIMITIVE_OBJECT:
1012     case T_ARRAY:
1013     case T_ADDRESS:
1014     case T_METADATA:
1015       regs[i].set1(VMRegImpl::stack2reg(stack++));
1016       break;
1017     case T_LONG:
1018     case T_DOUBLE: // The stack numbering is reversed from Java
1019       // Since C arguments do not get reversed, the ordering for
1020       // doubles on the stack must be opposite the Java convention
1021       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1022       regs[i].set2(VMRegImpl::stack2reg(stack));
1023       stack += 2;
1024       break;
1025     case T_VOID: regs[i].set_bad(); break;
1026     default:
1027       ShouldNotReachHere();
1028       break;
1029     }
1030   }
1031   return stack;

1575   int receiver_offset = -1;
1576 
1577   // This is a trick. We double the stack slots so we can claim
1578   // the oops in the caller's frame. Since we are sure to have
1579   // more args than the caller doubling is enough to make
1580   // sure we can capture all the incoming oop args from the
1581   // caller.
1582   //
1583   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1584 
1585   // Mark location of rbp,
1586   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1587 
1588   // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1589   // Are free to temporaries if we have to do  stack to steck moves.
1590   // All inbound args are referenced based on rbp, and all outbound args via rsp.
1591 
1592   for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1593     switch (in_sig_bt[i]) {
1594       case T_ARRAY:
1595       case T_PRIMITIVE_OBJECT:
1596       case T_OBJECT:
1597         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1598                     ((i == 0) && (!is_static)),
1599                     &receiver_offset);
1600         break;
1601       case T_VOID:
1602         break;
1603 
1604       case T_FLOAT:
1605         float_move(masm, in_regs[i], out_regs[c_arg]);
1606           break;
1607 
1608       case T_DOUBLE:
1609         assert( i + 1 < total_in_args &&
1610                 in_sig_bt[i + 1] == T_VOID &&
1611                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1612         double_move(masm, in_regs[i], out_regs[c_arg]);
1613         break;
1614 
1615       case T_LONG :

1750   // Verify or restore cpu control state after JNI call
1751   __ restore_cpu_control_state_after_jni(noreg);
1752 
1753   // WARNING - on Windows Java Natives use pascal calling convention and pop the
1754   // arguments off of the stack. We could just re-adjust the stack pointer here
1755   // and continue to do SP relative addressing but we instead switch to FP
1756   // relative addressing.
1757 
1758   // Unpack native results.
1759   switch (ret_type) {
1760   case T_BOOLEAN: __ c2bool(rax);            break;
1761   case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
1762   case T_BYTE   : __ sign_extend_byte (rax); break;
1763   case T_SHORT  : __ sign_extend_short(rax); break;
1764   case T_INT    : /* nothing to do */        break;
1765   case T_DOUBLE :
1766   case T_FLOAT  :
1767     // Result is in st0 we'll save as needed
1768     break;
1769   case T_ARRAY:                 // Really a handle
1770   case T_PRIMITIVE_OBJECT:           // Really a handle
1771   case T_OBJECT:                // Really a handle
1772       break; // can't de-handlize until after safepoint check
1773   case T_VOID: break;
1774   case T_LONG: break;
1775   default       : ShouldNotReachHere();
1776   }
1777 
1778   Label after_transition;
1779 
1780   // Switch thread to "native transition" state before reading the synchronization state.
1781   // This additional state is necessary because reading and testing the synchronization
1782   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1783   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1784   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1785   //     Thread A is resumed to finish this native method, but doesn't block here since it
1786   //     didn't see any synchronization is progress, and escapes.
1787   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1788 
1789   // Force this write out before the read below
1790   __ membar(Assembler::Membar_mask_bits(

2838 
2839   __ bind(pending);
2840 
2841   RegisterSaver::restore_live_registers(masm);
2842 
2843   // exception pending => remove activation and forward to exception handler
2844 
2845   __ get_thread(thread);
2846   __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
2847   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
2848   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2849 
2850   // -------------
2851   // make sure all code is generated
2852   masm->flush();
2853 
2854   // return the  blob
2855   // frame_size_words or bytes??
2856   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2857 }
2858 
2859 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
2860   Unimplemented();
2861   return NULL;
2862 }
< prev index next >