465 else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
466 else // Else double is passed low on the stack to be aligned.
467 stack += 2;
468 } else if( sig_bt[i] == T_LONG ) {
469 stack += 2;
470 }
471 }
472 int dstack = 0; // Separate counter for placing doubles
473
474 // Now pick where all else goes.
475 for( i = 0; i < total_args_passed; i++) {
476 // From the type and the argument number (count) compute the location
477 switch( sig_bt[i] ) {
478 case T_SHORT:
479 case T_CHAR:
480 case T_BYTE:
481 case T_BOOLEAN:
482 case T_INT:
483 case T_ARRAY:
484 case T_OBJECT:
485 case T_ADDRESS:
486 if( reg_arg0 == 9999 ) {
487 reg_arg0 = i;
488 regs[i].set1(rcx->as_VMReg());
489 } else if( reg_arg1 == 9999 ) {
490 reg_arg1 = i;
491 regs[i].set1(rdx->as_VMReg());
492 } else {
493 regs[i].set1(VMRegImpl::stack2reg(stack++));
494 }
495 break;
496 case T_FLOAT:
497 if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
498 freg_arg0 = i;
499 regs[i].set1(xmm0->as_VMReg());
500 } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
501 freg_arg1 = i;
502 regs[i].set1(xmm1->as_VMReg());
503 } else {
504 regs[i].set1(VMRegImpl::stack2reg(stack++));
515 regs[i].set2(xmm0->as_VMReg());
516 } else if( freg_arg1 == (uint)i ) {
517 regs[i].set2(xmm1->as_VMReg());
518 } else {
519 regs[i].set2(VMRegImpl::stack2reg(dstack));
520 dstack += 2;
521 }
522 break;
523 case T_VOID: regs[i].set_bad(); break;
524 break;
525 default:
526 ShouldNotReachHere();
527 break;
528 }
529 }
530
531 // return value can be odd number of VMRegImpl stack slots make multiple of 2
532 return align_up(stack, 2);
533 }
534
535 // Patch the callers callsite with entry to compiled code if it exists.
536 static void patch_callers_callsite(MacroAssembler *masm) {
537 Label L;
538 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
539 __ jcc(Assembler::equal, L);
540 // Schedule the branch target address early.
541 // Call into the VM to patch the caller, then jump to compiled callee
542 // rax, isn't live so capture return address while we easily can
543 __ movptr(rax, Address(rsp, 0));
544 __ pusha();
545 __ pushf();
546
547 if (UseSSE == 1) {
548 __ subptr(rsp, 2*wordSize);
549 __ movflt(Address(rsp, 0), xmm0);
550 __ movflt(Address(rsp, wordSize), xmm1);
551 }
552 if (UseSSE >= 2) {
553 __ subptr(rsp, 4*wordSize);
554 __ movdbl(Address(rsp, 0), xmm0);
576 __ addptr(rsp, 2*wordSize);
577 }
578 if (UseSSE >= 2) {
579 __ movdbl(xmm0, Address(rsp, 0));
580 __ movdbl(xmm1, Address(rsp, 2*wordSize));
581 __ addptr(rsp, 4*wordSize);
582 }
583
584 __ popf();
585 __ popa();
586 __ bind(L);
587 }
588
589
590 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
591 int next_off = st_off - Interpreter::stackElementSize;
592 __ movdbl(Address(rsp, next_off), r);
593 }
594
595 static void gen_c2i_adapter(MacroAssembler *masm,
596 int total_args_passed,
597 int comp_args_on_stack,
598 const BasicType *sig_bt,
599 const VMRegPair *regs,
600 Label& skip_fixup) {
601 // Before we get into the guts of the C2I adapter, see if we should be here
602 // at all. We've come from compiled code and are attempting to jump to the
603 // interpreter, which means the caller made a static call to get here
604 // (vcalls always get a compiled target if there is one). Check for a
605 // compiled target. If there is one, we need to patch the caller's call.
606 patch_callers_callsite(masm);
607
608 __ bind(skip_fixup);
609
610 #ifdef COMPILER2
611 // C2 may leave the stack dirty if not in SSE2+ mode
612 if (UseSSE >= 2) {
613 __ verify_FPU(0, "c2i transition should have clean FPU stack");
614 } else {
615 __ empty_FPU_stack();
616 }
617 #endif /* COMPILER2 */
618
619 // Since all args are passed on the stack, total_args_passed * interpreter_
620 // stack_element_size is the
621 // space we need.
622 int extraspace = total_args_passed * Interpreter::stackElementSize;
623
624 // Get return address
625 __ pop(rax);
626
627 // set senderSP value
628 __ movptr(rsi, rsp);
629
630 __ subptr(rsp, extraspace);
631
632 // Now write the args into the outgoing interpreter space
633 for (int i = 0; i < total_args_passed; i++) {
634 if (sig_bt[i] == T_VOID) {
635 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
636 continue;
637 }
638
639 // st_off points to lowest address on stack.
640 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
641 int next_off = st_off - Interpreter::stackElementSize;
642
643 // Say 4 args:
644 // i st_off
645 // 0 12 T_LONG
646 // 1 8 T_VOID
647 // 2 4 T_OBJECT
648 // 3 0 T_BOOL
649 VMReg r_1 = regs[i].first();
650 VMReg r_2 = regs[i].second();
651 if (!r_1->is_valid()) {
652 assert(!r_2->is_valid(), "");
653 continue;
654 }
655
656 if (r_1->is_stack()) {
657 // memory to memory use fpu stack top
658 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
659
660 if (!r_2->is_valid()) {
666 // st_off == MSW, st_off-wordSize == LSW
667
668 __ movptr(rdi, Address(rsp, ld_off));
669 __ movptr(Address(rsp, next_off), rdi);
670 __ movptr(rdi, Address(rsp, ld_off + wordSize));
671 __ movptr(Address(rsp, st_off), rdi);
672 }
673 } else if (r_1->is_Register()) {
674 Register r = r_1->as_Register();
675 if (!r_2->is_valid()) {
676 __ movl(Address(rsp, st_off), r);
677 } else {
678 // long/double in gpr
679 ShouldNotReachHere();
680 }
681 } else {
682 assert(r_1->is_XMMRegister(), "");
683 if (!r_2->is_valid()) {
684 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
685 } else {
686 assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
687 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
688 }
689 }
690 }
691
692 // Schedule the branch target address early.
693 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
694 // And repush original return address
695 __ push(rax);
696 __ jmp(rcx);
697 }
698
699
700 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
701 int next_val_off = ld_off - Interpreter::stackElementSize;
702 __ movdbl(r, Address(saved_sp, next_val_off));
703 }
704
705 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
706 address code_start, address code_end,
707 Label& L_ok) {
708 Label L_fail;
709 __ lea(temp_reg, ExternalAddress(code_start));
710 __ cmpptr(pc_reg, temp_reg);
711 __ jcc(Assembler::belowEqual, L_fail);
712 __ lea(temp_reg, ExternalAddress(code_end));
713 __ cmpptr(pc_reg, temp_reg);
714 __ jcc(Assembler::below, L_ok);
715 __ bind(L_fail);
716 }
717
718 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
719 int total_args_passed,
720 int comp_args_on_stack,
721 const BasicType *sig_bt,
722 const VMRegPair *regs) {
723 // Note: rsi contains the senderSP on entry. We must preserve it since
724 // we may do a i2c -> c2i transition if we lose a race where compiled
725 // code goes non-entrant while we get args ready.
726
727 // Adapters can be frameless because they do not require the caller
728 // to perform additional cleanup work, such as correcting the stack pointer.
729 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
730 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
731 // even if a callee has modified the stack pointer.
732 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
733 // routinely repairs its caller's stack pointer (from sender_sp, which is set
734 // up via the senderSP register).
735 // In other words, if *either* the caller or callee is interpreted, we can
736 // get the stack pointer repaired after a call.
737 // This is why c2i and i2c adapters cannot be indefinitely composed.
738 // In particular, if a c2i adapter were to somehow call an i2c adapter,
739 // both caller and callee would be compiled methods, and neither would
740 // clean up the stack pointer changes performed by the two adapters.
741 // If this happens, control eventually transfers back to the compiled
742 // caller, but with an uncorrected stack, causing delayed havoc.
791 }
792
793 // Align the outgoing SP
794 __ andptr(rsp, -(StackAlignmentInBytes));
795
796 // push the return address on the stack (note that pushing, rather
797 // than storing it, yields the correct frame alignment for the callee)
798 __ push(rax);
799
800 // Put saved SP in another register
801 const Register saved_sp = rax;
802 __ movptr(saved_sp, rdi);
803
804
805 // Will jump to the compiled code just as if compiled code was doing it.
806 // Pre-load the register-jump target early, to schedule it better.
807 __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
808
809 // Now generate the shuffle code. Pick up all register args and move the
810 // rest through the floating point stack top.
811 for (int i = 0; i < total_args_passed; i++) {
812 if (sig_bt[i] == T_VOID) {
813 // Longs and doubles are passed in native word order, but misaligned
814 // in the 32-bit build.
815 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
816 continue;
817 }
818
819 // Pick up 0, 1 or 2 words from SP+offset.
820
821 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
822 "scrambled load targets?");
823 // Load in argument order going down.
824 int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
825 // Point to interpreter value (vs. tag)
826 int next_off = ld_off - Interpreter::stackElementSize;
827 //
828 //
829 //
830 VMReg r_1 = regs[i].first();
831 VMReg r_2 = regs[i].second();
832 if (!r_1->is_valid()) {
833 assert(!r_2->is_valid(), "");
834 continue;
835 }
836 if (r_1->is_stack()) {
837 // Convert stack slot to an SP offset (+ wordSize to account for return address )
838 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
839
840 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
841 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
842 // we be generated.
843 if (!r_2->is_valid()) {
844 // __ fld_s(Address(saved_sp, ld_off));
902 // "compiled" so it is much better to make this transition
903 // invisible to the stack walking code. Unfortunately if
904 // we try and find the callee by normal means a safepoint
905 // is possible. So we stash the desired callee in the thread
906 // and the vm will find there should this case occur.
907
908 __ get_thread(rax);
909 __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
910
911 // move Method* to rax, in case we end up in an c2i adapter.
912 // the c2i adapters expect Method* in rax, (c2) because c2's
913 // resolve stubs return the result (the method) in rax,.
914 // I'd love to fix this.
915 __ mov(rax, rbx);
916
917 __ jmp(rdi);
918 }
919
920 // ---------------------------------------------------------------
921 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
922 int total_args_passed,
923 int comp_args_on_stack,
924 const BasicType *sig_bt,
925 const VMRegPair *regs,
926 AdapterFingerPrint* fingerprint) {
927 address i2c_entry = __ pc();
928
929 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
930
931 // -------------------------------------------------------------------------
932 // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls
933 // to the interpreter. The args start out packed in the compiled layout. They
934 // need to be unpacked into the interpreter layout. This will almost always
935 // require some stack space. We grow the current (compiled) stack, then repack
936 // the args. We finally end in a jump to the generic interpreter entry point.
937 // On exit from the interpreter, the interpreter will restore our SP (lest the
938 // compiled code, which relies solely on SP and not EBP, get sick).
939
940 address c2i_unverified_entry = __ pc();
941 Label skip_fixup;
942
943 Register holder = rax;
944 Register receiver = rcx;
945 Register temp = rbx;
946
947 {
948
949 Label missed;
950 __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
951 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
952 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
953 __ jcc(Assembler::notEqual, missed);
954 // Method might have been compiled since the call site was patched to
955 // interpreted if that is the case treat it as a miss so we can get
956 // the call site corrected.
957 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
958 __ jcc(Assembler::equal, skip_fixup);
959
960 __ bind(missed);
961 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
962 }
963
964 address c2i_entry = __ pc();
965
966 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
967 bs->c2i_entry_barrier(masm);
968
969 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
970
971 __ flush();
972 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
973 }
974
975 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
976 VMRegPair *regs,
977 VMRegPair *regs2,
978 int total_args_passed) {
979 assert(regs2 == NULL, "not needed on x86");
980 // We return the amount of VMRegImpl stack slots we need to reserve for all
981 // the arguments NOT counting out_preserve_stack_slots.
982
983 uint stack = 0; // All arguments on stack
984
985 for( int i = 0; i < total_args_passed; i++) {
986 // From the type and the argument number (count) compute the location
987 switch( sig_bt[i] ) {
988 case T_BOOLEAN:
989 case T_CHAR:
990 case T_FLOAT:
991 case T_BYTE:
992 case T_SHORT:
993 case T_INT:
994 case T_OBJECT:
995 case T_ARRAY:
996 case T_ADDRESS:
997 case T_METADATA:
998 regs[i].set1(VMRegImpl::stack2reg(stack++));
999 break;
1000 case T_LONG:
1001 case T_DOUBLE: // The stack numbering is reversed from Java
1002 // Since C arguments do not get reversed, the ordering for
1003 // doubles on the stack must be opposite the Java convention
1004 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1005 regs[i].set2(VMRegImpl::stack2reg(stack));
1006 stack += 2;
1007 break;
1008 case T_VOID: regs[i].set_bad(); break;
1009 default:
1010 ShouldNotReachHere();
1011 break;
1012 }
1013 }
1014 return stack;
1558 int receiver_offset = -1;
1559
1560 // This is a trick. We double the stack slots so we can claim
1561 // the oops in the caller's frame. Since we are sure to have
1562 // more args than the caller doubling is enough to make
1563 // sure we can capture all the incoming oop args from the
1564 // caller.
1565 //
1566 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1567
1568 // Mark location of rbp,
1569 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1570
1571 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1572 // Are free to temporaries if we have to do stack to steck moves.
1573 // All inbound args are referenced based on rbp, and all outbound args via rsp.
1574
1575 for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1576 switch (in_sig_bt[i]) {
1577 case T_ARRAY:
1578 case T_OBJECT:
1579 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1580 ((i == 0) && (!is_static)),
1581 &receiver_offset);
1582 break;
1583 case T_VOID:
1584 break;
1585
1586 case T_FLOAT:
1587 float_move(masm, in_regs[i], out_regs[c_arg]);
1588 break;
1589
1590 case T_DOUBLE:
1591 assert( i + 1 < total_in_args &&
1592 in_sig_bt[i + 1] == T_VOID &&
1593 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1594 double_move(masm, in_regs[i], out_regs[c_arg]);
1595 break;
1596
1597 case T_LONG :
1732 // Verify or restore cpu control state after JNI call
1733 __ restore_cpu_control_state_after_jni(noreg);
1734
1735 // WARNING - on Windows Java Natives use pascal calling convention and pop the
1736 // arguments off of the stack. We could just re-adjust the stack pointer here
1737 // and continue to do SP relative addressing but we instead switch to FP
1738 // relative addressing.
1739
1740 // Unpack native results.
1741 switch (ret_type) {
1742 case T_BOOLEAN: __ c2bool(rax); break;
1743 case T_CHAR : __ andptr(rax, 0xFFFF); break;
1744 case T_BYTE : __ sign_extend_byte (rax); break;
1745 case T_SHORT : __ sign_extend_short(rax); break;
1746 case T_INT : /* nothing to do */ break;
1747 case T_DOUBLE :
1748 case T_FLOAT :
1749 // Result is in st0 we'll save as needed
1750 break;
1751 case T_ARRAY: // Really a handle
1752 case T_OBJECT: // Really a handle
1753 break; // can't de-handlize until after safepoint check
1754 case T_VOID: break;
1755 case T_LONG: break;
1756 default : ShouldNotReachHere();
1757 }
1758
1759 Label after_transition;
1760
1761 // Switch thread to "native transition" state before reading the synchronization state.
1762 // This additional state is necessary because reading and testing the synchronization
1763 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1764 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1765 // VM thread changes sync state to synchronizing and suspends threads for GC.
1766 // Thread A is resumed to finish this native method, but doesn't block here since it
1767 // didn't see any synchronization is progress, and escapes.
1768 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1769
1770 // Force this write out before the read below
1771 __ membar(Assembler::Membar_mask_bits(
2819
2820 __ bind(pending);
2821
2822 RegisterSaver::restore_live_registers(masm);
2823
2824 // exception pending => remove activation and forward to exception handler
2825
2826 __ get_thread(thread);
2827 __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
2828 __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
2829 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2830
2831 // -------------
2832 // make sure all code is generated
2833 masm->flush();
2834
2835 // return the blob
2836 // frame_size_words or bytes??
2837 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2838 }
|
465 else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
466 else // Else double is passed low on the stack to be aligned.
467 stack += 2;
468 } else if( sig_bt[i] == T_LONG ) {
469 stack += 2;
470 }
471 }
472 int dstack = 0; // Separate counter for placing doubles
473
474 // Now pick where all else goes.
475 for( i = 0; i < total_args_passed; i++) {
476 // From the type and the argument number (count) compute the location
477 switch( sig_bt[i] ) {
478 case T_SHORT:
479 case T_CHAR:
480 case T_BYTE:
481 case T_BOOLEAN:
482 case T_INT:
483 case T_ARRAY:
484 case T_OBJECT:
485 case T_PRIMITIVE_OBJECT:
486 case T_ADDRESS:
487 if( reg_arg0 == 9999 ) {
488 reg_arg0 = i;
489 regs[i].set1(rcx->as_VMReg());
490 } else if( reg_arg1 == 9999 ) {
491 reg_arg1 = i;
492 regs[i].set1(rdx->as_VMReg());
493 } else {
494 regs[i].set1(VMRegImpl::stack2reg(stack++));
495 }
496 break;
497 case T_FLOAT:
498 if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
499 freg_arg0 = i;
500 regs[i].set1(xmm0->as_VMReg());
501 } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
502 freg_arg1 = i;
503 regs[i].set1(xmm1->as_VMReg());
504 } else {
505 regs[i].set1(VMRegImpl::stack2reg(stack++));
516 regs[i].set2(xmm0->as_VMReg());
517 } else if( freg_arg1 == (uint)i ) {
518 regs[i].set2(xmm1->as_VMReg());
519 } else {
520 regs[i].set2(VMRegImpl::stack2reg(dstack));
521 dstack += 2;
522 }
523 break;
524 case T_VOID: regs[i].set_bad(); break;
525 break;
526 default:
527 ShouldNotReachHere();
528 break;
529 }
530 }
531
532 // return value can be odd number of VMRegImpl stack slots make multiple of 2
533 return align_up(stack, 2);
534 }
535
536 const uint SharedRuntime::java_return_convention_max_int = 1;
537 const uint SharedRuntime::java_return_convention_max_float = 1;
538 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
539 VMRegPair *regs,
540 int total_args_passed) {
541 Unimplemented();
542 return 0;
543 }
544
545 // Patch the callers callsite with entry to compiled code if it exists.
546 static void patch_callers_callsite(MacroAssembler *masm) {
547 Label L;
548 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
549 __ jcc(Assembler::equal, L);
550 // Schedule the branch target address early.
551 // Call into the VM to patch the caller, then jump to compiled callee
552 // rax, isn't live so capture return address while we easily can
553 __ movptr(rax, Address(rsp, 0));
554 __ pusha();
555 __ pushf();
556
557 if (UseSSE == 1) {
558 __ subptr(rsp, 2*wordSize);
559 __ movflt(Address(rsp, 0), xmm0);
560 __ movflt(Address(rsp, wordSize), xmm1);
561 }
562 if (UseSSE >= 2) {
563 __ subptr(rsp, 4*wordSize);
564 __ movdbl(Address(rsp, 0), xmm0);
586 __ addptr(rsp, 2*wordSize);
587 }
588 if (UseSSE >= 2) {
589 __ movdbl(xmm0, Address(rsp, 0));
590 __ movdbl(xmm1, Address(rsp, 2*wordSize));
591 __ addptr(rsp, 4*wordSize);
592 }
593
594 __ popf();
595 __ popa();
596 __ bind(L);
597 }
598
599
600 static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
601 int next_off = st_off - Interpreter::stackElementSize;
602 __ movdbl(Address(rsp, next_off), r);
603 }
604
605 static void gen_c2i_adapter(MacroAssembler *masm,
606 const GrowableArray<SigEntry>& sig_extended,
607 const VMRegPair *regs,
608 Label& skip_fixup,
609 address start,
610 OopMapSet*& oop_maps,
611 int& frame_complete,
612 int& frame_size_in_words) {
613 // Before we get into the guts of the C2I adapter, see if we should be here
614 // at all. We've come from compiled code and are attempting to jump to the
615 // interpreter, which means the caller made a static call to get here
616 // (vcalls always get a compiled target if there is one). Check for a
617 // compiled target. If there is one, we need to patch the caller's call.
618 patch_callers_callsite(masm);
619
620 __ bind(skip_fixup);
621
622 #ifdef COMPILER2
623 // C2 may leave the stack dirty if not in SSE2+ mode
624 if (UseSSE >= 2) {
625 __ verify_FPU(0, "c2i transition should have clean FPU stack");
626 } else {
627 __ empty_FPU_stack();
628 }
629 #endif /* COMPILER2 */
630
631 // Since all args are passed on the stack, total_args_passed * interpreter_
632 // stack_element_size is the
633 // space we need.
634 int extraspace = sig_extended.length() * Interpreter::stackElementSize;
635
636 // Get return address
637 __ pop(rax);
638
639 // set senderSP value
640 __ movptr(rsi, rsp);
641
642 __ subptr(rsp, extraspace);
643
644 // Now write the args into the outgoing interpreter space
645 for (int i = 0; i < sig_extended.length(); i++) {
646 if (sig_extended.at(i)._bt == T_VOID) {
647 assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
648 continue;
649 }
650
651 // st_off points to lowest address on stack.
652 int st_off = ((sig_extended.length() - 1) - i) * Interpreter::stackElementSize;
653 int next_off = st_off - Interpreter::stackElementSize;
654
655 // Say 4 args:
656 // i st_off
657 // 0 12 T_LONG
658 // 1 8 T_VOID
659 // 2 4 T_OBJECT
660 // 3 0 T_BOOL
661 VMReg r_1 = regs[i].first();
662 VMReg r_2 = regs[i].second();
663 if (!r_1->is_valid()) {
664 assert(!r_2->is_valid(), "");
665 continue;
666 }
667
668 if (r_1->is_stack()) {
669 // memory to memory use fpu stack top
670 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
671
672 if (!r_2->is_valid()) {
678 // st_off == MSW, st_off-wordSize == LSW
679
680 __ movptr(rdi, Address(rsp, ld_off));
681 __ movptr(Address(rsp, next_off), rdi);
682 __ movptr(rdi, Address(rsp, ld_off + wordSize));
683 __ movptr(Address(rsp, st_off), rdi);
684 }
685 } else if (r_1->is_Register()) {
686 Register r = r_1->as_Register();
687 if (!r_2->is_valid()) {
688 __ movl(Address(rsp, st_off), r);
689 } else {
690 // long/double in gpr
691 ShouldNotReachHere();
692 }
693 } else {
694 assert(r_1->is_XMMRegister(), "");
695 if (!r_2->is_valid()) {
696 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
697 } else {
698 assert(sig_extended.at(i)._bt == T_DOUBLE || sig_extended.at(i)._bt == T_LONG, "wrong type");
699 move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
700 }
701 }
702 }
703
704 // Schedule the branch target address early.
705 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
706 // And repush original return address
707 __ push(rax);
708 __ jmp(rcx);
709 }
710
711
712 static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
713 int next_val_off = ld_off - Interpreter::stackElementSize;
714 __ movdbl(r, Address(saved_sp, next_val_off));
715 }
716
717 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
718 address code_start, address code_end,
719 Label& L_ok) {
720 Label L_fail;
721 __ lea(temp_reg, ExternalAddress(code_start));
722 __ cmpptr(pc_reg, temp_reg);
723 __ jcc(Assembler::belowEqual, L_fail);
724 __ lea(temp_reg, ExternalAddress(code_end));
725 __ cmpptr(pc_reg, temp_reg);
726 __ jcc(Assembler::below, L_ok);
727 __ bind(L_fail);
728 }
729
730 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
731 int comp_args_on_stack,
732 const GrowableArray<SigEntry>& sig_extended,
733 const VMRegPair *regs) {
734
735 // Note: rsi contains the senderSP on entry. We must preserve it since
736 // we may do a i2c -> c2i transition if we lose a race where compiled
737 // code goes non-entrant while we get args ready.
738
739 // Adapters can be frameless because they do not require the caller
740 // to perform additional cleanup work, such as correcting the stack pointer.
741 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
742 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
743 // even if a callee has modified the stack pointer.
744 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
745 // routinely repairs its caller's stack pointer (from sender_sp, which is set
746 // up via the senderSP register).
747 // In other words, if *either* the caller or callee is interpreted, we can
748 // get the stack pointer repaired after a call.
749 // This is why c2i and i2c adapters cannot be indefinitely composed.
750 // In particular, if a c2i adapter were to somehow call an i2c adapter,
751 // both caller and callee would be compiled methods, and neither would
752 // clean up the stack pointer changes performed by the two adapters.
753 // If this happens, control eventually transfers back to the compiled
754 // caller, but with an uncorrected stack, causing delayed havoc.
803 }
804
805 // Align the outgoing SP
806 __ andptr(rsp, -(StackAlignmentInBytes));
807
808 // push the return address on the stack (note that pushing, rather
809 // than storing it, yields the correct frame alignment for the callee)
810 __ push(rax);
811
812 // Put saved SP in another register
813 const Register saved_sp = rax;
814 __ movptr(saved_sp, rdi);
815
816
817 // Will jump to the compiled code just as if compiled code was doing it.
818 // Pre-load the register-jump target early, to schedule it better.
819 __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
820
821 // Now generate the shuffle code. Pick up all register args and move the
822 // rest through the floating point stack top.
823 for (int i = 0; i < sig_extended.length(); i++) {
824 if (sig_extended.at(i)._bt == T_VOID) {
825 // Longs and doubles are passed in native word order, but misaligned
826 // in the 32-bit build.
827 assert(i > 0 && (sig_extended.at(i-1)._bt == T_LONG || sig_extended.at(i-1)._bt == T_DOUBLE), "missing half");
828 continue;
829 }
830
831 // Pick up 0, 1 or 2 words from SP+offset.
832
833 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
834 "scrambled load targets?");
835 // Load in argument order going down.
836 int ld_off = (sig_extended.length() - i) * Interpreter::stackElementSize;
837 // Point to interpreter value (vs. tag)
838 int next_off = ld_off - Interpreter::stackElementSize;
839 //
840 //
841 //
842 VMReg r_1 = regs[i].first();
843 VMReg r_2 = regs[i].second();
844 if (!r_1->is_valid()) {
845 assert(!r_2->is_valid(), "");
846 continue;
847 }
848 if (r_1->is_stack()) {
849 // Convert stack slot to an SP offset (+ wordSize to account for return address )
850 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
851
852 // We can use rsi as a temp here because compiled code doesn't need rsi as an input
853 // and if we end up going thru a c2i because of a miss a reasonable value of rsi
854 // we be generated.
855 if (!r_2->is_valid()) {
856 // __ fld_s(Address(saved_sp, ld_off));
914 // "compiled" so it is much better to make this transition
915 // invisible to the stack walking code. Unfortunately if
916 // we try and find the callee by normal means a safepoint
917 // is possible. So we stash the desired callee in the thread
918 // and the vm will find there should this case occur.
919
920 __ get_thread(rax);
921 __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
922
923 // move Method* to rax, in case we end up in an c2i adapter.
924 // the c2i adapters expect Method* in rax, (c2) because c2's
925 // resolve stubs return the result (the method) in rax,.
926 // I'd love to fix this.
927 __ mov(rax, rbx);
928
929 __ jmp(rdi);
930 }
931
932 // ---------------------------------------------------------------
933 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
934 int comp_args_on_stack,
935 const GrowableArray<SigEntry>& sig_extended,
936 const VMRegPair *regs,
937 AdapterFingerPrint* fingerprint,
938 AdapterBlob*& new_adapter) {
939 address i2c_entry = __ pc();
940
941 gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
942
943 // -------------------------------------------------------------------------
944 // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls
945 // to the interpreter. The args start out packed in the compiled layout. They
946 // need to be unpacked into the interpreter layout. This will almost always
947 // require some stack space. We grow the current (compiled) stack, then repack
948 // the args. We finally end in a jump to the generic interpreter entry point.
949 // On exit from the interpreter, the interpreter will restore our SP (lest the
950 // compiled code, which relies solely on SP and not EBP, get sick).
951
952 address c2i_unverified_entry = __ pc();
953 Label skip_fixup;
954
955 Register holder = rax;
956 Register receiver = rcx;
957 Register temp = rbx;
958
959 {
960
961 Label missed;
962 __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
963 __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
964 __ movptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
965 __ jcc(Assembler::notEqual, missed);
966 // Method might have been compiled since the call site was patched to
967 // interpreted if that is the case treat it as a miss so we can get
968 // the call site corrected.
969 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
970 __ jcc(Assembler::equal, skip_fixup);
971
972 __ bind(missed);
973 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
974 }
975
976 address c2i_entry = __ pc();
977
978 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
979 bs->c2i_entry_barrier(masm);
980
981 OopMapSet* oop_maps = NULL;
982 int frame_complete = CodeOffsets::frame_never_safe;
983 int frame_size_in_words = 0;
984 gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
985
986 __ flush();
987 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
988 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
989 }
990
991 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
992 VMRegPair *regs,
993 VMRegPair *regs2,
994 int total_args_passed) {
995 assert(regs2 == NULL, "not needed on x86");
996 // We return the amount of VMRegImpl stack slots we need to reserve for all
997 // the arguments NOT counting out_preserve_stack_slots.
998
999 uint stack = 0; // All arguments on stack
1000
1001 for( int i = 0; i < total_args_passed; i++) {
1002 // From the type and the argument number (count) compute the location
1003 switch( sig_bt[i] ) {
1004 case T_BOOLEAN:
1005 case T_CHAR:
1006 case T_FLOAT:
1007 case T_BYTE:
1008 case T_SHORT:
1009 case T_INT:
1010 case T_OBJECT:
1011 case T_PRIMITIVE_OBJECT:
1012 case T_ARRAY:
1013 case T_ADDRESS:
1014 case T_METADATA:
1015 regs[i].set1(VMRegImpl::stack2reg(stack++));
1016 break;
1017 case T_LONG:
1018 case T_DOUBLE: // The stack numbering is reversed from Java
1019 // Since C arguments do not get reversed, the ordering for
1020 // doubles on the stack must be opposite the Java convention
1021 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
1022 regs[i].set2(VMRegImpl::stack2reg(stack));
1023 stack += 2;
1024 break;
1025 case T_VOID: regs[i].set_bad(); break;
1026 default:
1027 ShouldNotReachHere();
1028 break;
1029 }
1030 }
1031 return stack;
1575 int receiver_offset = -1;
1576
1577 // This is a trick. We double the stack slots so we can claim
1578 // the oops in the caller's frame. Since we are sure to have
1579 // more args than the caller doubling is enough to make
1580 // sure we can capture all the incoming oop args from the
1581 // caller.
1582 //
1583 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1584
1585 // Mark location of rbp,
1586 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1587
1588 // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1589 // Are free to temporaries if we have to do stack to steck moves.
1590 // All inbound args are referenced based on rbp, and all outbound args via rsp.
1591
1592 for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1593 switch (in_sig_bt[i]) {
1594 case T_ARRAY:
1595 case T_PRIMITIVE_OBJECT:
1596 case T_OBJECT:
1597 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1598 ((i == 0) && (!is_static)),
1599 &receiver_offset);
1600 break;
1601 case T_VOID:
1602 break;
1603
1604 case T_FLOAT:
1605 float_move(masm, in_regs[i], out_regs[c_arg]);
1606 break;
1607
1608 case T_DOUBLE:
1609 assert( i + 1 < total_in_args &&
1610 in_sig_bt[i + 1] == T_VOID &&
1611 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1612 double_move(masm, in_regs[i], out_regs[c_arg]);
1613 break;
1614
1615 case T_LONG :
1750 // Verify or restore cpu control state after JNI call
1751 __ restore_cpu_control_state_after_jni(noreg);
1752
1753 // WARNING - on Windows Java Natives use pascal calling convention and pop the
1754 // arguments off of the stack. We could just re-adjust the stack pointer here
1755 // and continue to do SP relative addressing but we instead switch to FP
1756 // relative addressing.
1757
1758 // Unpack native results.
1759 switch (ret_type) {
1760 case T_BOOLEAN: __ c2bool(rax); break;
1761 case T_CHAR : __ andptr(rax, 0xFFFF); break;
1762 case T_BYTE : __ sign_extend_byte (rax); break;
1763 case T_SHORT : __ sign_extend_short(rax); break;
1764 case T_INT : /* nothing to do */ break;
1765 case T_DOUBLE :
1766 case T_FLOAT :
1767 // Result is in st0 we'll save as needed
1768 break;
1769 case T_ARRAY: // Really a handle
1770 case T_PRIMITIVE_OBJECT: // Really a handle
1771 case T_OBJECT: // Really a handle
1772 break; // can't de-handlize until after safepoint check
1773 case T_VOID: break;
1774 case T_LONG: break;
1775 default : ShouldNotReachHere();
1776 }
1777
1778 Label after_transition;
1779
1780 // Switch thread to "native transition" state before reading the synchronization state.
1781 // This additional state is necessary because reading and testing the synchronization
1782 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1783 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1784 // VM thread changes sync state to synchronizing and suspends threads for GC.
1785 // Thread A is resumed to finish this native method, but doesn't block here since it
1786 // didn't see any synchronization is progress, and escapes.
1787 __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1788
1789 // Force this write out before the read below
1790 __ membar(Assembler::Membar_mask_bits(
2838
2839 __ bind(pending);
2840
2841 RegisterSaver::restore_live_registers(masm);
2842
2843 // exception pending => remove activation and forward to exception handler
2844
2845 __ get_thread(thread);
2846 __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
2847 __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
2848 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2849
2850 // -------------
2851 // make sure all code is generated
2852 masm->flush();
2853
2854 // return the blob
2855 // frame_size_words or bytes??
2856 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2857 }
2858
2859 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
2860 Unimplemented();
2861 return NULL;
2862 }
|