< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page

        

@@ -35,10 +35,11 @@
 #include "oops/instanceOop.hpp"
 #include "oops/method.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/methodHandles.hpp"
+#include "runtime/continuation.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"

@@ -5692,10 +5693,381 @@
 
     return start;
 
   }
 
+void push_FrameInfo(MacroAssembler* _masm, Register fi, Register sp, Register fp, address pc) {
+  if (!sp->is_valid()) { __ push(0); } else {
+    if (sp == rsp) {
+      __ movptr(fi, rsp);
+      __ push(fi);
+    } else {
+      __ push(sp);
+    }
+  }
+
+  if (!fp->is_valid()) __ push(0); else __ push(fp);
+
+  __ lea(fi, ExternalAddress(pc));
+  __ push(fi);
+
+  __ movptr(fi, rsp); // make fi point to the beginning of FramInfo
+}
+
+void push_FrameInfo(MacroAssembler* _masm, Register fi, Register sp, Register fp, Register pc) {
+  if (!sp->is_valid()) { __ push(0); } else {
+    if (sp == rsp) {
+      __ movptr(fi, rsp);
+      __ push(fi);
+    } else {
+      __ push(sp);
+    }
+  }
+
+  if (!fp->is_valid()) __ push(0); else __ push(fp);
+
+  if (!pc->is_valid()) __ push(0); else __ push(pc);
+
+  __ movptr(fi, rsp); // make fi point to the beginning of FramInfo
+}
+
+void pop_FrameInfo(MacroAssembler* _masm, Register sp, Register fp, Register pc) {
+  if (!pc->is_valid()) __ lea(rsp, Address(rsp, wordSize)); else __ pop(pc);
+  if (!fp->is_valid()) __ lea(rsp, Address(rsp, wordSize)); else __ pop(fp);
+  if (!sp->is_valid()) __ lea(rsp, Address(rsp, wordSize)); else __ pop(sp);
+}
+
+static Register get_thread() {
+#ifdef _LP64
+  return r15_thread;
+#else
+  get_thread(rdi);
+  return rdi;
+#endif // LP64
+}
+
+static void setup_freeze_invocation(MacroAssembler* _masm, address pc) {
+  Register thread = get_thread();
+  NOT_LP64(__ push(thread));
+  LP64_ONLY(__ movptr(c_rarg0, thread));
+  __ set_last_Java_frame(rsp, rbp, pc);
+}
+
+static void teardown_freeze_invocation(MacroAssembler* _masm) {
+  __ reset_last_Java_frame(true);
+  NOT_LP64(__ pop(rdi));
+}
+
+// c_rarg1 is from interpreter
+RuntimeStub* generate_cont_doYield() {
+    const char *name = "cont_doYield";
+
+    enum layout {
+      frameinfo_11,
+      frameinfo_12,
+      frameinfo_21,
+      frameinfo_22,
+      frameinfo_31,
+      frameinfo_32,
+      rbp_off,
+      rbpH_off,
+      return_off,
+      return_off2,
+      framesize // inclusive of return address
+    };
+    // assert(is_even(framesize/2), "sp not 16-byte aligned");
+    int insts_size = 512;
+    int locs_size  = 64;
+    CodeBuffer code(name, insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+    MacroAssembler* _masm = masm;
+
+    // MacroAssembler* masm = _masm;
+    // StubCodeMark mark(this, "StubRoutines", name);
+
+    // second argument is the FrameInfo
+    Register fi = c_rarg1;
+
+    address start = __ pc();
+
+    __ movl(c_rarg2, c_rarg1);          // save from interpreter
+    __ movptr(rax, Address(rsp, 0));    // use return address as the frame pc // __ lea(rax, InternalAddress(pcxxxx));
+    __ lea(fi, Address(rsp, wordSize)); // skip return address
+    __ movptr(c_rarg3, rbp);
+
+    // __ stop("FFFFF");
+    __ enter();
+
+    // // return address and rbp are already in place
+    // __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
+
+    push_FrameInfo(masm, fi, fi, c_rarg3, rax);
+
+    int frame_complete = __ pc() - start;
+    address the_pc = __ pc();
+
+    __ post_call_nop(); // this must be exactly after the pc value that is pushed into the frame info, we use this nop for fast CodeBlob lookup
+
+    if (ContPerfTest > 5) {
+      setup_freeze_invocation(_masm, the_pc);
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::freeze), 3);
+      teardown_freeze_invocation(_masm);
+
+      // if (from_java) {
+      //__ set_last_Java_frame(rsp, rbp, the_pc); // may be unnecessary. also, consider MacroAssembler::call_VM_leaf_base
+      //__ call_VM(noreg, CAST_FROM_FN_PTR(address, Continuation::freeze), fi, false); // do NOT check exceptions; they'll get forwarded to the caller
+      // } else {
+      //   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::freeze_C), fi);
+      // }
+    }
+
+    Label pinned;
+    __ pop(c_rarg2); // read the pc from the FrameInfo
+    if (ContPerfTest <= 5) { __ xorq(c_rarg2, c_rarg2); __ xorq(rax, rax); }
+    __ testq(c_rarg2, c_rarg2);
+    __ jcc(Assembler::zero, pinned);
+
+    __ pop(rbp); // not pinned -- jump to Continuation.run (the entry frame)
+    __ movptr(rbp, Address(rbp, 0)); // frame_info->fp has an indirection here. See Continuation::freeze for an explanation.
+    __ pop(fi);
+    __ movptr(rsp, fi);
+    __ jmp(c_rarg2);
+
+    __ bind(pinned); // pinned -- return to caller
+    __ lea(rsp, Address(rsp, wordSize*2)); // "pop" the rest of the FrameInfo struct
+
+    __ leave();
+    __ ret(0);
+
+    // return start;
+
+    OopMap* map = new OopMap(framesize, 1);
+    // map->set_callee_saved(VMRegImpl::stack2reg(rbp_off), rbp->as_VMReg());
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
+    RuntimeStub::new_runtime_stub(name,
+                                  &code,
+                                  frame_complete,
+                                  (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+                                  oop_maps, false);
+    return stub;
+  }
+
+  address generate_cont_jump_from_safepoint() {
+    StubCodeMark mark(this, "StubRoutines","Continuation jump from safepoint");
+
+    Register fi = rbx;
+
+    address start = __ pc();
+
+    __ get_thread(r15_thread);
+    __ reset_last_Java_frame(true); // false would be fine, too, I guess
+
+    __ lea(fi, Address(r15_thread, JavaThread::cont_frame_offset()));
+    __ movptr(rdx, Address(fi, wordSize*0)); // pc
+    __ movptr(rbp, Address(fi, wordSize*1)); // fp
+    __ movptr(rbp, Address(rbp, 0)); // fp is indirect. See Continuation::freeze for an explanation.
+    __ movptr(rsp, Address(fi, wordSize*2)); // sp
+
+    __ xorq(rax, rax);
+    __ movptr(Address(fi, wordSize*0), rax); // pc
+    __ movptr(Address(fi, wordSize*1), rax); // fp
+    __ movptr(Address(fi, wordSize*2), rax); // sp
+    __ movb(Address(r15_thread, JavaThread::cont_preempt_offset()), 0);
+
+    __ jmp(rdx);
+
+    return start;
+  }
+
+  // c_rarg1 - sp
+  // c_rarg2 - fp
+  // c_rarg3 - pc
+  address generate_cont_jump() {
+    StubCodeMark mark(this, "StubRoutines","Continuation Jump");
+    address start = __ pc();
+
+    __ movptr(rbp, c_rarg2);
+    __ movptr(rbp, Address(rbp, 0)); // rbp is indirect. See Continuation::freeze for an explanation.
+    __ movptr(rsp, c_rarg1);
+    __ jmp(c_rarg3);
+
+    return start;
+  }
+
+  address generate_cont_thaw(bool return_barrier, bool exception) {
+    assert (return_barrier || !exception, "must be");
+
+    address start = __ pc();
+
+    // TODO: Handle Valhalla return types. May require generating different return barriers.
+
+    Register fi = r11;
+
+    if (!return_barrier) {
+      __ pop(c_rarg3); // pop return address. if we don't do this, we get a drift, where the bottom-most frozen frame continuously grows
+      // __ lea(rsp, Address(rsp, wordSize)); // pop return address. if we don't do this, we get a drift, where the bottom-most frozen frame continuously grows
+      // write sp to thread->_cont_frame.sp
+      __ lea(fi, Address(r15_thread, JavaThread::cont_frame_offset()));
+      __ movptr(Address(fi, wordSize*2), rsp); // sp
+    } else {
+      Label no_saved_sp;
+      __ lea(fi, Address(r15_thread, JavaThread::cont_frame_offset()));
+      __ movptr(fi, Address(fi, wordSize*2)); // sp
+      __ testq(fi, fi);
+      __ jcc(Assembler::zero, no_saved_sp);
+      __ movptr(rsp, fi);
+      __ bind(no_saved_sp);
+    }
+
+    Label thaw_success;
+    __ movptr(fi, rsp);
+    if (return_barrier) {
+      __ push(rax); __ push_d(xmm0); // preserve possible return value from a method returning to the return barrier
+    }
+    __ movl(c_rarg1, return_barrier);
+    push_FrameInfo(_masm, fi, fi, rbp, c_rarg3);
+    if (ContPerfTest > 105) {
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::prepare_thaw), fi, c_rarg1);
+    } else {
+      __ xorq(rax, rax);
+    }
+    __ testq(rax, rax);           // rax contains the size of the frames to thaw, 0 if overflow or no more frames
+    __ jcc(Assembler::notZero, thaw_success);
+
+    pop_FrameInfo(_masm, fi, rbp, rbx);
+    if (return_barrier) {
+      __ pop_d(xmm0); __ pop(rax); // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+    }
+    __ movptr(rsp, fi); // we're now on the yield frame (which is in an address above us b/c rsp has been pushed down)
+    __ jmp(rbx); // a jump to StubRoutines::throw_StackOverflowError_entry
+
+    __ bind(thaw_success);
+
+    pop_FrameInfo(_masm, fi, rbp, c_rarg3); // c_rarg3 would still be our return address
+    if (return_barrier) {
+      __ pop_d(xmm0); __ pop(rdx);   // TEMPORARILY restore return value (we're going to push it again, but rsp is about to move)
+    }
+
+    __ subq(rsp, rax);             // make room for the thawed frames
+    __ subptr(rsp, wordSize);      // make room for return address
+    __ andptr(rsp, -16); // align
+    if (return_barrier) {
+      __ push(rdx); __ push_d(xmm0); // save original return value -- again
+    }
+    push_FrameInfo(_masm, fi, fi, rbp, c_rarg3);
+    __ movl(c_rarg1, return_barrier);
+    __ movl(c_rarg2, exception);
+    if (ContPerfTest > 112) {
+      if (!return_barrier && JvmtiExport::can_support_continuations()) {
+        __ call_VM(noreg, CAST_FROM_FN_PTR(address, Continuation::thaw), fi, c_rarg1, c_rarg2);
+      } else {
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::thaw_leaf), fi, c_rarg1, c_rarg2);
+      }
+    }
+    if (exception) {
+      __ movptr(rdx, rax); // rdx must contain the original pc in the case of exception
+    }
+    pop_FrameInfo(_masm, fi, rbp, rbx);
+    if (return_barrier) {
+      __ pop_d(xmm0); __ pop(rax); // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+    }
+
+    __ movptr(rsp, fi); // we're now on the yield frame (which is in an address above us b/c rsp has been pushed down)
+
+    if (!return_barrier) {
+      // This is necessary for forced yields, as the return addres (in rbx) is captured in a call_VM, and skips the restoration of rbcp and locals
+      // ... but it does no harm even for ordinary yields
+      // TODO: use InterpreterMacroAssembler
+      static const Register _locals_register = LP64_ONLY(r14) NOT_LP64(rdi);
+      static const Register _bcp_register    = LP64_ONLY(r13) NOT_LP64(rsi);
+
+      Label not_interpreter;
+      __ testq(rax, rax); // rax is true iff we're jumping into the interpreter
+      __ jcc(Assembler::zero, not_interpreter);
+
+      // see InterpreterMacroAssembler::restore_bcp/restore_locals
+      __ movptr(_bcp_register,    Address(rbp, frame::interpreter_frame_bcp_offset    * wordSize));
+      __ movptr(_locals_register, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
+      // __ reinit_heapbase();
+
+      __ bind(not_interpreter);
+
+      __ movl(rax, 0); // return 0 (success) from doYield
+    }
+
+    __ jmp(rbx);
+
+    return start;
+  }
+
+  address generate_cont_thaw() {
+    StubCodeMark mark(this, "StubRoutines", "Cont thaw");
+    address start = __ pc();
+    generate_cont_thaw(false, false);
+    return start;
+  }
+
+  address generate_cont_returnBarrier() {
+    // TODO: will probably need multiple return barriers depending on return type
+    StubCodeMark mark(this, "StubRoutines", "cont return barrier");
+    address start = __ pc();
+
+    if (CONT_FULL_STACK)
+      __ stop("RETURN BARRIER -- UNREACHABLE 0");
+
+    generate_cont_thaw(true, false);
+
+    return start;
+  }
+
+  address generate_cont_returnBarrier_exception() {
+    StubCodeMark mark(this, "StubRoutines", "cont return barrier exception handler");
+    address start = __ pc();
+
+    if (CONT_FULL_STACK)
+      __ stop("RETURN BARRIER -- UNREACHABLE 0");
+
+    generate_cont_thaw(true, true);
+
+    return start;
+  }
+
+  address generate_cont_getPC() {
+    StubCodeMark mark(this, "StubRoutines", "GetPC");
+    address start = __ pc();
+
+    __ movptr(rax, Address(rsp, 0));
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_cont_getSP() { // used by C2
+    StubCodeMark mark(this, "StubRoutines", "getSP");
+    address start = __ pc();
+
+    __ set_cont_fastpath(get_thread(), 1);
+    __ lea(rax, Address(rsp, wordSize));
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_cont_getFP() {
+    StubCodeMark mark(this, "StubRoutines", "GetFP");
+    address start = __ pc();
+
+    __ stop("WHAT?");
+    __ lea(rax, Address(rsp, wordSize));
+    __ ret(0);
+
+    return start;
+  }
+
 #undef __
 #define __ masm->
 
   // Continuation point for throwing of implicit exceptions that are
   // not handled in the current activation. Fabricates an exception

@@ -5921,10 +6293,23 @@
         StubRoutines::_dtan = generate_libmTan();
       }
     }
   }
 
+  void generate_phase1() {
+    // Continuation stubs:
+    StubRoutines::_cont_thaw          = generate_cont_thaw();
+    StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier();
+    StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
+    StubRoutines::_cont_doYield_stub = generate_cont_doYield();
+    StubRoutines::_cont_doYield    = StubRoutines::_cont_doYield_stub->entry_point();
+    StubRoutines::_cont_jump_from_sp = generate_cont_jump_from_safepoint();
+    StubRoutines::_cont_jump       = generate_cont_jump();
+    StubRoutines::_cont_getSP      = generate_cont_getSP();
+    StubRoutines::_cont_getPC      = generate_cont_getPC();
+  }
+
   void generate_all() {
     // Generates all stubs and initializes the entry points
 
     // These entry points require SharedInfo::stack0 to be set up in
     // non-core builds and need to be relocatable, so they each

@@ -6076,21 +6461,23 @@
       StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
     }
   }
 
  public:
-  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
-    if (all) {
-      generate_all();
-    } else {
+  StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) {
+    if (phase == 0) {
       generate_initial();
+    } else if (phase == 1) {
+      generate_phase1();
+    } else {
+      generate_all();
     }
   }
 }; // end class declaration
 
 #define UCM_TABLE_MAX_ENTRIES 16
-void StubGenerator_generate(CodeBuffer* code, bool all) {
+void StubGenerator_generate(CodeBuffer* code, int phase) {
   if (UnsafeCopyMemory::_table == NULL) {
     UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
   }
-  StubGenerator g(code, all);
+  StubGenerator g(code, phase);
 }
< prev index next >