< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page
*** 46,10 ***
--- 46,11 ---
  #include "runtime/sharedRuntime.hpp"
  #include "runtime/stubCodeGenerator.hpp"
  #include "runtime/stubRoutines.hpp"
  #include "runtime/thread.inline.hpp"
  #include "utilities/align.hpp"
+ #include "utilities/globalDefinitions.hpp"
  #include "utilities/powerOfTwo.hpp"
  #ifdef COMPILER2
  #include "opto/runtime.hpp"
  #endif
  #if INCLUDE_ZGC

*** 70,10 ***
--- 71,14 ---
  #define BLOCK_COMMENT(str) __ block_comment(str)
  #endif
  
  #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  
+ OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots);
+ void fill_continuation_entry(MacroAssembler* masm);
+ void continuation_enter_cleanup(MacroAssembler* masm);
+ 
  // Stub Code definitions
  
  class StubGenerator: public StubCodeGenerator {
   private:
  

*** 346,10 ***
--- 351,12 ---
        __ stop("StubRoutines::call_stub: threads must correspond");
        __ BIND(L);
      }
  #endif
  
+     __ pop_cont_fastpath(rthread);
+ 
      // restore callee-save registers
      __ ldpd(v15, v14,  d15_save);
      __ ldpd(v13, v12,  d13_save);
      __ ldpd(v11, v10,  d11_save);
      __ ldpd(v9,  v8,   d9_save);

*** 6303,10 ***
--- 6310,307 ---
  
      ICache::invalidate_range(first_entry, __ pc() - first_entry);
    }
  #endif // LINUX
  
+ RuntimeStub* generate_cont_doYield() {
+     const char *name = "cont_doYield";
+ 
+     enum layout {
+       rfp_off1,
+       rfp_off2,
+       lr_off,
+       lr_off2,
+       framesize // inclusive of return address
+     };
+     // assert(is_even(framesize/2), "sp not 16-byte aligned");
+     
+     int insts_size = 512;
+     int locs_size  = 64;
+     CodeBuffer code(name, insts_size, locs_size);
+     OopMapSet* oop_maps  = new OopMapSet();
+     MacroAssembler* masm = new MacroAssembler(&code);
+     MacroAssembler* _masm = masm;
+ 
+     address start = __ pc();
+ 
+     __ enter();
+ 
+     __ mov(c_rarg1, sp);
+ 
+     int frame_complete = __ pc() - start;
+     address the_pc = __ pc();
+ 
+     __ post_call_nop(); // this must be exactly after the pc value that is pushed into the frame info, we use this nop for fast CodeBlob lookup
+ 
+     __ mov(c_rarg0, rthread);
+     __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
+ 
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::freeze), 2);
+       
+     __ reset_last_Java_frame(true);
+ 
+     Label pinned;
+ 
+     __ cbnz(r0, pinned);
+ 
+     __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+     __ mov(sp, rscratch1);
+     continuation_enter_cleanup(masm);
+ 
+     __ bind(pinned); // pinned -- return to caller
+     
+     __ leave();
+     __ ret(lr);
+ 
+     OopMap* map = new OopMap(framesize, 1);
+     // map->set_callee_saved(VMRegImpl::stack2reg(rfp_off), rfp->as_VMReg());
+     oop_maps->add_gc_map(the_pc - start, map);
+ 
+     RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
+     RuntimeStub::new_runtime_stub(name,
+                                   &code,
+                                   frame_complete,
+                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+                                   oop_maps, false);
+     return stub;
+   }
+ 
+   address generate_cont_jump_from_safepoint() {
+     __ align(CodeEntryAlignment);
+     StubCodeMark mark(this, "StubRoutines","Continuation jump from safepoint");
+ 
+     address start = __ pc();
+ 
+ #ifdef ASSERT
+     { // verify that threads correspond
+       Label L;
+       __ get_thread(rscratch1);
+       __ cmp(rthread, rscratch1);
+       __ br(Assembler::EQ, L);
+       __ stop("StubRoutines::cont_jump_from_safepoint: threads must correspond");
+       __ BIND(L);
+     }
+ #endif
+ 
+     __ reset_last_Java_frame(true); // false would be fine, too, I guess
+     __ reinit_heapbase();
+     
+     __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+     __ mov(sp, rscratch1);
+     continuation_enter_cleanup(_masm);
+     __ leave();
+     __ ret(lr);
+ 
+     return start;
+   }
+ 
+   address generate_cont_thaw(bool return_barrier, bool exception) {
+     assert (return_barrier || !exception, "must be");
+ 
+     address start = __ pc();
+ 
+     if (return_barrier) {
+       __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+       __ mov(sp, rscratch1);
+     }
+     assert_asm(_masm, (__ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset())), __ cmp(sp, rscratch1)), Assembler::EQ, "incorrect sp");
+ 
+     if (return_barrier) {
+       // preserve possible return value from a method returning to the return barrier
+       __ fmovd(rscratch1, v0);
+       __ stp(rscratch1, r0, Address(__ pre(sp, -2 * wordSize)));
+     }
+ 
+     __ movw(c_rarg1, (return_barrier ? 1 : 0) + (exception ? 1 : 0));
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::prepare_thaw), rthread, c_rarg1);
+     __ mov(rscratch2, r0); // r0 contains the size of the frames to thaw, 0 if overflow or no more frames
+ 
+     if (return_barrier) {
+       // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+       __ ldp(rscratch1, r0, Address(__ post(sp, 2 * wordSize)));
+       __ fmovd(v0, rscratch1);
+     }
+     assert_asm(_masm, (__ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset())), __ cmp(sp, rscratch1)), Assembler::EQ, "incorrect sp");
+ 
+ 
+     Label thaw_success;
+     // rscratch2 contains the size of the frames to thaw, 0 if overflow or no more frames
+     __ cbnz(rscratch2, thaw_success);
+     __ lea(rscratch1, ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
+     __ br(rscratch1);
+     __ bind(thaw_success);
+     
+     // make room for the thawed frames
+     __ sub(rscratch1, sp, rscratch2);
+     __ andr(rscratch1, rscratch1, -16); // align
+     __ mov(sp, rscratch1);
+     
+     if (return_barrier) {
+       // save original return value -- again
+       __ fmovd(rscratch1, v0);
+       __ stp(rscratch1, r0, Address(__ pre(sp, -2 * wordSize)));
+     }
+ 
+     __ movw(c_rarg1, (return_barrier ? 1 : 0) + (exception ? 1 : 0));
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::thaw), rthread, c_rarg1);
+     __ mov(rscratch2, r0); // r0 is the sp of the yielding frame
+ 
+     if (return_barrier) {
+       // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+       __ ldp(rscratch1, r0, Address(__ post(sp, 2 * wordSize)));
+       __ fmovd(v0, rscratch1);
+     } else {
+       __ mov(r0, zr); // return 0 (success) from doYield
+     }
+ 
+     // we're now on the yield frame (which is in an address above us b/c rsp has been pushed down)
+     __ sub(sp, rscratch2, 2*wordSize); // now pointing to rfp spill
+     __ mov(rfp, sp);
+ 
+     if (exception) {
+       __ ldr(c_rarg1, Address(rfp, wordSize)); // return address
+       __ verify_oop(r0);
+       __ mov(r19, r0); // save return value contaning the exception oop in callee-saved R19
+ 
+       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, c_rarg1);
+ 
+       // Reinitialize the ptrue predicate register, in case the external runtime call clobbers ptrue reg, as we may return to SVE compiled code.
+       // __ reinitialize_ptrue();
+ 
+       // see OptoRuntime::generate_exception_blob: r0 -- exception oop, r3 -- exception pc
+ 
+       __ mov(r1, r0); // the exception handler
+       __ mov(r0, r19); // restore return value contaning the exception oop
+       __ verify_oop(r0);
+ 
+       __ leave();
+       __ mov(r3, lr);
+       __ br(r1); // the exception handler
+     }
+ 
+     // We're "returning" into the topmost thawed frame; see Thaw::push_return_frame
+     __ leave();
+     __ ret(lr);
+ 
+     return start;
+   }
+ 
+   address generate_cont_thaw() {
+     StubCodeMark mark(this, "StubRoutines", "Cont thaw");
+     address start = __ pc();
+     generate_cont_thaw(false, false);
+     return start;
+   }
+ 
+   address generate_cont_returnBarrier() {
+     // TODO: will probably need multiple return barriers depending on return type
+     StubCodeMark mark(this, "StubRoutines", "cont return barrier");
+     address start = __ pc();
+ 
+     generate_cont_thaw(true, false);
+ 
+     return start;
+   }
+ 
+   address generate_cont_returnBarrier_exception() {
+     StubCodeMark mark(this, "StubRoutines", "cont return barrier exception handler");
+     address start = __ pc();
+ 
+     generate_cont_thaw(true, true);
+ 
+     return start;
+   }
+ 
+   address generate_cont_interpreter_forced_preempt_return() {
+       StubCodeMark mark(this, "StubRoutines", "cont interpreter forced preempt return");
+       address start = __ pc();
+ 
+       // This is necessary for forced yields, as the return addres (in rbx) is captured in a call_VM, and skips the restoration of rbcp and locals
+ 
+       assert_asm(_masm, __ cmp(sp, rfp), Assembler::EQ, "sp != fp"); // __ mov(rfp, sp);
+       __ leave(); // we're now on the last thawed frame
+ 
+       __ ldr(rbcp,    Address(rfp, frame::interpreter_frame_bcp_offset    * wordSize)); // InterpreterMacroAssembler::restore_bcp()
+       __ ldr(rlocals, Address(rfp, frame::interpreter_frame_locals_offset * wordSize)); // InterpreterMacroAssembler::restore_locals()
+       __ ldr(rcpool,  Address(rfp, frame::interpreter_frame_cache_offset  * wordSize)); // InterpreterMacroAssembler::restore_constant_pool_cache()
+       __ ldr(rmethod, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); // InterpreterMacroAssembler::get_method(rmethod) -- might not be necessary
+       // __ reinit_heapbase();
+ 
+       // Restore stack bottom in case i2c adjusted stack and NULL it as marker that esp is now tos until next java call
+       __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+       __ str(zr,  Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+ 
+       __ ret(lr);
+ 
+       return start;
+     }
+ 
+ #if INCLUDE_JFR
+ 
+   static void jfr_set_last_java_frame(MacroAssembler* _masm, Register thread) {
+     Register last_java_pc = c_rarg0;
+     Register last_java_sp = c_rarg2;
+     __ ldr(last_java_pc, Address(sp, 0));
+     __ lea(last_java_sp, Address(sp, wordSize));
+     // __ vzeroupper();
+     Address anchor_java_pc(thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
+     __ str(last_java_pc, anchor_java_pc);
+     __ str(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
+   }
+ 
+   static void jfr_prologue(MacroAssembler* _masm, Register thread) {
+     jfr_set_last_java_frame(_masm, thread);
+     __ mov(c_rarg0, rthread);
+   }
+ 
+   // Handle is dereference here using correct load constructs.
+   static void jfr_epilogue(MacroAssembler* _masm, Register thread) {
+     __ reset_last_Java_frame(false);
+     Label null_jobject;
+     __ cbz(r0, null_jobject);
+     DecoratorSet decorators = ACCESS_READ | IN_NATIVE;
+     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+     bs->load_at(_masm, decorators, T_OBJECT, r0, Address(r0, 0), rscratch1, rthread);
+     __ bind(null_jobject);
+   }
+ 
+   // For c2: c_rarg0 is junk, c_rarg1 is the thread id. Call to runtime to write a checkpoint.
+   // Runtime will return a jobject handle to the event writer. The handle is dereferenced and the return value
+   // is the event writer oop.
+   address generate_jfr_write_checkpoint() {
+     StubCodeMark mark(this, "jfr_write_checkpoint", "JFR C2 support for Virtual Threads");
+     address start = __ pc();
+ 
+     __ enter();
+     jfr_prologue(_masm, rthread);
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, JFR_WRITE_CHECKPOINT_FUNCTION), 2);
+     jfr_epilogue(_masm, rthread);
+     __ leave();
+     __ ret(lr);
+ 
+     return start;
+   }
+ 
+   // For c1: call the corresponding runtime routine, it returns a jobject handle to the event writer.
+   // The handle is dereferenced and the return value is the event writer oop.
+   address generate_jfr_get_event_writer() {
+     StubCodeMark mark(this, "jfr_get_event_writer", "JFR C1 support for Virtual Threads");
+     address start = __ pc();
+ 
+     __ enter();
+     jfr_prologue(_masm, rthread);
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, JFR_GET_EVENT_WRITER_FUNCTION), 1);
+     jfr_epilogue(_masm, rthread);
+     __ leave();
+     __ ret(lr);
+ 
+     return start;
+   }
+ 
+ #endif // INCLUDE_JFR
+ 
    // Continuation point for throwing of implicit exceptions that are
    // not handled in the current activation. Fabricates an exception
    // oop and initiates normal exception dispatching in this
    // frame. Since we need to preserve callee-saved values (currently
    // only for C2, but done for C1 as well) we need a callee-saved oop

*** 7295,10 ***
--- 7599,24 ---
      generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
                                                         &StubRoutines::_safefetchN_fault_pc,
                                                         &StubRoutines::_safefetchN_continuation_pc);
    }
  
+   void generate_phase1() {
+     // Continuation stubs:
+     StubRoutines::_cont_thaw          = generate_cont_thaw();
+     StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier();
+     StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
+     StubRoutines::_cont_doYield_stub = generate_cont_doYield();
+     StubRoutines::_cont_doYield    = StubRoutines::_cont_doYield_stub->entry_point();
+     StubRoutines::_cont_jump_from_sp = generate_cont_jump_from_safepoint();
+     StubRoutines::_cont_interpreter_forced_preempt_return = generate_cont_interpreter_forced_preempt_return();
+ 
+     JFR_ONLY(StubRoutines::_jfr_write_checkpoint = generate_jfr_write_checkpoint();)
+     JFR_ONLY(StubRoutines::_jfr_get_event_writer = generate_jfr_get_event_writer();)
+   }
+ 
    void generate_all() {
      // support for verify_oop (must happen after universe_init)
      StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
      StubRoutines::_throw_AbstractMethodError_entry =
        generate_throw_exception("AbstractMethodError throw_exception",

*** 7429,25 ***
  
      StubRoutines::aarch64::set_completed();
    }
  
   public:
!   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
!     if (all) {
-       generate_all();
-     } else {
        generate_initial();
      }
    }
  }; // end class declaration
  
  #define UCM_TABLE_MAX_ENTRIES 8
! void StubGenerator_generate(CodeBuffer* code, bool all) {
    if (UnsafeCopyMemory::_table == NULL) {
      UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
    }
!   StubGenerator g(code, all);
  }
  
  
  #ifdef LINUX
  
--- 7747,27 ---
  
      StubRoutines::aarch64::set_completed();
    }
  
   public:
!   StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) {
!     if (phase == 0) {
        generate_initial();
+     } else if (phase == 1) {
+       generate_phase1(); // stubs that must be available for the interpreter
+     } else {
+       generate_all();
      }
    }
  }; // end class declaration
  
  #define UCM_TABLE_MAX_ENTRIES 8
! void StubGenerator_generate(CodeBuffer* code, int phase) {
    if (UnsafeCopyMemory::_table == NULL) {
      UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
    }
!   StubGenerator g(code, phase);
  }
  
  
  #ifdef LINUX
  

*** 7478,5 ***
--- 7798,74 ---
  DEFAULT_ATOMIC_OP(cmpxchg, 8, _seq_cst)
  
  #undef DEFAULT_ATOMIC_OP
  
  #endif // LINUX
+ 
+ 
+ #undef __
+ #define __ masm->
+ 
+ // on exit, sp points to the ContinuationEntry
+ OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) {
+   assert (ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
+   assert (in_bytes(ContinuationEntry::cont_offset())  % VMRegImpl::stack_slot_size == 0, "");
+   assert (in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
+ 
+   stack_slots += (int)ContinuationEntry::size()/wordSize;
+   __ sub(sp, sp, (int)ContinuationEntry::size()); // place Continuation metadata
+ 
+   OopMap* map = new OopMap(((int)ContinuationEntry::size() + wordSize)/ VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
+   ContinuationEntry::setup_oopmap(map);
+ 
+   __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+   __ str(rscratch1, Address(sp, ContinuationEntry::parent_offset()));
+   __ mov(rscratch1, sp); // we can't use sp as the source in str
+   __ str(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+ 
+   return map;
+ }
+ 
+ // on entry c_rarg1 points to the continuation 
+ //          sp points to ContinuationEntry
+ void fill_continuation_entry(MacroAssembler* masm) {
+ #ifdef ASSERT
+   __ movw(rscratch1, 0x1234);
+   __ strw(rscratch1, Address(sp, ContinuationEntry::cookie_offset()));
+ #endif
+ 
+   __ str(c_rarg1, Address(sp, ContinuationEntry::cont_offset()));
+   __ str(zr, Address(sp, ContinuationEntry::chunk_offset()));
+   __ strw(zr, Address(sp, ContinuationEntry::argsize_offset()));
+ 
+   __ ldr(rscratch1, Address(rthread, JavaThread::cont_fastpath_offset()));
+   __ str(rscratch1, Address(sp, ContinuationEntry::parent_cont_fastpath_offset()));
+   __ ldr(rscratch1, Address(rthread, JavaThread::held_monitor_count_offset()));
+   __ str(rscratch1, Address(sp, ContinuationEntry::parent_held_monitor_count_offset()));
+   
+   __ str(zr, Address(rthread, JavaThread::cont_fastpath_offset()));
+   __ reset_held_monitor_count(rthread);
+ }
+ 
+ // on entry, sp points to the ContinuationEntry
+ // on exit, rfp points to the spilled rfp in the entry frame
+ void continuation_enter_cleanup(MacroAssembler* masm) {
+ #ifndef PRODUCT
+   Label OK;
+   __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
+   __ cmp(sp, rscratch1);
+   __ br(Assembler::EQ, OK);
+   __ stop("incorrect sp1");
+   __ bind(OK);
+ #endif
+   
+   __ ldr(rscratch1, Address(sp, ContinuationEntry::parent_cont_fastpath_offset()));
+   __ str(rscratch1, Address(rthread, JavaThread::cont_fastpath_offset()));
+   __ ldr(rscratch1, Address(sp, ContinuationEntry::parent_held_monitor_count_offset()));
+   __ str(rscratch1, Address(rthread, JavaThread::held_monitor_count_offset()));
+ 
+   __ ldr(rscratch2, Address(sp, ContinuationEntry::parent_offset()));
+   __ str(rscratch2, Address(rthread, JavaThread::cont_entry_offset()));
+   __ add(rfp, sp, (int)ContinuationEntry::size());
+ }
+ 
+ #undef __
< prev index next >