48 #include "runtime/continuationEntry.inline.hpp"
49 #include "runtime/globals.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/safepointMechanism.hpp"
52 #include "runtime/sharedRuntime.hpp"
53 #include "runtime/signature.hpp"
54 #include "runtime/stubRoutines.hpp"
55 #include "runtime/timerTrace.hpp"
56 #include "runtime/vframeArray.hpp"
57 #include "runtime/vm_version.hpp"
58 #include "utilities/align.hpp"
59 #include "utilities/checkedCast.hpp"
60 #include "utilities/formatBuffer.hpp"
61 #include "vmreg_x86.inline.hpp"
62 #ifdef COMPILER1
63 #include "c1/c1_Runtime1.hpp"
64 #endif
65 #ifdef COMPILER2
66 #include "opto/runtime.hpp"
67 #endif
68 #if INCLUDE_JVMCI
69 #include "jvmci/jvmciJavaClasses.hpp"
70 #endif
71
72 #define __ masm->
73
74 #ifdef PRODUCT
75 #define BLOCK_COMMENT(str) /* nothing */
76 #else
77 #define BLOCK_COMMENT(str) __ block_comment(str)
78 #endif // PRODUCT
79
80 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
81
82 class RegisterSaver {
83 // Capture info about frame layout. Layout offsets are in jint
84 // units because compiler frame slots are jints.
85 #define XSAVE_AREA_BEGIN 160
86 #define XSAVE_AREA_YMM_BEGIN 576
87 #define XSAVE_AREA_EGPRS 960
142 r8_off, r8H_off,
143 rdi_off, rdiH_off,
144 rsi_off, rsiH_off,
145 ignore_off, ignoreH_off, // extra copy of rbp
146 rsp_off, rspH_off,
147 rbx_off, rbxH_off,
148 rdx_off, rdxH_off,
149 rcx_off, rcxH_off,
150 rax_off, raxH_off,
151 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
152 align_off, alignH_off,
153 flags_off, flagsH_off,
154 // The frame sender code expects that rbp will be in the "natural" place and
155 // will override any oopMap setting for it. We must therefore force the layout
156 // so that it agrees with the frame sender code.
157 rbp_off, rbpH_off, // copy of rbp we will restore
158 return_off, returnH_off, // slot for return address
159 reg_save_size // size in compiler stack slots
160 };
161
162 public:
163 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
164 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
165
166 // Offsets into the register save area
167 // Used by deoptimization when it is managing result register
168 // values on its own
169
170 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
171 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
172 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
173 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
174 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
175 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
176
177 // During deoptimization only the result registers need to be restored,
178 // all the other values have already been extracted.
179 static void restore_result_registers(MacroAssembler* masm);
180 };
181
182 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
183 int off = 0;
184 int num_xmm_regs = XMMRegister::available_xmm_registers();
185 #if COMPILER2_OR_JVMCI
186 if (save_wide_vectors && UseAVX == 0) {
187 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
188 }
189 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
190 #else
191 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI
192 #endif
193
194 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
195 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
196 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
197 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
198 // CodeBlob frame size is in words.
199 int frame_size_in_words = frame_size_in_bytes / wordSize;
200 *total_frame_words = frame_size_in_words;
201
202 // Save registers, fpu state, and flags.
203 // We assume caller has already pushed the return address onto the
204 // stack, so rsp is 8-byte aligned here.
205 // We push rpb twice in this sequence because we want the real rbp
206 // to be under the return like a normal enter.
207
208 __ enter(); // rsp becomes 16-byte aligned here
209 __ pushf();
210 // Make sure rsp stays 16-byte aligned
211 __ subq(rsp, 8);
212 // Push CPU state in multiple of 16 bytes
414 off = zmm16H_off;
415 delta = zmm17H_off - off;
416 for (int n = 16; n < num_xmm_regs; n++) {
417 XMMRegister zmm_name = as_XMMRegister(n);
418 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
419 off += delta;
420 }
421 }
422 }
423
424 return map;
425 }
426
427 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
428 int num_xmm_regs = XMMRegister::available_xmm_registers();
429 if (frame::arg_reg_save_area_bytes != 0) {
430 // Pop arg register save area
431 __ addptr(rsp, frame::arg_reg_save_area_bytes);
432 }
433
434 #if COMPILER2_OR_JVMCI
435 if (restore_wide_vectors) {
436 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
437 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
438 }
439 #else
440 assert(!restore_wide_vectors, "vectors are generated only by C2");
441 #endif
442
443 __ vzeroupper();
444
445 // On EVEX enabled targets everything is handled in pop fpu state
446 if (restore_wide_vectors) {
447 // Restore upper half of YMM registers (0..15)
448 int base_addr = XSAVE_AREA_YMM_BEGIN;
449 for (int n = 0; n < 16; n++) {
450 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
451 }
452 if (VM_Version::supports_evex()) {
453 // Restore upper half of ZMM registers (0..15)
454 base_addr = XSAVE_AREA_ZMM_BEGIN;
455 for (int n = 0; n < 16; n++) {
456 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
457 }
458 // Restore full ZMM registers(16..num_xmm_regs)
459 base_addr = XSAVE_AREA_UPPERBANK;
460 int vector_len = Assembler::AVX_512bit;
461 int off = 0;
3584 __ reset_last_Java_frame(true);
3585
3586 __ leave();
3587 __ ret(0);
3588
3589 OopMapSet* oop_maps = new OopMapSet();
3590 OopMap* map = new OopMap(framesize, 1);
3591 oop_maps->add_gc_map(frame_complete, map);
3592
3593 RuntimeStub* stub =
3594 RuntimeStub::new_runtime_stub(name,
3595 &code,
3596 frame_complete,
3597 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3598 oop_maps,
3599 false);
3600 return stub;
3601 }
3602
3603 #endif // INCLUDE_JFR
|
48 #include "runtime/continuationEntry.inline.hpp"
49 #include "runtime/globals.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/safepointMechanism.hpp"
52 #include "runtime/sharedRuntime.hpp"
53 #include "runtime/signature.hpp"
54 #include "runtime/stubRoutines.hpp"
55 #include "runtime/timerTrace.hpp"
56 #include "runtime/vframeArray.hpp"
57 #include "runtime/vm_version.hpp"
58 #include "utilities/align.hpp"
59 #include "utilities/checkedCast.hpp"
60 #include "utilities/formatBuffer.hpp"
61 #include "vmreg_x86.inline.hpp"
62 #ifdef COMPILER1
63 #include "c1/c1_Runtime1.hpp"
64 #endif
65 #ifdef COMPILER2
66 #include "opto/runtime.hpp"
67 #endif
68 #if INCLUDE_SHENANDOAHGC
69 #include "gc/shenandoah/shenandoahRuntime.hpp"
70 #endif
71 #if INCLUDE_JVMCI
72 #include "jvmci/jvmciJavaClasses.hpp"
73 #endif
74
75 #define __ masm->
76
77 #ifdef PRODUCT
78 #define BLOCK_COMMENT(str) /* nothing */
79 #else
80 #define BLOCK_COMMENT(str) __ block_comment(str)
81 #endif // PRODUCT
82
83 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
84
85 class RegisterSaver {
86 // Capture info about frame layout. Layout offsets are in jint
87 // units because compiler frame slots are jints.
88 #define XSAVE_AREA_BEGIN 160
89 #define XSAVE_AREA_YMM_BEGIN 576
90 #define XSAVE_AREA_EGPRS 960
145 r8_off, r8H_off,
146 rdi_off, rdiH_off,
147 rsi_off, rsiH_off,
148 ignore_off, ignoreH_off, // extra copy of rbp
149 rsp_off, rspH_off,
150 rbx_off, rbxH_off,
151 rdx_off, rdxH_off,
152 rcx_off, rcxH_off,
153 rax_off, raxH_off,
154 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
155 align_off, alignH_off,
156 flags_off, flagsH_off,
157 // The frame sender code expects that rbp will be in the "natural" place and
158 // will override any oopMap setting for it. We must therefore force the layout
159 // so that it agrees with the frame sender code.
160 rbp_off, rbpH_off, // copy of rbp we will restore
161 return_off, returnH_off, // slot for return address
162 reg_save_size // size in compiler stack slots
163 };
164
165 static void adjust_wide_vectors_support(bool& wide_vectors);
166
167 public:
168 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
169 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
170
171 // Offsets into the register save area
172 // Used by deoptimization when it is managing result register
173 // values on its own
174
175 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
176 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
177 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
178 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
179 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
180 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
181
182 // During deoptimization only the result registers need to be restored,
183 // all the other values have already been extracted.
184 static void restore_result_registers(MacroAssembler* masm);
185 };
186
187 // TODO: Should be upstreamed separately.
188 void RegisterSaver::adjust_wide_vectors_support(bool& wide_vectors) {
189 #if COMPILER2_OR_JVMCI
190 if (wide_vectors && UseAVX == 0) {
191 wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
192 }
193 assert(!wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
194 #else
195 wide_vectors = false; // vectors are generated only by C2 and JVMCI
196 #endif
197 }
198
199 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
200 int off = 0;
201 int num_xmm_regs = XMMRegister::available_xmm_registers();
202
203 adjust_wide_vectors_support(save_wide_vectors);
204
205 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
206 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
207 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
208 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
209 // CodeBlob frame size is in words.
210 int frame_size_in_words = frame_size_in_bytes / wordSize;
211 *total_frame_words = frame_size_in_words;
212
213 // Save registers, fpu state, and flags.
214 // We assume caller has already pushed the return address onto the
215 // stack, so rsp is 8-byte aligned here.
216 // We push rpb twice in this sequence because we want the real rbp
217 // to be under the return like a normal enter.
218
219 __ enter(); // rsp becomes 16-byte aligned here
220 __ pushf();
221 // Make sure rsp stays 16-byte aligned
222 __ subq(rsp, 8);
223 // Push CPU state in multiple of 16 bytes
425 off = zmm16H_off;
426 delta = zmm17H_off - off;
427 for (int n = 16; n < num_xmm_regs; n++) {
428 XMMRegister zmm_name = as_XMMRegister(n);
429 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
430 off += delta;
431 }
432 }
433 }
434
435 return map;
436 }
437
438 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
439 int num_xmm_regs = XMMRegister::available_xmm_registers();
440 if (frame::arg_reg_save_area_bytes != 0) {
441 // Pop arg register save area
442 __ addptr(rsp, frame::arg_reg_save_area_bytes);
443 }
444
445 adjust_wide_vectors_support(restore_wide_vectors);
446
447 __ vzeroupper();
448
449 // On EVEX enabled targets everything is handled in pop fpu state
450 if (restore_wide_vectors) {
451 // Restore upper half of YMM registers (0..15)
452 int base_addr = XSAVE_AREA_YMM_BEGIN;
453 for (int n = 0; n < 16; n++) {
454 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
455 }
456 if (VM_Version::supports_evex()) {
457 // Restore upper half of ZMM registers (0..15)
458 base_addr = XSAVE_AREA_ZMM_BEGIN;
459 for (int n = 0; n < 16; n++) {
460 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
461 }
462 // Restore full ZMM registers(16..num_xmm_regs)
463 base_addr = XSAVE_AREA_UPPERBANK;
464 int vector_len = Assembler::AVX_512bit;
465 int off = 0;
3588 __ reset_last_Java_frame(true);
3589
3590 __ leave();
3591 __ ret(0);
3592
3593 OopMapSet* oop_maps = new OopMapSet();
3594 OopMap* map = new OopMap(framesize, 1);
3595 oop_maps->add_gc_map(frame_complete, map);
3596
3597 RuntimeStub* stub =
3598 RuntimeStub::new_runtime_stub(name,
3599 &code,
3600 frame_complete,
3601 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3602 oop_maps,
3603 false);
3604 return stub;
3605 }
3606
3607 #endif // INCLUDE_JFR
3608
3609 RuntimeStub* SharedRuntime::generate_shenandoah_stub(StubId stub_id) {
3610 assert(UseShenandoahGC, "Only generate when Shenandoah is enabled");
3611
3612 const char* name = SharedRuntime::stub_name(stub_id);
3613 address stub_addr = nullptr;
3614 bool returns_obj = true;
3615
3616 switch (stub_id) {
3617 case StubId::shared_shenandoah_keepalive_id: {
3618 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre);
3619 returns_obj = false;
3620 break;
3621 }
3622 case StubId::shared_shenandoah_lrb_strong_id: {
3623 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
3624 break;
3625 }
3626 case StubId::shared_shenandoah_lrb_weak_id: {
3627 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
3628 break;
3629 }
3630 case StubId::shared_shenandoah_lrb_phantom_id: {
3631 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
3632 break;
3633 }
3634 case StubId::shared_shenandoah_lrb_strong_narrow_id: {
3635 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
3636 break;
3637 }
3638 case StubId::shared_shenandoah_lrb_weak_narrow_id: {
3639 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
3640 break;
3641 }
3642 case StubId::shared_shenandoah_lrb_phantom_narrow_id: {
3643 stub_addr = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow);
3644 break;
3645 }
3646 default:
3647 ShouldNotReachHere();
3648 }
3649
3650 CodeBuffer code(name, 2048, 64);
3651 MacroAssembler* masm = new MacroAssembler(&code);
3652 address start = __ pc();
3653
3654 int frame_size_in_words;
3655 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, true);
3656 address frame_complete_pc = __ pc();
3657
3658 address post_call_pc;
3659
3660 // Call the runtime. This is what MacroAssember::call_VM_leaf does,
3661 // but we also want to have exact post-call PC for oop map location.
3662 {
3663 Label L_stack_aligned, L_end;
3664
3665 #ifdef _WIN64
3666 // Windows always allocates space for it's register args
3667 __ subptr(rsp, frame::arg_reg_save_area_bytes);
3668 #endif
3669
3670 __ testptr(rsp, 15);
3671 __ jccb(Assembler::zero, L_stack_aligned);
3672 __ subptr(rsp, 8);
3673 __ call(RuntimeAddress(stub_addr));
3674 post_call_pc = __ pc();
3675 __ addptr(rsp, 8);
3676 __ jmpb(L_end);
3677 __ bind(L_stack_aligned);
3678 __ call(RuntimeAddress(stub_addr));
3679 post_call_pc = __ pc();
3680 __ bind(L_end);
3681
3682 #ifdef _WIN64
3683 __ addptr(rsp, frame::arg_reg_save_area_bytes);
3684 #endif
3685 }
3686
3687 if (returns_obj) {
3688 // RegisterSaver would clobber the call result when restoring.
3689 // Carry the result out of this stub by overwriting saved register.
3690 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3691 }
3692
3693 OopMapSet* oop_maps = new OopMapSet();
3694 oop_maps->add_gc_map(post_call_pc - start, map);
3695
3696 RegisterSaver::restore_live_registers(masm, true);
3697 __ ret(0);
3698
3699 return RuntimeStub::new_runtime_stub(name,
3700 &code,
3701 frame_complete_pc - start,
3702 frame_size_in_words,
3703 oop_maps,
3704 true);
3705 }
|