48 #include "runtime/continuationEntry.inline.hpp"
49 #include "runtime/globals.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/safepointMechanism.hpp"
52 #include "runtime/sharedRuntime.hpp"
53 #include "runtime/signature.hpp"
54 #include "runtime/stubRoutines.hpp"
55 #include "runtime/timerTrace.hpp"
56 #include "runtime/vframeArray.hpp"
57 #include "runtime/vm_version.hpp"
58 #include "utilities/align.hpp"
59 #include "utilities/checkedCast.hpp"
60 #include "utilities/formatBuffer.hpp"
61 #include "vmreg_x86.inline.hpp"
62 #ifdef COMPILER1
63 #include "c1/c1_Runtime1.hpp"
64 #endif
65 #ifdef COMPILER2
66 #include "opto/runtime.hpp"
67 #endif
68 #if INCLUDE_JVMCI
69 #include "jvmci/jvmciJavaClasses.hpp"
70 #endif
71
72 #define __ masm->
73
74 #ifdef PRODUCT
75 #define BLOCK_COMMENT(str) /* nothing */
76 #else
77 #define BLOCK_COMMENT(str) __ block_comment(str)
78 #endif // PRODUCT
79
80 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
81
82 class RegisterSaver {
83 // Capture info about frame layout. Layout offsets are in jint
84 // units because compiler frame slots are jints.
85 #define XSAVE_AREA_BEGIN 160
86 #define XSAVE_AREA_YMM_BEGIN 576
87 #define XSAVE_AREA_EGPRS 960
142 r8_off, r8H_off,
143 rdi_off, rdiH_off,
144 rsi_off, rsiH_off,
145 ignore_off, ignoreH_off, // extra copy of rbp
146 rsp_off, rspH_off,
147 rbx_off, rbxH_off,
148 rdx_off, rdxH_off,
149 rcx_off, rcxH_off,
150 rax_off, raxH_off,
151 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
152 align_off, alignH_off,
153 flags_off, flagsH_off,
154 // The frame sender code expects that rbp will be in the "natural" place and
155 // will override any oopMap setting for it. We must therefore force the layout
156 // so that it agrees with the frame sender code.
157 rbp_off, rbpH_off, // copy of rbp we will restore
158 return_off, returnH_off, // slot for return address
159 reg_save_size // size in compiler stack slots
160 };
161
162 public:
163 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
164 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
165
166 // Offsets into the register save area
167 // Used by deoptimization when it is managing result register
168 // values on its own
169
170 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
171 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
172 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
173 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
174 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
175 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
176
177 // During deoptimization only the result registers need to be restored,
178 // all the other values have already been extracted.
179 static void restore_result_registers(MacroAssembler* masm);
180 };
181
182 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
183 int off = 0;
184 int num_xmm_regs = XMMRegister::available_xmm_registers();
185 #if COMPILER2_OR_JVMCI
186 if (save_wide_vectors && UseAVX == 0) {
187 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
188 }
189 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
190 #else
191 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI
192 #endif
193
194 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
195 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
196 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
197 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
198 // CodeBlob frame size is in words.
199 int frame_size_in_words = frame_size_in_bytes / wordSize;
200 *total_frame_words = frame_size_in_words;
201
202 // Save registers, fpu state, and flags.
203 // We assume caller has already pushed the return address onto the
204 // stack, so rsp is 8-byte aligned here.
205 // We push rpb twice in this sequence because we want the real rbp
206 // to be under the return like a normal enter.
207
208 __ enter(); // rsp becomes 16-byte aligned here
209 __ pushf();
210 // Make sure rsp stays 16-byte aligned
211 __ subq(rsp, 8);
212 // Push CPU state in multiple of 16 bytes
414 off = zmm16H_off;
415 delta = zmm17H_off - off;
416 for (int n = 16; n < num_xmm_regs; n++) {
417 XMMRegister zmm_name = as_XMMRegister(n);
418 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
419 off += delta;
420 }
421 }
422 }
423
424 return map;
425 }
426
427 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
428 int num_xmm_regs = XMMRegister::available_xmm_registers();
429 if (frame::arg_reg_save_area_bytes != 0) {
430 // Pop arg register save area
431 __ addptr(rsp, frame::arg_reg_save_area_bytes);
432 }
433
434 #if COMPILER2_OR_JVMCI
435 if (restore_wide_vectors) {
436 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
437 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
438 }
439 #else
440 assert(!restore_wide_vectors, "vectors are generated only by C2");
441 #endif
442
443 __ vzeroupper();
444
445 // On EVEX enabled targets everything is handled in pop fpu state
446 if (restore_wide_vectors) {
447 // Restore upper half of YMM registers (0..15)
448 int base_addr = XSAVE_AREA_YMM_BEGIN;
449 for (int n = 0; n < 16; n++) {
450 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
451 }
452 if (VM_Version::supports_evex()) {
453 // Restore upper half of ZMM registers (0..15)
454 base_addr = XSAVE_AREA_ZMM_BEGIN;
455 for (int n = 0; n < 16; n++) {
456 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
457 }
458 // Restore full ZMM registers(16..num_xmm_regs)
459 base_addr = XSAVE_AREA_UPPERBANK;
460 int vector_len = Assembler::AVX_512bit;
461 int off = 0;
3584 __ reset_last_Java_frame(true);
3585
3586 __ leave();
3587 __ ret(0);
3588
3589 OopMapSet* oop_maps = new OopMapSet();
3590 OopMap* map = new OopMap(framesize, 1);
3591 oop_maps->add_gc_map(frame_complete, map);
3592
3593 RuntimeStub* stub =
3594 RuntimeStub::new_runtime_stub(name,
3595 &code,
3596 frame_complete,
3597 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3598 oop_maps,
3599 false);
3600 return stub;
3601 }
3602
3603 #endif // INCLUDE_JFR
|
48 #include "runtime/continuationEntry.inline.hpp"
49 #include "runtime/globals.hpp"
50 #include "runtime/jniHandles.hpp"
51 #include "runtime/safepointMechanism.hpp"
52 #include "runtime/sharedRuntime.hpp"
53 #include "runtime/signature.hpp"
54 #include "runtime/stubRoutines.hpp"
55 #include "runtime/timerTrace.hpp"
56 #include "runtime/vframeArray.hpp"
57 #include "runtime/vm_version.hpp"
58 #include "utilities/align.hpp"
59 #include "utilities/checkedCast.hpp"
60 #include "utilities/formatBuffer.hpp"
61 #include "vmreg_x86.inline.hpp"
62 #ifdef COMPILER1
63 #include "c1/c1_Runtime1.hpp"
64 #endif
65 #ifdef COMPILER2
66 #include "opto/runtime.hpp"
67 #endif
68 #if INCLUDE_SHENANDOAHGC
69 #include "gc/shenandoah/shenandoahRuntime.hpp"
70 #endif
71 #if INCLUDE_JVMCI
72 #include "jvmci/jvmciJavaClasses.hpp"
73 #endif
74
75 #define __ masm->
76
77 #ifdef PRODUCT
78 #define BLOCK_COMMENT(str) /* nothing */
79 #else
80 #define BLOCK_COMMENT(str) __ block_comment(str)
81 #endif // PRODUCT
82
83 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
84
85 class RegisterSaver {
86 // Capture info about frame layout. Layout offsets are in jint
87 // units because compiler frame slots are jints.
88 #define XSAVE_AREA_BEGIN 160
89 #define XSAVE_AREA_YMM_BEGIN 576
90 #define XSAVE_AREA_EGPRS 960
145 r8_off, r8H_off,
146 rdi_off, rdiH_off,
147 rsi_off, rsiH_off,
148 ignore_off, ignoreH_off, // extra copy of rbp
149 rsp_off, rspH_off,
150 rbx_off, rbxH_off,
151 rdx_off, rdxH_off,
152 rcx_off, rcxH_off,
153 rax_off, raxH_off,
154 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
155 align_off, alignH_off,
156 flags_off, flagsH_off,
157 // The frame sender code expects that rbp will be in the "natural" place and
158 // will override any oopMap setting for it. We must therefore force the layout
159 // so that it agrees with the frame sender code.
160 rbp_off, rbpH_off, // copy of rbp we will restore
161 return_off, returnH_off, // slot for return address
162 reg_save_size // size in compiler stack slots
163 };
164
165 static void adjust_wide_vectors_support(bool& wide_vectors);
166
167 public:
168 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
169 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
170
171 // Offsets into the register save area
172 // Used by deoptimization when it is managing result register
173 // values on its own
174
175 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
176 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
177 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
178 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
179 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
180 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
181
182 // During deoptimization only the result registers need to be restored,
183 // all the other values have already been extracted.
184 static void restore_result_registers(MacroAssembler* masm);
185 };
186
187 // TODO: Should be upstreamed separately.
188 void RegisterSaver::adjust_wide_vectors_support(bool& wide_vectors) {
189 #if COMPILER2_OR_JVMCI
190 if (wide_vectors && UseAVX == 0) {
191 wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
192 }
193 assert(!wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
194 #else
195 wide_vectors = false; // vectors are generated only by C2 and JVMCI
196 #endif
197 }
198
199 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
200 int off = 0;
201 int num_xmm_regs = XMMRegister::available_xmm_registers();
202
203 adjust_wide_vectors_support(save_wide_vectors);
204
205 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
206 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
207 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
208 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
209 // CodeBlob frame size is in words.
210 int frame_size_in_words = frame_size_in_bytes / wordSize;
211 *total_frame_words = frame_size_in_words;
212
213 // Save registers, fpu state, and flags.
214 // We assume caller has already pushed the return address onto the
215 // stack, so rsp is 8-byte aligned here.
216 // We push rpb twice in this sequence because we want the real rbp
217 // to be under the return like a normal enter.
218
219 __ enter(); // rsp becomes 16-byte aligned here
220 __ pushf();
221 // Make sure rsp stays 16-byte aligned
222 __ subq(rsp, 8);
223 // Push CPU state in multiple of 16 bytes
425 off = zmm16H_off;
426 delta = zmm17H_off - off;
427 for (int n = 16; n < num_xmm_regs; n++) {
428 XMMRegister zmm_name = as_XMMRegister(n);
429 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
430 off += delta;
431 }
432 }
433 }
434
435 return map;
436 }
437
438 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
439 int num_xmm_regs = XMMRegister::available_xmm_registers();
440 if (frame::arg_reg_save_area_bytes != 0) {
441 // Pop arg register save area
442 __ addptr(rsp, frame::arg_reg_save_area_bytes);
443 }
444
445 adjust_wide_vectors_support(restore_wide_vectors);
446
447 __ vzeroupper();
448
449 // On EVEX enabled targets everything is handled in pop fpu state
450 if (restore_wide_vectors) {
451 // Restore upper half of YMM registers (0..15)
452 int base_addr = XSAVE_AREA_YMM_BEGIN;
453 for (int n = 0; n < 16; n++) {
454 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
455 }
456 if (VM_Version::supports_evex()) {
457 // Restore upper half of ZMM registers (0..15)
458 base_addr = XSAVE_AREA_ZMM_BEGIN;
459 for (int n = 0; n < 16; n++) {
460 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
461 }
462 // Restore full ZMM registers(16..num_xmm_regs)
463 base_addr = XSAVE_AREA_UPPERBANK;
464 int vector_len = Assembler::AVX_512bit;
465 int off = 0;
3588 __ reset_last_Java_frame(true);
3589
3590 __ leave();
3591 __ ret(0);
3592
3593 OopMapSet* oop_maps = new OopMapSet();
3594 OopMap* map = new OopMap(framesize, 1);
3595 oop_maps->add_gc_map(frame_complete, map);
3596
3597 RuntimeStub* stub =
3598 RuntimeStub::new_runtime_stub(name,
3599 &code,
3600 frame_complete,
3601 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3602 oop_maps,
3603 false);
3604 return stub;
3605 }
3606
3607 #endif // INCLUDE_JFR
3608
3609 RuntimeStub* SharedRuntime::generate_gc_slow_call_blob(StubId stub_id, address stub_addr, bool has_return, bool save_registers, bool save_vectors) {
3610 const char* name = SharedRuntime::stub_name(stub_id);
3611
3612 CodeBuffer code(name, 2048, 64);
3613 MacroAssembler* masm = new MacroAssembler(&code);
3614 address start = __ pc();
3615
3616 int frame_size_in_words = 0;
3617 OopMap* map;
3618 if (save_registers) {
3619 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
3620 } else {
3621 map = new OopMap(frame_size_in_words, 0); // FIXME: Correct?
3622 }
3623 address frame_complete_pc = __ pc();
3624
3625 address post_call_pc;
3626
3627 // Call the runtime. This is what MacroAssember::call_VM_leaf does,
3628 // but we also want to have exact post-call PC for oop map location.
3629 {
3630 Label L_stack_aligned, L_end;
3631
3632 #ifdef _WIN64
3633 // Windows always allocates space for it's register args
3634 __ subptr(rsp, frame::arg_reg_save_area_bytes);
3635 #endif
3636
3637 __ testptr(rsp, 15);
3638 __ jccb(Assembler::zero, L_stack_aligned);
3639 __ subptr(rsp, 8);
3640 __ call(RuntimeAddress(stub_addr));
3641 post_call_pc = __ pc();
3642 __ addptr(rsp, 8);
3643 __ jmpb(L_end);
3644 __ bind(L_stack_aligned);
3645 __ call(RuntimeAddress(stub_addr));
3646 post_call_pc = __ pc();
3647 __ bind(L_end);
3648
3649 #ifdef _WIN64
3650 __ addptr(rsp, frame::arg_reg_save_area_bytes);
3651 #endif
3652 }
3653
3654 if (save_registers && has_return) {
3655 // RegisterSaver would clobber the call result when restoring.
3656 // Carry the result out of this stub by overwriting saved register.
3657 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3658 }
3659
3660 OopMapSet* oop_maps = new OopMapSet();
3661 oop_maps->add_gc_map(post_call_pc - start, map);
3662
3663 if (save_registers) {
3664 RegisterSaver::restore_live_registers(masm, save_vectors);
3665 }
3666 __ ret(0);
3667
3668 return RuntimeStub::new_runtime_stub(name,
3669 &code,
3670 frame_complete_pc - start,
3671 frame_size_in_words,
3672 oop_maps,
3673 true);
3674 }
|