1 /*
2 * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef _WINDOWS
26 #include "alloca.h"
27 #endif
28 #include "asm/macroAssembler.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "classfile/symbolTable.hpp"
31 #include "code/aotCodeCache.hpp"
32 #include "code/compiledIC.hpp"
33 #include "code/debugInfoRec.hpp"
34 #include "code/nativeInst.hpp"
35 #include "code/vtableStubs.hpp"
36 #include "compiler/oopMap.hpp"
37 #include "gc/shared/collectedHeap.hpp"
38 #include "gc/shared/gcLocker.hpp"
39 #include "gc/shared/barrierSet.hpp"
40 #include "gc/shared/barrierSetAssembler.hpp"
41 #include "interpreter/interpreter.hpp"
42 #include "logging/log.hpp"
43 #include "memory/resourceArea.hpp"
44 #include "memory/universe.hpp"
45 #include "oops/klass.inline.hpp"
46 #include "oops/method.inline.hpp"
47 #include "prims/methodHandles.hpp"
48 #include "runtime/continuation.hpp"
49 #include "runtime/continuationEntry.inline.hpp"
50 #include "runtime/globals.hpp"
51 #include "runtime/jniHandles.hpp"
52 #include "runtime/safepointMechanism.hpp"
53 #include "runtime/sharedRuntime.hpp"
54 #include "runtime/signature.hpp"
55 #include "runtime/stubRoutines.hpp"
56 #include "runtime/timerTrace.hpp"
57 #include "runtime/vframeArray.hpp"
58 #include "runtime/vm_version.hpp"
59 #include "utilities/align.hpp"
60 #include "utilities/checkedCast.hpp"
61 #include "utilities/formatBuffer.hpp"
62 #include "vmreg_x86.inline.hpp"
63 #ifdef COMPILER1
64 #include "c1/c1_Runtime1.hpp"
65 #endif
66 #ifdef COMPILER2
67 #include "opto/runtime.hpp"
68 #endif
69
70 #define __ masm->
71
72 #ifdef PRODUCT
73 #define BLOCK_COMMENT(str) /* nothing */
74 #else
75 #define BLOCK_COMMENT(str) __ block_comment(str)
76 #endif // PRODUCT
77
78 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
79
80 class RegisterSaver {
81 // Capture info about frame layout. Layout offsets are in jint
82 // units because compiler frame slots are jints.
83 #define XSAVE_AREA_BEGIN 160
84 #define XSAVE_AREA_YMM_BEGIN 576
85 #define XSAVE_AREA_EGPRS 960
86 #define XSAVE_AREA_OPMASK_BEGIN 1088
87 #define XSAVE_AREA_ZMM_BEGIN 1152
88 #define XSAVE_AREA_UPPERBANK 1664
89 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
90 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
91 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off
92 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off
93 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
94 enum layout {
95 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
96 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area
97 DEF_XMM_OFFS(0),
98 DEF_XMM_OFFS(1),
99 // 2..15 are implied in range usage
100 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
101 DEF_YMM_OFFS(0),
102 DEF_YMM_OFFS(1),
103 r16_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt,
104 r16H_off,
105 r17_off, r17H_off,
106 r18_off, r18H_off,
107 r19_off, r19H_off,
108 r20_off, r20H_off,
109 r21_off, r21H_off,
110 r22_off, r22H_off,
111 r23_off, r23H_off,
112 r24_off, r24H_off,
113 r25_off, r25H_off,
114 r26_off, r26H_off,
115 r27_off, r27H_off,
116 r28_off, r28H_off,
117 r29_off, r29H_off,
118 r30_off, r30H_off,
119 r31_off, r31H_off,
120 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
121 DEF_OPMASK_OFFS(0),
122 DEF_OPMASK_OFFS(1),
123 // 2..7 are implied in range usage
124 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
125 DEF_ZMM_OFFS(0),
126 DEF_ZMM_OFFS(1),
127 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
128 DEF_ZMM_UPPER_OFFS(16),
129 DEF_ZMM_UPPER_OFFS(17),
130 // 18..31 are implied in range usage
131 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
132 fpu_stateH_end,
133 r15_off, r15H_off,
134 r14_off, r14H_off,
135 r13_off, r13H_off,
136 r12_off, r12H_off,
137 r11_off, r11H_off,
138 r10_off, r10H_off,
139 r9_off, r9H_off,
140 r8_off, r8H_off,
141 rdi_off, rdiH_off,
142 rsi_off, rsiH_off,
143 ignore_off, ignoreH_off, // extra copy of rbp
144 rsp_off, rspH_off,
145 rbx_off, rbxH_off,
146 rdx_off, rdxH_off,
147 rcx_off, rcxH_off,
148 rax_off, raxH_off,
149 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
150 align_off, alignH_off,
151 flags_off, flagsH_off,
152 // The frame sender code expects that rbp will be in the "natural" place and
153 // will override any oopMap setting for it. We must therefore force the layout
154 // so that it agrees with the frame sender code.
155 rbp_off, rbpH_off, // copy of rbp we will restore
156 return_off, returnH_off, // slot for return address
157 reg_save_size // size in compiler stack slots
158 };
159
160 public:
161 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
162 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
163
164 // Offsets into the register save area
165 // Used by deoptimization when it is managing result register
166 // values on its own
167
168 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
169 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
170 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
171 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
172 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
173 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
174
175 // During deoptimization only the result registers need to be restored,
176 // all the other values have already been extracted.
177 static void restore_result_registers(MacroAssembler* masm);
178 };
179
180 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
181 int off = 0;
182 int num_xmm_regs = XMMRegister::available_xmm_registers();
183 #ifdef COMPILER2
184 if (save_wide_vectors && UseAVX == 0) {
185 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
186 }
187 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
188 #else
189 save_wide_vectors = false; // vectors are generated only by C2
190 #endif // COMPILER2
191
192 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
193 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
194 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
195 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
196 // CodeBlob frame size is in words.
197 int frame_size_in_words = frame_size_in_bytes / wordSize;
198 *total_frame_words = frame_size_in_words;
199
200 // Save registers, fpu state, and flags.
201 // We assume caller has already pushed the return address onto the
202 // stack, so rsp is 8-byte aligned here.
203 // We push rpb twice in this sequence because we want the real rbp
204 // to be under the return like a normal enter.
205
206 __ enter(); // rsp becomes 16-byte aligned here
207 __ pushf();
208 // Make sure rsp stays 16-byte aligned
209 __ subq(rsp, 8);
210 // Push CPU state in multiple of 16 bytes
211 __ save_legacy_gprs();
212 __ push_FPU_state();
213
214
215 // push cpu state handles this on EVEX enabled targets
216 if (save_wide_vectors) {
217 // Save upper half of YMM registers(0..15)
218 int base_addr = XSAVE_AREA_YMM_BEGIN;
219 for (int n = 0; n < 16; n++) {
220 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
221 }
222 if (VM_Version::supports_evex()) {
223 // Save upper half of ZMM registers(0..15)
224 base_addr = XSAVE_AREA_ZMM_BEGIN;
225 for (int n = 0; n < 16; n++) {
226 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
227 }
228 // Save full ZMM registers(16..num_xmm_regs)
229 base_addr = XSAVE_AREA_UPPERBANK;
230 off = 0;
231 int vector_len = Assembler::AVX_512bit;
232 for (int n = 16; n < num_xmm_regs; n++) {
233 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
234 }
235 #ifdef COMPILER2
236 base_addr = XSAVE_AREA_OPMASK_BEGIN;
237 off = 0;
238 for(int n = 0; n < KRegister::number_of_registers; n++) {
239 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
240 }
241 #endif // COMPILER2
242 }
243 } else {
244 if (VM_Version::supports_evex()) {
245 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage
246 int base_addr = XSAVE_AREA_UPPERBANK;
247 off = 0;
248 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit;
249 for (int n = 16; n < num_xmm_regs; n++) {
250 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
251 }
252 #ifdef COMPILER2
253 base_addr = XSAVE_AREA_OPMASK_BEGIN;
254 off = 0;
255 for(int n = 0; n < KRegister::number_of_registers; n++) {
256 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
257 }
258 #endif // COMPILER2
259 }
260 }
261
262 #ifdef COMPILER2
263 if (UseAPX) {
264 int base_addr = XSAVE_AREA_EGPRS;
265 off = 0;
266 for (int n = 16; n < Register::number_of_registers; n++) {
267 __ movq(Address(rsp, base_addr+(off++*8)), as_Register(n));
268 }
269 }
270 #endif // COMPILER2
271
272 __ vzeroupper();
273 if (frame::arg_reg_save_area_bytes != 0) {
274 // Allocate argument register save area
275 __ subptr(rsp, frame::arg_reg_save_area_bytes);
276 }
277
278 // Set an oopmap for the call site. This oopmap will map all
279 // oop-registers and debug-info registers as callee-saved. This
280 // will allow deoptimization at this safepoint to find all possible
281 // debug-info recordings, as well as let GC find all oops.
282
283 OopMapSet *oop_maps = new OopMapSet();
284 OopMap* map = new OopMap(frame_size_in_slots, 0);
285
286 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
287
288 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
289 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
290 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
291 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
292 // rbp location is known implicitly by the frame sender code, needs no oopmap
293 // and the location where rbp was saved by is ignored
294 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
295 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
296 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
297 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
298 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
299 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
300 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
301 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
302 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
303 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
304
305 if (UseAPX) {
306 map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg());
307 map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg());
308 map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg());
309 map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg());
310 map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg());
311 map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg());
312 map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg());
313 map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg());
314 map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg());
315 map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg());
316 map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg());
317 map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg());
318 map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg());
319 map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg());
320 map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg());
321 map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg());
322 }
323 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
324 // on EVEX enabled targets, we get it included in the xsave area
325 off = xmm0_off;
326 int delta = xmm1_off - off;
327 for (int n = 0; n < 16; n++) {
328 XMMRegister xmm_name = as_XMMRegister(n);
329 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
330 off += delta;
331 }
332 if (UseAVX > 2) {
333 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
334 off = zmm16_off;
335 delta = zmm17_off - off;
336 for (int n = 16; n < num_xmm_regs; n++) {
337 XMMRegister zmm_name = as_XMMRegister(n);
338 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
339 off += delta;
340 }
341 }
342
343 #ifdef COMPILER2
344 if (save_wide_vectors) {
345 // Save upper half of YMM registers(0..15)
346 off = ymm0_off;
347 delta = ymm1_off - ymm0_off;
348 for (int n = 0; n < 16; n++) {
349 XMMRegister ymm_name = as_XMMRegister(n);
350 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
351 off += delta;
352 }
353 if (VM_Version::supports_evex()) {
354 // Save upper half of ZMM registers(0..15)
355 off = zmm0_off;
356 delta = zmm1_off - zmm0_off;
357 for (int n = 0; n < 16; n++) {
358 XMMRegister zmm_name = as_XMMRegister(n);
359 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8));
360 off += delta;
361 }
362 }
363 }
364 #endif // COMPILER2
365
366 // %%% These should all be a waste but we'll keep things as they were for now
367 if (true) {
368 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
369 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
370 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
371 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
372 // rbp location is known implicitly by the frame sender code, needs no oopmap
373 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
374 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
375 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
376 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
377 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
378 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
379 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
380 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
381 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
382 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
383 if (UseAPX) {
384 map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next());
385 map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next());
386 map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next());
387 map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next());
388 map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next());
389 map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next());
390 map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next());
391 map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next());
392 map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next());
393 map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next());
394 map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next());
395 map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next());
396 map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next());
397 map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next());
398 map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next());
399 map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next());
400 }
401 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
402 // on EVEX enabled targets, we get it included in the xsave area
403 off = xmm0H_off;
404 delta = xmm1H_off - off;
405 for (int n = 0; n < 16; n++) {
406 XMMRegister xmm_name = as_XMMRegister(n);
407 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
408 off += delta;
409 }
410 if (UseAVX > 2) {
411 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
412 off = zmm16H_off;
413 delta = zmm17H_off - off;
414 for (int n = 16; n < num_xmm_regs; n++) {
415 XMMRegister zmm_name = as_XMMRegister(n);
416 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
417 off += delta;
418 }
419 }
420 }
421
422 return map;
423 }
424
425 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
426 int num_xmm_regs = XMMRegister::available_xmm_registers();
427 if (frame::arg_reg_save_area_bytes != 0) {
428 // Pop arg register save area
429 __ addptr(rsp, frame::arg_reg_save_area_bytes);
430 }
431
432 #ifdef COMPILER2
433 if (restore_wide_vectors) {
434 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
435 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
436 }
437 #else
438 assert(!restore_wide_vectors, "vectors are generated only by C2");
439 #endif // COMPILER2
440
441 __ vzeroupper();
442
443 // On EVEX enabled targets everything is handled in pop fpu state
444 if (restore_wide_vectors) {
445 // Restore upper half of YMM registers (0..15)
446 int base_addr = XSAVE_AREA_YMM_BEGIN;
447 for (int n = 0; n < 16; n++) {
448 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
449 }
450 if (VM_Version::supports_evex()) {
451 // Restore upper half of ZMM registers (0..15)
452 base_addr = XSAVE_AREA_ZMM_BEGIN;
453 for (int n = 0; n < 16; n++) {
454 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
455 }
456 // Restore full ZMM registers(16..num_xmm_regs)
457 base_addr = XSAVE_AREA_UPPERBANK;
458 int vector_len = Assembler::AVX_512bit;
459 int off = 0;
460 for (int n = 16; n < num_xmm_regs; n++) {
461 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
462 }
463 #ifdef COMPILER2
464 base_addr = XSAVE_AREA_OPMASK_BEGIN;
465 off = 0;
466 for (int n = 0; n < KRegister::number_of_registers; n++) {
467 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
468 }
469 #endif // COMPILER2
470 }
471 } else {
472 if (VM_Version::supports_evex()) {
473 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage
474 int base_addr = XSAVE_AREA_UPPERBANK;
475 int off = 0;
476 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit;
477 for (int n = 16; n < num_xmm_regs; n++) {
478 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
479 }
480 #ifdef COMPILER2
481 base_addr = XSAVE_AREA_OPMASK_BEGIN;
482 off = 0;
483 for (int n = 0; n < KRegister::number_of_registers; n++) {
484 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
485 }
486 #endif // COMPILER2
487 }
488 }
489
490 #ifdef COMPILER2
491 if (UseAPX) {
492 int base_addr = XSAVE_AREA_EGPRS;
493 int off = 0;
494 for (int n = 16; n < Register::number_of_registers; n++) {
495 __ movq(as_Register(n), Address(rsp, base_addr+(off++*8)));
496 }
497 }
498 #endif // COMPILER2
499
500 // Recover CPU state
501 __ pop_FPU_state();
502 __ restore_legacy_gprs();
503 __ addq(rsp, 8);
504 __ popf();
505 // Get the rbp described implicitly by the calling convention (no oopMap)
506 __ pop(rbp);
507 }
508
509 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
510
511 // Just restore result register. Only used by deoptimization. By
512 // now any callee save register that needs to be restored to a c2
513 // caller of the deoptee has been extracted into the vframeArray
514 // and will be stuffed into the c2i adapter we create for later
515 // restoration so only result registers need to be restored here.
516
517 // Restore fp result register
518 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
519 // Restore integer result register
520 __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
521 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
522
523 // Pop all of the register save are off the stack except the return address
524 __ addptr(rsp, return_offset_in_bytes());
525 }
526
527 // Is vector's size (in bytes) bigger than a size saved by default?
528 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
529 bool SharedRuntime::is_wide_vector(int size) {
530 return size > 16;
531 }
532
533 // ---------------------------------------------------------------------------
534 // Read the array of BasicTypes from a signature, and compute where the
535 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
536 // quantities. Values less than VMRegImpl::stack0 are registers, those above
537 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
538 // as framesizes are fixed.
539 // VMRegImpl::stack0 refers to the first slot 0(sp).
540 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.
541 // Register up to Register::number_of_registers are the 64-bit
542 // integer registers.
543
544 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
545 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
546 // units regardless of build. Of course for i486 there is no 64 bit build
547
548 // The Java calling convention is a "shifted" version of the C ABI.
549 // By skipping the first C ABI register we can call non-static jni methods
550 // with small numbers of arguments without having to shuffle the arguments
551 // at all. Since we control the java ABI we ought to at least get some
552 // advantage out of it.
553
554 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
555 VMRegPair *regs,
556 int total_args_passed) {
557
558 // Create the mapping between argument positions and
559 // registers.
560 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
561 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
562 };
563 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
564 j_farg0, j_farg1, j_farg2, j_farg3,
565 j_farg4, j_farg5, j_farg6, j_farg7
566 };
567
568
569 uint int_args = 0;
570 uint fp_args = 0;
571 uint stk_args = 0;
572
573 for (int i = 0; i < total_args_passed; i++) {
574 switch (sig_bt[i]) {
575 case T_BOOLEAN:
576 case T_CHAR:
577 case T_BYTE:
578 case T_SHORT:
579 case T_INT:
580 if (int_args < Argument::n_int_register_parameters_j) {
581 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
582 } else {
583 stk_args = align_up(stk_args, 2);
584 regs[i].set1(VMRegImpl::stack2reg(stk_args));
585 stk_args += 1;
586 }
587 break;
588 case T_VOID:
589 // halves of T_LONG or T_DOUBLE
590 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
591 regs[i].set_bad();
592 break;
593 case T_LONG:
594 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
595 // fall through
596 case T_OBJECT:
597 case T_ARRAY:
598 case T_ADDRESS:
599 if (int_args < Argument::n_int_register_parameters_j) {
600 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
601 } else {
602 stk_args = align_up(stk_args, 2);
603 regs[i].set2(VMRegImpl::stack2reg(stk_args));
604 stk_args += 2;
605 }
606 break;
607 case T_FLOAT:
608 if (fp_args < Argument::n_float_register_parameters_j) {
609 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
610 } else {
611 stk_args = align_up(stk_args, 2);
612 regs[i].set1(VMRegImpl::stack2reg(stk_args));
613 stk_args += 1;
614 }
615 break;
616 case T_DOUBLE:
617 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
618 if (fp_args < Argument::n_float_register_parameters_j) {
619 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
620 } else {
621 stk_args = align_up(stk_args, 2);
622 regs[i].set2(VMRegImpl::stack2reg(stk_args));
623 stk_args += 2;
624 }
625 break;
626 default:
627 ShouldNotReachHere();
628 break;
629 }
630 }
631
632 return stk_args;
633 }
634
635 // Same as java_calling_convention() but for multiple return
636 // values. There's no way to store them on the stack so if we don't
637 // have enough registers, multiple values can't be returned.
638 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
639 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
640 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
641 VMRegPair *regs,
642 int total_args_passed) {
643 // Create the mapping between argument positions and
644 // registers.
645 static const Register INT_ArgReg[java_return_convention_max_int] = {
646 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
647 };
648 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
649 j_farg0, j_farg1, j_farg2, j_farg3,
650 j_farg4, j_farg5, j_farg6, j_farg7
651 };
652
653
654 uint int_args = 0;
655 uint fp_args = 0;
656
657 for (int i = 0; i < total_args_passed; i++) {
658 switch (sig_bt[i]) {
659 case T_BOOLEAN:
660 case T_CHAR:
661 case T_BYTE:
662 case T_SHORT:
663 case T_INT:
664 if (int_args < Argument::n_int_register_parameters_j+1) {
665 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
666 int_args++;
667 } else {
668 return -1;
669 }
670 break;
671 case T_VOID:
672 // halves of T_LONG or T_DOUBLE
673 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
674 regs[i].set_bad();
675 break;
676 case T_LONG:
677 assert(sig_bt[i + 1] == T_VOID, "expecting half");
678 // fall through
679 case T_OBJECT:
680 case T_ARRAY:
681 case T_ADDRESS:
682 case T_METADATA:
683 if (int_args < Argument::n_int_register_parameters_j+1) {
684 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
685 int_args++;
686 } else {
687 return -1;
688 }
689 break;
690 case T_FLOAT:
691 if (fp_args < Argument::n_float_register_parameters_j) {
692 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
693 fp_args++;
694 } else {
695 return -1;
696 }
697 break;
698 case T_DOUBLE:
699 assert(sig_bt[i + 1] == T_VOID, "expecting half");
700 if (fp_args < Argument::n_float_register_parameters_j) {
701 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
702 fp_args++;
703 } else {
704 return -1;
705 }
706 break;
707 default:
708 ShouldNotReachHere();
709 break;
710 }
711 }
712
713 return int_args + fp_args;
714 }
715
716 // Patch the callers callsite with entry to compiled code if it exists.
717 static void patch_callers_callsite(MacroAssembler *masm) {
718 Label L;
719 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
720 __ jcc(Assembler::equal, L);
721
722 // Save the current stack pointer
723 __ mov(r13, rsp);
724 // Schedule the branch target address early.
725 // Call into the VM to patch the caller, then jump to compiled callee
726 // rax isn't live so capture return address while we easily can
727 __ movptr(rax, Address(rsp, 0));
728
729 // align stack so push_CPU_state doesn't fault
730 __ andptr(rsp, -(StackAlignmentInBytes));
731 __ push_CPU_state();
732 __ vzeroupper();
733 // VM needs caller's callsite
734 // VM needs target method
735 // This needs to be a long call since we will relocate this adapter to
736 // the codeBuffer and it may not reach
737
738 // Allocate argument register save area
739 if (frame::arg_reg_save_area_bytes != 0) {
740 __ subptr(rsp, frame::arg_reg_save_area_bytes);
741 }
742 __ mov(c_rarg0, rbx);
743 __ mov(c_rarg1, rax);
744 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
745
746 // De-allocate argument register save area
747 if (frame::arg_reg_save_area_bytes != 0) {
748 __ addptr(rsp, frame::arg_reg_save_area_bytes);
749 }
750
751 __ vzeroupper();
752 __ pop_CPU_state();
753 // restore sp
754 __ mov(rsp, r13);
755 __ bind(L);
756 }
757
758 // For each inline type argument, sig includes the list of fields of
759 // the inline type. This utility function computes the number of
760 // arguments for the call if inline types are passed by reference (the
761 // calling convention the interpreter expects).
762 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
763 int total_args_passed = 0;
764 if (InlineTypePassFieldsAsArgs) {
765 for (int i = 0; i < sig_extended->length(); i++) {
766 BasicType bt = sig_extended->at(i)._bt;
767 if (bt == T_METADATA) {
768 // In sig_extended, an inline type argument starts with:
769 // T_METADATA, followed by the types of the fields of the
770 // inline type and T_VOID to mark the end of the value
771 // type. Inline types are flattened so, for instance, in the
772 // case of an inline type with an int field and an inline type
773 // field that itself has 2 fields, an int and a long:
774 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
775 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
776 // (outer inline type)
777 total_args_passed++;
778 int vt = 1;
779 do {
780 i++;
781 BasicType bt = sig_extended->at(i)._bt;
782 BasicType prev_bt = sig_extended->at(i-1)._bt;
783 if (bt == T_METADATA) {
784 vt++;
785 } else if (bt == T_VOID &&
786 prev_bt != T_LONG &&
787 prev_bt != T_DOUBLE) {
788 vt--;
789 }
790 } while (vt != 0);
791 } else {
792 total_args_passed++;
793 }
794 }
795 } else {
796 total_args_passed = sig_extended->length();
797 }
798 return total_args_passed;
799 }
800
801
802 static void gen_c2i_adapter_helper(MacroAssembler* masm,
803 BasicType bt,
804 BasicType prev_bt,
805 size_t size_in_bytes,
806 const VMRegPair& reg_pair,
807 const Address& to,
808 int extraspace,
809 bool is_oop) {
810 if (bt == T_VOID) {
811 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
812 return;
813 }
814
815 // Say 4 args:
816 // i st_off
817 // 0 32 T_LONG
818 // 1 24 T_VOID
819 // 2 16 T_OBJECT
820 // 3 8 T_BOOL
821 // - 0 return address
822 //
823 // However to make thing extra confusing. Because we can fit a long/double in
824 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
825 // leaves one slot empty and only stores to a single slot. In this case the
826 // slot that is occupied is the T_VOID slot. See I said it was confusing.
827
828 bool wide = (size_in_bytes == wordSize);
829 VMReg r_1 = reg_pair.first();
830 VMReg r_2 = reg_pair.second();
831 assert(r_2->is_valid() == wide, "invalid size");
832 if (!r_1->is_valid()) {
833 assert(!r_2->is_valid(), "must be invalid");
834 return;
835 }
836
837 if (!r_1->is_XMMRegister()) {
838 Register val = rax;
839 if (r_1->is_stack()) {
840 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
841 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
842 } else {
843 val = r_1->as_Register();
844 }
845 assert_different_registers(to.base(), val, rscratch1);
846 if (is_oop) {
847 __ push(r13);
848 __ push(rbx);
849 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep it valid.
850 __ push(to.base());
851 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
852 __ pop(to.base());
853 __ pop(rbx);
854 __ pop(r13);
855 } else {
856 __ store_sized_value(to, val, size_in_bytes);
857 }
858 } else {
859 if (wide) {
860 __ movdbl(to, r_1->as_XMMRegister());
861 } else {
862 __ movflt(to, r_1->as_XMMRegister());
863 }
864 }
865 }
866
867 static void gen_c2i_adapter(MacroAssembler *masm,
868 const GrowableArray<SigEntry>* sig_extended,
869 const VMRegPair *regs,
870 bool requires_clinit_barrier,
871 address& c2i_no_clinit_check_entry,
872 Label& skip_fixup,
873 address start,
874 OopMapSet* oop_maps,
875 int& frame_complete,
876 int& frame_size_in_words,
877 bool alloc_inline_receiver) {
878 if (requires_clinit_barrier) {
879 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
880 Label L_skip_barrier;
881 Register method = rbx;
882
883 { // Bypass the barrier for non-static methods
884 Register flags = rscratch1;
885 __ load_unsigned_short(flags, Address(method, Method::access_flags_offset()));
886 __ testl(flags, JVM_ACC_STATIC);
887 __ jcc(Assembler::zero, L_skip_barrier); // non-static
888 }
889
890 Register klass = rscratch1;
891 __ load_method_holder(klass, method);
892 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
893
894 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
895
896 __ bind(L_skip_barrier);
897 c2i_no_clinit_check_entry = __ pc();
898 }
899
900 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
901 bs->c2i_entry_barrier(masm);
902
903 // Before we get into the guts of the C2I adapter, see if we should be here
904 // at all. We've come from compiled code and are attempting to jump to the
905 // interpreter, which means the caller made a static call to get here
906 // (vcalls always get a compiled target if there is one). Check for a
907 // compiled target. If there is one, we need to patch the caller's call.
908 patch_callers_callsite(masm);
909
910 __ bind(skip_fixup);
911
912 if (InlineTypePassFieldsAsArgs) {
913 // Is there an inline type argument?
914 bool has_inline_argument = false;
915 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
916 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
917 }
918 if (has_inline_argument) {
919 // There is at least a value type argument: we're coming from
920 // compiled code so we may not have buffers to back the value
921 // objects. Allocate the buffers here with a runtime call for
922 // the value arguments that needs a buffer.
923 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false);
924
925 frame_complete = __ offset();
926
927 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
928
929 __ mov(c_rarg0, r15_thread);
930 __ mov(c_rarg1, rbx);
931 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
932 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
933
934 oop_maps->add_gc_map((int)(__ pc() - start), map);
935 __ reset_last_Java_frame(false);
936
937 RegisterSaver::restore_live_registers(masm);
938
939 Label no_exception;
940 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
941 __ jcc(Assembler::equal, no_exception);
942
943 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD);
944 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
945 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
946
947 __ bind(no_exception);
948
949 // We get an array of objects from the runtime call
950 __ get_vm_result_oop(rscratch2); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
951 }
952 }
953
954 // Since all args are passed on the stack, total_args_passed *
955 // Interpreter::stackElementSize is the space we need.
956 int total_args_passed = compute_total_args_passed_int(sig_extended);
957 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
958
959 int extraspace = (total_args_passed * Interpreter::stackElementSize);
960
961 // stack is aligned, keep it that way
962 // This is not currently needed or enforced by the interpreter, but
963 // we might as well conform to the ABI.
964 extraspace = align_up(extraspace, 2*wordSize);
965
966 // set senderSP value
967 __ lea(r13, Address(rsp, wordSize));
968
969 #ifdef ASSERT
970 __ check_stack_alignment(r13, "sender stack not aligned");
971 #endif
972 if (extraspace > 0) {
973 // Pop the return address
974 __ pop(rax);
975
976 __ subptr(rsp, extraspace);
977
978 // Push the return address
979 __ push(rax);
980
981 // Account for the return address location since we store it first rather
982 // than hold it in a register across all the shuffling
983 extraspace += wordSize;
984 }
985
986 #ifdef ASSERT
987 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
988 #endif
989
990 // Now write the args into the outgoing interpreter space
991
992 // next_arg_comp is the next argument from the compiler point of
993 // view (inline type fields are passed in registers/on the stack). In
994 // sig_extended, an inline type argument starts with: T_METADATA,
995 // followed by the types of the fields of the inline type and T_VOID
996 // to mark the end of the inline type. ignored counts the number of
997 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument:
998 // used to get the buffer for that argument from the pool of buffers
999 // we allocated above and want to pass to the
1000 // interpreter. next_arg_int is the next argument from the
1001 // interpreter point of view (inline types are passed by reference).
1002 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
1003 next_arg_comp < sig_extended->length(); next_arg_comp++) {
1004 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
1005 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
1006 BasicType bt = sig_extended->at(next_arg_comp)._bt;
1007 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
1008 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) {
1009 int next_off = st_off - Interpreter::stackElementSize;
1010 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
1011 const VMRegPair reg_pair = regs[next_arg_comp-ignored];
1012 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
1013 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
1014 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
1015 next_arg_int++;
1016 #ifdef ASSERT
1017 if (bt == T_LONG || bt == T_DOUBLE) {
1018 // Overwrite the unused slot with known junk
1019 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
1020 __ movptr(Address(rsp, st_off), rax);
1021 }
1022 #endif /* ASSERT */
1023 } else {
1024 ignored++;
1025 next_arg_int++;
1026 int vt = 1;
1027 // write fields we get from compiled code in registers/stack
1028 // slots to the buffer: we know we are done with that inline type
1029 // argument when we hit the T_VOID that acts as an end of inline
1030 // type delimiter for this inline type. Inline types are flattened
1031 // so we might encounter embedded inline types. Each entry in
1032 // sig_extended contains a field offset in the buffer.
1033 Label L_null;
1034 Label not_null_buffer;
1035 do {
1036 next_arg_comp++;
1037 BasicType bt = sig_extended->at(next_arg_comp)._bt;
1038 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
1039 if (bt == T_METADATA) {
1040 vt++;
1041 ignored++;
1042 } else if (bt == T_VOID &&
1043 prev_bt != T_LONG &&
1044 prev_bt != T_DOUBLE) {
1045 vt--;
1046 ignored++;
1047 } else if (sig_extended->at(next_arg_comp)._vt_oop) {
1048 // buffer argument: use if non null
1049 VMReg buffer = regs[next_arg_comp-ignored].first();
1050 if (buffer->is_stack()) {
1051 int ld_off = buffer->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
1052 __ movptr(r14, Address(rsp, ld_off));
1053 } else {
1054 __ movptr(r14, buffer->as_Register());
1055 }
1056 __ testptr(r14, r14);
1057 __ jcc(Assembler::notEqual, not_null_buffer);
1058 // otherwise get the buffer from the just allocated pool of buffers
1059 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT);
1060 __ load_heap_oop(r14, Address(rscratch2, index));
1061 next_vt_arg++;
1062 } else {
1063 int off = sig_extended->at(next_arg_comp)._offset;
1064 if (off == -1) {
1065 // Nullable inline type argument, emit null check
1066 VMReg reg = regs[next_arg_comp-ignored].first();
1067 Label L_notNull;
1068 if (reg->is_stack()) {
1069 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
1070 __ testb(Address(rsp, ld_off), 1);
1071 } else {
1072 __ testb(reg->as_Register(), 1);
1073 }
1074 __ jcc(Assembler::notZero, L_notNull);
1075 __ movptr(Address(rsp, st_off), 0);
1076 __ jmp(L_null);
1077 __ bind(L_notNull);
1078 continue;
1079 }
1080 assert(off > 0, "offset in object should be positive");
1081 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
1082 bool is_oop = is_reference_type(bt);
1083 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
1084 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
1085 }
1086 } while (vt != 0);
1087 // pass the buffer to the interpreter
1088 __ bind(not_null_buffer);
1089 __ movptr(Address(rsp, st_off), r14);
1090 __ bind(L_null);
1091 }
1092 }
1093
1094 // Schedule the branch target address early.
1095 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
1096 __ jmp(rcx);
1097 }
1098
1099 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1100 int comp_args_on_stack,
1101 const GrowableArray<SigEntry>* sig,
1102 const VMRegPair *regs) {
1103
1104 // Note: r13 contains the senderSP on entry. We must preserve it since
1105 // we may do a i2c -> c2i transition if we lose a race where compiled
1106 // code goes non-entrant while we get args ready.
1107 // In addition we use r13 to locate all the interpreter args as
1108 // we must align the stack to 16 bytes on an i2c entry else we
1109 // lose alignment we expect in all compiled code and register
1110 // save code can segv when fxsave instructions find improperly
1111 // aligned stack pointer.
1112
1113 // Adapters can be frameless because they do not require the caller
1114 // to perform additional cleanup work, such as correcting the stack pointer.
1115 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
1116 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
1117 // even if a callee has modified the stack pointer.
1118 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
1119 // routinely repairs its caller's stack pointer (from sender_sp, which is set
1120 // up via the senderSP register).
1121 // In other words, if *either* the caller or callee is interpreted, we can
1122 // get the stack pointer repaired after a call.
1123 // This is why c2i and i2c adapters cannot be indefinitely composed.
1124 // In particular, if a c2i adapter were to somehow call an i2c adapter,
1125 // both caller and callee would be compiled methods, and neither would
1126 // clean up the stack pointer changes performed by the two adapters.
1127 // If this happens, control eventually transfers back to the compiled
1128 // caller, but with an uncorrected stack, causing delayed havoc.
1129
1130 // Must preserve original SP for loading incoming arguments because
1131 // we need to align the outgoing SP for compiled code.
1132 __ movptr(r11, rsp);
1133
1134 // Pick up the return address
1135 __ pop(rax);
1136
1137 // Convert 4-byte c2 stack slots to words.
1138 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1139
1140 if (comp_args_on_stack) {
1141 __ subptr(rsp, comp_words_on_stack * wordSize);
1142 }
1143
1144 // Ensure compiled code always sees stack at proper alignment
1145 __ andptr(rsp, -16);
1146
1147 // push the return address and misalign the stack that youngest frame always sees
1148 // as far as the placement of the call instruction
1149 __ push(rax);
1150
1151 // Put saved SP in another register
1152 const Register saved_sp = rax;
1153 __ movptr(saved_sp, r11);
1154
1155 // Will jump to the compiled code just as if compiled code was doing it.
1156 // Pre-load the register-jump target early, to schedule it better.
1157 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
1158
1159 int total_args_passed = sig->length();
1160
1161 // Now generate the shuffle code. Pick up all register args and move the
1162 // rest through the floating point stack top.
1163 for (int i = 0; i < total_args_passed; i++) {
1164 BasicType bt = sig->at(i)._bt;
1165 if (bt == T_VOID) {
1166 // Longs and doubles are passed in native word order, but misaligned
1167 // in the 32-bit build.
1168 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
1169 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
1170 continue;
1171 }
1172
1173 // Pick up 0, 1 or 2 words from SP+offset.
1174
1175 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1176 "scrambled load targets?");
1177 // Load in argument order going down.
1178 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
1179 // Point to interpreter value (vs. tag)
1180 int next_off = ld_off - Interpreter::stackElementSize;
1181 //
1182 //
1183 //
1184 VMReg r_1 = regs[i].first();
1185 VMReg r_2 = regs[i].second();
1186 if (!r_1->is_valid()) {
1187 assert(!r_2->is_valid(), "");
1188 continue;
1189 }
1190 if (r_1->is_stack()) {
1191 // Convert stack slot to an SP offset (+ wordSize to account for return address )
1192 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
1193
1194 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
1195 // and if we end up going thru a c2i because of a miss a reasonable value of r13
1196 // will be generated.
1197 if (!r_2->is_valid()) {
1198 // sign extend???
1199 __ movl(r13, Address(saved_sp, ld_off));
1200 __ movptr(Address(rsp, st_off), r13);
1201 } else {
1202 //
1203 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1204 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1205 // So we must adjust where to pick up the data to match the interpreter.
1206 //
1207 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1208 // are accessed as negative so LSW is at LOW address
1209
1210 // ld_off is MSW so get LSW
1211 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1212 next_off : ld_off;
1213 __ movq(r13, Address(saved_sp, offset));
1214 // st_off is LSW (i.e. reg.first())
1215 __ movq(Address(rsp, st_off), r13);
1216 }
1217 } else if (r_1->is_Register()) { // Register argument
1218 Register r = r_1->as_Register();
1219 assert(r != rax, "must be different");
1220 if (r_2->is_valid()) {
1221 //
1222 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1223 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1224 // So we must adjust where to pick up the data to match the interpreter.
1225
1226 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1227 next_off : ld_off;
1228
1229 // this can be a misaligned move
1230 __ movq(r, Address(saved_sp, offset));
1231 } else {
1232 // sign extend and use a full word?
1233 __ movl(r, Address(saved_sp, ld_off));
1234 }
1235 } else {
1236 if (!r_2->is_valid()) {
1237 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
1238 } else {
1239 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
1240 }
1241 }
1242 }
1243
1244 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1245
1246 // 6243940 We might end up in handle_wrong_method if
1247 // the callee is deoptimized as we race thru here. If that
1248 // happens we don't want to take a safepoint because the
1249 // caller frame will look interpreted and arguments are now
1250 // "compiled" so it is much better to make this transition
1251 // invisible to the stack walking code. Unfortunately if
1252 // we try and find the callee by normal means a safepoint
1253 // is possible. So we stash the desired callee in the thread
1254 // and the vm will find there should this case occur.
1255
1256 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1257
1258 // put Method* where a c2i would expect should we end up there
1259 // only needed because of c2 resolve stubs return Method* as a result in
1260 // rax
1261 __ mov(rax, rbx);
1262 __ jmp(r11);
1263 }
1264
1265 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1266 Register data = rax;
1267 __ ic_check(1 /* end_alignment */);
1268 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
1269
1270 // Method might have been compiled since the call site was patched to
1271 // interpreted if that is the case treat it as a miss so we can get
1272 // the call site corrected.
1273 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
1274 __ jcc(Assembler::equal, skip_fixup);
1275 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1276 }
1277
1278 // ---------------------------------------------------------------
1279 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1280 int comp_args_on_stack,
1281 const GrowableArray<SigEntry>* sig,
1282 const VMRegPair* regs,
1283 const GrowableArray<SigEntry>* sig_cc,
1284 const VMRegPair* regs_cc,
1285 const GrowableArray<SigEntry>* sig_cc_ro,
1286 const VMRegPair* regs_cc_ro,
1287 address entry_address[AdapterBlob::ENTRY_COUNT],
1288 AdapterBlob*& new_adapter,
1289 bool allocate_code_blob) {
1290 entry_address[AdapterBlob::I2C] = __ pc();
1291 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1292
1293 // -------------------------------------------------------------------------
1294 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
1295 // to the interpreter. The args start out packed in the compiled layout. They
1296 // need to be unpacked into the interpreter layout. This will almost always
1297 // require some stack space. We grow the current (compiled) stack, then repack
1298 // the args. We finally end in a jump to the generic interpreter entry point.
1299 // On exit from the interpreter, the interpreter will restore our SP (lest the
1300 // compiled code, which relies solely on SP and not RBP, get sick).
1301
1302 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1303 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1304 Label skip_fixup;
1305
1306 gen_inline_cache_check(masm, skip_fixup);
1307
1308 OopMapSet* oop_maps = new OopMapSet();
1309 int frame_complete = CodeOffsets::frame_never_safe;
1310 int frame_size_in_words = 0;
1311
1312 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1313 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1314 entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
1315 if (regs_cc != regs_cc_ro) {
1316 // No class init barrier needed because method is guaranteed to be non-static
1317 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1318 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1319 skip_fixup.reset();
1320 }
1321
1322 // Scalarized c2i adapter
1323 entry_address[AdapterBlob::C2I] = __ pc();
1324 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1325 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1326 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1327
1328 // Non-scalarized c2i adapter
1329 if (regs != regs_cc) {
1330 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1331 Label inline_entry_skip_fixup;
1332 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1333
1334 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1335 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1336 inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1337 }
1338
1339 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1340 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1341 if (allocate_code_blob) {
1342 bool caller_must_gc_arguments = (regs != regs_cc);
1343 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1344 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1345 AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1346 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1347 }
1348 }
1349
1350 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1351 VMRegPair *regs,
1352 int total_args_passed) {
1353
1354 // We return the amount of VMRegImpl stack slots we need to reserve for all
1355 // the arguments NOT counting out_preserve_stack_slots.
1356
1357 // NOTE: These arrays will have to change when c1 is ported
1358 #ifdef _WIN64
1359 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1360 c_rarg0, c_rarg1, c_rarg2, c_rarg3
1361 };
1362 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1363 c_farg0, c_farg1, c_farg2, c_farg3
1364 };
1365 #else
1366 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1367 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
1368 };
1369 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1370 c_farg0, c_farg1, c_farg2, c_farg3,
1371 c_farg4, c_farg5, c_farg6, c_farg7
1372 };
1373 #endif // _WIN64
1374
1375
1376 uint int_args = 0;
1377 uint fp_args = 0;
1378 uint stk_args = 0; // inc by 2 each time
1379
1380 for (int i = 0; i < total_args_passed; i++) {
1381 switch (sig_bt[i]) {
1382 case T_BOOLEAN:
1383 case T_CHAR:
1384 case T_BYTE:
1385 case T_SHORT:
1386 case T_INT:
1387 if (int_args < Argument::n_int_register_parameters_c) {
1388 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1389 #ifdef _WIN64
1390 fp_args++;
1391 // Allocate slots for callee to stuff register args the stack.
1392 stk_args += 2;
1393 #endif
1394 } else {
1395 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1396 stk_args += 2;
1397 }
1398 break;
1399 case T_LONG:
1400 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1401 // fall through
1402 case T_OBJECT:
1403 case T_ARRAY:
1404 case T_ADDRESS:
1405 case T_METADATA:
1406 if (int_args < Argument::n_int_register_parameters_c) {
1407 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1408 #ifdef _WIN64
1409 fp_args++;
1410 stk_args += 2;
1411 #endif
1412 } else {
1413 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1414 stk_args += 2;
1415 }
1416 break;
1417 case T_FLOAT:
1418 if (fp_args < Argument::n_float_register_parameters_c) {
1419 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1420 #ifdef _WIN64
1421 int_args++;
1422 // Allocate slots for callee to stuff register args the stack.
1423 stk_args += 2;
1424 #endif
1425 } else {
1426 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1427 stk_args += 2;
1428 }
1429 break;
1430 case T_DOUBLE:
1431 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1432 if (fp_args < Argument::n_float_register_parameters_c) {
1433 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
1434 #ifdef _WIN64
1435 int_args++;
1436 // Allocate slots for callee to stuff register args the stack.
1437 stk_args += 2;
1438 #endif
1439 } else {
1440 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1441 stk_args += 2;
1442 }
1443 break;
1444 case T_VOID: // Halves of longs and doubles
1445 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
1446 regs[i].set_bad();
1447 break;
1448 default:
1449 ShouldNotReachHere();
1450 break;
1451 }
1452 }
1453 #ifdef _WIN64
1454 // windows abi requires that we always allocate enough stack space
1455 // for 4 64bit registers to be stored down.
1456 if (stk_args < 8) {
1457 stk_args = 8;
1458 }
1459 #endif // _WIN64
1460
1461 return stk_args;
1462 }
1463
1464 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
1465 uint num_bits,
1466 uint total_args_passed) {
1467 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512,
1468 "only certain vector sizes are supported for now");
1469
1470 static const XMMRegister VEC_ArgReg[32] = {
1471 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
1472 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
1473 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
1474 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31
1475 };
1476
1477 uint stk_args = 0;
1478 uint fp_args = 0;
1479
1480 for (uint i = 0; i < total_args_passed; i++) {
1481 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg();
1482 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15));
1483 regs[i].set_pair(vmreg->next(next_val), vmreg);
1484 }
1485
1486 return stk_args;
1487 }
1488
1489 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1490 // We always ignore the frame_slots arg and just use the space just below frame pointer
1491 // which by this time is free to use
1492 switch (ret_type) {
1493 case T_FLOAT:
1494 __ movflt(Address(rbp, -wordSize), xmm0);
1495 break;
1496 case T_DOUBLE:
1497 __ movdbl(Address(rbp, -wordSize), xmm0);
1498 break;
1499 case T_VOID: break;
1500 default: {
1501 __ movptr(Address(rbp, -wordSize), rax);
1502 }
1503 }
1504 }
1505
1506 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1507 // We always ignore the frame_slots arg and just use the space just below frame pointer
1508 // which by this time is free to use
1509 switch (ret_type) {
1510 case T_FLOAT:
1511 __ movflt(xmm0, Address(rbp, -wordSize));
1512 break;
1513 case T_DOUBLE:
1514 __ movdbl(xmm0, Address(rbp, -wordSize));
1515 break;
1516 case T_VOID: break;
1517 default: {
1518 __ movptr(rax, Address(rbp, -wordSize));
1519 }
1520 }
1521 }
1522
1523 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1524 for ( int i = first_arg ; i < arg_count ; i++ ) {
1525 if (args[i].first()->is_Register()) {
1526 __ push(args[i].first()->as_Register());
1527 } else if (args[i].first()->is_XMMRegister()) {
1528 __ subptr(rsp, 2*wordSize);
1529 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister());
1530 }
1531 }
1532 }
1533
1534 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1535 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1536 if (args[i].first()->is_Register()) {
1537 __ pop(args[i].first()->as_Register());
1538 } else if (args[i].first()->is_XMMRegister()) {
1539 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0));
1540 __ addptr(rsp, 2*wordSize);
1541 }
1542 }
1543 }
1544
1545 static void verify_oop_args(MacroAssembler* masm,
1546 const methodHandle& method,
1547 const BasicType* sig_bt,
1548 const VMRegPair* regs) {
1549 Register temp_reg = rbx; // not part of any compiled calling seq
1550 if (VerifyOops) {
1551 for (int i = 0; i < method->size_of_parameters(); i++) {
1552 if (is_reference_type(sig_bt[i])) {
1553 VMReg r = regs[i].first();
1554 assert(r->is_valid(), "bad oop arg");
1555 if (r->is_stack()) {
1556 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1557 __ verify_oop(temp_reg);
1558 } else {
1559 __ verify_oop(r->as_Register());
1560 }
1561 }
1562 }
1563 }
1564 }
1565
1566 static void check_continuation_enter_argument(VMReg actual_vmreg,
1567 Register expected_reg,
1568 const char* name) {
1569 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1570 assert(actual_vmreg->as_Register() == expected_reg,
1571 "%s is in unexpected register: %s instead of %s",
1572 name, actual_vmreg->as_Register()->name(), expected_reg->name());
1573 }
1574
1575
1576 //---------------------------- continuation_enter_setup ---------------------------
1577 //
1578 // Arguments:
1579 // None.
1580 //
1581 // Results:
1582 // rsp: pointer to blank ContinuationEntry
1583 //
1584 // Kills:
1585 // rax
1586 //
1587 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) {
1588 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1589 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, "");
1590 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1591
1592 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize;
1593 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size()));
1594
1595 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size;
1596 OopMap* map = new OopMap(frame_size, 0);
1597
1598 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset()));
1599 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax);
1600 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp);
1601
1602 return map;
1603 }
1604
1605 //---------------------------- fill_continuation_entry ---------------------------
1606 //
1607 // Arguments:
1608 // rsp: pointer to blank Continuation entry
1609 // reg_cont_obj: pointer to the continuation
1610 // reg_flags: flags
1611 //
1612 // Results:
1613 // rsp: pointer to filled out ContinuationEntry
1614 //
1615 // Kills:
1616 // rax
1617 //
1618 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1619 assert_different_registers(rax, reg_cont_obj, reg_flags);
1620 #ifdef ASSERT
1621 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value());
1622 #endif
1623 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj);
1624 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags);
1625 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0);
1626 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0);
1627 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0);
1628
1629 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset()));
1630 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax);
1631
1632 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0);
1633 }
1634
1635 //---------------------------- continuation_enter_cleanup ---------------------------
1636 //
1637 // Arguments:
1638 // rsp: pointer to the ContinuationEntry
1639 //
1640 // Results:
1641 // rsp: pointer to the spilled rbp in the entry frame
1642 //
1643 // Kills:
1644 // rbx
1645 //
1646 static void continuation_enter_cleanup(MacroAssembler* masm) {
1647 #ifdef ASSERT
1648 Label L_good_sp;
1649 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset()));
1650 __ jcc(Assembler::equal, L_good_sp);
1651 __ stop("Incorrect rsp at continuation_enter_cleanup");
1652 __ bind(L_good_sp);
1653 #endif
1654 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()));
1655 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx);
1656 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset()));
1657 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx);
1658 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size()));
1659 }
1660
1661 static void gen_continuation_enter(MacroAssembler* masm,
1662 const VMRegPair* regs,
1663 int& exception_offset,
1664 OopMapSet* oop_maps,
1665 int& frame_complete,
1666 int& stack_slots,
1667 int& interpreted_entry_offset,
1668 int& compiled_entry_offset) {
1669
1670 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1671 int pos_cont_obj = 0;
1672 int pos_is_cont = 1;
1673 int pos_is_virtual = 2;
1674
1675 // The platform-specific calling convention may present the arguments in various registers.
1676 // To simplify the rest of the code, we expect the arguments to reside at these known
1677 // registers, and we additionally check the placement here in case calling convention ever
1678 // changes.
1679 Register reg_cont_obj = c_rarg1;
1680 Register reg_is_cont = c_rarg2;
1681 Register reg_is_virtual = c_rarg3;
1682
1683 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object");
1684 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
1685 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1686
1687 // Utility methods kill rax, make sure there are no collisions
1688 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual);
1689
1690 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
1691 relocInfo::static_call_type);
1692
1693 address start = __ pc();
1694
1695 Label L_thaw, L_exit;
1696
1697 // i2i entry used at interp_only_mode only
1698 interpreted_entry_offset = __ pc() - start;
1699 {
1700 #ifdef ASSERT
1701 Label is_interp_only;
1702 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0);
1703 __ jcc(Assembler::notEqual, is_interp_only);
1704 __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1705 __ bind(is_interp_only);
1706 #endif
1707
1708 __ pop(rax); // return address
1709 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1710 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2));
1711 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1));
1712 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0));
1713 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment
1714 __ push(rax); // return address
1715 __ push_cont_fastpath();
1716
1717 __ enter();
1718
1719 stack_slots = 2; // will be adjusted in setup
1720 OopMap* map = continuation_enter_setup(masm, stack_slots);
1721 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1722 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1723
1724 __ verify_oop(reg_cont_obj);
1725
1726 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1727
1728 // If continuation, call to thaw. Otherwise, resolve the call and exit.
1729 __ testptr(reg_is_cont, reg_is_cont);
1730 __ jcc(Assembler::notZero, L_thaw);
1731
1732 // --- Resolve path
1733
1734 // Make sure the call is patchable
1735 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
1736 // Emit stub for static call
1737 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
1738 if (stub == nullptr) {
1739 fatal("CodeCache is full at gen_continuation_enter");
1740 }
1741 __ call(resolve);
1742 oop_maps->add_gc_map(__ pc() - start, map);
1743 __ post_call_nop();
1744
1745 __ jmp(L_exit);
1746 }
1747
1748 // compiled entry
1749 __ align(CodeEntryAlignment);
1750 compiled_entry_offset = __ pc() - start;
1751 __ enter();
1752
1753 stack_slots = 2; // will be adjusted in setup
1754 OopMap* map = continuation_enter_setup(masm, stack_slots);
1755
1756 // Frame is now completed as far as size and linkage.
1757 frame_complete = __ pc() - start;
1758
1759 __ verify_oop(reg_cont_obj);
1760
1761 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1762
1763 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1764 __ testptr(reg_is_cont, reg_is_cont);
1765 __ jccb(Assembler::notZero, L_thaw);
1766
1767 // --- call Continuation.enter(Continuation c, boolean isContinue)
1768
1769 // Make sure the call is patchable
1770 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
1771
1772 // Emit stub for static call
1773 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
1774 if (stub == nullptr) {
1775 fatal("CodeCache is full at gen_continuation_enter");
1776 }
1777
1778 // The call needs to be resolved. There's a special case for this in
1779 // SharedRuntime::find_callee_info_helper() which calls
1780 // LinkResolver::resolve_continuation_enter() which resolves the call to
1781 // Continuation.enter(Continuation c, boolean isContinue).
1782 __ call(resolve);
1783
1784 oop_maps->add_gc_map(__ pc() - start, map);
1785 __ post_call_nop();
1786
1787 __ jmpb(L_exit);
1788
1789 // --- Thawing path
1790
1791 __ bind(L_thaw);
1792
1793 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1794 __ call(RuntimeAddress(StubRoutines::cont_thaw()));
1795
1796 ContinuationEntry::_return_pc_offset = __ pc() - start;
1797 oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1798 __ post_call_nop();
1799
1800 // --- Normal exit (resolve/thawing)
1801
1802 __ bind(L_exit);
1803 ContinuationEntry::_cleanup_offset = __ pc() - start;
1804 continuation_enter_cleanup(masm);
1805 __ pop(rbp);
1806 __ ret(0);
1807
1808 // --- Exception handling path
1809
1810 exception_offset = __ pc() - start;
1811
1812 continuation_enter_cleanup(masm);
1813 __ pop(rbp);
1814
1815 __ movptr(c_rarg0, r15_thread);
1816 __ movptr(c_rarg1, Address(rsp, 0)); // return address
1817
1818 // rax still holds the original exception oop, save it before the call
1819 __ push(rax);
1820
1821 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2);
1822 __ movptr(rbx, rax);
1823
1824 // Continue at exception handler:
1825 // rax: exception oop
1826 // rbx: exception handler
1827 // rdx: exception pc
1828 __ pop(rax);
1829 __ verify_oop(rax);
1830 __ pop(rdx);
1831 __ jmp(rbx);
1832 }
1833
1834 static void gen_continuation_yield(MacroAssembler* masm,
1835 const VMRegPair* regs,
1836 OopMapSet* oop_maps,
1837 int& frame_complete,
1838 int& stack_slots,
1839 int& compiled_entry_offset) {
1840 enum layout {
1841 rbp_off,
1842 rbpH_off,
1843 return_off,
1844 return_off2,
1845 framesize // inclusive of return address
1846 };
1847 stack_slots = framesize / VMRegImpl::slots_per_word;
1848 assert(stack_slots == 2, "recheck layout");
1849
1850 address start = __ pc();
1851 compiled_entry_offset = __ pc() - start;
1852 __ enter();
1853 address the_pc = __ pc();
1854
1855 frame_complete = the_pc - start;
1856
1857 // This nop must be exactly at the PC we push into the frame info.
1858 // We use this nop for fast CodeBlob lookup, associate the OopMap
1859 // with it right away.
1860 __ post_call_nop();
1861 OopMap* map = new OopMap(framesize, 1);
1862 oop_maps->add_gc_map(frame_complete, map);
1863
1864 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
1865 __ movptr(c_rarg0, r15_thread);
1866 __ movptr(c_rarg1, rsp);
1867 __ call_VM_leaf(Continuation::freeze_entry(), 2);
1868 __ reset_last_Java_frame(true);
1869
1870 Label L_pinned;
1871
1872 __ testptr(rax, rax);
1873 __ jcc(Assembler::notZero, L_pinned);
1874
1875 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset()));
1876 continuation_enter_cleanup(masm);
1877 __ pop(rbp);
1878 __ ret(0);
1879
1880 __ bind(L_pinned);
1881
1882 // Pinned, return to caller
1883
1884 // handle pending exception thrown by freeze
1885 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
1886 Label ok;
1887 __ jcc(Assembler::equal, ok);
1888 __ leave();
1889 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1890 __ bind(ok);
1891
1892 __ leave();
1893 __ ret(0);
1894 }
1895
1896 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
1897 ::continuation_enter_cleanup(masm);
1898 }
1899
1900 static void gen_special_dispatch(MacroAssembler* masm,
1901 const methodHandle& method,
1902 const BasicType* sig_bt,
1903 const VMRegPair* regs) {
1904 verify_oop_args(masm, method, sig_bt, regs);
1905 vmIntrinsics::ID iid = method->intrinsic_id();
1906
1907 // Now write the args into the outgoing interpreter space
1908 bool has_receiver = false;
1909 Register receiver_reg = noreg;
1910 int member_arg_pos = -1;
1911 Register member_reg = noreg;
1912 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1913 if (ref_kind != 0) {
1914 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1915 member_reg = rbx; // known to be free at this point
1916 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1917 } else if (iid == vmIntrinsics::_invokeBasic) {
1918 has_receiver = true;
1919 } else if (iid == vmIntrinsics::_linkToNative) {
1920 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1921 member_reg = rbx; // known to be free at this point
1922 } else {
1923 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1924 }
1925
1926 if (member_reg != noreg) {
1927 // Load the member_arg into register, if necessary.
1928 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1929 VMReg r = regs[member_arg_pos].first();
1930 if (r->is_stack()) {
1931 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1932 } else {
1933 // no data motion is needed
1934 member_reg = r->as_Register();
1935 }
1936 }
1937
1938 if (has_receiver) {
1939 // Make sure the receiver is loaded into a register.
1940 assert(method->size_of_parameters() > 0, "oob");
1941 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1942 VMReg r = regs[0].first();
1943 assert(r->is_valid(), "bad receiver arg");
1944 if (r->is_stack()) {
1945 // Porting note: This assumes that compiled calling conventions always
1946 // pass the receiver oop in a register. If this is not true on some
1947 // platform, pick a temp and load the receiver from stack.
1948 fatal("receiver always in a register");
1949 receiver_reg = j_rarg0; // known to be free at this point
1950 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1951 } else {
1952 // no data motion is needed
1953 receiver_reg = r->as_Register();
1954 }
1955 }
1956
1957 // Figure out which address we are really jumping to:
1958 MethodHandles::generate_method_handle_dispatch(masm, iid,
1959 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1960 }
1961
1962 // ---------------------------------------------------------------------------
1963 // Generate a native wrapper for a given method. The method takes arguments
1964 // in the Java compiled code convention, marshals them to the native
1965 // convention (handlizes oops, etc), transitions to native, makes the call,
1966 // returns to java state (possibly blocking), unhandlizes any result and
1967 // returns.
1968 //
1969 // Critical native functions are a shorthand for the use of
1970 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1971 // functions. The wrapper is expected to unpack the arguments before
1972 // passing them to the callee. Critical native functions leave the state _in_Java,
1973 // since they cannot stop for GC.
1974 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
1975 // block and the check for pending exceptions it's impossible for them
1976 // to be thrown.
1977 //
1978 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1979 const methodHandle& method,
1980 int compile_id,
1981 BasicType* in_sig_bt,
1982 VMRegPair* in_regs,
1983 BasicType ret_type) {
1984 if (method->is_continuation_native_intrinsic()) {
1985 int exception_offset = -1;
1986 OopMapSet* oop_maps = new OopMapSet();
1987 int frame_complete = -1;
1988 int stack_slots = -1;
1989 int interpreted_entry_offset = -1;
1990 int vep_offset = -1;
1991 if (method->is_continuation_enter_intrinsic()) {
1992 gen_continuation_enter(masm,
1993 in_regs,
1994 exception_offset,
1995 oop_maps,
1996 frame_complete,
1997 stack_slots,
1998 interpreted_entry_offset,
1999 vep_offset);
2000 } else if (method->is_continuation_yield_intrinsic()) {
2001 gen_continuation_yield(masm,
2002 in_regs,
2003 oop_maps,
2004 frame_complete,
2005 stack_slots,
2006 vep_offset);
2007 } else {
2008 guarantee(false, "Unknown Continuation native intrinsic");
2009 }
2010
2011 #ifdef ASSERT
2012 if (method->is_continuation_enter_intrinsic()) {
2013 assert(interpreted_entry_offset != -1, "Must be set");
2014 assert(exception_offset != -1, "Must be set");
2015 } else {
2016 assert(interpreted_entry_offset == -1, "Must be unset");
2017 assert(exception_offset == -1, "Must be unset");
2018 }
2019 assert(frame_complete != -1, "Must be set");
2020 assert(stack_slots != -1, "Must be set");
2021 assert(vep_offset != -1, "Must be set");
2022 #endif
2023
2024 __ flush();
2025 nmethod* nm = nmethod::new_native_nmethod(method,
2026 compile_id,
2027 masm->code(),
2028 vep_offset,
2029 frame_complete,
2030 stack_slots,
2031 in_ByteSize(-1),
2032 in_ByteSize(-1),
2033 oop_maps,
2034 exception_offset);
2035 if (nm == nullptr) return nm;
2036 if (method->is_continuation_enter_intrinsic()) {
2037 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2038 } else if (method->is_continuation_yield_intrinsic()) {
2039 _cont_doYield_stub = nm;
2040 }
2041 return nm;
2042 }
2043
2044 if (method->is_method_handle_intrinsic()) {
2045 vmIntrinsics::ID iid = method->intrinsic_id();
2046 intptr_t start = (intptr_t)__ pc();
2047 int vep_offset = ((intptr_t)__ pc()) - start;
2048 gen_special_dispatch(masm,
2049 method,
2050 in_sig_bt,
2051 in_regs);
2052 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
2053 __ flush();
2054 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
2055 return nmethod::new_native_nmethod(method,
2056 compile_id,
2057 masm->code(),
2058 vep_offset,
2059 frame_complete,
2060 stack_slots / VMRegImpl::slots_per_word,
2061 in_ByteSize(-1),
2062 in_ByteSize(-1),
2063 nullptr);
2064 }
2065 address native_func = method->native_function();
2066 assert(native_func != nullptr, "must have function");
2067
2068 // An OopMap for lock (and class if static)
2069 OopMapSet *oop_maps = new OopMapSet();
2070 intptr_t start = (intptr_t)__ pc();
2071
2072 // We have received a description of where all the java arg are located
2073 // on entry to the wrapper. We need to convert these args to where
2074 // the jni function will expect them. To figure out where they go
2075 // we convert the java signature to a C signature by inserting
2076 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2077
2078 const int total_in_args = method->size_of_parameters();
2079 int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
2080
2081 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2082 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2083
2084 int argc = 0;
2085 out_sig_bt[argc++] = T_ADDRESS;
2086 if (method->is_static()) {
2087 out_sig_bt[argc++] = T_OBJECT;
2088 }
2089
2090 for (int i = 0; i < total_in_args ; i++ ) {
2091 out_sig_bt[argc++] = in_sig_bt[i];
2092 }
2093
2094 // Now figure out where the args must be stored and how much stack space
2095 // they require.
2096 int out_arg_slots;
2097 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2098
2099 // Compute framesize for the wrapper. We need to handlize all oops in
2100 // incoming registers
2101
2102 // Calculate the total number of stack slots we will need.
2103
2104 // First count the abi requirement plus all of the outgoing args
2105 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2106
2107 // Now the space for the inbound oop handle area
2108 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers
2109
2110 int oop_handle_offset = stack_slots;
2111 stack_slots += total_save_slots;
2112
2113 // Now any space we need for handlizing a klass if static method
2114
2115 int klass_slot_offset = 0;
2116 int klass_offset = -1;
2117 int lock_slot_offset = 0;
2118 bool is_static = false;
2119
2120 if (method->is_static()) {
2121 klass_slot_offset = stack_slots;
2122 stack_slots += VMRegImpl::slots_per_word;
2123 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2124 is_static = true;
2125 }
2126
2127 // Plus a lock if needed
2128
2129 if (method->is_synchronized()) {
2130 lock_slot_offset = stack_slots;
2131 stack_slots += VMRegImpl::slots_per_word;
2132 }
2133
2134 // Now a place (+2) to save return values or temp during shuffling
2135 // + 4 for return address (which we own) and saved rbp
2136 stack_slots += 6;
2137
2138 // Ok The space we have allocated will look like:
2139 //
2140 //
2141 // FP-> | |
2142 // |---------------------|
2143 // | 2 slots for moves |
2144 // |---------------------|
2145 // | lock box (if sync) |
2146 // |---------------------| <- lock_slot_offset
2147 // | klass (if static) |
2148 // |---------------------| <- klass_slot_offset
2149 // | oopHandle area |
2150 // |---------------------| <- oop_handle_offset (6 java arg registers)
2151 // | outbound memory |
2152 // | based arguments |
2153 // | |
2154 // |---------------------|
2155 // | |
2156 // SP-> | out_preserved_slots |
2157 //
2158 //
2159
2160
2161 // Now compute actual number of stack words we need rounding to make
2162 // stack properly aligned.
2163 stack_slots = align_up(stack_slots, StackAlignmentInSlots);
2164
2165 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2166
2167 // First thing make an ic check to see if we should even be here
2168
2169 // We are free to use all registers as temps without saving them and
2170 // restoring them except rbp. rbp is the only callee save register
2171 // as far as the interpreter and the compiler(s) are concerned.
2172
2173 const Register receiver = j_rarg0;
2174
2175 Label exception_pending;
2176
2177 assert_different_registers(receiver, rscratch1, rscratch2);
2178 __ verify_oop(receiver);
2179 __ ic_check(8 /* end_alignment */);
2180
2181 int vep_offset = ((intptr_t)__ pc()) - start;
2182
2183 if (method->needs_clinit_barrier()) {
2184 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2185 Label L_skip_barrier;
2186 Register klass = r10;
2187 __ mov_metadata(klass, method->method_holder()); // InstanceKlass*
2188 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
2189
2190 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
2191
2192 __ bind(L_skip_barrier);
2193 }
2194
2195 #ifdef COMPILER1
2196 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
2197 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
2198 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
2199 }
2200 #endif // COMPILER1
2201
2202 // The instruction at the verified entry point must be 5 bytes or longer
2203 // because it can be patched on the fly by make_non_entrant. The stack bang
2204 // instruction fits that requirement.
2205
2206 // Generate stack overflow check
2207 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size());
2208
2209 // Generate a new frame for the wrapper.
2210 __ enter();
2211 // -2 because return address is already present and so is saved rbp
2212 __ subptr(rsp, stack_size - 2*wordSize);
2213
2214 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2215 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub
2216 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */);
2217
2218 // Frame is now completed as far as size and linkage.
2219 int frame_complete = ((intptr_t)__ pc()) - start;
2220
2221 #ifdef ASSERT
2222 __ check_stack_alignment(rsp, "improperly aligned stack");
2223 #endif /* ASSERT */
2224
2225
2226 // We use r14 as the oop handle for the receiver/klass
2227 // It is callee save so it survives the call to native
2228
2229 const Register oop_handle_reg = r14;
2230
2231 //
2232 // We immediately shuffle the arguments so that any vm call we have to
2233 // make from here on out (sync slow path, jvmti, etc.) we will have
2234 // captured the oops from our caller and have a valid oopMap for
2235 // them.
2236
2237 // -----------------
2238 // The Grand Shuffle
2239
2240 // The Java calling convention is either equal (linux) or denser (win64) than the
2241 // c calling convention. However the because of the jni_env argument the c calling
2242 // convention always has at least one more (and two for static) arguments than Java.
2243 // Therefore if we move the args from java -> c backwards then we will never have
2244 // a register->register conflict and we don't have to build a dependency graph
2245 // and figure out how to break any cycles.
2246 //
2247
2248 // Record esp-based slot for receiver on stack for non-static methods
2249 int receiver_offset = -1;
2250
2251 // This is a trick. We double the stack slots so we can claim
2252 // the oops in the caller's frame. Since we are sure to have
2253 // more args than the caller doubling is enough to make
2254 // sure we can capture all the incoming oop args from the
2255 // caller.
2256 //
2257 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2258
2259 // Mark location of rbp (someday)
2260 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
2261
2262 // Use eax, ebx as temporaries during any memory-memory moves we have to do
2263 // All inbound args are referenced based on rbp and all outbound args via rsp.
2264
2265
2266 #ifdef ASSERT
2267 bool reg_destroyed[Register::number_of_registers];
2268 bool freg_destroyed[XMMRegister::number_of_registers];
2269 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) {
2270 reg_destroyed[r] = false;
2271 }
2272 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) {
2273 freg_destroyed[f] = false;
2274 }
2275
2276 #endif /* ASSERT */
2277
2278 // For JNI natives the incoming and outgoing registers are offset upwards.
2279 GrowableArray<int> arg_order(2 * total_in_args);
2280
2281 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
2282 arg_order.push(i);
2283 arg_order.push(c_arg);
2284 }
2285
2286 for (int ai = 0; ai < arg_order.length(); ai += 2) {
2287 int i = arg_order.at(ai);
2288 int c_arg = arg_order.at(ai + 1);
2289 __ block_comment(err_msg("move %d -> %d", i, c_arg));
2290 #ifdef ASSERT
2291 if (in_regs[i].first()->is_Register()) {
2292 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
2293 } else if (in_regs[i].first()->is_XMMRegister()) {
2294 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
2295 }
2296 if (out_regs[c_arg].first()->is_Register()) {
2297 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2298 } else if (out_regs[c_arg].first()->is_XMMRegister()) {
2299 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
2300 }
2301 #endif /* ASSERT */
2302 switch (in_sig_bt[i]) {
2303 case T_ARRAY:
2304 case T_OBJECT:
2305 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2306 ((i == 0) && (!is_static)),
2307 &receiver_offset);
2308 break;
2309 case T_VOID:
2310 break;
2311
2312 case T_FLOAT:
2313 __ float_move(in_regs[i], out_regs[c_arg]);
2314 break;
2315
2316 case T_DOUBLE:
2317 assert( i + 1 < total_in_args &&
2318 in_sig_bt[i + 1] == T_VOID &&
2319 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2320 __ double_move(in_regs[i], out_regs[c_arg]);
2321 break;
2322
2323 case T_LONG :
2324 __ long_move(in_regs[i], out_regs[c_arg]);
2325 break;
2326
2327 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2328
2329 default:
2330 __ move32_64(in_regs[i], out_regs[c_arg]);
2331 }
2332 }
2333
2334 int c_arg;
2335
2336 // Pre-load a static method's oop into r14. Used both by locking code and
2337 // the normal JNI call code.
2338 // point c_arg at the first arg that is already loaded in case we
2339 // need to spill before we call out
2340 c_arg = total_c_args - total_in_args;
2341
2342 if (method->is_static()) {
2343
2344 // load oop into a register
2345 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
2346
2347 // Now handlize the static class mirror it's known not-null.
2348 __ movptr(Address(rsp, klass_offset), oop_handle_reg);
2349 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2350
2351 // Now get the handle
2352 __ lea(oop_handle_reg, Address(rsp, klass_offset));
2353 // store the klass handle as second argument
2354 __ movptr(c_rarg1, oop_handle_reg);
2355 // and protect the arg if we must spill
2356 c_arg--;
2357 }
2358
2359 // Change state to native (we save the return address in the thread, since it might not
2360 // be pushed on the stack when we do a stack traversal). It is enough that the pc()
2361 // points into the right code segment. It does not have to be the correct return pc.
2362 // We use the same pc/oopMap repeatedly when we call out
2363
2364 Label native_return;
2365 if (method->is_object_wait0()) {
2366 // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
2367 __ set_last_Java_frame(rsp, noreg, native_return, rscratch1);
2368 } else {
2369 intptr_t the_pc = (intptr_t) __ pc();
2370 oop_maps->add_gc_map(the_pc - start, map);
2371
2372 __ set_last_Java_frame(rsp, noreg, __ pc(), rscratch1);
2373 }
2374
2375 // We have all of the arguments setup at this point. We must not touch any register
2376 // argument registers at this point (what if we save/restore them there are no oop?
2377
2378 if (DTraceMethodProbes) {
2379 // protect the args we've loaded
2380 save_args(masm, total_c_args, c_arg, out_regs);
2381 __ mov_metadata(c_rarg1, method());
2382 __ call_VM_leaf(
2383 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2384 r15_thread, c_rarg1);
2385 restore_args(masm, total_c_args, c_arg, out_regs);
2386 }
2387
2388 // RedefineClasses() tracing support for obsolete method entry
2389 if (log_is_enabled(Trace, redefine, class, obsolete)) {
2390 // protect the args we've loaded
2391 save_args(masm, total_c_args, c_arg, out_regs);
2392 __ mov_metadata(c_rarg1, method());
2393 __ call_VM_leaf(
2394 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
2395 r15_thread, c_rarg1);
2396 restore_args(masm, total_c_args, c_arg, out_regs);
2397 }
2398
2399 // Lock a synchronized method
2400
2401 // Register definitions used by locking and unlocking
2402
2403 const Register swap_reg = rax; // Must use rax for cmpxchg instruction
2404 const Register obj_reg = rbx; // Will contain the oop
2405 const Register lock_reg = r13; // Address of compiler lock object (BasicLock)
2406
2407 Label slow_path_lock;
2408 Label lock_done;
2409
2410 if (method->is_synchronized()) {
2411 // Get the handle (the 2nd argument)
2412 __ mov(oop_handle_reg, c_rarg1);
2413
2414 // Get address of the box
2415
2416 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2417
2418 // Load the oop from the handle
2419 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2420
2421 __ fast_lock(lock_reg, obj_reg, swap_reg, rscratch1, slow_path_lock);
2422
2423 // Slow path will re-enter here
2424 __ bind(lock_done);
2425 }
2426
2427 // Finally just about ready to make the JNI call
2428
2429 // get JNIEnv* which is first argument to native
2430 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
2431
2432 // Now set thread in native
2433 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
2434
2435 __ call(RuntimeAddress(native_func));
2436
2437 // Verify or restore cpu control state after JNI call
2438 __ restore_cpu_control_state_after_jni(rscratch1);
2439
2440 // Unpack native results.
2441 switch (ret_type) {
2442 case T_BOOLEAN: __ c2bool(rax); break;
2443 case T_CHAR : __ movzwl(rax, rax); break;
2444 case T_BYTE : __ sign_extend_byte (rax); break;
2445 case T_SHORT : __ sign_extend_short(rax); break;
2446 case T_INT : /* nothing to do */ break;
2447 case T_DOUBLE :
2448 case T_FLOAT :
2449 // Result is in xmm0 we'll save as needed
2450 break;
2451 case T_ARRAY: // Really a handle
2452 case T_OBJECT: // Really a handle
2453 break; // can't de-handlize until after safepoint check
2454 case T_VOID: break;
2455 case T_LONG: break;
2456 default : ShouldNotReachHere();
2457 }
2458
2459 // Switch thread to "native transition" state before reading the synchronization state.
2460 // This additional state is necessary because reading and testing the synchronization
2461 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2462 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2463 // VM thread changes sync state to synchronizing and suspends threads for GC.
2464 // Thread A is resumed to finish this native method, but doesn't block here since it
2465 // didn't see any synchronization is progress, and escapes.
2466 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2467
2468 // Force this write out before the read below
2469 if (!UseSystemMemoryBarrier) {
2470 __ membar(Assembler::Membar_mask_bits(
2471 Assembler::LoadLoad | Assembler::LoadStore |
2472 Assembler::StoreLoad | Assembler::StoreStore));
2473 }
2474
2475 // check for safepoint operation in progress and/or pending suspend requests
2476 {
2477 Label Continue;
2478 Label slow_path;
2479
2480 __ safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */);
2481
2482 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2483 __ jcc(Assembler::equal, Continue);
2484 __ bind(slow_path);
2485
2486 // Don't use call_VM as it will see a possible pending exception and forward it
2487 // and never return here preventing us from clearing _last_native_pc down below.
2488 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2489 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2490 // by hand.
2491 //
2492 __ vzeroupper();
2493 save_native_result(masm, ret_type, stack_slots);
2494 __ mov(c_rarg0, r15_thread);
2495 __ mov(r12, rsp); // remember sp
2496 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2497 __ andptr(rsp, -16); // align stack as required by ABI
2498 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2499 __ mov(rsp, r12); // restore sp
2500 __ reinit_heapbase();
2501 // Restore any method result value
2502 restore_native_result(masm, ret_type, stack_slots);
2503 __ bind(Continue);
2504 }
2505
2506 // change thread state
2507 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
2508
2509 if (method->is_object_wait0()) {
2510 // Check preemption for Object.wait()
2511 __ movptr(rscratch1, Address(r15_thread, JavaThread::preempt_alternate_return_offset()));
2512 __ cmpptr(rscratch1, NULL_WORD);
2513 __ jccb(Assembler::equal, native_return);
2514 __ movptr(Address(r15_thread, JavaThread::preempt_alternate_return_offset()), NULL_WORD);
2515 __ jmp(rscratch1);
2516 __ bind(native_return);
2517
2518 intptr_t the_pc = (intptr_t) __ pc();
2519 oop_maps->add_gc_map(the_pc - start, map);
2520 }
2521
2522
2523 Label reguard;
2524 Label reguard_done;
2525 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled);
2526 __ jcc(Assembler::equal, reguard);
2527 __ bind(reguard_done);
2528
2529 // native result if any is live
2530
2531 // Unlock
2532 Label slow_path_unlock;
2533 Label unlock_done;
2534 if (method->is_synchronized()) {
2535
2536 Label fast_done;
2537
2538 // Get locked oop from the handle we passed to jni
2539 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2540
2541 // Must save rax if it is live now because cmpxchg must use it
2542 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2543 save_native_result(masm, ret_type, stack_slots);
2544 }
2545
2546 __ fast_unlock(obj_reg, swap_reg, lock_reg, slow_path_unlock);
2547
2548 // slow path re-enters here
2549 __ bind(unlock_done);
2550 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2551 restore_native_result(masm, ret_type, stack_slots);
2552 }
2553
2554 __ bind(fast_done);
2555 }
2556 if (DTraceMethodProbes) {
2557 save_native_result(masm, ret_type, stack_slots);
2558 __ mov_metadata(c_rarg1, method());
2559 __ call_VM_leaf(
2560 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2561 r15_thread, c_rarg1);
2562 restore_native_result(masm, ret_type, stack_slots);
2563 }
2564
2565 __ reset_last_Java_frame(false);
2566
2567 // Unbox oop result, e.g. JNIHandles::resolve value.
2568 if (is_reference_type(ret_type)) {
2569 __ resolve_jobject(rax /* value */,
2570 rcx /* tmp */);
2571 }
2572
2573 if (CheckJNICalls) {
2574 // clear_pending_jni_exception_check
2575 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2576 }
2577
2578 // reset handle block
2579 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
2580 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD);
2581
2582 // pop our frame
2583
2584 __ leave();
2585
2586 #if INCLUDE_JFR
2587 // We need to do a poll test after unwind in case the sampler
2588 // managed to sample the native frame after returning to Java.
2589 Label L_return;
2590 address poll_test_pc = __ pc();
2591 __ relocate(relocInfo::poll_return_type);
2592 __ testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());
2593 __ jccb(Assembler::zero, L_return);
2594 __ lea(rscratch1, InternalAddress(poll_test_pc));
2595 __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
2596 assert(SharedRuntime::polling_page_return_handler_blob() != nullptr,
2597 "polling page return stub not created yet");
2598 address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
2599 __ jump(RuntimeAddress(stub));
2600 __ bind(L_return);
2601 #endif // INCLUDE_JFR
2602
2603 // Any exception pending?
2604 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2605 __ jcc(Assembler::notEqual, exception_pending);
2606
2607 // Return
2608
2609 __ ret(0);
2610
2611 // Unexpected paths are out of line and go here
2612
2613 // forward the exception
2614 __ bind(exception_pending);
2615
2616 // and forward the exception
2617 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2618
2619 // Slow path locking & unlocking
2620 if (method->is_synchronized()) {
2621
2622 // BEGIN Slow path lock
2623 __ bind(slow_path_lock);
2624
2625 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2626 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2627
2628 // protect the args we've loaded
2629 save_args(masm, total_c_args, c_arg, out_regs);
2630
2631 __ mov(c_rarg0, obj_reg);
2632 __ mov(c_rarg1, lock_reg);
2633 __ mov(c_rarg2, r15_thread);
2634
2635 // Not a leaf but we have last_Java_frame setup as we want.
2636 // We don't want to unmount in case of contention since that would complicate preserving
2637 // the arguments that had already been marshalled into the native convention. So we force
2638 // the freeze slow path to find this native wrapper frame (see recurse_freeze_native_frame())
2639 // and pin the vthread. Otherwise the fast path won't find it since we don't walk the stack.
2640 __ push_cont_fastpath();
2641 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2642 __ pop_cont_fastpath();
2643 restore_args(masm, total_c_args, c_arg, out_regs);
2644
2645 #ifdef ASSERT
2646 { Label L;
2647 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2648 __ jcc(Assembler::equal, L);
2649 __ stop("no pending exception allowed on exit from monitorenter");
2650 __ bind(L);
2651 }
2652 #endif
2653 __ jmp(lock_done);
2654
2655 // END Slow path lock
2656
2657 // BEGIN Slow path unlock
2658 __ bind(slow_path_unlock);
2659
2660 // If we haven't already saved the native result we must save it now as xmm registers
2661 // are still exposed.
2662 __ vzeroupper();
2663 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2664 save_native_result(masm, ret_type, stack_slots);
2665 }
2666
2667 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2668
2669 __ mov(c_rarg0, obj_reg);
2670 __ mov(c_rarg2, r15_thread);
2671 __ mov(r12, rsp); // remember sp
2672 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2673 __ andptr(rsp, -16); // align stack as required by ABI
2674
2675 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2676 // NOTE that obj_reg == rbx currently
2677 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2678 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2679
2680 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2681 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2682 __ mov(rsp, r12); // restore sp
2683 __ reinit_heapbase();
2684 #ifdef ASSERT
2685 {
2686 Label L;
2687 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2688 __ jcc(Assembler::equal, L);
2689 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2690 __ bind(L);
2691 }
2692 #endif /* ASSERT */
2693
2694 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2695
2696 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2697 restore_native_result(masm, ret_type, stack_slots);
2698 }
2699 __ jmp(unlock_done);
2700
2701 // END Slow path unlock
2702
2703 } // synchronized
2704
2705 // SLOW PATH Reguard the stack if needed
2706
2707 __ bind(reguard);
2708 __ vzeroupper();
2709 save_native_result(masm, ret_type, stack_slots);
2710 __ mov(r12, rsp); // remember sp
2711 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2712 __ andptr(rsp, -16); // align stack as required by ABI
2713 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2714 __ mov(rsp, r12); // restore sp
2715 __ reinit_heapbase();
2716 restore_native_result(masm, ret_type, stack_slots);
2717 // and continue
2718 __ jmp(reguard_done);
2719
2720
2721
2722 __ flush();
2723
2724 nmethod *nm = nmethod::new_native_nmethod(method,
2725 compile_id,
2726 masm->code(),
2727 vep_offset,
2728 frame_complete,
2729 stack_slots / VMRegImpl::slots_per_word,
2730 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2731 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2732 oop_maps);
2733
2734 return nm;
2735 }
2736
2737 // this function returns the adjust size (in number of words) to a c2i adapter
2738 // activation for use during deoptimization
2739 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2740 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2741 }
2742
2743
2744 uint SharedRuntime::out_preserve_stack_slots() {
2745 return 0;
2746 }
2747
2748
2749 // Number of stack slots between incoming argument block and the start of
2750 // a new frame. The PROLOG must add this many slots to the stack. The
2751 // EPILOG must remove this many slots. amd64 needs two slots for
2752 // return address.
2753 uint SharedRuntime::in_preserve_stack_slots() {
2754 return 4 + 2 * VerifyStackAtCalls;
2755 }
2756
2757 VMReg SharedRuntime::thread_register() {
2758 return r15_thread->as_VMReg();
2759 }
2760
2761 //------------------------------generate_deopt_blob----------------------------
2762 void SharedRuntime::generate_deopt_blob() {
2763 // Allocate space for the code
2764 ResourceMark rm;
2765 // Setup code generation tools
2766 int pad = 0;
2767 if (UseAVX > 2) {
2768 pad += 1024;
2769 }
2770 if (UseAPX) {
2771 pad += 1024;
2772 }
2773 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2774 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
2775 if (blob != nullptr) {
2776 _deopt_blob = blob->as_deoptimization_blob();
2777 return;
2778 }
2779
2780 CodeBuffer buffer(name, 2560+pad, 1024);
2781 MacroAssembler* masm = new MacroAssembler(&buffer);
2782 int frame_size_in_words;
2783 OopMap* map = nullptr;
2784 OopMapSet *oop_maps = new OopMapSet();
2785
2786 // -------------
2787 // This code enters when returning to a de-optimized nmethod. A return
2788 // address has been pushed on the stack, and return values are in
2789 // registers.
2790 // If we are doing a normal deopt then we were called from the patched
2791 // nmethod from the point we returned to the nmethod. So the return
2792 // address on the stack is wrong by NativeCall::instruction_size
2793 // We will adjust the value so it looks like we have the original return
2794 // address on the stack (like when we eagerly deoptimized).
2795 // In the case of an exception pending when deoptimizing, we enter
2796 // with a return address on the stack that points after the call we patched
2797 // into the exception handler. We have the following register state from,
2798 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2799 // rax: exception oop
2800 // rbx: exception handler
2801 // rdx: throwing pc
2802 // So in this case we simply jam rdx into the useless return address and
2803 // the stack looks just like we want.
2804 //
2805 // At this point we need to de-opt. We save the argument return
2806 // registers. We call the first C routine, fetch_unroll_info(). This
2807 // routine captures the return values and returns a structure which
2808 // describes the current frame size and the sizes of all replacement frames.
2809 // The current frame is compiled code and may contain many inlined
2810 // functions, each with their own JVM state. We pop the current frame, then
2811 // push all the new frames. Then we call the C routine unpack_frames() to
2812 // populate these frames. Finally unpack_frames() returns us the new target
2813 // address. Notice that callee-save registers are BLOWN here; they have
2814 // already been captured in the vframeArray at the time the return PC was
2815 // patched.
2816 address start = __ pc();
2817 Label cont;
2818
2819 // Prolog for non exception case!
2820
2821 // Save everything in sight.
2822 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2823
2824 // Normal deoptimization. Save exec mode for unpack_frames.
2825 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2826 __ jmp(cont);
2827
2828 int reexecute_offset = __ pc() - start;
2829 // Reexecute case
2830 // return address is the pc describes what bci to do re-execute at
2831
2832 // No need to update map as each call to save_live_registers will produce identical oopmap
2833 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2834
2835 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2836 __ jmp(cont);
2837
2838 int exception_offset = __ pc() - start;
2839
2840 // Prolog for exception case
2841
2842 // all registers are dead at this entry point, except for rax, and
2843 // rdx which contain the exception oop and exception pc
2844 // respectively. Set them in TLS and fall thru to the
2845 // unpack_with_exception_in_tls entry point.
2846
2847 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2848 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2849
2850 int exception_in_tls_offset = __ pc() - start;
2851
2852 // new implementation because exception oop is now passed in JavaThread
2853
2854 // Prolog for exception case
2855 // All registers must be preserved because they might be used by LinearScan
2856 // Exceptiop oop and throwing PC are passed in JavaThread
2857 // tos: stack at point of call to method that threw the exception (i.e. only
2858 // args are on the stack, no return address)
2859
2860 // make room on stack for the return address
2861 // It will be patched later with the throwing pc. The correct value is not
2862 // available now because loading it from memory would destroy registers.
2863 __ push(0);
2864
2865 // Save everything in sight.
2866 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2867
2868 // Now it is safe to overwrite any register
2869
2870 // Deopt during an exception. Save exec mode for unpack_frames.
2871 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved
2872
2873 // load throwing pc from JavaThread and patch it as the return address
2874 // of the current frame. Then clear the field in JavaThread
2875
2876 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2877 __ movptr(Address(rbp, wordSize), rdx);
2878 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
2879
2880 #ifdef ASSERT
2881 // verify that there is really an exception oop in JavaThread
2882 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2883 __ verify_oop(rax);
2884
2885 // verify that there is no pending exception
2886 Label no_pending_exception;
2887 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
2888 __ testptr(rax, rax);
2889 __ jcc(Assembler::zero, no_pending_exception);
2890 __ stop("must not have pending exception here");
2891 __ bind(no_pending_exception);
2892 #endif
2893
2894 __ bind(cont);
2895
2896 // Call C code. Need thread and this frame, but NOT official VM entry
2897 // crud. We cannot block on this call, no GC can happen.
2898 //
2899 // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2900
2901 // fetch_unroll_info needs to call last_java_frame().
2902
2903 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
2904 #ifdef ASSERT
2905 { Label L;
2906 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
2907 __ jcc(Assembler::equal, L);
2908 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2909 __ bind(L);
2910 }
2911 #endif // ASSERT
2912 __ mov(c_rarg0, r15_thread);
2913 __ movl(c_rarg1, r14); // exec_mode
2914 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2915
2916 // Need to have an oopmap that tells fetch_unroll_info where to
2917 // find any register it might need.
2918 oop_maps->add_gc_map(__ pc() - start, map);
2919
2920 __ reset_last_Java_frame(false);
2921
2922 // Load UnrollBlock* into rdi
2923 __ mov(rdi, rax);
2924
2925 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()));
2926 Label noException;
2927 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending?
2928 __ jcc(Assembler::notEqual, noException);
2929 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2930 // QQQ this is useless it was null above
2931 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2932 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD);
2933 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
2934
2935 __ verify_oop(rax);
2936
2937 // Overwrite the result registers with the exception results.
2938 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2939 // I think this is useless
2940 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2941
2942 __ bind(noException);
2943
2944 // Only register save data is on the stack.
2945 // Now restore the result registers. Everything else is either dead
2946 // or captured in the vframeArray.
2947 RegisterSaver::restore_result_registers(masm);
2948
2949 // All of the register save area has been popped of the stack. Only the
2950 // return address remains.
2951
2952 // Pop all the frames we must move/replace.
2953 //
2954 // Frame picture (youngest to oldest)
2955 // 1: self-frame (no frame link)
2956 // 2: deopting frame (no frame link)
2957 // 3: caller of deopting frame (could be compiled/interpreted).
2958 //
2959 // Note: by leaving the return address of self-frame on the stack
2960 // and using the size of frame 2 to adjust the stack
2961 // when we are done the return to frame 3 will still be on the stack.
2962
2963 // Pop deoptimized frame
2964 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
2965 __ addptr(rsp, rcx);
2966
2967 // rsp should be pointing at the return address to the caller (3)
2968
2969 // Pick up the initial fp we should save
2970 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
2971 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset()));
2972
2973 #ifdef ASSERT
2974 // Compilers generate code that bang the stack by as much as the
2975 // interpreter would need. So this stack banging should never
2976 // trigger a fault. Verify that it does not on non product builds.
2977 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset()));
2978 __ bang_stack_size(rbx, rcx);
2979 #endif
2980
2981 // Load address of array of frame pcs into rcx
2982 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset()));
2983
2984 // Trash the old pc
2985 __ addptr(rsp, wordSize);
2986
2987 // Load address of array of frame sizes into rsi
2988 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset()));
2989
2990 // Load counter into rdx
2991 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset()));
2992
2993 // Now adjust the caller's stack to make up for the extra locals
2994 // but record the original sp so that we can save it in the skeletal interpreter
2995 // frame and the stack walking of interpreter_sender will get the unextended sp
2996 // value and not the "real" sp value.
2997
2998 const Register sender_sp = r8;
2999
3000 __ mov(sender_sp, rsp);
3001 __ movl(rbx, Address(rdi,
3002 Deoptimization::UnrollBlock::
3003 caller_adjustment_offset()));
3004 __ subptr(rsp, rbx);
3005
3006 // Push interpreter frames in a loop
3007 Label loop;
3008 __ bind(loop);
3009 __ movptr(rbx, Address(rsi, 0)); // Load frame size
3010 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand
3011 __ pushptr(Address(rcx, 0)); // Save return address
3012 __ enter(); // Save old & set new ebp
3013 __ subptr(rsp, rbx); // Prolog
3014 // This value is corrected by layout_activation_impl
3015 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
3016 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
3017 __ mov(sender_sp, rsp); // Pass sender_sp to next frame
3018 __ addptr(rsi, wordSize); // Bump array pointer (sizes)
3019 __ addptr(rcx, wordSize); // Bump array pointer (pcs)
3020 __ decrementl(rdx); // Decrement counter
3021 __ jcc(Assembler::notZero, loop);
3022 __ pushptr(Address(rcx, 0)); // Save final return address
3023
3024 // Re-push self-frame
3025 __ enter(); // Save old & set new ebp
3026
3027 // Allocate a full sized register save area.
3028 // Return address and rbp are in place, so we allocate two less words.
3029 __ subptr(rsp, (frame_size_in_words - 2) * wordSize);
3030
3031 // Restore frame locals after moving the frame
3032 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
3033 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3034
3035 // Call C code. Need thread but NOT official VM entry
3036 // crud. We cannot block on this call, no GC can happen. Call should
3037 // restore return values to their stack-slots with the new SP.
3038 //
3039 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
3040
3041 // Use rbp because the frames look interpreted now
3042 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
3043 // Don't need the precise return PC here, just precise enough to point into this code blob.
3044 address the_pc = __ pc();
3045 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1);
3046
3047 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI
3048 __ mov(c_rarg0, r15_thread);
3049 __ movl(c_rarg1, r14); // second arg: exec_mode
3050 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
3051 // Revert SP alignment after call since we're going to do some SP relative addressing below
3052 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset()));
3053
3054 // Set an oopmap for the call site
3055 // Use the same PC we used for the last java frame
3056 oop_maps->add_gc_map(the_pc - start,
3057 new OopMap( frame_size_in_words, 0 ));
3058
3059 // Clear fp AND pc
3060 __ reset_last_Java_frame(true);
3061
3062 // Collect return values
3063 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
3064 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
3065 // I think this is useless (throwing pc?)
3066 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
3067
3068 // Pop self-frame.
3069 __ leave(); // Epilog
3070
3071 // Jump to interpreter
3072 __ ret(0);
3073
3074 // Make sure all code is generated
3075 masm->flush();
3076
3077 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3078 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3079
3080 AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
3081 }
3082
3083 //------------------------------generate_handler_blob------
3084 //
3085 // Generate a special Compile2Runtime blob that saves all registers,
3086 // and setup oopmap.
3087 //
3088 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3089 assert(StubRoutines::forward_exception_entry() != nullptr,
3090 "must be generated before");
3091 assert(is_polling_page_id(id), "expected a polling page stub id");
3092
3093 // Allocate space for the code. Setup code generation tools.
3094 const char* name = SharedRuntime::stub_name(id);
3095 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3096 if (blob != nullptr) {
3097 return blob->as_safepoint_blob();
3098 }
3099
3100 ResourceMark rm;
3101 OopMapSet *oop_maps = new OopMapSet();
3102 OopMap* map;
3103 CodeBuffer buffer(name, 2548, 1024);
3104 MacroAssembler* masm = new MacroAssembler(&buffer);
3105
3106 address start = __ pc();
3107 address call_pc = nullptr;
3108 int frame_size_in_words;
3109 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3110 bool save_wide_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3111
3112 // Make room for return address (or push it again)
3113 if (!cause_return) {
3114 __ push(rbx);
3115 }
3116
3117 // Save registers, fpu state, and flags
3118 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors);
3119
3120 // The following is basically a call_VM. However, we need the precise
3121 // address of the call in order to generate an oopmap. Hence, we do all the
3122 // work ourselves.
3123
3124 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next:
3125
3126 // The return address must always be correct so that frame constructor never
3127 // sees an invalid pc.
3128
3129 if (!cause_return) {
3130 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
3131 // Additionally, rbx is a callee saved register and we can look at it later to determine
3132 // if someone changed the return address for us!
3133 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
3134 __ movptr(Address(rbp, wordSize), rbx);
3135 }
3136
3137 // Do the call
3138 __ mov(c_rarg0, r15_thread);
3139 __ call(RuntimeAddress(call_ptr));
3140
3141 // Set an oopmap for the call site. This oopmap will map all
3142 // oop-registers and debug-info registers as callee-saved. This
3143 // will allow deoptimization at this safepoint to find all possible
3144 // debug-info recordings, as well as let GC find all oops.
3145
3146 oop_maps->add_gc_map( __ pc() - start, map);
3147
3148 Label noException;
3149
3150 __ reset_last_Java_frame(false);
3151
3152 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3153 __ jcc(Assembler::equal, noException);
3154
3155 // Exception pending
3156
3157 RegisterSaver::restore_live_registers(masm, save_wide_vectors);
3158
3159 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3160
3161 // No exception case
3162 __ bind(noException);
3163
3164 Label no_adjust;
3165 #ifdef ASSERT
3166 Label bail;
3167 #endif
3168 if (!cause_return) {
3169 Label no_prefix, not_special, check_rex_prefix;
3170
3171 // If our stashed return pc was modified by the runtime we avoid touching it
3172 __ cmpptr(rbx, Address(rbp, wordSize));
3173 __ jcc(Assembler::notEqual, no_adjust);
3174
3175 // Skip over the poll instruction.
3176 // See NativeInstruction::is_safepoint_poll()
3177 // Possible encodings:
3178 // 85 00 test %eax,(%rax)
3179 // 85 01 test %eax,(%rcx)
3180 // 85 02 test %eax,(%rdx)
3181 // 85 03 test %eax,(%rbx)
3182 // 85 06 test %eax,(%rsi)
3183 // 85 07 test %eax,(%rdi)
3184 //
3185 // 41 85 00 test %eax,(%r8)
3186 // 41 85 01 test %eax,(%r9)
3187 // 41 85 02 test %eax,(%r10)
3188 // 41 85 03 test %eax,(%r11)
3189 // 41 85 06 test %eax,(%r14)
3190 // 41 85 07 test %eax,(%r15)
3191 //
3192 // 85 04 24 test %eax,(%rsp)
3193 // 41 85 04 24 test %eax,(%r12)
3194 // 85 45 00 test %eax,0x0(%rbp)
3195 // 41 85 45 00 test %eax,0x0(%r13)
3196 //
3197 // Notes:
3198 // Format of legacy MAP0 test instruction:-
3199 // [REX/REX2] [OPCODE] [ModRM] [SIB] [DISP] [IMM32]
3200 // o For safepoint polling instruction "test %eax,(%rax)", encoding of first register
3201 // operand and base register of memory operand is b/w [0-8), hence we do not require
3202 // additional REX prefix where REX.B bit stores MSB bit of register encoding, which
3203 // is why two bytes encoding is sufficient here.
3204 // o For safepoint polling instruction like "test %eax,(%r8)", register encoding of BASE
3205 // register of memory operand is 1000, thus we need additional REX prefix in this case,
3206 // there by adding additional byte to instruction encoding.
3207 // o In case BASE register is one of the 32 extended GPR registers available only on targets
3208 // supporting Intel APX extension, then we need to emit two bytes REX2 prefix to hold
3209 // most significant two bits of 5 bit register encoding.
3210
3211 if (VM_Version::supports_apx_f()) {
3212 __ cmpb(Address(rbx, 0), Assembler::REX2);
3213 __ jccb(Assembler::notEqual, check_rex_prefix);
3214 __ addptr(rbx, 2);
3215 __ bind(check_rex_prefix);
3216 }
3217 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix);
3218 __ jccb(Assembler::notEqual, no_prefix);
3219 __ addptr(rbx, 1);
3220 __ bind(no_prefix);
3221 #ifdef ASSERT
3222 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below
3223 #endif
3224 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values:
3225 // r12/rsp 0x04
3226 // r13/rbp 0x05
3227 __ movzbq(rcx, Address(rbx, 1));
3228 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05
3229 __ subptr(rcx, 4); // looking for 0x00 .. 0x01
3230 __ cmpptr(rcx, 1);
3231 __ jccb(Assembler::above, not_special);
3232 __ addptr(rbx, 1);
3233 __ bind(not_special);
3234 #ifdef ASSERT
3235 // Verify the correct encoding of the poll we're about to skip.
3236 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl);
3237 __ jcc(Assembler::notEqual, bail);
3238 // Mask out the modrm bits
3239 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask);
3240 // rax encodes to 0, so if the bits are nonzero it's incorrect
3241 __ jcc(Assembler::notZero, bail);
3242 #endif
3243 // Adjust return pc forward to step over the safepoint poll instruction
3244 __ addptr(rbx, 2);
3245 __ movptr(Address(rbp, wordSize), rbx);
3246 }
3247
3248 __ bind(no_adjust);
3249 // Normal exit, restore registers and exit.
3250 RegisterSaver::restore_live_registers(masm, save_wide_vectors);
3251 __ ret(0);
3252
3253 #ifdef ASSERT
3254 __ bind(bail);
3255 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
3256 #endif
3257
3258 // Make sure all code is generated
3259 masm->flush();
3260
3261 // Fill-out other meta info
3262 SafepointBlob* sp_blob = SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3263
3264 AOTCodeCache::store_code_blob(*sp_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3265 return sp_blob;
3266 }
3267
3268 //
3269 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3270 //
3271 // Generate a stub that calls into vm to find out the proper destination
3272 // of a java call. All the argument registers are live at this point
3273 // but since this is generic code we don't know what they are and the caller
3274 // must do any gc of the args.
3275 //
3276 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3277 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
3278 assert(is_resolve_id(id), "expected a resolve stub id");
3279
3280 const char* name = SharedRuntime::stub_name(id);
3281 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3282 if (blob != nullptr) {
3283 return blob->as_runtime_stub();
3284 }
3285
3286 // allocate space for the code
3287 ResourceMark rm;
3288 CodeBuffer buffer(name, 1552, 512);
3289 MacroAssembler* masm = new MacroAssembler(&buffer);
3290
3291 int frame_size_in_words;
3292
3293 OopMapSet *oop_maps = new OopMapSet();
3294 OopMap* map = nullptr;
3295
3296 int start = __ offset();
3297
3298 // No need to save vector registers since they are caller-saved anyway.
3299 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false);
3300
3301 int frame_complete = __ offset();
3302
3303 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
3304
3305 __ mov(c_rarg0, r15_thread);
3306
3307 __ call(RuntimeAddress(destination));
3308
3309
3310 // Set an oopmap for the call site.
3311 // We need this not only for callee-saved registers, but also for volatile
3312 // registers that the compiler might be keeping live across a safepoint.
3313
3314 oop_maps->add_gc_map( __ offset() - start, map);
3315
3316 // rax contains the address we are going to jump to assuming no exception got installed
3317
3318 // clear last_Java_sp
3319 __ reset_last_Java_frame(false);
3320 // check for pending exceptions
3321 Label pending;
3322 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3323 __ jcc(Assembler::notEqual, pending);
3324
3325 // get the returned Method*
3326 __ get_vm_result_metadata(rbx);
3327 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
3328
3329 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3330
3331 RegisterSaver::restore_live_registers(masm);
3332
3333 // We are back to the original state on entry and ready to go.
3334
3335 __ jmp(rax);
3336
3337 // Pending exception after the safepoint
3338
3339 __ bind(pending);
3340
3341 RegisterSaver::restore_live_registers(masm);
3342
3343 // exception pending => remove activation and forward to exception handler
3344
3345 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD);
3346
3347 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3348 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3349
3350 // -------------
3351 // make sure all code is generated
3352 masm->flush();
3353
3354 // return the blob
3355 // frame_size_words or bytes??
3356 RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3357
3358 AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3359 return rs_blob;
3360 }
3361
3362 // Continuation point for throwing of implicit exceptions that are
3363 // not handled in the current activation. Fabricates an exception
3364 // oop and initiates normal exception dispatching in this
3365 // frame. Since we need to preserve callee-saved values (currently
3366 // only for C2, but done for C1 as well) we need a callee-saved oop
3367 // map and therefore have to make these stubs into RuntimeStubs
3368 // rather than BufferBlobs. If the compiler needs all registers to
3369 // be preserved between the fault point and the exception handler
3370 // then it must assume responsibility for that in
3371 // AbstractCompiler::continuation_for_implicit_null_exception or
3372 // continuation_for_implicit_division_by_zero_exception. All other
3373 // implicit exceptions (e.g., NullPointerException or
3374 // AbstractMethodError on entry) are either at call sites or
3375 // otherwise assume that stack unwinding will be initiated, so
3376 // caller saved registers were assumed volatile in the compiler.
3377 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3378 assert(is_throw_id(id), "expected a throw stub id");
3379
3380 const char* name = SharedRuntime::stub_name(id);
3381
3382 // Information about frame layout at time of blocking runtime call.
3383 // Note that we only have to preserve callee-saved registers since
3384 // the compilers are responsible for supplying a continuation point
3385 // if they expect all registers to be preserved.
3386 enum layout {
3387 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
3388 rbp_off2,
3389 return_off,
3390 return_off2,
3391 framesize // inclusive of return address
3392 };
3393
3394 int insts_size = 512;
3395 int locs_size = 64;
3396
3397 const char* timer_msg = "SharedRuntime generate_throw_exception";
3398 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3399
3400 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3401 if (blob != nullptr) {
3402 return blob->as_runtime_stub();
3403 }
3404
3405 ResourceMark rm;
3406 CodeBuffer code(name, insts_size, locs_size);
3407 OopMapSet* oop_maps = new OopMapSet();
3408 MacroAssembler* masm = new MacroAssembler(&code);
3409
3410 address start = __ pc();
3411
3412 // This is an inlined and slightly modified version of call_VM
3413 // which has the ability to fetch the return PC out of
3414 // thread-local storage and also sets up last_Java_sp slightly
3415 // differently than the real call_VM
3416
3417 __ enter(); // required for proper stackwalking of RuntimeStub frame
3418
3419 assert(is_even(framesize/2), "sp not 16-byte aligned");
3420
3421 // return address and rbp are already in place
3422 __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
3423
3424 int frame_complete = __ pc() - start;
3425
3426 // Set up last_Java_sp and last_Java_fp
3427 address the_pc = __ pc();
3428 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
3429 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
3430
3431 // Call runtime
3432 __ movptr(c_rarg0, r15_thread);
3433 BLOCK_COMMENT("call runtime_entry");
3434 __ call(RuntimeAddress(runtime_entry));
3435
3436 // Generate oop map
3437 OopMap* map = new OopMap(framesize, 0);
3438
3439 oop_maps->add_gc_map(the_pc - start, map);
3440
3441 __ reset_last_Java_frame(true);
3442
3443 __ leave(); // required for proper stackwalking of RuntimeStub frame
3444
3445 // check for pending exceptions
3446 #ifdef ASSERT
3447 Label L;
3448 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3449 __ jcc(Assembler::notEqual, L);
3450 __ should_not_reach_here();
3451 __ bind(L);
3452 #endif // ASSERT
3453 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3454
3455
3456 // codeBlob framesize is in words (not VMRegImpl::slot_size)
3457 RuntimeStub* stub =
3458 RuntimeStub::new_runtime_stub(name,
3459 &code,
3460 frame_complete,
3461 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3462 oop_maps, false);
3463 AOTCodeCache::store_code_blob(*stub, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3464
3465 return stub;
3466 }
3467
3468 //------------------------------Montgomery multiplication------------------------
3469 //
3470
3471 #ifndef _WINDOWS
3472
3473 // Subtract 0:b from carry:a. Return carry.
3474 static julong
3475 sub(julong a[], julong b[], julong carry, long len) {
3476 long long i = 0, cnt = len;
3477 julong tmp;
3478 asm volatile("clc; "
3479 "0: ; "
3480 "mov (%[b], %[i], 8), %[tmp]; "
3481 "sbb %[tmp], (%[a], %[i], 8); "
3482 "inc %[i]; dec %[cnt]; "
3483 "jne 0b; "
3484 "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
3485 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
3486 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
3487 : "memory");
3488 return tmp;
3489 }
3490
3491 // Multiply (unsigned) Long A by Long B, accumulating the double-
3492 // length result into the accumulator formed of T0, T1, and T2.
3493 #define MACC(A, B, T0, T1, T2) \
3494 do { \
3495 unsigned long hi, lo; \
3496 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3497 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3498 : "r"(A), "a"(B) : "cc"); \
3499 } while(0)
3500
3501 // As above, but add twice the double-length result into the
3502 // accumulator.
3503 #define MACC2(A, B, T0, T1, T2) \
3504 do { \
3505 unsigned long hi, lo; \
3506 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
3507 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3508 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3509 : "r"(A), "a"(B) : "cc"); \
3510 } while(0)
3511
3512 #else //_WINDOWS
3513
3514 static julong
3515 sub(julong a[], julong b[], julong carry, long len) {
3516 long i;
3517 julong tmp;
3518 unsigned char c = 1;
3519 for (i = 0; i < len; i++) {
3520 c = _addcarry_u64(c, a[i], ~b[i], &tmp);
3521 a[i] = tmp;
3522 }
3523 c = _addcarry_u64(c, carry, ~0, &tmp);
3524 return tmp;
3525 }
3526
3527 // Multiply (unsigned) Long A by Long B, accumulating the double-
3528 // length result into the accumulator formed of T0, T1, and T2.
3529 #define MACC(A, B, T0, T1, T2) \
3530 do { \
3531 julong hi, lo; \
3532 lo = _umul128(A, B, &hi); \
3533 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \
3534 c = _addcarry_u64(c, hi, T1, &T1); \
3535 _addcarry_u64(c, T2, 0, &T2); \
3536 } while(0)
3537
3538 // As above, but add twice the double-length result into the
3539 // accumulator.
3540 #define MACC2(A, B, T0, T1, T2) \
3541 do { \
3542 julong hi, lo; \
3543 lo = _umul128(A, B, &hi); \
3544 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \
3545 c = _addcarry_u64(c, hi, T1, &T1); \
3546 _addcarry_u64(c, T2, 0, &T2); \
3547 c = _addcarry_u64(0, lo, T0, &T0); \
3548 c = _addcarry_u64(c, hi, T1, &T1); \
3549 _addcarry_u64(c, T2, 0, &T2); \
3550 } while(0)
3551
3552 #endif //_WINDOWS
3553
3554 // Fast Montgomery multiplication. The derivation of the algorithm is
3555 // in A Cryptographic Library for the Motorola DSP56000,
3556 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
3557
3558 static void NOINLINE
3559 montgomery_multiply(julong a[], julong b[], julong n[],
3560 julong m[], julong inv, int len) {
3561 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3562 int i;
3563
3564 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply");
3565
3566 for (i = 0; i < len; i++) {
3567 int j;
3568 for (j = 0; j < i; j++) {
3569 MACC(a[j], b[i-j], t0, t1, t2);
3570 MACC(m[j], n[i-j], t0, t1, t2);
3571 }
3572 MACC(a[i], b[0], t0, t1, t2);
3573 m[i] = t0 * inv;
3574 MACC(m[i], n[0], t0, t1, t2);
3575
3576 assert(t0 == 0, "broken Montgomery multiply");
3577
3578 t0 = t1; t1 = t2; t2 = 0;
3579 }
3580
3581 for (i = len; i < 2*len; i++) {
3582 int j;
3583 for (j = i-len+1; j < len; j++) {
3584 MACC(a[j], b[i-j], t0, t1, t2);
3585 MACC(m[j], n[i-j], t0, t1, t2);
3586 }
3587 m[i-len] = t0;
3588 t0 = t1; t1 = t2; t2 = 0;
3589 }
3590
3591 while (t0)
3592 t0 = sub(m, n, t0, len);
3593 }
3594
3595 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3596 // multiplies so it should be up to 25% faster than Montgomery
3597 // multiplication. However, its loop control is more complex and it
3598 // may actually run slower on some machines.
3599
3600 static void NOINLINE
3601 montgomery_square(julong a[], julong n[],
3602 julong m[], julong inv, int len) {
3603 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3604 int i;
3605
3606 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square");
3607
3608 for (i = 0; i < len; i++) {
3609 int j;
3610 int end = (i+1)/2;
3611 for (j = 0; j < end; j++) {
3612 MACC2(a[j], a[i-j], t0, t1, t2);
3613 MACC(m[j], n[i-j], t0, t1, t2);
3614 }
3615 if ((i & 1) == 0) {
3616 MACC(a[j], a[j], t0, t1, t2);
3617 }
3618 for (; j < i; j++) {
3619 MACC(m[j], n[i-j], t0, t1, t2);
3620 }
3621 m[i] = t0 * inv;
3622 MACC(m[i], n[0], t0, t1, t2);
3623
3624 assert(t0 == 0, "broken Montgomery square");
3625
3626 t0 = t1; t1 = t2; t2 = 0;
3627 }
3628
3629 for (i = len; i < 2*len; i++) {
3630 int start = i-len+1;
3631 int end = start + (len - start)/2;
3632 int j;
3633 for (j = start; j < end; j++) {
3634 MACC2(a[j], a[i-j], t0, t1, t2);
3635 MACC(m[j], n[i-j], t0, t1, t2);
3636 }
3637 if ((i & 1) == 0) {
3638 MACC(a[j], a[j], t0, t1, t2);
3639 }
3640 for (; j < len; j++) {
3641 MACC(m[j], n[i-j], t0, t1, t2);
3642 }
3643 m[i-len] = t0;
3644 t0 = t1; t1 = t2; t2 = 0;
3645 }
3646
3647 while (t0)
3648 t0 = sub(m, n, t0, len);
3649 }
3650
3651 // Swap words in a longword.
3652 static julong swap(julong x) {
3653 return (x << 32) | (x >> 32);
3654 }
3655
3656 // Copy len longwords from s to d, word-swapping as we go. The
3657 // destination array is reversed.
3658 static void reverse_words(julong *s, julong *d, int len) {
3659 d += len;
3660 while(len-- > 0) {
3661 d--;
3662 *d = swap(*s);
3663 s++;
3664 }
3665 }
3666
3667 // The threshold at which squaring is advantageous was determined
3668 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3669 #define MONTGOMERY_SQUARING_THRESHOLD 64
3670
3671 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3672 jint len, jlong inv,
3673 jint *m_ints) {
3674 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3675 int longwords = len/2;
3676
3677 // Make very sure we don't use so much space that the stack might
3678 // overflow. 512 jints corresponds to an 16384-bit integer and
3679 // will use here a total of 8k bytes of stack space.
3680 int divisor = sizeof(julong) * 4;
3681 guarantee(longwords <= 8192 / divisor, "must be");
3682 int total_allocation = longwords * sizeof (julong) * 4;
3683 julong *scratch = (julong *)alloca(total_allocation);
3684
3685 // Local scratch arrays
3686 julong
3687 *a = scratch + 0 * longwords,
3688 *b = scratch + 1 * longwords,
3689 *n = scratch + 2 * longwords,
3690 *m = scratch + 3 * longwords;
3691
3692 reverse_words((julong *)a_ints, a, longwords);
3693 reverse_words((julong *)b_ints, b, longwords);
3694 reverse_words((julong *)n_ints, n, longwords);
3695
3696 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords);
3697
3698 reverse_words(m, (julong *)m_ints, longwords);
3699 }
3700
3701 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3702 jint len, jlong inv,
3703 jint *m_ints) {
3704 assert(len % 2 == 0, "array length in montgomery_square must be even");
3705 int longwords = len/2;
3706
3707 // Make very sure we don't use so much space that the stack might
3708 // overflow. 512 jints corresponds to an 16384-bit integer and
3709 // will use here a total of 6k bytes of stack space.
3710 int divisor = sizeof(julong) * 3;
3711 guarantee(longwords <= (8192 / divisor), "must be");
3712 int total_allocation = longwords * sizeof (julong) * 3;
3713 julong *scratch = (julong *)alloca(total_allocation);
3714
3715 // Local scratch arrays
3716 julong
3717 *a = scratch + 0 * longwords,
3718 *n = scratch + 1 * longwords,
3719 *m = scratch + 2 * longwords;
3720
3721 reverse_words((julong *)a_ints, a, longwords);
3722 reverse_words((julong *)n_ints, n, longwords);
3723
3724 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3725 ::montgomery_square(a, n, m, (julong)inv, longwords);
3726 } else {
3727 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords);
3728 }
3729
3730 reverse_words(m, (julong *)m_ints, longwords);
3731 }
3732
3733 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3734 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
3735 if (buf == nullptr) {
3736 return nullptr;
3737 }
3738 CodeBuffer buffer(buf);
3739 short buffer_locs[20];
3740 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
3741 sizeof(buffer_locs)/sizeof(relocInfo));
3742
3743 MacroAssembler* masm = new MacroAssembler(&buffer);
3744
3745 const Array<SigEntry>* sig_vk = vk->extended_sig();
3746 const Array<VMRegPair>* regs = vk->return_regs();
3747
3748 int pack_fields_jobject_off = __ offset();
3749 // Resolve pre-allocated buffer from JNI handle.
3750 // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
3751 __ movptr(rax, Address(r13, 0));
3752 __ resolve_jobject(rax /* value */,
3753 r12 /* tmp */);
3754 __ movptr(Address(r13, 0), rax);
3755
3756 int pack_fields_off = __ offset();
3757
3758 int j = 1;
3759 for (int i = 0; i < sig_vk->length(); i++) {
3760 BasicType bt = sig_vk->at(i)._bt;
3761 if (bt == T_METADATA) {
3762 continue;
3763 }
3764 if (bt == T_VOID) {
3765 if (sig_vk->at(i-1)._bt == T_LONG ||
3766 sig_vk->at(i-1)._bt == T_DOUBLE) {
3767 j++;
3768 }
3769 continue;
3770 }
3771 int off = sig_vk->at(i)._offset;
3772 assert(off > 0, "offset in object should be positive");
3773 VMRegPair pair = regs->at(j);
3774 VMReg r_1 = pair.first();
3775 Address to(rax, off);
3776 if (bt == T_FLOAT) {
3777 __ movflt(to, r_1->as_XMMRegister());
3778 } else if (bt == T_DOUBLE) {
3779 __ movdbl(to, r_1->as_XMMRegister());
3780 } else {
3781 Register val = r_1->as_Register();
3782 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
3783 if (is_reference_type(bt)) {
3784 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep rax valid.
3785 __ mov(rbx, rax);
3786 Address to_with_rbx(rbx, off);
3787 __ store_heap_oop(to_with_rbx, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
3788 } else {
3789 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
3790 }
3791 }
3792 j++;
3793 }
3794 assert(j == regs->length(), "missed a field?");
3795 if (vk->supports_nullable_layouts()) {
3796 // Set the null marker
3797 __ movb(Address(rax, vk->null_marker_offset()), 1);
3798 }
3799 __ ret(0);
3800
3801 int unpack_fields_off = __ offset();
3802
3803 Label skip;
3804 Label not_null;
3805 __ testptr(rax, rax);
3806 __ jcc(Assembler::notZero, not_null);
3807
3808 // Return value is null. Zero all registers because the runtime requires a canonical
3809 // representation of a flat null.
3810 j = 1;
3811 for (int i = 0; i < sig_vk->length(); i++) {
3812 BasicType bt = sig_vk->at(i)._bt;
3813 if (bt == T_METADATA) {
3814 continue;
3815 }
3816 if (bt == T_VOID) {
3817 if (sig_vk->at(i-1)._bt == T_LONG ||
3818 sig_vk->at(i-1)._bt == T_DOUBLE) {
3819 j++;
3820 }
3821 continue;
3822 }
3823
3824 VMRegPair pair = regs->at(j);
3825 VMReg r_1 = pair.first();
3826 if (r_1->is_XMMRegister()) {
3827 __ xorps(r_1->as_XMMRegister(), r_1->as_XMMRegister());
3828 } else {
3829 __ xorl(r_1->as_Register(), r_1->as_Register());
3830 }
3831 j++;
3832 }
3833 __ jmp(skip);
3834 __ bind(not_null);
3835
3836 j = 1;
3837 for (int i = 0; i < sig_vk->length(); i++) {
3838 BasicType bt = sig_vk->at(i)._bt;
3839 if (bt == T_METADATA) {
3840 continue;
3841 }
3842 if (bt == T_VOID) {
3843 if (sig_vk->at(i-1)._bt == T_LONG ||
3844 sig_vk->at(i-1)._bt == T_DOUBLE) {
3845 j++;
3846 }
3847 continue;
3848 }
3849 int off = sig_vk->at(i)._offset;
3850 assert(off > 0, "offset in object should be positive");
3851 VMRegPair pair = regs->at(j);
3852 VMReg r_1 = pair.first();
3853 VMReg r_2 = pair.second();
3854 Address from(rax, off);
3855 if (bt == T_FLOAT) {
3856 __ movflt(r_1->as_XMMRegister(), from);
3857 } else if (bt == T_DOUBLE) {
3858 __ movdbl(r_1->as_XMMRegister(), from);
3859 } else if (bt == T_OBJECT || bt == T_ARRAY) {
3860 assert_different_registers(rax, r_1->as_Register());
3861 __ load_heap_oop(r_1->as_Register(), from);
3862 } else {
3863 assert(is_java_primitive(bt), "unexpected basic type");
3864 assert_different_registers(rax, r_1->as_Register());
3865 size_t size_in_bytes = type2aelembytes(bt);
3866 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
3867 }
3868 j++;
3869 }
3870 assert(j == regs->length(), "missed a field?");
3871
3872 __ bind(skip);
3873 __ ret(0);
3874
3875 __ flush();
3876
3877 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
3878 }
3879
3880 #if INCLUDE_JFR
3881
3882 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3883 // It returns a jobject handle to the event writer.
3884 // The handle is dereferenced and the return value is the event writer oop.
3885 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3886 enum layout {
3887 rbp_off,
3888 rbpH_off,
3889 return_off,
3890 return_off2,
3891 framesize // inclusive of return address
3892 };
3893
3894 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3895 CodeBuffer code(name, 1024, 64);
3896 MacroAssembler* masm = new MacroAssembler(&code);
3897 address start = __ pc();
3898
3899 __ enter();
3900 address the_pc = __ pc();
3901
3902 int frame_complete = the_pc - start;
3903
3904 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
3905 __ movptr(c_rarg0, r15_thread);
3906 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1);
3907 __ reset_last_Java_frame(true);
3908
3909 // rax is jobject handle result, unpack and process it through a barrier.
3910 __ resolve_global_jobject(rax, c_rarg0);
3911
3912 __ leave();
3913 __ ret(0);
3914
3915 OopMapSet* oop_maps = new OopMapSet();
3916 OopMap* map = new OopMap(framesize, 1);
3917 oop_maps->add_gc_map(frame_complete, map);
3918
3919 RuntimeStub* stub =
3920 RuntimeStub::new_runtime_stub(name,
3921 &code,
3922 frame_complete,
3923 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3924 oop_maps,
3925 false);
3926 return stub;
3927 }
3928
3929 // For c2: call to return a leased buffer.
3930 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3931 enum layout {
3932 rbp_off,
3933 rbpH_off,
3934 return_off,
3935 return_off2,
3936 framesize // inclusive of return address
3937 };
3938
3939 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3940 CodeBuffer code(name, 1024, 64);
3941 MacroAssembler* masm = new MacroAssembler(&code);
3942 address start = __ pc();
3943
3944 __ enter();
3945 address the_pc = __ pc();
3946
3947 int frame_complete = the_pc - start;
3948
3949 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch2);
3950 __ movptr(c_rarg0, r15_thread);
3951 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1);
3952 __ reset_last_Java_frame(true);
3953
3954 __ leave();
3955 __ ret(0);
3956
3957 OopMapSet* oop_maps = new OopMapSet();
3958 OopMap* map = new OopMap(framesize, 1);
3959 oop_maps->add_gc_map(frame_complete, map);
3960
3961 RuntimeStub* stub =
3962 RuntimeStub::new_runtime_stub(name,
3963 &code,
3964 frame_complete,
3965 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3966 oop_maps,
3967 false);
3968 return stub;
3969 }
3970
3971 #endif // INCLUDE_JFR