1 /*
2 * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef _WINDOWS
26 #include "alloca.h"
27 #endif
28 #include "asm/macroAssembler.hpp"
29 #include "asm/macroAssembler.inline.hpp"
30 #include "classfile/symbolTable.hpp"
31 #include "code/aotCodeCache.hpp"
32 #include "code/compiledIC.hpp"
33 #include "code/debugInfoRec.hpp"
34 #include "code/nativeInst.hpp"
35 #include "code/vtableStubs.hpp"
36 #include "compiler/oopMap.hpp"
37 #include "gc/shared/collectedHeap.hpp"
38 #include "gc/shared/gcLocker.hpp"
39 #include "gc/shared/barrierSet.hpp"
40 #include "gc/shared/barrierSetAssembler.hpp"
41 #include "interpreter/interpreter.hpp"
42 #include "logging/log.hpp"
43 #include "memory/resourceArea.hpp"
44 #include "memory/universe.hpp"
45 #include "oops/klass.inline.hpp"
46 #include "oops/method.inline.hpp"
47 #include "prims/methodHandles.hpp"
48 #include "runtime/continuation.hpp"
49 #include "runtime/continuationEntry.inline.hpp"
50 #include "runtime/globals.hpp"
51 #include "runtime/jniHandles.hpp"
52 #include "runtime/safepointMechanism.hpp"
53 #include "runtime/sharedRuntime.hpp"
54 #include "runtime/signature.hpp"
55 #include "runtime/stubRoutines.hpp"
56 #include "runtime/timerTrace.hpp"
57 #include "runtime/vframeArray.hpp"
58 #include "runtime/vm_version.hpp"
59 #include "utilities/align.hpp"
60 #include "utilities/checkedCast.hpp"
61 #include "utilities/formatBuffer.hpp"
62 #include "vmreg_x86.inline.hpp"
63 #ifdef COMPILER1
64 #include "c1/c1_Runtime1.hpp"
65 #endif
66 #ifdef COMPILER2
67 #include "opto/runtime.hpp"
68 #endif
69 #if INCLUDE_JVMCI
70 #include "jvmci/jvmciJavaClasses.hpp"
71 #endif
72
73 #define __ masm->
74
75 #ifdef PRODUCT
76 #define BLOCK_COMMENT(str) /* nothing */
77 #else
78 #define BLOCK_COMMENT(str) __ block_comment(str)
79 #endif // PRODUCT
80
81 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
82
83 class RegisterSaver {
84 // Capture info about frame layout. Layout offsets are in jint
85 // units because compiler frame slots are jints.
86 #define XSAVE_AREA_BEGIN 160
87 #define XSAVE_AREA_YMM_BEGIN 576
88 #define XSAVE_AREA_EGPRS 960
89 #define XSAVE_AREA_OPMASK_BEGIN 1088
90 #define XSAVE_AREA_ZMM_BEGIN 1152
91 #define XSAVE_AREA_UPPERBANK 1664
92 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
93 #define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
94 #define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off
95 #define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off
96 #define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
97 enum layout {
98 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
99 xmm_off = fpu_state_off + XSAVE_AREA_BEGIN/BytesPerInt, // offset in fxsave save area
100 DEF_XMM_OFFS(0),
101 DEF_XMM_OFFS(1),
102 // 2..15 are implied in range usage
103 ymm_off = xmm_off + (XSAVE_AREA_YMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
104 DEF_YMM_OFFS(0),
105 DEF_YMM_OFFS(1),
106 r16_off = xmm_off + (XSAVE_AREA_EGPRS - XSAVE_AREA_BEGIN)/BytesPerInt,
107 r16H_off,
108 r17_off, r17H_off,
109 r18_off, r18H_off,
110 r19_off, r19H_off,
111 r20_off, r20H_off,
112 r21_off, r21H_off,
113 r22_off, r22H_off,
114 r23_off, r23H_off,
115 r24_off, r24H_off,
116 r25_off, r25H_off,
117 r26_off, r26H_off,
118 r27_off, r27H_off,
119 r28_off, r28H_off,
120 r29_off, r29H_off,
121 r30_off, r30H_off,
122 r31_off, r31H_off,
123 opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
124 DEF_OPMASK_OFFS(0),
125 DEF_OPMASK_OFFS(1),
126 // 2..7 are implied in range usage
127 zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
128 DEF_ZMM_OFFS(0),
129 DEF_ZMM_OFFS(1),
130 zmm_upper_off = xmm_off + (XSAVE_AREA_UPPERBANK - XSAVE_AREA_BEGIN)/BytesPerInt,
131 DEF_ZMM_UPPER_OFFS(16),
132 DEF_ZMM_UPPER_OFFS(17),
133 // 18..31 are implied in range usage
134 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
135 fpu_stateH_end,
136 r15_off, r15H_off,
137 r14_off, r14H_off,
138 r13_off, r13H_off,
139 r12_off, r12H_off,
140 r11_off, r11H_off,
141 r10_off, r10H_off,
142 r9_off, r9H_off,
143 r8_off, r8H_off,
144 rdi_off, rdiH_off,
145 rsi_off, rsiH_off,
146 ignore_off, ignoreH_off, // extra copy of rbp
147 rsp_off, rspH_off,
148 rbx_off, rbxH_off,
149 rdx_off, rdxH_off,
150 rcx_off, rcxH_off,
151 rax_off, raxH_off,
152 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
153 align_off, alignH_off,
154 flags_off, flagsH_off,
155 // The frame sender code expects that rbp will be in the "natural" place and
156 // will override any oopMap setting for it. We must therefore force the layout
157 // so that it agrees with the frame sender code.
158 rbp_off, rbpH_off, // copy of rbp we will restore
159 return_off, returnH_off, // slot for return address
160 reg_save_size // size in compiler stack slots
161 };
162
163 public:
164 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors);
165 static void restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors = false);
166
167 // Offsets into the register save area
168 // Used by deoptimization when it is managing result register
169 // values on its own
170
171 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; }
172 static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; }
173 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; }
174 static int r15_offset_in_bytes(void) { return BytesPerInt * r15_off; }
175 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; }
176 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
177
178 // During deoptimization only the result registers need to be restored,
179 // all the other values have already been extracted.
180 static void restore_result_registers(MacroAssembler* masm);
181 };
182
183 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_wide_vectors) {
184 int off = 0;
185 int num_xmm_regs = XMMRegister::available_xmm_registers();
186 #if COMPILER2_OR_JVMCI
187 if (save_wide_vectors && UseAVX == 0) {
188 save_wide_vectors = false; // vectors larger than 16 byte long are supported only with AVX
189 }
190 assert(!save_wide_vectors || MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
191 #else
192 save_wide_vectors = false; // vectors are generated only by C2 and JVMCI
193 #endif
194
195 // Always make the frame size 16-byte aligned, both vector and non vector stacks are always allocated
196 int frame_size_in_bytes = align_up(reg_save_size*BytesPerInt, num_xmm_regs);
197 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
198 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
199 // CodeBlob frame size is in words.
200 int frame_size_in_words = frame_size_in_bytes / wordSize;
201 *total_frame_words = frame_size_in_words;
202
203 // Save registers, fpu state, and flags.
204 // We assume caller has already pushed the return address onto the
205 // stack, so rsp is 8-byte aligned here.
206 // We push rpb twice in this sequence because we want the real rbp
207 // to be under the return like a normal enter.
208
209 __ enter(); // rsp becomes 16-byte aligned here
210 __ pushf();
211 // Make sure rsp stays 16-byte aligned
212 __ subq(rsp, 8);
213 // Push CPU state in multiple of 16 bytes
214 __ save_legacy_gprs();
215 __ push_FPU_state();
216
217
218 // push cpu state handles this on EVEX enabled targets
219 if (save_wide_vectors) {
220 // Save upper half of YMM registers(0..15)
221 int base_addr = XSAVE_AREA_YMM_BEGIN;
222 for (int n = 0; n < 16; n++) {
223 __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
224 }
225 if (VM_Version::supports_evex()) {
226 // Save upper half of ZMM registers(0..15)
227 base_addr = XSAVE_AREA_ZMM_BEGIN;
228 for (int n = 0; n < 16; n++) {
229 __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
230 }
231 // Save full ZMM registers(16..num_xmm_regs)
232 base_addr = XSAVE_AREA_UPPERBANK;
233 off = 0;
234 int vector_len = Assembler::AVX_512bit;
235 for (int n = 16; n < num_xmm_regs; n++) {
236 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
237 }
238 #if COMPILER2_OR_JVMCI
239 base_addr = XSAVE_AREA_OPMASK_BEGIN;
240 off = 0;
241 for(int n = 0; n < KRegister::number_of_registers; n++) {
242 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
243 }
244 #endif
245 }
246 } else {
247 if (VM_Version::supports_evex()) {
248 // Save upper bank of XMM registers(16..31) for scalar or 16-byte vector usage
249 int base_addr = XSAVE_AREA_UPPERBANK;
250 off = 0;
251 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit;
252 for (int n = 16; n < num_xmm_regs; n++) {
253 __ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
254 }
255 #if COMPILER2_OR_JVMCI
256 base_addr = XSAVE_AREA_OPMASK_BEGIN;
257 off = 0;
258 for(int n = 0; n < KRegister::number_of_registers; n++) {
259 __ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
260 }
261 #endif
262 }
263 }
264
265 #if COMPILER2_OR_JVMCI
266 if (UseAPX) {
267 int base_addr = XSAVE_AREA_EGPRS;
268 off = 0;
269 for (int n = 16; n < Register::number_of_registers; n++) {
270 __ movq(Address(rsp, base_addr+(off++*8)), as_Register(n));
271 }
272 }
273 #endif
274
275 __ vzeroupper();
276 if (frame::arg_reg_save_area_bytes != 0) {
277 // Allocate argument register save area
278 __ subptr(rsp, frame::arg_reg_save_area_bytes);
279 }
280
281 // Set an oopmap for the call site. This oopmap will map all
282 // oop-registers and debug-info registers as callee-saved. This
283 // will allow deoptimization at this safepoint to find all possible
284 // debug-info recordings, as well as let GC find all oops.
285
286 OopMapSet *oop_maps = new OopMapSet();
287 OopMap* map = new OopMap(frame_size_in_slots, 0);
288
289 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x))
290
291 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
292 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
293 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
294 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
295 // rbp location is known implicitly by the frame sender code, needs no oopmap
296 // and the location where rbp was saved by is ignored
297 map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
298 map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
299 map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
300 map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
301 map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
302 map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
303 map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
304 map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
305 map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
306 map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
307
308 if (UseAPX) {
309 map->set_callee_saved(STACK_OFFSET( r16_off ), r16->as_VMReg());
310 map->set_callee_saved(STACK_OFFSET( r17_off ), r17->as_VMReg());
311 map->set_callee_saved(STACK_OFFSET( r18_off ), r18->as_VMReg());
312 map->set_callee_saved(STACK_OFFSET( r19_off ), r19->as_VMReg());
313 map->set_callee_saved(STACK_OFFSET( r20_off ), r20->as_VMReg());
314 map->set_callee_saved(STACK_OFFSET( r21_off ), r21->as_VMReg());
315 map->set_callee_saved(STACK_OFFSET( r22_off ), r22->as_VMReg());
316 map->set_callee_saved(STACK_OFFSET( r23_off ), r23->as_VMReg());
317 map->set_callee_saved(STACK_OFFSET( r24_off ), r24->as_VMReg());
318 map->set_callee_saved(STACK_OFFSET( r25_off ), r25->as_VMReg());
319 map->set_callee_saved(STACK_OFFSET( r26_off ), r26->as_VMReg());
320 map->set_callee_saved(STACK_OFFSET( r27_off ), r27->as_VMReg());
321 map->set_callee_saved(STACK_OFFSET( r28_off ), r28->as_VMReg());
322 map->set_callee_saved(STACK_OFFSET( r29_off ), r29->as_VMReg());
323 map->set_callee_saved(STACK_OFFSET( r30_off ), r30->as_VMReg());
324 map->set_callee_saved(STACK_OFFSET( r31_off ), r31->as_VMReg());
325 }
326 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
327 // on EVEX enabled targets, we get it included in the xsave area
328 off = xmm0_off;
329 int delta = xmm1_off - off;
330 for (int n = 0; n < 16; n++) {
331 XMMRegister xmm_name = as_XMMRegister(n);
332 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
333 off += delta;
334 }
335 if (UseAVX > 2) {
336 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
337 off = zmm16_off;
338 delta = zmm17_off - off;
339 for (int n = 16; n < num_xmm_regs; n++) {
340 XMMRegister zmm_name = as_XMMRegister(n);
341 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg());
342 off += delta;
343 }
344 }
345
346 #if COMPILER2_OR_JVMCI
347 if (save_wide_vectors) {
348 // Save upper half of YMM registers(0..15)
349 off = ymm0_off;
350 delta = ymm1_off - ymm0_off;
351 for (int n = 0; n < 16; n++) {
352 XMMRegister ymm_name = as_XMMRegister(n);
353 map->set_callee_saved(STACK_OFFSET(off), ymm_name->as_VMReg()->next(4));
354 off += delta;
355 }
356 if (VM_Version::supports_evex()) {
357 // Save upper half of ZMM registers(0..15)
358 off = zmm0_off;
359 delta = zmm1_off - zmm0_off;
360 for (int n = 0; n < 16; n++) {
361 XMMRegister zmm_name = as_XMMRegister(n);
362 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next(8));
363 off += delta;
364 }
365 }
366 }
367 #endif // COMPILER2_OR_JVMCI
368
369 // %%% These should all be a waste but we'll keep things as they were for now
370 if (true) {
371 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
372 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
373 map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
374 map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
375 // rbp location is known implicitly by the frame sender code, needs no oopmap
376 map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
377 map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
378 map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
379 map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
380 map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
381 map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
382 map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
383 map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
384 map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
385 map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
386 if (UseAPX) {
387 map->set_callee_saved(STACK_OFFSET( r16H_off ), r16->as_VMReg()->next());
388 map->set_callee_saved(STACK_OFFSET( r17H_off ), r17->as_VMReg()->next());
389 map->set_callee_saved(STACK_OFFSET( r18H_off ), r18->as_VMReg()->next());
390 map->set_callee_saved(STACK_OFFSET( r19H_off ), r19->as_VMReg()->next());
391 map->set_callee_saved(STACK_OFFSET( r20H_off ), r20->as_VMReg()->next());
392 map->set_callee_saved(STACK_OFFSET( r21H_off ), r21->as_VMReg()->next());
393 map->set_callee_saved(STACK_OFFSET( r22H_off ), r22->as_VMReg()->next());
394 map->set_callee_saved(STACK_OFFSET( r23H_off ), r23->as_VMReg()->next());
395 map->set_callee_saved(STACK_OFFSET( r24H_off ), r24->as_VMReg()->next());
396 map->set_callee_saved(STACK_OFFSET( r25H_off ), r25->as_VMReg()->next());
397 map->set_callee_saved(STACK_OFFSET( r26H_off ), r26->as_VMReg()->next());
398 map->set_callee_saved(STACK_OFFSET( r27H_off ), r27->as_VMReg()->next());
399 map->set_callee_saved(STACK_OFFSET( r28H_off ), r28->as_VMReg()->next());
400 map->set_callee_saved(STACK_OFFSET( r29H_off ), r29->as_VMReg()->next());
401 map->set_callee_saved(STACK_OFFSET( r30H_off ), r30->as_VMReg()->next());
402 map->set_callee_saved(STACK_OFFSET( r31H_off ), r31->as_VMReg()->next());
403 }
404 // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
405 // on EVEX enabled targets, we get it included in the xsave area
406 off = xmm0H_off;
407 delta = xmm1H_off - off;
408 for (int n = 0; n < 16; n++) {
409 XMMRegister xmm_name = as_XMMRegister(n);
410 map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
411 off += delta;
412 }
413 if (UseAVX > 2) {
414 // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
415 off = zmm16H_off;
416 delta = zmm17H_off - off;
417 for (int n = 16; n < num_xmm_regs; n++) {
418 XMMRegister zmm_name = as_XMMRegister(n);
419 map->set_callee_saved(STACK_OFFSET(off), zmm_name->as_VMReg()->next());
420 off += delta;
421 }
422 }
423 }
424
425 return map;
426 }
427
428 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_wide_vectors) {
429 int num_xmm_regs = XMMRegister::available_xmm_registers();
430 if (frame::arg_reg_save_area_bytes != 0) {
431 // Pop arg register save area
432 __ addptr(rsp, frame::arg_reg_save_area_bytes);
433 }
434
435 #if COMPILER2_OR_JVMCI
436 if (restore_wide_vectors) {
437 assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
438 assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
439 }
440 #else
441 assert(!restore_wide_vectors, "vectors are generated only by C2");
442 #endif
443
444 __ vzeroupper();
445
446 // On EVEX enabled targets everything is handled in pop fpu state
447 if (restore_wide_vectors) {
448 // Restore upper half of YMM registers (0..15)
449 int base_addr = XSAVE_AREA_YMM_BEGIN;
450 for (int n = 0; n < 16; n++) {
451 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
452 }
453 if (VM_Version::supports_evex()) {
454 // Restore upper half of ZMM registers (0..15)
455 base_addr = XSAVE_AREA_ZMM_BEGIN;
456 for (int n = 0; n < 16; n++) {
457 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
458 }
459 // Restore full ZMM registers(16..num_xmm_regs)
460 base_addr = XSAVE_AREA_UPPERBANK;
461 int vector_len = Assembler::AVX_512bit;
462 int off = 0;
463 for (int n = 16; n < num_xmm_regs; n++) {
464 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
465 }
466 #if COMPILER2_OR_JVMCI
467 base_addr = XSAVE_AREA_OPMASK_BEGIN;
468 off = 0;
469 for (int n = 0; n < KRegister::number_of_registers; n++) {
470 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
471 }
472 #endif
473 }
474 } else {
475 if (VM_Version::supports_evex()) {
476 // Restore upper bank of XMM registers(16..31) for scalar or 16-byte vector usage
477 int base_addr = XSAVE_AREA_UPPERBANK;
478 int off = 0;
479 int vector_len = VM_Version::supports_avx512vl() ? Assembler::AVX_128bit : Assembler::AVX_512bit;
480 for (int n = 16; n < num_xmm_regs; n++) {
481 __ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
482 }
483 #if COMPILER2_OR_JVMCI
484 base_addr = XSAVE_AREA_OPMASK_BEGIN;
485 off = 0;
486 for (int n = 0; n < KRegister::number_of_registers; n++) {
487 __ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
488 }
489 #endif
490 }
491 }
492
493 #if COMPILER2_OR_JVMCI
494 if (UseAPX) {
495 int base_addr = XSAVE_AREA_EGPRS;
496 int off = 0;
497 for (int n = 16; n < Register::number_of_registers; n++) {
498 __ movq(as_Register(n), Address(rsp, base_addr+(off++*8)));
499 }
500 }
501 #endif
502
503 // Recover CPU state
504 __ pop_FPU_state();
505 __ restore_legacy_gprs();
506 __ addq(rsp, 8);
507 __ popf();
508 // Get the rbp described implicitly by the calling convention (no oopMap)
509 __ pop(rbp);
510 }
511
512 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
513
514 // Just restore result register. Only used by deoptimization. By
515 // now any callee save register that needs to be restored to a c2
516 // caller of the deoptee has been extracted into the vframeArray
517 // and will be stuffed into the c2i adapter we create for later
518 // restoration so only result registers need to be restored here.
519
520 // Restore fp result register
521 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
522 // Restore integer result register
523 __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
524 __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
525
526 // Pop all of the register save are off the stack except the return address
527 __ addptr(rsp, return_offset_in_bytes());
528 }
529
530 // Is vector's size (in bytes) bigger than a size saved by default?
531 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
532 bool SharedRuntime::is_wide_vector(int size) {
533 return size > 16;
534 }
535
536 // ---------------------------------------------------------------------------
537 // Read the array of BasicTypes from a signature, and compute where the
538 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
539 // quantities. Values less than VMRegImpl::stack0 are registers, those above
540 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
541 // as framesizes are fixed.
542 // VMRegImpl::stack0 refers to the first slot 0(sp).
543 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.
544 // Register up to Register::number_of_registers are the 64-bit
545 // integer registers.
546
547 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
548 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
549 // units regardless of build. Of course for i486 there is no 64 bit build
550
551 // The Java calling convention is a "shifted" version of the C ABI.
552 // By skipping the first C ABI register we can call non-static jni methods
553 // with small numbers of arguments without having to shuffle the arguments
554 // at all. Since we control the java ABI we ought to at least get some
555 // advantage out of it.
556
557 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
558 VMRegPair *regs,
559 int total_args_passed) {
560
561 // Create the mapping between argument positions and
562 // registers.
563 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
564 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
565 };
566 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
567 j_farg0, j_farg1, j_farg2, j_farg3,
568 j_farg4, j_farg5, j_farg6, j_farg7
569 };
570
571
572 uint int_args = 0;
573 uint fp_args = 0;
574 uint stk_args = 0;
575
576 for (int i = 0; i < total_args_passed; i++) {
577 switch (sig_bt[i]) {
578 case T_BOOLEAN:
579 case T_CHAR:
580 case T_BYTE:
581 case T_SHORT:
582 case T_INT:
583 if (int_args < Argument::n_int_register_parameters_j) {
584 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
585 } else {
586 stk_args = align_up(stk_args, 2);
587 regs[i].set1(VMRegImpl::stack2reg(stk_args));
588 stk_args += 1;
589 }
590 break;
591 case T_VOID:
592 // halves of T_LONG or T_DOUBLE
593 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
594 regs[i].set_bad();
595 break;
596 case T_LONG:
597 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
598 // fall through
599 case T_OBJECT:
600 case T_ARRAY:
601 case T_ADDRESS:
602 if (int_args < Argument::n_int_register_parameters_j) {
603 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
604 } else {
605 stk_args = align_up(stk_args, 2);
606 regs[i].set2(VMRegImpl::stack2reg(stk_args));
607 stk_args += 2;
608 }
609 break;
610 case T_FLOAT:
611 if (fp_args < Argument::n_float_register_parameters_j) {
612 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
613 } else {
614 stk_args = align_up(stk_args, 2);
615 regs[i].set1(VMRegImpl::stack2reg(stk_args));
616 stk_args += 1;
617 }
618 break;
619 case T_DOUBLE:
620 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
621 if (fp_args < Argument::n_float_register_parameters_j) {
622 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
623 } else {
624 stk_args = align_up(stk_args, 2);
625 regs[i].set2(VMRegImpl::stack2reg(stk_args));
626 stk_args += 2;
627 }
628 break;
629 default:
630 ShouldNotReachHere();
631 break;
632 }
633 }
634
635 return stk_args;
636 }
637
638 // Same as java_calling_convention() but for multiple return
639 // values. There's no way to store them on the stack so if we don't
640 // have enough registers, multiple values can't be returned.
641 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
642 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
643 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
644 VMRegPair *regs,
645 int total_args_passed) {
646 // Create the mapping between argument positions and
647 // registers.
648 static const Register INT_ArgReg[java_return_convention_max_int] = {
649 rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
650 };
651 static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
652 j_farg0, j_farg1, j_farg2, j_farg3,
653 j_farg4, j_farg5, j_farg6, j_farg7
654 };
655
656
657 uint int_args = 0;
658 uint fp_args = 0;
659
660 for (int i = 0; i < total_args_passed; i++) {
661 switch (sig_bt[i]) {
662 case T_BOOLEAN:
663 case T_CHAR:
664 case T_BYTE:
665 case T_SHORT:
666 case T_INT:
667 if (int_args < Argument::n_int_register_parameters_j+1) {
668 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
669 int_args++;
670 } else {
671 return -1;
672 }
673 break;
674 case T_VOID:
675 // halves of T_LONG or T_DOUBLE
676 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
677 regs[i].set_bad();
678 break;
679 case T_LONG:
680 assert(sig_bt[i + 1] == T_VOID, "expecting half");
681 // fall through
682 case T_OBJECT:
683 case T_ARRAY:
684 case T_ADDRESS:
685 case T_METADATA:
686 if (int_args < Argument::n_int_register_parameters_j+1) {
687 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
688 int_args++;
689 } else {
690 return -1;
691 }
692 break;
693 case T_FLOAT:
694 if (fp_args < Argument::n_float_register_parameters_j) {
695 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
696 fp_args++;
697 } else {
698 return -1;
699 }
700 break;
701 case T_DOUBLE:
702 assert(sig_bt[i + 1] == T_VOID, "expecting half");
703 if (fp_args < Argument::n_float_register_parameters_j) {
704 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
705 fp_args++;
706 } else {
707 return -1;
708 }
709 break;
710 default:
711 ShouldNotReachHere();
712 break;
713 }
714 }
715
716 return int_args + fp_args;
717 }
718
719 // Patch the callers callsite with entry to compiled code if it exists.
720 static void patch_callers_callsite(MacroAssembler *masm) {
721 Label L;
722 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
723 __ jcc(Assembler::equal, L);
724
725 // Save the current stack pointer
726 __ mov(r13, rsp);
727 // Schedule the branch target address early.
728 // Call into the VM to patch the caller, then jump to compiled callee
729 // rax isn't live so capture return address while we easily can
730 __ movptr(rax, Address(rsp, 0));
731
732 // align stack so push_CPU_state doesn't fault
733 __ andptr(rsp, -(StackAlignmentInBytes));
734 __ push_CPU_state();
735 __ vzeroupper();
736 // VM needs caller's callsite
737 // VM needs target method
738 // This needs to be a long call since we will relocate this adapter to
739 // the codeBuffer and it may not reach
740
741 // Allocate argument register save area
742 if (frame::arg_reg_save_area_bytes != 0) {
743 __ subptr(rsp, frame::arg_reg_save_area_bytes);
744 }
745 __ mov(c_rarg0, rbx);
746 __ mov(c_rarg1, rax);
747 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
748
749 // De-allocate argument register save area
750 if (frame::arg_reg_save_area_bytes != 0) {
751 __ addptr(rsp, frame::arg_reg_save_area_bytes);
752 }
753
754 __ vzeroupper();
755 __ pop_CPU_state();
756 // restore sp
757 __ mov(rsp, r13);
758 __ bind(L);
759 }
760
761 // For each inline type argument, sig includes the list of fields of
762 // the inline type. This utility function computes the number of
763 // arguments for the call if inline types are passed by reference (the
764 // calling convention the interpreter expects).
765 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
766 int total_args_passed = 0;
767 if (InlineTypePassFieldsAsArgs) {
768 for (int i = 0; i < sig_extended->length(); i++) {
769 BasicType bt = sig_extended->at(i)._bt;
770 if (bt == T_METADATA) {
771 // In sig_extended, an inline type argument starts with:
772 // T_METADATA, followed by the types of the fields of the
773 // inline type and T_VOID to mark the end of the value
774 // type. Inline types are flattened so, for instance, in the
775 // case of an inline type with an int field and an inline type
776 // field that itself has 2 fields, an int and a long:
777 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
778 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
779 // (outer inline type)
780 total_args_passed++;
781 int vt = 1;
782 do {
783 i++;
784 BasicType bt = sig_extended->at(i)._bt;
785 BasicType prev_bt = sig_extended->at(i-1)._bt;
786 if (bt == T_METADATA) {
787 vt++;
788 } else if (bt == T_VOID &&
789 prev_bt != T_LONG &&
790 prev_bt != T_DOUBLE) {
791 vt--;
792 }
793 } while (vt != 0);
794 } else {
795 total_args_passed++;
796 }
797 }
798 } else {
799 total_args_passed = sig_extended->length();
800 }
801 return total_args_passed;
802 }
803
804
805 static void gen_c2i_adapter_helper(MacroAssembler* masm,
806 BasicType bt,
807 BasicType prev_bt,
808 size_t size_in_bytes,
809 const VMRegPair& reg_pair,
810 const Address& to,
811 int extraspace,
812 bool is_oop) {
813 if (bt == T_VOID) {
814 assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
815 return;
816 }
817
818 // Say 4 args:
819 // i st_off
820 // 0 32 T_LONG
821 // 1 24 T_VOID
822 // 2 16 T_OBJECT
823 // 3 8 T_BOOL
824 // - 0 return address
825 //
826 // However to make thing extra confusing. Because we can fit a long/double in
827 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
828 // leaves one slot empty and only stores to a single slot. In this case the
829 // slot that is occupied is the T_VOID slot. See I said it was confusing.
830
831 bool wide = (size_in_bytes == wordSize);
832 VMReg r_1 = reg_pair.first();
833 VMReg r_2 = reg_pair.second();
834 assert(r_2->is_valid() == wide, "invalid size");
835 if (!r_1->is_valid()) {
836 assert(!r_2->is_valid(), "must be invalid");
837 return;
838 }
839
840 if (!r_1->is_XMMRegister()) {
841 Register val = rax;
842 if (r_1->is_stack()) {
843 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
844 __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
845 } else {
846 val = r_1->as_Register();
847 }
848 assert_different_registers(to.base(), val, rscratch1);
849 if (is_oop) {
850 __ push(r13);
851 __ push(rbx);
852 __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
853 __ pop(rbx);
854 __ pop(r13);
855 } else {
856 __ store_sized_value(to, val, size_in_bytes);
857 }
858 } else {
859 if (wide) {
860 __ movdbl(to, r_1->as_XMMRegister());
861 } else {
862 __ movflt(to, r_1->as_XMMRegister());
863 }
864 }
865 }
866
867 static void gen_c2i_adapter(MacroAssembler *masm,
868 const GrowableArray<SigEntry>* sig_extended,
869 const VMRegPair *regs,
870 bool requires_clinit_barrier,
871 address& c2i_no_clinit_check_entry,
872 Label& skip_fixup,
873 address start,
874 OopMapSet* oop_maps,
875 int& frame_complete,
876 int& frame_size_in_words,
877 bool alloc_inline_receiver) {
878 if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) {
879 Label L_skip_barrier;
880 Register method = rbx;
881
882 { // Bypass the barrier for non-static methods
883 Register flags = rscratch1;
884 __ load_unsigned_short(flags, Address(method, Method::access_flags_offset()));
885 __ testl(flags, JVM_ACC_STATIC);
886 __ jcc(Assembler::zero, L_skip_barrier); // non-static
887 }
888
889 Register klass = rscratch1;
890 __ load_method_holder(klass, method);
891 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
892
893 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
894
895 __ bind(L_skip_barrier);
896 c2i_no_clinit_check_entry = __ pc();
897 }
898
899 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
900 bs->c2i_entry_barrier(masm);
901
902 // Before we get into the guts of the C2I adapter, see if we should be here
903 // at all. We've come from compiled code and are attempting to jump to the
904 // interpreter, which means the caller made a static call to get here
905 // (vcalls always get a compiled target if there is one). Check for a
906 // compiled target. If there is one, we need to patch the caller's call.
907 patch_callers_callsite(masm);
908
909 __ bind(skip_fixup);
910
911 if (InlineTypePassFieldsAsArgs) {
912 // Is there an inline type argument?
913 bool has_inline_argument = false;
914 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
915 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
916 }
917 if (has_inline_argument) {
918 // There is at least an inline type argument: we're coming from
919 // compiled code so we have no buffers to back the inline types.
920 // Allocate the buffers here with a runtime call.
921 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false);
922
923 frame_complete = __ offset();
924
925 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
926
927 __ mov(c_rarg0, r15_thread);
928 __ mov(c_rarg1, rbx);
929 __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
930 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
931
932 oop_maps->add_gc_map((int)(__ pc() - start), map);
933 __ reset_last_Java_frame(false);
934
935 RegisterSaver::restore_live_registers(masm);
936
937 Label no_exception;
938 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
939 __ jcc(Assembler::equal, no_exception);
940
941 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD);
942 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
943 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
944
945 __ bind(no_exception);
946
947 // We get an array of objects from the runtime call
948 __ get_vm_result_oop(rscratch2); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
949 __ get_vm_result_metadata(rbx); // TODO: required to keep the callee Method live?
950 }
951 }
952
953 // Since all args are passed on the stack, total_args_passed *
954 // Interpreter::stackElementSize is the space we need.
955 int total_args_passed = compute_total_args_passed_int(sig_extended);
956 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
957
958 int extraspace = (total_args_passed * Interpreter::stackElementSize);
959
960 // stack is aligned, keep it that way
961 // This is not currently needed or enforced by the interpreter, but
962 // we might as well conform to the ABI.
963 extraspace = align_up(extraspace, 2*wordSize);
964
965 // set senderSP value
966 __ lea(r13, Address(rsp, wordSize));
967
968 #ifdef ASSERT
969 __ check_stack_alignment(r13, "sender stack not aligned");
970 #endif
971 if (extraspace > 0) {
972 // Pop the return address
973 __ pop(rax);
974
975 __ subptr(rsp, extraspace);
976
977 // Push the return address
978 __ push(rax);
979
980 // Account for the return address location since we store it first rather
981 // than hold it in a register across all the shuffling
982 extraspace += wordSize;
983 }
984
985 #ifdef ASSERT
986 __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
987 #endif
988
989 // Now write the args into the outgoing interpreter space
990
991 // next_arg_comp is the next argument from the compiler point of
992 // view (inline type fields are passed in registers/on the stack). In
993 // sig_extended, an inline type argument starts with: T_METADATA,
994 // followed by the types of the fields of the inline type and T_VOID
995 // to mark the end of the inline type. ignored counts the number of
996 // T_METADATA/T_VOID. next_vt_arg is the next inline type argument:
997 // used to get the buffer for that argument from the pool of buffers
998 // we allocated above and want to pass to the
999 // interpreter. next_arg_int is the next argument from the
1000 // interpreter point of view (inline types are passed by reference).
1001 for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
1002 next_arg_comp < sig_extended->length(); next_arg_comp++) {
1003 assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
1004 assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
1005 BasicType bt = sig_extended->at(next_arg_comp)._bt;
1006 int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
1007 if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) {
1008 int next_off = st_off - Interpreter::stackElementSize;
1009 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
1010 const VMRegPair reg_pair = regs[next_arg_comp-ignored];
1011 size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
1012 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
1013 size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
1014 next_arg_int++;
1015 #ifdef ASSERT
1016 if (bt == T_LONG || bt == T_DOUBLE) {
1017 // Overwrite the unused slot with known junk
1018 __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
1019 __ movptr(Address(rsp, st_off), rax);
1020 }
1021 #endif /* ASSERT */
1022 } else {
1023 ignored++;
1024 // get the buffer from the just allocated pool of buffers
1025 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT);
1026 __ load_heap_oop(r14, Address(rscratch2, index));
1027 next_vt_arg++; next_arg_int++;
1028 int vt = 1;
1029 // write fields we get from compiled code in registers/stack
1030 // slots to the buffer: we know we are done with that inline type
1031 // argument when we hit the T_VOID that acts as an end of inline
1032 // type delimiter for this inline type. Inline types are flattened
1033 // so we might encounter embedded inline types. Each entry in
1034 // sig_extended contains a field offset in the buffer.
1035 Label L_null;
1036 do {
1037 next_arg_comp++;
1038 BasicType bt = sig_extended->at(next_arg_comp)._bt;
1039 BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
1040 if (bt == T_METADATA) {
1041 vt++;
1042 ignored++;
1043 } else if (bt == T_VOID &&
1044 prev_bt != T_LONG &&
1045 prev_bt != T_DOUBLE) {
1046 vt--;
1047 ignored++;
1048 } else {
1049 int off = sig_extended->at(next_arg_comp)._offset;
1050 if (off == -1) {
1051 // Nullable inline type argument, emit null check
1052 VMReg reg = regs[next_arg_comp-ignored].first();
1053 Label L_notNull;
1054 if (reg->is_stack()) {
1055 int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
1056 __ testb(Address(rsp, ld_off), 1);
1057 } else {
1058 __ testb(reg->as_Register(), 1);
1059 }
1060 __ jcc(Assembler::notZero, L_notNull);
1061 __ movptr(Address(rsp, st_off), 0);
1062 __ jmp(L_null);
1063 __ bind(L_notNull);
1064 continue;
1065 }
1066 assert(off > 0, "offset in object should be positive");
1067 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
1068 bool is_oop = is_reference_type(bt);
1069 gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
1070 size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
1071 }
1072 } while (vt != 0);
1073 // pass the buffer to the interpreter
1074 __ movptr(Address(rsp, st_off), r14);
1075 __ bind(L_null);
1076 }
1077 }
1078
1079 // Schedule the branch target address early.
1080 __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
1081 __ jmp(rcx);
1082 }
1083
1084 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1085 int comp_args_on_stack,
1086 const GrowableArray<SigEntry>* sig,
1087 const VMRegPair *regs) {
1088
1089 // Note: r13 contains the senderSP on entry. We must preserve it since
1090 // we may do a i2c -> c2i transition if we lose a race where compiled
1091 // code goes non-entrant while we get args ready.
1092 // In addition we use r13 to locate all the interpreter args as
1093 // we must align the stack to 16 bytes on an i2c entry else we
1094 // lose alignment we expect in all compiled code and register
1095 // save code can segv when fxsave instructions find improperly
1096 // aligned stack pointer.
1097
1098 // Adapters can be frameless because they do not require the caller
1099 // to perform additional cleanup work, such as correcting the stack pointer.
1100 // An i2c adapter is frameless because the *caller* frame, which is interpreted,
1101 // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
1102 // even if a callee has modified the stack pointer.
1103 // A c2i adapter is frameless because the *callee* frame, which is interpreted,
1104 // routinely repairs its caller's stack pointer (from sender_sp, which is set
1105 // up via the senderSP register).
1106 // In other words, if *either* the caller or callee is interpreted, we can
1107 // get the stack pointer repaired after a call.
1108 // This is why c2i and i2c adapters cannot be indefinitely composed.
1109 // In particular, if a c2i adapter were to somehow call an i2c adapter,
1110 // both caller and callee would be compiled methods, and neither would
1111 // clean up the stack pointer changes performed by the two adapters.
1112 // If this happens, control eventually transfers back to the compiled
1113 // caller, but with an uncorrected stack, causing delayed havoc.
1114
1115 // Must preserve original SP for loading incoming arguments because
1116 // we need to align the outgoing SP for compiled code.
1117 __ movptr(r11, rsp);
1118
1119 // Pick up the return address
1120 __ pop(rax);
1121
1122 // Convert 4-byte c2 stack slots to words.
1123 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1124
1125 if (comp_args_on_stack) {
1126 __ subptr(rsp, comp_words_on_stack * wordSize);
1127 }
1128
1129 // Ensure compiled code always sees stack at proper alignment
1130 __ andptr(rsp, -16);
1131
1132 // push the return address and misalign the stack that youngest frame always sees
1133 // as far as the placement of the call instruction
1134 __ push(rax);
1135
1136 // Put saved SP in another register
1137 const Register saved_sp = rax;
1138 __ movptr(saved_sp, r11);
1139
1140 // Will jump to the compiled code just as if compiled code was doing it.
1141 // Pre-load the register-jump target early, to schedule it better.
1142 __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
1143
1144 #if INCLUDE_JVMCI
1145 if (EnableJVMCI) {
1146 // check if this call should be routed towards a specific entry point
1147 __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1148 Label no_alternative_target;
1149 __ jcc(Assembler::equal, no_alternative_target);
1150 __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
1151 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
1152 __ bind(no_alternative_target);
1153 }
1154 #endif // INCLUDE_JVMCI
1155
1156 int total_args_passed = sig->length();
1157
1158 // Now generate the shuffle code. Pick up all register args and move the
1159 // rest through the floating point stack top.
1160 for (int i = 0; i < total_args_passed; i++) {
1161 BasicType bt = sig->at(i)._bt;
1162 if (bt == T_VOID) {
1163 // Longs and doubles are passed in native word order, but misaligned
1164 // in the 32-bit build.
1165 BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
1166 assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
1167 continue;
1168 }
1169
1170 // Pick up 0, 1 or 2 words from SP+offset.
1171
1172 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1173 "scrambled load targets?");
1174 // Load in argument order going down.
1175 int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
1176 // Point to interpreter value (vs. tag)
1177 int next_off = ld_off - Interpreter::stackElementSize;
1178 //
1179 //
1180 //
1181 VMReg r_1 = regs[i].first();
1182 VMReg r_2 = regs[i].second();
1183 if (!r_1->is_valid()) {
1184 assert(!r_2->is_valid(), "");
1185 continue;
1186 }
1187 if (r_1->is_stack()) {
1188 // Convert stack slot to an SP offset (+ wordSize to account for return address )
1189 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
1190
1191 // We can use r13 as a temp here because compiled code doesn't need r13 as an input
1192 // and if we end up going thru a c2i because of a miss a reasonable value of r13
1193 // will be generated.
1194 if (!r_2->is_valid()) {
1195 // sign extend???
1196 __ movl(r13, Address(saved_sp, ld_off));
1197 __ movptr(Address(rsp, st_off), r13);
1198 } else {
1199 //
1200 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1201 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1202 // So we must adjust where to pick up the data to match the interpreter.
1203 //
1204 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1205 // are accessed as negative so LSW is at LOW address
1206
1207 // ld_off is MSW so get LSW
1208 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1209 next_off : ld_off;
1210 __ movq(r13, Address(saved_sp, offset));
1211 // st_off is LSW (i.e. reg.first())
1212 __ movq(Address(rsp, st_off), r13);
1213 }
1214 } else if (r_1->is_Register()) { // Register argument
1215 Register r = r_1->as_Register();
1216 assert(r != rax, "must be different");
1217 if (r_2->is_valid()) {
1218 //
1219 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
1220 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
1221 // So we must adjust where to pick up the data to match the interpreter.
1222
1223 const int offset = (bt==T_LONG||bt==T_DOUBLE)?
1224 next_off : ld_off;
1225
1226 // this can be a misaligned move
1227 __ movq(r, Address(saved_sp, offset));
1228 } else {
1229 // sign extend and use a full word?
1230 __ movl(r, Address(saved_sp, ld_off));
1231 }
1232 } else {
1233 if (!r_2->is_valid()) {
1234 __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
1235 } else {
1236 __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
1237 }
1238 }
1239 }
1240
1241 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1242
1243 // 6243940 We might end up in handle_wrong_method if
1244 // the callee is deoptimized as we race thru here. If that
1245 // happens we don't want to take a safepoint because the
1246 // caller frame will look interpreted and arguments are now
1247 // "compiled" so it is much better to make this transition
1248 // invisible to the stack walking code. Unfortunately if
1249 // we try and find the callee by normal means a safepoint
1250 // is possible. So we stash the desired callee in the thread
1251 // and the vm will find there should this case occur.
1252
1253 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
1254
1255 // put Method* where a c2i would expect should we end up there
1256 // only needed because of c2 resolve stubs return Method* as a result in
1257 // rax
1258 __ mov(rax, rbx);
1259 __ jmp(r11);
1260 }
1261
1262 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1263 Register data = rax;
1264 __ ic_check(1 /* end_alignment */);
1265 __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
1266
1267 // Method might have been compiled since the call site was patched to
1268 // interpreted if that is the case treat it as a miss so we can get
1269 // the call site corrected.
1270 __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
1271 __ jcc(Assembler::equal, skip_fixup);
1272 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1273 }
1274
1275 // ---------------------------------------------------------------
1276 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1277 int comp_args_on_stack,
1278 const GrowableArray<SigEntry>* sig,
1279 const VMRegPair* regs,
1280 const GrowableArray<SigEntry>* sig_cc,
1281 const VMRegPair* regs_cc,
1282 const GrowableArray<SigEntry>* sig_cc_ro,
1283 const VMRegPair* regs_cc_ro,
1284 address entry_address[AdapterBlob::ENTRY_COUNT],
1285 AdapterBlob*& new_adapter,
1286 bool allocate_code_blob) {
1287 entry_address[AdapterBlob::I2C] = __ pc();
1288 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1289
1290 // -------------------------------------------------------------------------
1291 // Generate a C2I adapter. On entry we know rbx holds the Method* during calls
1292 // to the interpreter. The args start out packed in the compiled layout. They
1293 // need to be unpacked into the interpreter layout. This will almost always
1294 // require some stack space. We grow the current (compiled) stack, then repack
1295 // the args. We finally end in a jump to the generic interpreter entry point.
1296 // On exit from the interpreter, the interpreter will restore our SP (lest the
1297 // compiled code, which relies solely on SP and not RBP, get sick).
1298
1299 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1300 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1301 Label skip_fixup;
1302
1303 gen_inline_cache_check(masm, skip_fixup);
1304
1305 OopMapSet* oop_maps = new OopMapSet();
1306 int frame_complete = CodeOffsets::frame_never_safe;
1307 int frame_size_in_words = 0;
1308
1309 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1310 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1311 entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
1312 if (regs_cc != regs_cc_ro) {
1313 // No class init barrier needed because method is guaranteed to be non-static
1314 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1315 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1316 skip_fixup.reset();
1317 }
1318
1319 // Scalarized c2i adapter
1320 entry_address[AdapterBlob::C2I] = __ pc();
1321 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1322 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1323 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1324
1325 // Non-scalarized c2i adapter
1326 if (regs != regs_cc) {
1327 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1328 Label inline_entry_skip_fixup;
1329 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1330
1331 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1332 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1333 inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1334 }
1335
1336 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1337 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1338 if (allocate_code_blob) {
1339 bool caller_must_gc_arguments = (regs != regs_cc);
1340 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1341 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1342 AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1343 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1344 }
1345 }
1346
1347 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1348 VMRegPair *regs,
1349 int total_args_passed) {
1350
1351 // We return the amount of VMRegImpl stack slots we need to reserve for all
1352 // the arguments NOT counting out_preserve_stack_slots.
1353
1354 // NOTE: These arrays will have to change when c1 is ported
1355 #ifdef _WIN64
1356 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1357 c_rarg0, c_rarg1, c_rarg2, c_rarg3
1358 };
1359 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1360 c_farg0, c_farg1, c_farg2, c_farg3
1361 };
1362 #else
1363 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1364 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
1365 };
1366 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1367 c_farg0, c_farg1, c_farg2, c_farg3,
1368 c_farg4, c_farg5, c_farg6, c_farg7
1369 };
1370 #endif // _WIN64
1371
1372
1373 uint int_args = 0;
1374 uint fp_args = 0;
1375 uint stk_args = 0; // inc by 2 each time
1376
1377 for (int i = 0; i < total_args_passed; i++) {
1378 switch (sig_bt[i]) {
1379 case T_BOOLEAN:
1380 case T_CHAR:
1381 case T_BYTE:
1382 case T_SHORT:
1383 case T_INT:
1384 if (int_args < Argument::n_int_register_parameters_c) {
1385 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1386 #ifdef _WIN64
1387 fp_args++;
1388 // Allocate slots for callee to stuff register args the stack.
1389 stk_args += 2;
1390 #endif
1391 } else {
1392 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1393 stk_args += 2;
1394 }
1395 break;
1396 case T_LONG:
1397 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1398 // fall through
1399 case T_OBJECT:
1400 case T_ARRAY:
1401 case T_ADDRESS:
1402 case T_METADATA:
1403 if (int_args < Argument::n_int_register_parameters_c) {
1404 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1405 #ifdef _WIN64
1406 fp_args++;
1407 stk_args += 2;
1408 #endif
1409 } else {
1410 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1411 stk_args += 2;
1412 }
1413 break;
1414 case T_FLOAT:
1415 if (fp_args < Argument::n_float_register_parameters_c) {
1416 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1417 #ifdef _WIN64
1418 int_args++;
1419 // Allocate slots for callee to stuff register args the stack.
1420 stk_args += 2;
1421 #endif
1422 } else {
1423 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1424 stk_args += 2;
1425 }
1426 break;
1427 case T_DOUBLE:
1428 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1429 if (fp_args < Argument::n_float_register_parameters_c) {
1430 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
1431 #ifdef _WIN64
1432 int_args++;
1433 // Allocate slots for callee to stuff register args the stack.
1434 stk_args += 2;
1435 #endif
1436 } else {
1437 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1438 stk_args += 2;
1439 }
1440 break;
1441 case T_VOID: // Halves of longs and doubles
1442 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
1443 regs[i].set_bad();
1444 break;
1445 default:
1446 ShouldNotReachHere();
1447 break;
1448 }
1449 }
1450 #ifdef _WIN64
1451 // windows abi requires that we always allocate enough stack space
1452 // for 4 64bit registers to be stored down.
1453 if (stk_args < 8) {
1454 stk_args = 8;
1455 }
1456 #endif // _WIN64
1457
1458 return stk_args;
1459 }
1460
1461 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
1462 uint num_bits,
1463 uint total_args_passed) {
1464 assert(num_bits == 64 || num_bits == 128 || num_bits == 256 || num_bits == 512,
1465 "only certain vector sizes are supported for now");
1466
1467 static const XMMRegister VEC_ArgReg[32] = {
1468 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
1469 xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
1470 xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
1471 xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31
1472 };
1473
1474 uint stk_args = 0;
1475 uint fp_args = 0;
1476
1477 for (uint i = 0; i < total_args_passed; i++) {
1478 VMReg vmreg = VEC_ArgReg[fp_args++]->as_VMReg();
1479 int next_val = num_bits == 64 ? 1 : (num_bits == 128 ? 3 : (num_bits == 256 ? 7 : 15));
1480 regs[i].set_pair(vmreg->next(next_val), vmreg);
1481 }
1482
1483 return stk_args;
1484 }
1485
1486 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1487 // We always ignore the frame_slots arg and just use the space just below frame pointer
1488 // which by this time is free to use
1489 switch (ret_type) {
1490 case T_FLOAT:
1491 __ movflt(Address(rbp, -wordSize), xmm0);
1492 break;
1493 case T_DOUBLE:
1494 __ movdbl(Address(rbp, -wordSize), xmm0);
1495 break;
1496 case T_VOID: break;
1497 default: {
1498 __ movptr(Address(rbp, -wordSize), rax);
1499 }
1500 }
1501 }
1502
1503 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1504 // We always ignore the frame_slots arg and just use the space just below frame pointer
1505 // which by this time is free to use
1506 switch (ret_type) {
1507 case T_FLOAT:
1508 __ movflt(xmm0, Address(rbp, -wordSize));
1509 break;
1510 case T_DOUBLE:
1511 __ movdbl(xmm0, Address(rbp, -wordSize));
1512 break;
1513 case T_VOID: break;
1514 default: {
1515 __ movptr(rax, Address(rbp, -wordSize));
1516 }
1517 }
1518 }
1519
1520 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1521 for ( int i = first_arg ; i < arg_count ; i++ ) {
1522 if (args[i].first()->is_Register()) {
1523 __ push(args[i].first()->as_Register());
1524 } else if (args[i].first()->is_XMMRegister()) {
1525 __ subptr(rsp, 2*wordSize);
1526 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister());
1527 }
1528 }
1529 }
1530
1531 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1532 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1533 if (args[i].first()->is_Register()) {
1534 __ pop(args[i].first()->as_Register());
1535 } else if (args[i].first()->is_XMMRegister()) {
1536 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0));
1537 __ addptr(rsp, 2*wordSize);
1538 }
1539 }
1540 }
1541
1542 static void verify_oop_args(MacroAssembler* masm,
1543 const methodHandle& method,
1544 const BasicType* sig_bt,
1545 const VMRegPair* regs) {
1546 Register temp_reg = rbx; // not part of any compiled calling seq
1547 if (VerifyOops) {
1548 for (int i = 0; i < method->size_of_parameters(); i++) {
1549 if (is_reference_type(sig_bt[i])) {
1550 VMReg r = regs[i].first();
1551 assert(r->is_valid(), "bad oop arg");
1552 if (r->is_stack()) {
1553 __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1554 __ verify_oop(temp_reg);
1555 } else {
1556 __ verify_oop(r->as_Register());
1557 }
1558 }
1559 }
1560 }
1561 }
1562
1563 static void check_continuation_enter_argument(VMReg actual_vmreg,
1564 Register expected_reg,
1565 const char* name) {
1566 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1567 assert(actual_vmreg->as_Register() == expected_reg,
1568 "%s is in unexpected register: %s instead of %s",
1569 name, actual_vmreg->as_Register()->name(), expected_reg->name());
1570 }
1571
1572
1573 //---------------------------- continuation_enter_setup ---------------------------
1574 //
1575 // Arguments:
1576 // None.
1577 //
1578 // Results:
1579 // rsp: pointer to blank ContinuationEntry
1580 //
1581 // Kills:
1582 // rax
1583 //
1584 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) {
1585 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1586 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, "");
1587 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1588
1589 stack_slots += checked_cast<int>(ContinuationEntry::size()) / wordSize;
1590 __ subptr(rsp, checked_cast<int32_t>(ContinuationEntry::size()));
1591
1592 int frame_size = (checked_cast<int>(ContinuationEntry::size()) + wordSize) / VMRegImpl::stack_slot_size;
1593 OopMap* map = new OopMap(frame_size, 0);
1594
1595 __ movptr(rax, Address(r15_thread, JavaThread::cont_entry_offset()));
1596 __ movptr(Address(rsp, ContinuationEntry::parent_offset()), rax);
1597 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rsp);
1598
1599 return map;
1600 }
1601
1602 //---------------------------- fill_continuation_entry ---------------------------
1603 //
1604 // Arguments:
1605 // rsp: pointer to blank Continuation entry
1606 // reg_cont_obj: pointer to the continuation
1607 // reg_flags: flags
1608 //
1609 // Results:
1610 // rsp: pointer to filled out ContinuationEntry
1611 //
1612 // Kills:
1613 // rax
1614 //
1615 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1616 assert_different_registers(rax, reg_cont_obj, reg_flags);
1617 #ifdef ASSERT
1618 __ movl(Address(rsp, ContinuationEntry::cookie_offset()), ContinuationEntry::cookie_value());
1619 #endif
1620 __ movptr(Address(rsp, ContinuationEntry::cont_offset()), reg_cont_obj);
1621 __ movl (Address(rsp, ContinuationEntry::flags_offset()), reg_flags);
1622 __ movptr(Address(rsp, ContinuationEntry::chunk_offset()), 0);
1623 __ movl(Address(rsp, ContinuationEntry::argsize_offset()), 0);
1624 __ movl(Address(rsp, ContinuationEntry::pin_count_offset()), 0);
1625
1626 __ movptr(rax, Address(r15_thread, JavaThread::cont_fastpath_offset()));
1627 __ movptr(Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()), rax);
1628
1629 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0);
1630 }
1631
1632 //---------------------------- continuation_enter_cleanup ---------------------------
1633 //
1634 // Arguments:
1635 // rsp: pointer to the ContinuationEntry
1636 //
1637 // Results:
1638 // rsp: pointer to the spilled rbp in the entry frame
1639 //
1640 // Kills:
1641 // rbx
1642 //
1643 static void continuation_enter_cleanup(MacroAssembler* masm) {
1644 #ifdef ASSERT
1645 Label L_good_sp;
1646 __ cmpptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset()));
1647 __ jcc(Assembler::equal, L_good_sp);
1648 __ stop("Incorrect rsp at continuation_enter_cleanup");
1649 __ bind(L_good_sp);
1650 #endif
1651 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_cont_fastpath_offset()));
1652 __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), rbx);
1653 __ movptr(rbx, Address(rsp, ContinuationEntry::parent_offset()));
1654 __ movptr(Address(r15_thread, JavaThread::cont_entry_offset()), rbx);
1655 __ addptr(rsp, checked_cast<int32_t>(ContinuationEntry::size()));
1656 }
1657
1658 static void gen_continuation_enter(MacroAssembler* masm,
1659 const VMRegPair* regs,
1660 int& exception_offset,
1661 OopMapSet* oop_maps,
1662 int& frame_complete,
1663 int& stack_slots,
1664 int& interpreted_entry_offset,
1665 int& compiled_entry_offset) {
1666
1667 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1668 int pos_cont_obj = 0;
1669 int pos_is_cont = 1;
1670 int pos_is_virtual = 2;
1671
1672 // The platform-specific calling convention may present the arguments in various registers.
1673 // To simplify the rest of the code, we expect the arguments to reside at these known
1674 // registers, and we additionally check the placement here in case calling convention ever
1675 // changes.
1676 Register reg_cont_obj = c_rarg1;
1677 Register reg_is_cont = c_rarg2;
1678 Register reg_is_virtual = c_rarg3;
1679
1680 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object");
1681 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
1682 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1683
1684 // Utility methods kill rax, make sure there are no collisions
1685 assert_different_registers(rax, reg_cont_obj, reg_is_cont, reg_is_virtual);
1686
1687 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
1688 relocInfo::static_call_type);
1689
1690 address start = __ pc();
1691
1692 Label L_thaw, L_exit;
1693
1694 // i2i entry used at interp_only_mode only
1695 interpreted_entry_offset = __ pc() - start;
1696 {
1697 #ifdef ASSERT
1698 Label is_interp_only;
1699 __ cmpb(Address(r15_thread, JavaThread::interp_only_mode_offset()), 0);
1700 __ jcc(Assembler::notEqual, is_interp_only);
1701 __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1702 __ bind(is_interp_only);
1703 #endif
1704
1705 __ pop(rax); // return address
1706 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1707 __ movptr(c_rarg1, Address(rsp, Interpreter::stackElementSize*2));
1708 __ movl(c_rarg2, Address(rsp, Interpreter::stackElementSize*1));
1709 __ movl(c_rarg3, Address(rsp, Interpreter::stackElementSize*0));
1710 __ andptr(rsp, -16); // Ensure compiled code always sees stack at proper alignment
1711 __ push(rax); // return address
1712 __ push_cont_fastpath();
1713
1714 __ enter();
1715
1716 stack_slots = 2; // will be adjusted in setup
1717 OopMap* map = continuation_enter_setup(masm, stack_slots);
1718 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1719 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1720
1721 __ verify_oop(reg_cont_obj);
1722
1723 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1724
1725 // If continuation, call to thaw. Otherwise, resolve the call and exit.
1726 __ testptr(reg_is_cont, reg_is_cont);
1727 __ jcc(Assembler::notZero, L_thaw);
1728
1729 // --- Resolve path
1730
1731 // Make sure the call is patchable
1732 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
1733 // Emit stub for static call
1734 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
1735 if (stub == nullptr) {
1736 fatal("CodeCache is full at gen_continuation_enter");
1737 }
1738 __ call(resolve);
1739 oop_maps->add_gc_map(__ pc() - start, map);
1740 __ post_call_nop();
1741
1742 __ jmp(L_exit);
1743 }
1744
1745 // compiled entry
1746 __ align(CodeEntryAlignment);
1747 compiled_entry_offset = __ pc() - start;
1748 __ enter();
1749
1750 stack_slots = 2; // will be adjusted in setup
1751 OopMap* map = continuation_enter_setup(masm, stack_slots);
1752
1753 // Frame is now completed as far as size and linkage.
1754 frame_complete = __ pc() - start;
1755
1756 __ verify_oop(reg_cont_obj);
1757
1758 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1759
1760 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1761 __ testptr(reg_is_cont, reg_is_cont);
1762 __ jccb(Assembler::notZero, L_thaw);
1763
1764 // --- call Continuation.enter(Continuation c, boolean isContinue)
1765
1766 // Make sure the call is patchable
1767 __ align(BytesPerWord, __ offset() + NativeCall::displacement_offset);
1768
1769 // Emit stub for static call
1770 address stub = CompiledDirectCall::emit_to_interp_stub(masm, __ pc());
1771 if (stub == nullptr) {
1772 fatal("CodeCache is full at gen_continuation_enter");
1773 }
1774
1775 // The call needs to be resolved. There's a special case for this in
1776 // SharedRuntime::find_callee_info_helper() which calls
1777 // LinkResolver::resolve_continuation_enter() which resolves the call to
1778 // Continuation.enter(Continuation c, boolean isContinue).
1779 __ call(resolve);
1780
1781 oop_maps->add_gc_map(__ pc() - start, map);
1782 __ post_call_nop();
1783
1784 __ jmpb(L_exit);
1785
1786 // --- Thawing path
1787
1788 __ bind(L_thaw);
1789
1790 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1791 __ call(RuntimeAddress(StubRoutines::cont_thaw()));
1792
1793 ContinuationEntry::_return_pc_offset = __ pc() - start;
1794 oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1795 __ post_call_nop();
1796
1797 // --- Normal exit (resolve/thawing)
1798
1799 __ bind(L_exit);
1800 ContinuationEntry::_cleanup_offset = __ pc() - start;
1801 continuation_enter_cleanup(masm);
1802 __ pop(rbp);
1803 __ ret(0);
1804
1805 // --- Exception handling path
1806
1807 exception_offset = __ pc() - start;
1808
1809 continuation_enter_cleanup(masm);
1810 __ pop(rbp);
1811
1812 __ movptr(c_rarg0, r15_thread);
1813 __ movptr(c_rarg1, Address(rsp, 0)); // return address
1814
1815 // rax still holds the original exception oop, save it before the call
1816 __ push(rax);
1817
1818 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 2);
1819 __ movptr(rbx, rax);
1820
1821 // Continue at exception handler:
1822 // rax: exception oop
1823 // rbx: exception handler
1824 // rdx: exception pc
1825 __ pop(rax);
1826 __ verify_oop(rax);
1827 __ pop(rdx);
1828 __ jmp(rbx);
1829 }
1830
1831 static void gen_continuation_yield(MacroAssembler* masm,
1832 const VMRegPair* regs,
1833 OopMapSet* oop_maps,
1834 int& frame_complete,
1835 int& stack_slots,
1836 int& compiled_entry_offset) {
1837 enum layout {
1838 rbp_off,
1839 rbpH_off,
1840 return_off,
1841 return_off2,
1842 framesize // inclusive of return address
1843 };
1844 stack_slots = framesize / VMRegImpl::slots_per_word;
1845 assert(stack_slots == 2, "recheck layout");
1846
1847 address start = __ pc();
1848 compiled_entry_offset = __ pc() - start;
1849 __ enter();
1850 address the_pc = __ pc();
1851
1852 frame_complete = the_pc - start;
1853
1854 // This nop must be exactly at the PC we push into the frame info.
1855 // We use this nop for fast CodeBlob lookup, associate the OopMap
1856 // with it right away.
1857 __ post_call_nop();
1858 OopMap* map = new OopMap(framesize, 1);
1859 oop_maps->add_gc_map(frame_complete, map);
1860
1861 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
1862 __ movptr(c_rarg0, r15_thread);
1863 __ movptr(c_rarg1, rsp);
1864 __ call_VM_leaf(Continuation::freeze_entry(), 2);
1865 __ reset_last_Java_frame(true);
1866
1867 Label L_pinned;
1868
1869 __ testptr(rax, rax);
1870 __ jcc(Assembler::notZero, L_pinned);
1871
1872 __ movptr(rsp, Address(r15_thread, JavaThread::cont_entry_offset()));
1873 continuation_enter_cleanup(masm);
1874 __ pop(rbp);
1875 __ ret(0);
1876
1877 __ bind(L_pinned);
1878
1879 // Pinned, return to caller
1880
1881 // handle pending exception thrown by freeze
1882 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
1883 Label ok;
1884 __ jcc(Assembler::equal, ok);
1885 __ leave();
1886 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1887 __ bind(ok);
1888
1889 __ leave();
1890 __ ret(0);
1891 }
1892
1893 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
1894 ::continuation_enter_cleanup(masm);
1895 }
1896
1897 static void gen_special_dispatch(MacroAssembler* masm,
1898 const methodHandle& method,
1899 const BasicType* sig_bt,
1900 const VMRegPair* regs) {
1901 verify_oop_args(masm, method, sig_bt, regs);
1902 vmIntrinsics::ID iid = method->intrinsic_id();
1903
1904 // Now write the args into the outgoing interpreter space
1905 bool has_receiver = false;
1906 Register receiver_reg = noreg;
1907 int member_arg_pos = -1;
1908 Register member_reg = noreg;
1909 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1910 if (ref_kind != 0) {
1911 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1912 member_reg = rbx; // known to be free at this point
1913 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1914 } else if (iid == vmIntrinsics::_invokeBasic) {
1915 has_receiver = true;
1916 } else if (iid == vmIntrinsics::_linkToNative) {
1917 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1918 member_reg = rbx; // known to be free at this point
1919 } else {
1920 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1921 }
1922
1923 if (member_reg != noreg) {
1924 // Load the member_arg into register, if necessary.
1925 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1926 VMReg r = regs[member_arg_pos].first();
1927 if (r->is_stack()) {
1928 __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1929 } else {
1930 // no data motion is needed
1931 member_reg = r->as_Register();
1932 }
1933 }
1934
1935 if (has_receiver) {
1936 // Make sure the receiver is loaded into a register.
1937 assert(method->size_of_parameters() > 0, "oob");
1938 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1939 VMReg r = regs[0].first();
1940 assert(r->is_valid(), "bad receiver arg");
1941 if (r->is_stack()) {
1942 // Porting note: This assumes that compiled calling conventions always
1943 // pass the receiver oop in a register. If this is not true on some
1944 // platform, pick a temp and load the receiver from stack.
1945 fatal("receiver always in a register");
1946 receiver_reg = j_rarg0; // known to be free at this point
1947 __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1948 } else {
1949 // no data motion is needed
1950 receiver_reg = r->as_Register();
1951 }
1952 }
1953
1954 // Figure out which address we are really jumping to:
1955 MethodHandles::generate_method_handle_dispatch(masm, iid,
1956 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1957 }
1958
1959 // ---------------------------------------------------------------------------
1960 // Generate a native wrapper for a given method. The method takes arguments
1961 // in the Java compiled code convention, marshals them to the native
1962 // convention (handlizes oops, etc), transitions to native, makes the call,
1963 // returns to java state (possibly blocking), unhandlizes any result and
1964 // returns.
1965 //
1966 // Critical native functions are a shorthand for the use of
1967 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1968 // functions. The wrapper is expected to unpack the arguments before
1969 // passing them to the callee. Critical native functions leave the state _in_Java,
1970 // since they cannot stop for GC.
1971 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
1972 // block and the check for pending exceptions it's impossible for them
1973 // to be thrown.
1974 //
1975 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1976 const methodHandle& method,
1977 int compile_id,
1978 BasicType* in_sig_bt,
1979 VMRegPair* in_regs,
1980 BasicType ret_type) {
1981 if (method->is_continuation_native_intrinsic()) {
1982 int exception_offset = -1;
1983 OopMapSet* oop_maps = new OopMapSet();
1984 int frame_complete = -1;
1985 int stack_slots = -1;
1986 int interpreted_entry_offset = -1;
1987 int vep_offset = -1;
1988 if (method->is_continuation_enter_intrinsic()) {
1989 gen_continuation_enter(masm,
1990 in_regs,
1991 exception_offset,
1992 oop_maps,
1993 frame_complete,
1994 stack_slots,
1995 interpreted_entry_offset,
1996 vep_offset);
1997 } else if (method->is_continuation_yield_intrinsic()) {
1998 gen_continuation_yield(masm,
1999 in_regs,
2000 oop_maps,
2001 frame_complete,
2002 stack_slots,
2003 vep_offset);
2004 } else {
2005 guarantee(false, "Unknown Continuation native intrinsic");
2006 }
2007
2008 #ifdef ASSERT
2009 if (method->is_continuation_enter_intrinsic()) {
2010 assert(interpreted_entry_offset != -1, "Must be set");
2011 assert(exception_offset != -1, "Must be set");
2012 } else {
2013 assert(interpreted_entry_offset == -1, "Must be unset");
2014 assert(exception_offset == -1, "Must be unset");
2015 }
2016 assert(frame_complete != -1, "Must be set");
2017 assert(stack_slots != -1, "Must be set");
2018 assert(vep_offset != -1, "Must be set");
2019 #endif
2020
2021 __ flush();
2022 nmethod* nm = nmethod::new_native_nmethod(method,
2023 compile_id,
2024 masm->code(),
2025 vep_offset,
2026 frame_complete,
2027 stack_slots,
2028 in_ByteSize(-1),
2029 in_ByteSize(-1),
2030 oop_maps,
2031 exception_offset);
2032 if (nm == nullptr) return nm;
2033 if (method->is_continuation_enter_intrinsic()) {
2034 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2035 } else if (method->is_continuation_yield_intrinsic()) {
2036 _cont_doYield_stub = nm;
2037 }
2038 return nm;
2039 }
2040
2041 if (method->is_method_handle_intrinsic()) {
2042 vmIntrinsics::ID iid = method->intrinsic_id();
2043 intptr_t start = (intptr_t)__ pc();
2044 int vep_offset = ((intptr_t)__ pc()) - start;
2045 gen_special_dispatch(masm,
2046 method,
2047 in_sig_bt,
2048 in_regs);
2049 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
2050 __ flush();
2051 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
2052 return nmethod::new_native_nmethod(method,
2053 compile_id,
2054 masm->code(),
2055 vep_offset,
2056 frame_complete,
2057 stack_slots / VMRegImpl::slots_per_word,
2058 in_ByteSize(-1),
2059 in_ByteSize(-1),
2060 nullptr);
2061 }
2062 address native_func = method->native_function();
2063 assert(native_func != nullptr, "must have function");
2064
2065 // An OopMap for lock (and class if static)
2066 OopMapSet *oop_maps = new OopMapSet();
2067 intptr_t start = (intptr_t)__ pc();
2068
2069 // We have received a description of where all the java arg are located
2070 // on entry to the wrapper. We need to convert these args to where
2071 // the jni function will expect them. To figure out where they go
2072 // we convert the java signature to a C signature by inserting
2073 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2074
2075 const int total_in_args = method->size_of_parameters();
2076 int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
2077
2078 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2079 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2080
2081 int argc = 0;
2082 out_sig_bt[argc++] = T_ADDRESS;
2083 if (method->is_static()) {
2084 out_sig_bt[argc++] = T_OBJECT;
2085 }
2086
2087 for (int i = 0; i < total_in_args ; i++ ) {
2088 out_sig_bt[argc++] = in_sig_bt[i];
2089 }
2090
2091 // Now figure out where the args must be stored and how much stack space
2092 // they require.
2093 int out_arg_slots;
2094 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2095
2096 // Compute framesize for the wrapper. We need to handlize all oops in
2097 // incoming registers
2098
2099 // Calculate the total number of stack slots we will need.
2100
2101 // First count the abi requirement plus all of the outgoing args
2102 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2103
2104 // Now the space for the inbound oop handle area
2105 int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers
2106
2107 int oop_handle_offset = stack_slots;
2108 stack_slots += total_save_slots;
2109
2110 // Now any space we need for handlizing a klass if static method
2111
2112 int klass_slot_offset = 0;
2113 int klass_offset = -1;
2114 int lock_slot_offset = 0;
2115 bool is_static = false;
2116
2117 if (method->is_static()) {
2118 klass_slot_offset = stack_slots;
2119 stack_slots += VMRegImpl::slots_per_word;
2120 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2121 is_static = true;
2122 }
2123
2124 // Plus a lock if needed
2125
2126 if (method->is_synchronized()) {
2127 lock_slot_offset = stack_slots;
2128 stack_slots += VMRegImpl::slots_per_word;
2129 }
2130
2131 // Now a place (+2) to save return values or temp during shuffling
2132 // + 4 for return address (which we own) and saved rbp
2133 stack_slots += 6;
2134
2135 // Ok The space we have allocated will look like:
2136 //
2137 //
2138 // FP-> | |
2139 // |---------------------|
2140 // | 2 slots for moves |
2141 // |---------------------|
2142 // | lock box (if sync) |
2143 // |---------------------| <- lock_slot_offset
2144 // | klass (if static) |
2145 // |---------------------| <- klass_slot_offset
2146 // | oopHandle area |
2147 // |---------------------| <- oop_handle_offset (6 java arg registers)
2148 // | outbound memory |
2149 // | based arguments |
2150 // | |
2151 // |---------------------|
2152 // | |
2153 // SP-> | out_preserved_slots |
2154 //
2155 //
2156
2157
2158 // Now compute actual number of stack words we need rounding to make
2159 // stack properly aligned.
2160 stack_slots = align_up(stack_slots, StackAlignmentInSlots);
2161
2162 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2163
2164 // First thing make an ic check to see if we should even be here
2165
2166 // We are free to use all registers as temps without saving them and
2167 // restoring them except rbp. rbp is the only callee save register
2168 // as far as the interpreter and the compiler(s) are concerned.
2169
2170 const Register receiver = j_rarg0;
2171
2172 Label exception_pending;
2173
2174 assert_different_registers(receiver, rscratch1, rscratch2);
2175 __ verify_oop(receiver);
2176 __ ic_check(8 /* end_alignment */);
2177
2178 int vep_offset = ((intptr_t)__ pc()) - start;
2179
2180 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
2181 Label L_skip_barrier;
2182 Register klass = r10;
2183 __ mov_metadata(klass, method->method_holder()); // InstanceKlass*
2184 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
2185
2186 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
2187
2188 __ bind(L_skip_barrier);
2189 }
2190
2191 #ifdef COMPILER1
2192 // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
2193 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
2194 inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
2195 }
2196 #endif // COMPILER1
2197
2198 // The instruction at the verified entry point must be 5 bytes or longer
2199 // because it can be patched on the fly by make_non_entrant. The stack bang
2200 // instruction fits that requirement.
2201
2202 // Generate stack overflow check
2203 __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size());
2204
2205 // Generate a new frame for the wrapper.
2206 __ enter();
2207 // -2 because return address is already present and so is saved rbp
2208 __ subptr(rsp, stack_size - 2*wordSize);
2209
2210 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2211 // native wrapper is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub
2212 bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */);
2213
2214 // Frame is now completed as far as size and linkage.
2215 int frame_complete = ((intptr_t)__ pc()) - start;
2216
2217 #ifdef ASSERT
2218 __ check_stack_alignment(rsp, "improperly aligned stack");
2219 #endif /* ASSERT */
2220
2221
2222 // We use r14 as the oop handle for the receiver/klass
2223 // It is callee save so it survives the call to native
2224
2225 const Register oop_handle_reg = r14;
2226
2227 //
2228 // We immediately shuffle the arguments so that any vm call we have to
2229 // make from here on out (sync slow path, jvmti, etc.) we will have
2230 // captured the oops from our caller and have a valid oopMap for
2231 // them.
2232
2233 // -----------------
2234 // The Grand Shuffle
2235
2236 // The Java calling convention is either equal (linux) or denser (win64) than the
2237 // c calling convention. However the because of the jni_env argument the c calling
2238 // convention always has at least one more (and two for static) arguments than Java.
2239 // Therefore if we move the args from java -> c backwards then we will never have
2240 // a register->register conflict and we don't have to build a dependency graph
2241 // and figure out how to break any cycles.
2242 //
2243
2244 // Record esp-based slot for receiver on stack for non-static methods
2245 int receiver_offset = -1;
2246
2247 // This is a trick. We double the stack slots so we can claim
2248 // the oops in the caller's frame. Since we are sure to have
2249 // more args than the caller doubling is enough to make
2250 // sure we can capture all the incoming oop args from the
2251 // caller.
2252 //
2253 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2254
2255 // Mark location of rbp (someday)
2256 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
2257
2258 // Use eax, ebx as temporaries during any memory-memory moves we have to do
2259 // All inbound args are referenced based on rbp and all outbound args via rsp.
2260
2261
2262 #ifdef ASSERT
2263 bool reg_destroyed[Register::number_of_registers];
2264 bool freg_destroyed[XMMRegister::number_of_registers];
2265 for ( int r = 0 ; r < Register::number_of_registers ; r++ ) {
2266 reg_destroyed[r] = false;
2267 }
2268 for ( int f = 0 ; f < XMMRegister::number_of_registers ; f++ ) {
2269 freg_destroyed[f] = false;
2270 }
2271
2272 #endif /* ASSERT */
2273
2274 // For JNI natives the incoming and outgoing registers are offset upwards.
2275 GrowableArray<int> arg_order(2 * total_in_args);
2276
2277 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
2278 arg_order.push(i);
2279 arg_order.push(c_arg);
2280 }
2281
2282 for (int ai = 0; ai < arg_order.length(); ai += 2) {
2283 int i = arg_order.at(ai);
2284 int c_arg = arg_order.at(ai + 1);
2285 __ block_comment(err_msg("move %d -> %d", i, c_arg));
2286 #ifdef ASSERT
2287 if (in_regs[i].first()->is_Register()) {
2288 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
2289 } else if (in_regs[i].first()->is_XMMRegister()) {
2290 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
2291 }
2292 if (out_regs[c_arg].first()->is_Register()) {
2293 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2294 } else if (out_regs[c_arg].first()->is_XMMRegister()) {
2295 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
2296 }
2297 #endif /* ASSERT */
2298 switch (in_sig_bt[i]) {
2299 case T_ARRAY:
2300 case T_OBJECT:
2301 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2302 ((i == 0) && (!is_static)),
2303 &receiver_offset);
2304 break;
2305 case T_VOID:
2306 break;
2307
2308 case T_FLOAT:
2309 __ float_move(in_regs[i], out_regs[c_arg]);
2310 break;
2311
2312 case T_DOUBLE:
2313 assert( i + 1 < total_in_args &&
2314 in_sig_bt[i + 1] == T_VOID &&
2315 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2316 __ double_move(in_regs[i], out_regs[c_arg]);
2317 break;
2318
2319 case T_LONG :
2320 __ long_move(in_regs[i], out_regs[c_arg]);
2321 break;
2322
2323 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2324
2325 default:
2326 __ move32_64(in_regs[i], out_regs[c_arg]);
2327 }
2328 }
2329
2330 int c_arg;
2331
2332 // Pre-load a static method's oop into r14. Used both by locking code and
2333 // the normal JNI call code.
2334 // point c_arg at the first arg that is already loaded in case we
2335 // need to spill before we call out
2336 c_arg = total_c_args - total_in_args;
2337
2338 if (method->is_static()) {
2339
2340 // load oop into a register
2341 __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
2342
2343 // Now handlize the static class mirror it's known not-null.
2344 __ movptr(Address(rsp, klass_offset), oop_handle_reg);
2345 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2346
2347 // Now get the handle
2348 __ lea(oop_handle_reg, Address(rsp, klass_offset));
2349 // store the klass handle as second argument
2350 __ movptr(c_rarg1, oop_handle_reg);
2351 // and protect the arg if we must spill
2352 c_arg--;
2353 }
2354
2355 // Change state to native (we save the return address in the thread, since it might not
2356 // be pushed on the stack when we do a stack traversal). It is enough that the pc()
2357 // points into the right code segment. It does not have to be the correct return pc.
2358 // We use the same pc/oopMap repeatedly when we call out
2359
2360 Label native_return;
2361 if (method->is_object_wait0()) {
2362 // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
2363 __ set_last_Java_frame(rsp, noreg, native_return, rscratch1);
2364 } else {
2365 intptr_t the_pc = (intptr_t) __ pc();
2366 oop_maps->add_gc_map(the_pc - start, map);
2367
2368 __ set_last_Java_frame(rsp, noreg, __ pc(), rscratch1);
2369 }
2370
2371 // We have all of the arguments setup at this point. We must not touch any register
2372 // argument registers at this point (what if we save/restore them there are no oop?
2373
2374 if (DTraceMethodProbes) {
2375 // protect the args we've loaded
2376 save_args(masm, total_c_args, c_arg, out_regs);
2377 __ mov_metadata(c_rarg1, method());
2378 __ call_VM_leaf(
2379 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2380 r15_thread, c_rarg1);
2381 restore_args(masm, total_c_args, c_arg, out_regs);
2382 }
2383
2384 // RedefineClasses() tracing support for obsolete method entry
2385 if (log_is_enabled(Trace, redefine, class, obsolete)) {
2386 // protect the args we've loaded
2387 save_args(masm, total_c_args, c_arg, out_regs);
2388 __ mov_metadata(c_rarg1, method());
2389 __ call_VM_leaf(
2390 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
2391 r15_thread, c_rarg1);
2392 restore_args(masm, total_c_args, c_arg, out_regs);
2393 }
2394
2395 // Lock a synchronized method
2396
2397 // Register definitions used by locking and unlocking
2398
2399 const Register swap_reg = rax; // Must use rax for cmpxchg instruction
2400 const Register obj_reg = rbx; // Will contain the oop
2401 const Register lock_reg = r13; // Address of compiler lock object (BasicLock)
2402
2403 Label slow_path_lock;
2404 Label lock_done;
2405
2406 if (method->is_synchronized()) {
2407 // Get the handle (the 2nd argument)
2408 __ mov(oop_handle_reg, c_rarg1);
2409
2410 // Get address of the box
2411
2412 __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2413
2414 // Load the oop from the handle
2415 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2416
2417 __ lightweight_lock(lock_reg, obj_reg, swap_reg, rscratch1, slow_path_lock);
2418
2419 // Slow path will re-enter here
2420 __ bind(lock_done);
2421 }
2422
2423 // Finally just about ready to make the JNI call
2424
2425 // get JNIEnv* which is first argument to native
2426 __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
2427
2428 // Now set thread in native
2429 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
2430
2431 __ call(RuntimeAddress(native_func));
2432
2433 // Verify or restore cpu control state after JNI call
2434 __ restore_cpu_control_state_after_jni(rscratch1);
2435
2436 // Unpack native results.
2437 switch (ret_type) {
2438 case T_BOOLEAN: __ c2bool(rax); break;
2439 case T_CHAR : __ movzwl(rax, rax); break;
2440 case T_BYTE : __ sign_extend_byte (rax); break;
2441 case T_SHORT : __ sign_extend_short(rax); break;
2442 case T_INT : /* nothing to do */ break;
2443 case T_DOUBLE :
2444 case T_FLOAT :
2445 // Result is in xmm0 we'll save as needed
2446 break;
2447 case T_ARRAY: // Really a handle
2448 case T_OBJECT: // Really a handle
2449 break; // can't de-handlize until after safepoint check
2450 case T_VOID: break;
2451 case T_LONG: break;
2452 default : ShouldNotReachHere();
2453 }
2454
2455 // Switch thread to "native transition" state before reading the synchronization state.
2456 // This additional state is necessary because reading and testing the synchronization
2457 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2458 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2459 // VM thread changes sync state to synchronizing and suspends threads for GC.
2460 // Thread A is resumed to finish this native method, but doesn't block here since it
2461 // didn't see any synchronization is progress, and escapes.
2462 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2463
2464 // Force this write out before the read below
2465 if (!UseSystemMemoryBarrier) {
2466 __ membar(Assembler::Membar_mask_bits(
2467 Assembler::LoadLoad | Assembler::LoadStore |
2468 Assembler::StoreLoad | Assembler::StoreStore));
2469 }
2470
2471 // check for safepoint operation in progress and/or pending suspend requests
2472 {
2473 Label Continue;
2474 Label slow_path;
2475
2476 __ safepoint_poll(slow_path, true /* at_return */, false /* in_nmethod */);
2477
2478 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
2479 __ jcc(Assembler::equal, Continue);
2480 __ bind(slow_path);
2481
2482 // Don't use call_VM as it will see a possible pending exception and forward it
2483 // and never return here preventing us from clearing _last_native_pc down below.
2484 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2485 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2486 // by hand.
2487 //
2488 __ vzeroupper();
2489 save_native_result(masm, ret_type, stack_slots);
2490 __ mov(c_rarg0, r15_thread);
2491 __ mov(r12, rsp); // remember sp
2492 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2493 __ andptr(rsp, -16); // align stack as required by ABI
2494 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2495 __ mov(rsp, r12); // restore sp
2496 __ reinit_heapbase();
2497 // Restore any method result value
2498 restore_native_result(masm, ret_type, stack_slots);
2499 __ bind(Continue);
2500 }
2501
2502 // change thread state
2503 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
2504
2505 if (method->is_object_wait0()) {
2506 // Check preemption for Object.wait()
2507 __ movptr(rscratch1, Address(r15_thread, JavaThread::preempt_alternate_return_offset()));
2508 __ cmpptr(rscratch1, NULL_WORD);
2509 __ jccb(Assembler::equal, native_return);
2510 __ movptr(Address(r15_thread, JavaThread::preempt_alternate_return_offset()), NULL_WORD);
2511 __ jmp(rscratch1);
2512 __ bind(native_return);
2513
2514 intptr_t the_pc = (intptr_t) __ pc();
2515 oop_maps->add_gc_map(the_pc - start, map);
2516 }
2517
2518
2519 Label reguard;
2520 Label reguard_done;
2521 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled);
2522 __ jcc(Assembler::equal, reguard);
2523 __ bind(reguard_done);
2524
2525 // native result if any is live
2526
2527 // Unlock
2528 Label slow_path_unlock;
2529 Label unlock_done;
2530 if (method->is_synchronized()) {
2531
2532 Label fast_done;
2533
2534 // Get locked oop from the handle we passed to jni
2535 __ movptr(obj_reg, Address(oop_handle_reg, 0));
2536
2537 // Must save rax if it is live now because cmpxchg must use it
2538 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2539 save_native_result(masm, ret_type, stack_slots);
2540 }
2541
2542 __ lightweight_unlock(obj_reg, swap_reg, lock_reg, slow_path_unlock);
2543
2544 // slow path re-enters here
2545 __ bind(unlock_done);
2546 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2547 restore_native_result(masm, ret_type, stack_slots);
2548 }
2549
2550 __ bind(fast_done);
2551 }
2552 if (DTraceMethodProbes) {
2553 save_native_result(masm, ret_type, stack_slots);
2554 __ mov_metadata(c_rarg1, method());
2555 __ call_VM_leaf(
2556 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2557 r15_thread, c_rarg1);
2558 restore_native_result(masm, ret_type, stack_slots);
2559 }
2560
2561 __ reset_last_Java_frame(false);
2562
2563 // Unbox oop result, e.g. JNIHandles::resolve value.
2564 if (is_reference_type(ret_type)) {
2565 __ resolve_jobject(rax /* value */,
2566 rcx /* tmp */);
2567 }
2568
2569 if (CheckJNICalls) {
2570 // clear_pending_jni_exception_check
2571 __ movptr(Address(r15_thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
2572 }
2573
2574 // reset handle block
2575 __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
2576 __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD);
2577
2578 // pop our frame
2579
2580 __ leave();
2581
2582 #if INCLUDE_JFR
2583 // We need to do a poll test after unwind in case the sampler
2584 // managed to sample the native frame after returning to Java.
2585 Label L_return;
2586 address poll_test_pc = __ pc();
2587 __ relocate(relocInfo::poll_return_type);
2588 __ testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());
2589 __ jccb(Assembler::zero, L_return);
2590 __ lea(rscratch1, InternalAddress(poll_test_pc));
2591 __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
2592 assert(SharedRuntime::polling_page_return_handler_blob() != nullptr,
2593 "polling page return stub not created yet");
2594 address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
2595 __ jump(RuntimeAddress(stub));
2596 __ bind(L_return);
2597 #endif // INCLUDE_JFR
2598
2599 // Any exception pending?
2600 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2601 __ jcc(Assembler::notEqual, exception_pending);
2602
2603 // Return
2604
2605 __ ret(0);
2606
2607 // Unexpected paths are out of line and go here
2608
2609 // forward the exception
2610 __ bind(exception_pending);
2611
2612 // and forward the exception
2613 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2614
2615 // Slow path locking & unlocking
2616 if (method->is_synchronized()) {
2617
2618 // BEGIN Slow path lock
2619 __ bind(slow_path_lock);
2620
2621 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2622 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2623
2624 // protect the args we've loaded
2625 save_args(masm, total_c_args, c_arg, out_regs);
2626
2627 __ mov(c_rarg0, obj_reg);
2628 __ mov(c_rarg1, lock_reg);
2629 __ mov(c_rarg2, r15_thread);
2630
2631 // Not a leaf but we have last_Java_frame setup as we want.
2632 // We don't want to unmount in case of contention since that would complicate preserving
2633 // the arguments that had already been marshalled into the native convention. So we force
2634 // the freeze slow path to find this native wrapper frame (see recurse_freeze_native_frame())
2635 // and pin the vthread. Otherwise the fast path won't find it since we don't walk the stack.
2636 __ push_cont_fastpath();
2637 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2638 __ pop_cont_fastpath();
2639 restore_args(masm, total_c_args, c_arg, out_regs);
2640
2641 #ifdef ASSERT
2642 { Label L;
2643 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2644 __ jcc(Assembler::equal, L);
2645 __ stop("no pending exception allowed on exit from monitorenter");
2646 __ bind(L);
2647 }
2648 #endif
2649 __ jmp(lock_done);
2650
2651 // END Slow path lock
2652
2653 // BEGIN Slow path unlock
2654 __ bind(slow_path_unlock);
2655
2656 // If we haven't already saved the native result we must save it now as xmm registers
2657 // are still exposed.
2658 __ vzeroupper();
2659 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2660 save_native_result(masm, ret_type, stack_slots);
2661 }
2662
2663 __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
2664
2665 __ mov(c_rarg0, obj_reg);
2666 __ mov(c_rarg2, r15_thread);
2667 __ mov(r12, rsp); // remember sp
2668 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2669 __ andptr(rsp, -16); // align stack as required by ABI
2670
2671 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2672 // NOTE that obj_reg == rbx currently
2673 __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
2674 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2675
2676 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2677 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2678 __ mov(rsp, r12); // restore sp
2679 __ reinit_heapbase();
2680 #ifdef ASSERT
2681 {
2682 Label L;
2683 __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2684 __ jcc(Assembler::equal, L);
2685 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2686 __ bind(L);
2687 }
2688 #endif /* ASSERT */
2689
2690 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
2691
2692 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2693 restore_native_result(masm, ret_type, stack_slots);
2694 }
2695 __ jmp(unlock_done);
2696
2697 // END Slow path unlock
2698
2699 } // synchronized
2700
2701 // SLOW PATH Reguard the stack if needed
2702
2703 __ bind(reguard);
2704 __ vzeroupper();
2705 save_native_result(masm, ret_type, stack_slots);
2706 __ mov(r12, rsp); // remember sp
2707 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
2708 __ andptr(rsp, -16); // align stack as required by ABI
2709 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2710 __ mov(rsp, r12); // restore sp
2711 __ reinit_heapbase();
2712 restore_native_result(masm, ret_type, stack_slots);
2713 // and continue
2714 __ jmp(reguard_done);
2715
2716
2717
2718 __ flush();
2719
2720 nmethod *nm = nmethod::new_native_nmethod(method,
2721 compile_id,
2722 masm->code(),
2723 vep_offset,
2724 frame_complete,
2725 stack_slots / VMRegImpl::slots_per_word,
2726 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2727 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2728 oop_maps);
2729
2730 return nm;
2731 }
2732
2733 // this function returns the adjust size (in number of words) to a c2i adapter
2734 // activation for use during deoptimization
2735 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2736 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2737 }
2738
2739
2740 uint SharedRuntime::out_preserve_stack_slots() {
2741 return 0;
2742 }
2743
2744
2745 // Number of stack slots between incoming argument block and the start of
2746 // a new frame. The PROLOG must add this many slots to the stack. The
2747 // EPILOG must remove this many slots. amd64 needs two slots for
2748 // return address.
2749 uint SharedRuntime::in_preserve_stack_slots() {
2750 return 4 + 2 * VerifyStackAtCalls;
2751 }
2752
2753 VMReg SharedRuntime::thread_register() {
2754 return r15_thread->as_VMReg();
2755 }
2756
2757 //------------------------------generate_deopt_blob----------------------------
2758 void SharedRuntime::generate_deopt_blob() {
2759 // Allocate space for the code
2760 ResourceMark rm;
2761 // Setup code generation tools
2762 int pad = 0;
2763 if (UseAVX > 2) {
2764 pad += 1024;
2765 }
2766 if (UseAPX) {
2767 pad += 1024;
2768 }
2769 #if INCLUDE_JVMCI
2770 if (EnableJVMCI) {
2771 pad += 512; // Increase the buffer size when compiling for JVMCI
2772 }
2773 #endif
2774 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2775 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
2776 if (blob != nullptr) {
2777 _deopt_blob = blob->as_deoptimization_blob();
2778 return;
2779 }
2780
2781 CodeBuffer buffer(name, 2560+pad, 1024);
2782 MacroAssembler* masm = new MacroAssembler(&buffer);
2783 int frame_size_in_words;
2784 OopMap* map = nullptr;
2785 OopMapSet *oop_maps = new OopMapSet();
2786
2787 // -------------
2788 // This code enters when returning to a de-optimized nmethod. A return
2789 // address has been pushed on the stack, and return values are in
2790 // registers.
2791 // If we are doing a normal deopt then we were called from the patched
2792 // nmethod from the point we returned to the nmethod. So the return
2793 // address on the stack is wrong by NativeCall::instruction_size
2794 // We will adjust the value so it looks like we have the original return
2795 // address on the stack (like when we eagerly deoptimized).
2796 // In the case of an exception pending when deoptimizing, we enter
2797 // with a return address on the stack that points after the call we patched
2798 // into the exception handler. We have the following register state from,
2799 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2800 // rax: exception oop
2801 // rbx: exception handler
2802 // rdx: throwing pc
2803 // So in this case we simply jam rdx into the useless return address and
2804 // the stack looks just like we want.
2805 //
2806 // At this point we need to de-opt. We save the argument return
2807 // registers. We call the first C routine, fetch_unroll_info(). This
2808 // routine captures the return values and returns a structure which
2809 // describes the current frame size and the sizes of all replacement frames.
2810 // The current frame is compiled code and may contain many inlined
2811 // functions, each with their own JVM state. We pop the current frame, then
2812 // push all the new frames. Then we call the C routine unpack_frames() to
2813 // populate these frames. Finally unpack_frames() returns us the new target
2814 // address. Notice that callee-save registers are BLOWN here; they have
2815 // already been captured in the vframeArray at the time the return PC was
2816 // patched.
2817 address start = __ pc();
2818 Label cont;
2819
2820 // Prolog for non exception case!
2821
2822 // Save everything in sight.
2823 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2824
2825 // Normal deoptimization. Save exec mode for unpack_frames.
2826 __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2827 __ jmp(cont);
2828
2829 int reexecute_offset = __ pc() - start;
2830 #if INCLUDE_JVMCI && !defined(COMPILER1)
2831 if (UseJVMCICompiler) {
2832 // JVMCI does not use this kind of deoptimization
2833 __ should_not_reach_here();
2834 }
2835 #endif
2836
2837 // Reexecute case
2838 // return address is the pc describes what bci to do re-execute at
2839
2840 // No need to update map as each call to save_live_registers will produce identical oopmap
2841 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2842
2843 __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2844 __ jmp(cont);
2845
2846 #if INCLUDE_JVMCI
2847 Label after_fetch_unroll_info_call;
2848 int implicit_exception_uncommon_trap_offset = 0;
2849 int uncommon_trap_offset = 0;
2850
2851 if (EnableJVMCI) {
2852 implicit_exception_uncommon_trap_offset = __ pc() - start;
2853
2854 __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2855 __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), NULL_WORD);
2856
2857 uncommon_trap_offset = __ pc() - start;
2858
2859 // Save everything in sight.
2860 RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2861 // fetch_unroll_info needs to call last_java_frame()
2862 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
2863
2864 __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())));
2865 __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1);
2866
2867 __ movl(r14, Deoptimization::Unpack_reexecute);
2868 __ mov(c_rarg0, r15_thread);
2869 __ movl(c_rarg2, r14); // exec mode
2870 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2871 oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2872
2873 __ reset_last_Java_frame(false);
2874
2875 __ jmp(after_fetch_unroll_info_call);
2876 } // EnableJVMCI
2877 #endif // INCLUDE_JVMCI
2878
2879 int exception_offset = __ pc() - start;
2880
2881 // Prolog for exception case
2882
2883 // all registers are dead at this entry point, except for rax, and
2884 // rdx which contain the exception oop and exception pc
2885 // respectively. Set them in TLS and fall thru to the
2886 // unpack_with_exception_in_tls entry point.
2887
2888 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2889 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2890
2891 int exception_in_tls_offset = __ pc() - start;
2892
2893 // new implementation because exception oop is now passed in JavaThread
2894
2895 // Prolog for exception case
2896 // All registers must be preserved because they might be used by LinearScan
2897 // Exceptiop oop and throwing PC are passed in JavaThread
2898 // tos: stack at point of call to method that threw the exception (i.e. only
2899 // args are on the stack, no return address)
2900
2901 // make room on stack for the return address
2902 // It will be patched later with the throwing pc. The correct value is not
2903 // available now because loading it from memory would destroy registers.
2904 __ push(0);
2905
2906 // Save everything in sight.
2907 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ true);
2908
2909 // Now it is safe to overwrite any register
2910
2911 // Deopt during an exception. Save exec mode for unpack_frames.
2912 __ movl(r14, Deoptimization::Unpack_exception); // callee-saved
2913
2914 // load throwing pc from JavaThread and patch it as the return address
2915 // of the current frame. Then clear the field in JavaThread
2916
2917 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2918 __ movptr(Address(rbp, wordSize), rdx);
2919 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
2920
2921 #ifdef ASSERT
2922 // verify that there is really an exception oop in JavaThread
2923 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2924 __ verify_oop(rax);
2925
2926 // verify that there is no pending exception
2927 Label no_pending_exception;
2928 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
2929 __ testptr(rax, rax);
2930 __ jcc(Assembler::zero, no_pending_exception);
2931 __ stop("must not have pending exception here");
2932 __ bind(no_pending_exception);
2933 #endif
2934
2935 __ bind(cont);
2936
2937 // Call C code. Need thread and this frame, but NOT official VM entry
2938 // crud. We cannot block on this call, no GC can happen.
2939 //
2940 // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2941
2942 // fetch_unroll_info needs to call last_java_frame().
2943
2944 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
2945 #ifdef ASSERT
2946 { Label L;
2947 __ cmpptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
2948 __ jcc(Assembler::equal, L);
2949 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2950 __ bind(L);
2951 }
2952 #endif // ASSERT
2953 __ mov(c_rarg0, r15_thread);
2954 __ movl(c_rarg1, r14); // exec_mode
2955 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2956
2957 // Need to have an oopmap that tells fetch_unroll_info where to
2958 // find any register it might need.
2959 oop_maps->add_gc_map(__ pc() - start, map);
2960
2961 __ reset_last_Java_frame(false);
2962
2963 #if INCLUDE_JVMCI
2964 if (EnableJVMCI) {
2965 __ bind(after_fetch_unroll_info_call);
2966 }
2967 #endif
2968
2969 // Load UnrollBlock* into rdi
2970 __ mov(rdi, rax);
2971
2972 __ movl(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()));
2973 Label noException;
2974 __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending?
2975 __ jcc(Assembler::notEqual, noException);
2976 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2977 // QQQ this is useless it was null above
2978 __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2979 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD);
2980 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD);
2981
2982 __ verify_oop(rax);
2983
2984 // Overwrite the result registers with the exception results.
2985 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2986 // I think this is useless
2987 __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2988
2989 __ bind(noException);
2990
2991 // Only register save data is on the stack.
2992 // Now restore the result registers. Everything else is either dead
2993 // or captured in the vframeArray.
2994 RegisterSaver::restore_result_registers(masm);
2995
2996 // All of the register save area has been popped of the stack. Only the
2997 // return address remains.
2998
2999 // Pop all the frames we must move/replace.
3000 //
3001 // Frame picture (youngest to oldest)
3002 // 1: self-frame (no frame link)
3003 // 2: deopting frame (no frame link)
3004 // 3: caller of deopting frame (could be compiled/interpreted).
3005 //
3006 // Note: by leaving the return address of self-frame on the stack
3007 // and using the size of frame 2 to adjust the stack
3008 // when we are done the return to frame 3 will still be on the stack.
3009
3010 // Pop deoptimized frame
3011 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
3012 __ addptr(rsp, rcx);
3013
3014 // rsp should be pointing at the return address to the caller (3)
3015
3016 // Pick up the initial fp we should save
3017 // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
3018 __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset()));
3019
3020 #ifdef ASSERT
3021 // Compilers generate code that bang the stack by as much as the
3022 // interpreter would need. So this stack banging should never
3023 // trigger a fault. Verify that it does not on non product builds.
3024 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset()));
3025 __ bang_stack_size(rbx, rcx);
3026 #endif
3027
3028 // Load address of array of frame pcs into rcx
3029 __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset()));
3030
3031 // Trash the old pc
3032 __ addptr(rsp, wordSize);
3033
3034 // Load address of array of frame sizes into rsi
3035 __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset()));
3036
3037 // Load counter into rdx
3038 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset()));
3039
3040 // Now adjust the caller's stack to make up for the extra locals
3041 // but record the original sp so that we can save it in the skeletal interpreter
3042 // frame and the stack walking of interpreter_sender will get the unextended sp
3043 // value and not the "real" sp value.
3044
3045 const Register sender_sp = r8;
3046
3047 __ mov(sender_sp, rsp);
3048 __ movl(rbx, Address(rdi,
3049 Deoptimization::UnrollBlock::
3050 caller_adjustment_offset()));
3051 __ subptr(rsp, rbx);
3052
3053 // Push interpreter frames in a loop
3054 Label loop;
3055 __ bind(loop);
3056 __ movptr(rbx, Address(rsi, 0)); // Load frame size
3057 __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand
3058 __ pushptr(Address(rcx, 0)); // Save return address
3059 __ enter(); // Save old & set new ebp
3060 __ subptr(rsp, rbx); // Prolog
3061 // This value is corrected by layout_activation_impl
3062 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
3063 __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
3064 __ mov(sender_sp, rsp); // Pass sender_sp to next frame
3065 __ addptr(rsi, wordSize); // Bump array pointer (sizes)
3066 __ addptr(rcx, wordSize); // Bump array pointer (pcs)
3067 __ decrementl(rdx); // Decrement counter
3068 __ jcc(Assembler::notZero, loop);
3069 __ pushptr(Address(rcx, 0)); // Save final return address
3070
3071 // Re-push self-frame
3072 __ enter(); // Save old & set new ebp
3073
3074 // Allocate a full sized register save area.
3075 // Return address and rbp are in place, so we allocate two less words.
3076 __ subptr(rsp, (frame_size_in_words - 2) * wordSize);
3077
3078 // Restore frame locals after moving the frame
3079 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
3080 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3081
3082 // Call C code. Need thread but NOT official VM entry
3083 // crud. We cannot block on this call, no GC can happen. Call should
3084 // restore return values to their stack-slots with the new SP.
3085 //
3086 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
3087
3088 // Use rbp because the frames look interpreted now
3089 // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
3090 // Don't need the precise return PC here, just precise enough to point into this code blob.
3091 address the_pc = __ pc();
3092 __ set_last_Java_frame(noreg, rbp, the_pc, rscratch1);
3093
3094 __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI
3095 __ mov(c_rarg0, r15_thread);
3096 __ movl(c_rarg1, r14); // second arg: exec_mode
3097 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
3098 // Revert SP alignment after call since we're going to do some SP relative addressing below
3099 __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset()));
3100
3101 // Set an oopmap for the call site
3102 // Use the same PC we used for the last java frame
3103 oop_maps->add_gc_map(the_pc - start,
3104 new OopMap( frame_size_in_words, 0 ));
3105
3106 // Clear fp AND pc
3107 __ reset_last_Java_frame(true);
3108
3109 // Collect return values
3110 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
3111 __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
3112 // I think this is useless (throwing pc?)
3113 __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
3114
3115 // Pop self-frame.
3116 __ leave(); // Epilog
3117
3118 // Jump to interpreter
3119 __ ret(0);
3120
3121 // Make sure all code is generated
3122 masm->flush();
3123
3124 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3125 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3126 #if INCLUDE_JVMCI
3127 if (EnableJVMCI) {
3128 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3129 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3130 }
3131 #endif
3132
3133 AOTCodeCache::store_code_blob(*_deopt_blob, AOTCodeEntry::SharedBlob, BlobId::shared_deopt_id);
3134 }
3135
3136 //------------------------------generate_handler_blob------
3137 //
3138 // Generate a special Compile2Runtime blob that saves all registers,
3139 // and setup oopmap.
3140 //
3141 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3142 assert(StubRoutines::forward_exception_entry() != nullptr,
3143 "must be generated before");
3144 assert(is_polling_page_id(id), "expected a polling page stub id");
3145
3146 // Allocate space for the code. Setup code generation tools.
3147 const char* name = SharedRuntime::stub_name(id);
3148 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3149 if (blob != nullptr) {
3150 return blob->as_safepoint_blob();
3151 }
3152
3153 ResourceMark rm;
3154 OopMapSet *oop_maps = new OopMapSet();
3155 OopMap* map;
3156 CodeBuffer buffer(name, 2548, 1024);
3157 MacroAssembler* masm = new MacroAssembler(&buffer);
3158
3159 address start = __ pc();
3160 address call_pc = nullptr;
3161 int frame_size_in_words;
3162 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3163 bool save_wide_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3164
3165 // Make room for return address (or push it again)
3166 if (!cause_return) {
3167 __ push(rbx);
3168 }
3169
3170 // Save registers, fpu state, and flags
3171 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_wide_vectors);
3172
3173 // The following is basically a call_VM. However, we need the precise
3174 // address of the call in order to generate an oopmap. Hence, we do all the
3175 // work ourselves.
3176
3177 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1); // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next:
3178
3179 // The return address must always be correct so that frame constructor never
3180 // sees an invalid pc.
3181
3182 if (!cause_return) {
3183 // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
3184 // Additionally, rbx is a callee saved register and we can look at it later to determine
3185 // if someone changed the return address for us!
3186 __ movptr(rbx, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
3187 __ movptr(Address(rbp, wordSize), rbx);
3188 }
3189
3190 // Do the call
3191 __ mov(c_rarg0, r15_thread);
3192 __ call(RuntimeAddress(call_ptr));
3193
3194 // Set an oopmap for the call site. This oopmap will map all
3195 // oop-registers and debug-info registers as callee-saved. This
3196 // will allow deoptimization at this safepoint to find all possible
3197 // debug-info recordings, as well as let GC find all oops.
3198
3199 oop_maps->add_gc_map( __ pc() - start, map);
3200
3201 Label noException;
3202
3203 __ reset_last_Java_frame(false);
3204
3205 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3206 __ jcc(Assembler::equal, noException);
3207
3208 // Exception pending
3209
3210 RegisterSaver::restore_live_registers(masm, save_wide_vectors);
3211
3212 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3213
3214 // No exception case
3215 __ bind(noException);
3216
3217 Label no_adjust;
3218 #ifdef ASSERT
3219 Label bail;
3220 #endif
3221 if (!cause_return) {
3222 Label no_prefix, not_special, check_rex_prefix;
3223
3224 // If our stashed return pc was modified by the runtime we avoid touching it
3225 __ cmpptr(rbx, Address(rbp, wordSize));
3226 __ jcc(Assembler::notEqual, no_adjust);
3227
3228 // Skip over the poll instruction.
3229 // See NativeInstruction::is_safepoint_poll()
3230 // Possible encodings:
3231 // 85 00 test %eax,(%rax)
3232 // 85 01 test %eax,(%rcx)
3233 // 85 02 test %eax,(%rdx)
3234 // 85 03 test %eax,(%rbx)
3235 // 85 06 test %eax,(%rsi)
3236 // 85 07 test %eax,(%rdi)
3237 //
3238 // 41 85 00 test %eax,(%r8)
3239 // 41 85 01 test %eax,(%r9)
3240 // 41 85 02 test %eax,(%r10)
3241 // 41 85 03 test %eax,(%r11)
3242 // 41 85 06 test %eax,(%r14)
3243 // 41 85 07 test %eax,(%r15)
3244 //
3245 // 85 04 24 test %eax,(%rsp)
3246 // 41 85 04 24 test %eax,(%r12)
3247 // 85 45 00 test %eax,0x0(%rbp)
3248 // 41 85 45 00 test %eax,0x0(%r13)
3249 //
3250 // Notes:
3251 // Format of legacy MAP0 test instruction:-
3252 // [REX/REX2] [OPCODE] [ModRM] [SIB] [DISP] [IMM32]
3253 // o For safepoint polling instruction "test %eax,(%rax)", encoding of first register
3254 // operand and base register of memory operand is b/w [0-8), hence we do not require
3255 // additional REX prefix where REX.B bit stores MSB bit of register encoding, which
3256 // is why two bytes encoding is sufficient here.
3257 // o For safepoint polling instruction like "test %eax,(%r8)", register encoding of BASE
3258 // register of memory operand is 1000, thus we need additional REX prefix in this case,
3259 // there by adding additional byte to instruction encoding.
3260 // o In case BASE register is one of the 32 extended GPR registers available only on targets
3261 // supporting Intel APX extension, then we need to emit two bytes REX2 prefix to hold
3262 // most significant two bits of 5 bit register encoding.
3263
3264 if (VM_Version::supports_apx_f()) {
3265 __ cmpb(Address(rbx, 0), Assembler::REX2);
3266 __ jccb(Assembler::notEqual, check_rex_prefix);
3267 __ addptr(rbx, 2);
3268 __ bind(check_rex_prefix);
3269 }
3270 __ cmpb(Address(rbx, 0), NativeTstRegMem::instruction_rex_b_prefix);
3271 __ jccb(Assembler::notEqual, no_prefix);
3272 __ addptr(rbx, 1);
3273 __ bind(no_prefix);
3274 #ifdef ASSERT
3275 __ movptr(rax, rbx); // remember where 0x85 should be, for verification below
3276 #endif
3277 // r12/r13/rsp/rbp base encoding takes 3 bytes with the following register values:
3278 // r12/rsp 0x04
3279 // r13/rbp 0x05
3280 __ movzbq(rcx, Address(rbx, 1));
3281 __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05
3282 __ subptr(rcx, 4); // looking for 0x00 .. 0x01
3283 __ cmpptr(rcx, 1);
3284 __ jccb(Assembler::above, not_special);
3285 __ addptr(rbx, 1);
3286 __ bind(not_special);
3287 #ifdef ASSERT
3288 // Verify the correct encoding of the poll we're about to skip.
3289 __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl);
3290 __ jcc(Assembler::notEqual, bail);
3291 // Mask out the modrm bits
3292 __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask);
3293 // rax encodes to 0, so if the bits are nonzero it's incorrect
3294 __ jcc(Assembler::notZero, bail);
3295 #endif
3296 // Adjust return pc forward to step over the safepoint poll instruction
3297 __ addptr(rbx, 2);
3298 __ movptr(Address(rbp, wordSize), rbx);
3299 }
3300
3301 __ bind(no_adjust);
3302 // Normal exit, restore registers and exit.
3303 RegisterSaver::restore_live_registers(masm, save_wide_vectors);
3304 __ ret(0);
3305
3306 #ifdef ASSERT
3307 __ bind(bail);
3308 __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
3309 #endif
3310
3311 // Make sure all code is generated
3312 masm->flush();
3313
3314 // Fill-out other meta info
3315 SafepointBlob* sp_blob = SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3316
3317 AOTCodeCache::store_code_blob(*sp_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3318 return sp_blob;
3319 }
3320
3321 //
3322 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3323 //
3324 // Generate a stub that calls into vm to find out the proper destination
3325 // of a java call. All the argument registers are live at this point
3326 // but since this is generic code we don't know what they are and the caller
3327 // must do any gc of the args.
3328 //
3329 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3330 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
3331 assert(is_resolve_id(id), "expected a resolve stub id");
3332
3333 const char* name = SharedRuntime::stub_name(id);
3334 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3335 if (blob != nullptr) {
3336 return blob->as_runtime_stub();
3337 }
3338
3339 // allocate space for the code
3340 ResourceMark rm;
3341 CodeBuffer buffer(name, 1552, 512);
3342 MacroAssembler* masm = new MacroAssembler(&buffer);
3343
3344 int frame_size_in_words;
3345
3346 OopMapSet *oop_maps = new OopMapSet();
3347 OopMap* map = nullptr;
3348
3349 int start = __ offset();
3350
3351 // No need to save vector registers since they are caller-saved anyway.
3352 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_wide_vectors*/ false);
3353
3354 int frame_complete = __ offset();
3355
3356 __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
3357
3358 __ mov(c_rarg0, r15_thread);
3359
3360 __ call(RuntimeAddress(destination));
3361
3362
3363 // Set an oopmap for the call site.
3364 // We need this not only for callee-saved registers, but also for volatile
3365 // registers that the compiler might be keeping live across a safepoint.
3366
3367 oop_maps->add_gc_map( __ offset() - start, map);
3368
3369 // rax contains the address we are going to jump to assuming no exception got installed
3370
3371 // clear last_Java_sp
3372 __ reset_last_Java_frame(false);
3373 // check for pending exceptions
3374 Label pending;
3375 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3376 __ jcc(Assembler::notEqual, pending);
3377
3378 // get the returned Method*
3379 __ get_vm_result_metadata(rbx);
3380 __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
3381
3382 __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3383
3384 RegisterSaver::restore_live_registers(masm);
3385
3386 // We are back to the original state on entry and ready to go.
3387
3388 __ jmp(rax);
3389
3390 // Pending exception after the safepoint
3391
3392 __ bind(pending);
3393
3394 RegisterSaver::restore_live_registers(masm);
3395
3396 // exception pending => remove activation and forward to exception handler
3397
3398 __ movptr(Address(r15_thread, JavaThread::vm_result_oop_offset()), NULL_WORD);
3399
3400 __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3401 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3402
3403 // -------------
3404 // make sure all code is generated
3405 masm->flush();
3406
3407 // return the blob
3408 // frame_size_words or bytes??
3409 RuntimeStub* rs_blob = RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3410
3411 AOTCodeCache::store_code_blob(*rs_blob, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3412 return rs_blob;
3413 }
3414
3415 // Continuation point for throwing of implicit exceptions that are
3416 // not handled in the current activation. Fabricates an exception
3417 // oop and initiates normal exception dispatching in this
3418 // frame. Since we need to preserve callee-saved values (currently
3419 // only for C2, but done for C1 as well) we need a callee-saved oop
3420 // map and therefore have to make these stubs into RuntimeStubs
3421 // rather than BufferBlobs. If the compiler needs all registers to
3422 // be preserved between the fault point and the exception handler
3423 // then it must assume responsibility for that in
3424 // AbstractCompiler::continuation_for_implicit_null_exception or
3425 // continuation_for_implicit_division_by_zero_exception. All other
3426 // implicit exceptions (e.g., NullPointerException or
3427 // AbstractMethodError on entry) are either at call sites or
3428 // otherwise assume that stack unwinding will be initiated, so
3429 // caller saved registers were assumed volatile in the compiler.
3430 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3431 assert(is_throw_id(id), "expected a throw stub id");
3432
3433 const char* name = SharedRuntime::stub_name(id);
3434
3435 // Information about frame layout at time of blocking runtime call.
3436 // Note that we only have to preserve callee-saved registers since
3437 // the compilers are responsible for supplying a continuation point
3438 // if they expect all registers to be preserved.
3439 enum layout {
3440 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
3441 rbp_off2,
3442 return_off,
3443 return_off2,
3444 framesize // inclusive of return address
3445 };
3446
3447 int insts_size = 512;
3448 int locs_size = 64;
3449
3450 const char* timer_msg = "SharedRuntime generate_throw_exception";
3451 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3452
3453 CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3454 if (blob != nullptr) {
3455 return blob->as_runtime_stub();
3456 }
3457
3458 ResourceMark rm;
3459 CodeBuffer code(name, insts_size, locs_size);
3460 OopMapSet* oop_maps = new OopMapSet();
3461 MacroAssembler* masm = new MacroAssembler(&code);
3462
3463 address start = __ pc();
3464
3465 // This is an inlined and slightly modified version of call_VM
3466 // which has the ability to fetch the return PC out of
3467 // thread-local storage and also sets up last_Java_sp slightly
3468 // differently than the real call_VM
3469
3470 __ enter(); // required for proper stackwalking of RuntimeStub frame
3471
3472 assert(is_even(framesize/2), "sp not 16-byte aligned");
3473
3474 // return address and rbp are already in place
3475 __ subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
3476
3477 int frame_complete = __ pc() - start;
3478
3479 // Set up last_Java_sp and last_Java_fp
3480 address the_pc = __ pc();
3481 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
3482 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
3483
3484 // Call runtime
3485 __ movptr(c_rarg0, r15_thread);
3486 BLOCK_COMMENT("call runtime_entry");
3487 __ call(RuntimeAddress(runtime_entry));
3488
3489 // Generate oop map
3490 OopMap* map = new OopMap(framesize, 0);
3491
3492 oop_maps->add_gc_map(the_pc - start, map);
3493
3494 __ reset_last_Java_frame(true);
3495
3496 __ leave(); // required for proper stackwalking of RuntimeStub frame
3497
3498 // check for pending exceptions
3499 #ifdef ASSERT
3500 Label L;
3501 __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
3502 __ jcc(Assembler::notEqual, L);
3503 __ should_not_reach_here();
3504 __ bind(L);
3505 #endif // ASSERT
3506 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3507
3508
3509 // codeBlob framesize is in words (not VMRegImpl::slot_size)
3510 RuntimeStub* stub =
3511 RuntimeStub::new_runtime_stub(name,
3512 &code,
3513 frame_complete,
3514 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3515 oop_maps, false);
3516 AOTCodeCache::store_code_blob(*stub, AOTCodeEntry::SharedBlob, StubInfo::blob(id));
3517
3518 return stub;
3519 }
3520
3521 //------------------------------Montgomery multiplication------------------------
3522 //
3523
3524 #ifndef _WINDOWS
3525
3526 // Subtract 0:b from carry:a. Return carry.
3527 static julong
3528 sub(julong a[], julong b[], julong carry, long len) {
3529 long long i = 0, cnt = len;
3530 julong tmp;
3531 asm volatile("clc; "
3532 "0: ; "
3533 "mov (%[b], %[i], 8), %[tmp]; "
3534 "sbb %[tmp], (%[a], %[i], 8); "
3535 "inc %[i]; dec %[cnt]; "
3536 "jne 0b; "
3537 "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
3538 : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
3539 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
3540 : "memory");
3541 return tmp;
3542 }
3543
3544 // Multiply (unsigned) Long A by Long B, accumulating the double-
3545 // length result into the accumulator formed of T0, T1, and T2.
3546 #define MACC(A, B, T0, T1, T2) \
3547 do { \
3548 unsigned long hi, lo; \
3549 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3550 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3551 : "r"(A), "a"(B) : "cc"); \
3552 } while(0)
3553
3554 // As above, but add twice the double-length result into the
3555 // accumulator.
3556 #define MACC2(A, B, T0, T1, T2) \
3557 do { \
3558 unsigned long hi, lo; \
3559 __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
3560 "add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
3561 : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
3562 : "r"(A), "a"(B) : "cc"); \
3563 } while(0)
3564
3565 #else //_WINDOWS
3566
3567 static julong
3568 sub(julong a[], julong b[], julong carry, long len) {
3569 long i;
3570 julong tmp;
3571 unsigned char c = 1;
3572 for (i = 0; i < len; i++) {
3573 c = _addcarry_u64(c, a[i], ~b[i], &tmp);
3574 a[i] = tmp;
3575 }
3576 c = _addcarry_u64(c, carry, ~0, &tmp);
3577 return tmp;
3578 }
3579
3580 // Multiply (unsigned) Long A by Long B, accumulating the double-
3581 // length result into the accumulator formed of T0, T1, and T2.
3582 #define MACC(A, B, T0, T1, T2) \
3583 do { \
3584 julong hi, lo; \
3585 lo = _umul128(A, B, &hi); \
3586 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \
3587 c = _addcarry_u64(c, hi, T1, &T1); \
3588 _addcarry_u64(c, T2, 0, &T2); \
3589 } while(0)
3590
3591 // As above, but add twice the double-length result into the
3592 // accumulator.
3593 #define MACC2(A, B, T0, T1, T2) \
3594 do { \
3595 julong hi, lo; \
3596 lo = _umul128(A, B, &hi); \
3597 unsigned char c = _addcarry_u64(0, lo, T0, &T0); \
3598 c = _addcarry_u64(c, hi, T1, &T1); \
3599 _addcarry_u64(c, T2, 0, &T2); \
3600 c = _addcarry_u64(0, lo, T0, &T0); \
3601 c = _addcarry_u64(c, hi, T1, &T1); \
3602 _addcarry_u64(c, T2, 0, &T2); \
3603 } while(0)
3604
3605 #endif //_WINDOWS
3606
3607 // Fast Montgomery multiplication. The derivation of the algorithm is
3608 // in A Cryptographic Library for the Motorola DSP56000,
3609 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
3610
3611 static void NOINLINE
3612 montgomery_multiply(julong a[], julong b[], julong n[],
3613 julong m[], julong inv, int len) {
3614 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3615 int i;
3616
3617 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply");
3618
3619 for (i = 0; i < len; i++) {
3620 int j;
3621 for (j = 0; j < i; j++) {
3622 MACC(a[j], b[i-j], t0, t1, t2);
3623 MACC(m[j], n[i-j], t0, t1, t2);
3624 }
3625 MACC(a[i], b[0], t0, t1, t2);
3626 m[i] = t0 * inv;
3627 MACC(m[i], n[0], t0, t1, t2);
3628
3629 assert(t0 == 0, "broken Montgomery multiply");
3630
3631 t0 = t1; t1 = t2; t2 = 0;
3632 }
3633
3634 for (i = len; i < 2*len; i++) {
3635 int j;
3636 for (j = i-len+1; j < len; j++) {
3637 MACC(a[j], b[i-j], t0, t1, t2);
3638 MACC(m[j], n[i-j], t0, t1, t2);
3639 }
3640 m[i-len] = t0;
3641 t0 = t1; t1 = t2; t2 = 0;
3642 }
3643
3644 while (t0)
3645 t0 = sub(m, n, t0, len);
3646 }
3647
3648 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3649 // multiplies so it should be up to 25% faster than Montgomery
3650 // multiplication. However, its loop control is more complex and it
3651 // may actually run slower on some machines.
3652
3653 static void NOINLINE
3654 montgomery_square(julong a[], julong n[],
3655 julong m[], julong inv, int len) {
3656 julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3657 int i;
3658
3659 assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square");
3660
3661 for (i = 0; i < len; i++) {
3662 int j;
3663 int end = (i+1)/2;
3664 for (j = 0; j < end; j++) {
3665 MACC2(a[j], a[i-j], t0, t1, t2);
3666 MACC(m[j], n[i-j], t0, t1, t2);
3667 }
3668 if ((i & 1) == 0) {
3669 MACC(a[j], a[j], t0, t1, t2);
3670 }
3671 for (; j < i; j++) {
3672 MACC(m[j], n[i-j], t0, t1, t2);
3673 }
3674 m[i] = t0 * inv;
3675 MACC(m[i], n[0], t0, t1, t2);
3676
3677 assert(t0 == 0, "broken Montgomery square");
3678
3679 t0 = t1; t1 = t2; t2 = 0;
3680 }
3681
3682 for (i = len; i < 2*len; i++) {
3683 int start = i-len+1;
3684 int end = start + (len - start)/2;
3685 int j;
3686 for (j = start; j < end; j++) {
3687 MACC2(a[j], a[i-j], t0, t1, t2);
3688 MACC(m[j], n[i-j], t0, t1, t2);
3689 }
3690 if ((i & 1) == 0) {
3691 MACC(a[j], a[j], t0, t1, t2);
3692 }
3693 for (; j < len; j++) {
3694 MACC(m[j], n[i-j], t0, t1, t2);
3695 }
3696 m[i-len] = t0;
3697 t0 = t1; t1 = t2; t2 = 0;
3698 }
3699
3700 while (t0)
3701 t0 = sub(m, n, t0, len);
3702 }
3703
3704 // Swap words in a longword.
3705 static julong swap(julong x) {
3706 return (x << 32) | (x >> 32);
3707 }
3708
3709 // Copy len longwords from s to d, word-swapping as we go. The
3710 // destination array is reversed.
3711 static void reverse_words(julong *s, julong *d, int len) {
3712 d += len;
3713 while(len-- > 0) {
3714 d--;
3715 *d = swap(*s);
3716 s++;
3717 }
3718 }
3719
3720 // The threshold at which squaring is advantageous was determined
3721 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3722 #define MONTGOMERY_SQUARING_THRESHOLD 64
3723
3724 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3725 jint len, jlong inv,
3726 jint *m_ints) {
3727 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3728 int longwords = len/2;
3729
3730 // Make very sure we don't use so much space that the stack might
3731 // overflow. 512 jints corresponds to an 16384-bit integer and
3732 // will use here a total of 8k bytes of stack space.
3733 int divisor = sizeof(julong) * 4;
3734 guarantee(longwords <= 8192 / divisor, "must be");
3735 int total_allocation = longwords * sizeof (julong) * 4;
3736 julong *scratch = (julong *)alloca(total_allocation);
3737
3738 // Local scratch arrays
3739 julong
3740 *a = scratch + 0 * longwords,
3741 *b = scratch + 1 * longwords,
3742 *n = scratch + 2 * longwords,
3743 *m = scratch + 3 * longwords;
3744
3745 reverse_words((julong *)a_ints, a, longwords);
3746 reverse_words((julong *)b_ints, b, longwords);
3747 reverse_words((julong *)n_ints, n, longwords);
3748
3749 ::montgomery_multiply(a, b, n, m, (julong)inv, longwords);
3750
3751 reverse_words(m, (julong *)m_ints, longwords);
3752 }
3753
3754 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3755 jint len, jlong inv,
3756 jint *m_ints) {
3757 assert(len % 2 == 0, "array length in montgomery_square must be even");
3758 int longwords = len/2;
3759
3760 // Make very sure we don't use so much space that the stack might
3761 // overflow. 512 jints corresponds to an 16384-bit integer and
3762 // will use here a total of 6k bytes of stack space.
3763 int divisor = sizeof(julong) * 3;
3764 guarantee(longwords <= (8192 / divisor), "must be");
3765 int total_allocation = longwords * sizeof (julong) * 3;
3766 julong *scratch = (julong *)alloca(total_allocation);
3767
3768 // Local scratch arrays
3769 julong
3770 *a = scratch + 0 * longwords,
3771 *n = scratch + 1 * longwords,
3772 *m = scratch + 2 * longwords;
3773
3774 reverse_words((julong *)a_ints, a, longwords);
3775 reverse_words((julong *)n_ints, n, longwords);
3776
3777 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3778 ::montgomery_square(a, n, m, (julong)inv, longwords);
3779 } else {
3780 ::montgomery_multiply(a, a, n, m, (julong)inv, longwords);
3781 }
3782
3783 reverse_words(m, (julong *)m_ints, longwords);
3784 }
3785
3786 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3787 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
3788 if (buf == nullptr) {
3789 return nullptr;
3790 }
3791 CodeBuffer buffer(buf);
3792 short buffer_locs[20];
3793 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
3794 sizeof(buffer_locs)/sizeof(relocInfo));
3795
3796 MacroAssembler* masm = new MacroAssembler(&buffer);
3797
3798 const Array<SigEntry>* sig_vk = vk->extended_sig();
3799 const Array<VMRegPair>* regs = vk->return_regs();
3800
3801 int pack_fields_jobject_off = __ offset();
3802 // Resolve pre-allocated buffer from JNI handle.
3803 // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
3804 __ movptr(rax, Address(r13, 0));
3805 __ resolve_jobject(rax /* value */,
3806 r12 /* tmp */);
3807 __ movptr(Address(r13, 0), rax);
3808
3809 int pack_fields_off = __ offset();
3810
3811 int j = 1;
3812 for (int i = 0; i < sig_vk->length(); i++) {
3813 BasicType bt = sig_vk->at(i)._bt;
3814 if (bt == T_METADATA) {
3815 continue;
3816 }
3817 if (bt == T_VOID) {
3818 if (sig_vk->at(i-1)._bt == T_LONG ||
3819 sig_vk->at(i-1)._bt == T_DOUBLE) {
3820 j++;
3821 }
3822 continue;
3823 }
3824 int off = sig_vk->at(i)._offset;
3825 assert(off > 0, "offset in object should be positive");
3826 VMRegPair pair = regs->at(j);
3827 VMReg r_1 = pair.first();
3828 VMReg r_2 = pair.second();
3829 Address to(rax, off);
3830 if (bt == T_FLOAT) {
3831 __ movflt(to, r_1->as_XMMRegister());
3832 } else if (bt == T_DOUBLE) {
3833 __ movdbl(to, r_1->as_XMMRegister());
3834 } else {
3835 Register val = r_1->as_Register();
3836 assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
3837 if (is_reference_type(bt)) {
3838 __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
3839 } else {
3840 __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
3841 }
3842 }
3843 j++;
3844 }
3845 assert(j == regs->length(), "missed a field?");
3846 if (vk->has_nullable_atomic_layout()) {
3847 // Set the null marker
3848 __ movb(Address(rax, vk->null_marker_offset()), 1);
3849 }
3850 __ ret(0);
3851
3852 int unpack_fields_off = __ offset();
3853
3854 Label skip;
3855 Label not_null;
3856 __ testptr(rax, rax);
3857 __ jcc(Assembler::notZero, not_null);
3858
3859 // Return value is null. Zero oop registers to make the GC happy.
3860 j = 1;
3861 for (int i = 0; i < sig_vk->length(); i++) {
3862 BasicType bt = sig_vk->at(i)._bt;
3863 if (bt == T_METADATA) {
3864 continue;
3865 }
3866 if (bt == T_VOID) {
3867 if (sig_vk->at(i-1)._bt == T_LONG ||
3868 sig_vk->at(i-1)._bt == T_DOUBLE) {
3869 j++;
3870 }
3871 continue;
3872 }
3873 if (bt == T_OBJECT || bt == T_ARRAY) {
3874 VMRegPair pair = regs->at(j);
3875 VMReg r_1 = pair.first();
3876 __ xorq(r_1->as_Register(), r_1->as_Register());
3877 }
3878 j++;
3879 }
3880 __ jmp(skip);
3881 __ bind(not_null);
3882
3883 j = 1;
3884 for (int i = 0; i < sig_vk->length(); i++) {
3885 BasicType bt = sig_vk->at(i)._bt;
3886 if (bt == T_METADATA) {
3887 continue;
3888 }
3889 if (bt == T_VOID) {
3890 if (sig_vk->at(i-1)._bt == T_LONG ||
3891 sig_vk->at(i-1)._bt == T_DOUBLE) {
3892 j++;
3893 }
3894 continue;
3895 }
3896 int off = sig_vk->at(i)._offset;
3897 assert(off > 0, "offset in object should be positive");
3898 VMRegPair pair = regs->at(j);
3899 VMReg r_1 = pair.first();
3900 VMReg r_2 = pair.second();
3901 Address from(rax, off);
3902 if (bt == T_FLOAT) {
3903 __ movflt(r_1->as_XMMRegister(), from);
3904 } else if (bt == T_DOUBLE) {
3905 __ movdbl(r_1->as_XMMRegister(), from);
3906 } else if (bt == T_OBJECT || bt == T_ARRAY) {
3907 assert_different_registers(rax, r_1->as_Register());
3908 __ load_heap_oop(r_1->as_Register(), from);
3909 } else {
3910 assert(is_java_primitive(bt), "unexpected basic type");
3911 assert_different_registers(rax, r_1->as_Register());
3912 size_t size_in_bytes = type2aelembytes(bt);
3913 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
3914 }
3915 j++;
3916 }
3917 assert(j == regs->length(), "missed a field?");
3918
3919 __ bind(skip);
3920 __ ret(0);
3921
3922 __ flush();
3923
3924 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
3925 }
3926
3927 #if INCLUDE_JFR
3928
3929 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3930 // It returns a jobject handle to the event writer.
3931 // The handle is dereferenced and the return value is the event writer oop.
3932 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3933 enum layout {
3934 rbp_off,
3935 rbpH_off,
3936 return_off,
3937 return_off2,
3938 framesize // inclusive of return address
3939 };
3940
3941 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3942 CodeBuffer code(name, 1024, 64);
3943 MacroAssembler* masm = new MacroAssembler(&code);
3944 address start = __ pc();
3945
3946 __ enter();
3947 address the_pc = __ pc();
3948
3949 int frame_complete = the_pc - start;
3950
3951 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch1);
3952 __ movptr(c_rarg0, r15_thread);
3953 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1);
3954 __ reset_last_Java_frame(true);
3955
3956 // rax is jobject handle result, unpack and process it through a barrier.
3957 __ resolve_global_jobject(rax, c_rarg0);
3958
3959 __ leave();
3960 __ ret(0);
3961
3962 OopMapSet* oop_maps = new OopMapSet();
3963 OopMap* map = new OopMap(framesize, 1);
3964 oop_maps->add_gc_map(frame_complete, map);
3965
3966 RuntimeStub* stub =
3967 RuntimeStub::new_runtime_stub(name,
3968 &code,
3969 frame_complete,
3970 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3971 oop_maps,
3972 false);
3973 return stub;
3974 }
3975
3976 // For c2: call to return a leased buffer.
3977 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3978 enum layout {
3979 rbp_off,
3980 rbpH_off,
3981 return_off,
3982 return_off2,
3983 framesize // inclusive of return address
3984 };
3985
3986 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3987 CodeBuffer code(name, 1024, 64);
3988 MacroAssembler* masm = new MacroAssembler(&code);
3989 address start = __ pc();
3990
3991 __ enter();
3992 address the_pc = __ pc();
3993
3994 int frame_complete = the_pc - start;
3995
3996 __ set_last_Java_frame(rsp, rbp, the_pc, rscratch2);
3997 __ movptr(c_rarg0, r15_thread);
3998 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1);
3999 __ reset_last_Java_frame(true);
4000
4001 __ leave();
4002 __ ret(0);
4003
4004 OopMapSet* oop_maps = new OopMapSet();
4005 OopMap* map = new OopMap(framesize, 1);
4006 oop_maps->add_gc_map(frame_complete, map);
4007
4008 RuntimeStub* stub =
4009 RuntimeStub::new_runtime_stub(name,
4010 &code,
4011 frame_complete,
4012 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
4013 oop_maps,
4014 false);
4015 return stub;
4016 }
4017
4018 #endif // INCLUDE_JFR