1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/compiledIC.hpp"
29 #include "code/vtableStubs.hpp"
30 #include "frame_ppc.hpp"
31 #include "compiler/oopMap.hpp"
32 #include "gc/shared/gcLocker.hpp"
33 #include "interpreter/interpreter.hpp"
34 #include "interpreter/interp_masm.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "oops/klass.inline.hpp"
37 #include "prims/methodHandles.hpp"
38 #include "runtime/continuation.hpp"
39 #include "runtime/continuationEntry.inline.hpp"
40 #include "runtime/jniHandles.hpp"
41 #include "runtime/os.inline.hpp"
42 #include "runtime/safepointMechanism.hpp"
43 #include "runtime/sharedRuntime.hpp"
44 #include "runtime/signature.hpp"
45 #include "runtime/stubRoutines.hpp"
46 #include "runtime/timerTrace.hpp"
47 #include "runtime/vframeArray.hpp"
48 #include "utilities/align.hpp"
49 #include "utilities/macros.hpp"
50 #include "vmreg_ppc.inline.hpp"
51 #ifdef COMPILER1
52 #include "c1/c1_Runtime1.hpp"
53 #endif
54 #ifdef COMPILER2
55 #include "opto/ad.hpp"
56 #include "opto/runtime.hpp"
57 #endif
58
59 #include <alloca.h>
60
61 #define __ masm->
62
63 #ifdef PRODUCT
64 #define BLOCK_COMMENT(str) // nothing
65 #else
66 #define BLOCK_COMMENT(str) __ block_comment(str)
67 #endif
68
69 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
70
71
72 class RegisterSaver {
73 // Used for saving volatile registers.
74 public:
75
76 // Support different return pc locations.
77 enum ReturnPCLocation {
78 return_pc_is_lr,
79 return_pc_is_pre_saved,
80 return_pc_is_thread_saved_exception_pc
81 };
82
83 static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
84 int* out_frame_size_in_bytes,
85 bool generate_oop_map,
86 ReturnPCLocation return_pc_location,
87 bool save_vectors = false);
88 static void restore_live_registers_and_pop_frame(MacroAssembler* masm,
89 int frame_size_in_bytes,
90 bool restore_ctr,
91 bool save_vectors = false);
92
93 static void push_frame_and_save_argument_registers(MacroAssembler* masm,
94 Register r_temp,
95 int frame_size,
96 int total_args,
97 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
98 static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
99 int frame_size,
100 int total_args,
101 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
102
103 // During deoptimization only the result registers need to be restored
104 // all the other values have already been extracted.
105 static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors);
106
107 // Constants and data structures:
108
109 typedef enum {
110 int_reg,
111 float_reg,
112 special_reg,
113 vec_reg
114 } RegisterType;
115
116 typedef enum {
117 reg_size = 8,
118 half_reg_size = reg_size / 2,
119 vec_reg_size = 16
120 } RegisterConstants;
121
122 typedef struct {
123 RegisterType reg_type;
124 int reg_num;
125 VMReg vmreg;
126 } LiveRegType;
127 };
128
129
130 #define RegisterSaver_LiveIntReg(regname) \
131 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
132
133 #define RegisterSaver_LiveFloatReg(regname) \
134 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
135
136 #define RegisterSaver_LiveSpecialReg(regname) \
137 { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
138
139 #define RegisterSaver_LiveVecReg(regname) \
140 { RegisterSaver::vec_reg, regname->encoding(), regname->as_VMReg() }
141
142 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
143 // Live registers which get spilled to the stack. Register
144 // positions in this array correspond directly to the stack layout.
145
146 //
147 // live special registers:
148 //
149 RegisterSaver_LiveSpecialReg(SR_CTR),
150 //
151 // live float registers:
152 //
153 RegisterSaver_LiveFloatReg( F0 ),
154 RegisterSaver_LiveFloatReg( F1 ),
155 RegisterSaver_LiveFloatReg( F2 ),
156 RegisterSaver_LiveFloatReg( F3 ),
157 RegisterSaver_LiveFloatReg( F4 ),
158 RegisterSaver_LiveFloatReg( F5 ),
159 RegisterSaver_LiveFloatReg( F6 ),
160 RegisterSaver_LiveFloatReg( F7 ),
161 RegisterSaver_LiveFloatReg( F8 ),
162 RegisterSaver_LiveFloatReg( F9 ),
163 RegisterSaver_LiveFloatReg( F10 ),
164 RegisterSaver_LiveFloatReg( F11 ),
165 RegisterSaver_LiveFloatReg( F12 ),
166 RegisterSaver_LiveFloatReg( F13 ),
167 RegisterSaver_LiveFloatReg( F14 ),
168 RegisterSaver_LiveFloatReg( F15 ),
169 RegisterSaver_LiveFloatReg( F16 ),
170 RegisterSaver_LiveFloatReg( F17 ),
171 RegisterSaver_LiveFloatReg( F18 ),
172 RegisterSaver_LiveFloatReg( F19 ),
173 RegisterSaver_LiveFloatReg( F20 ),
174 RegisterSaver_LiveFloatReg( F21 ),
175 RegisterSaver_LiveFloatReg( F22 ),
176 RegisterSaver_LiveFloatReg( F23 ),
177 RegisterSaver_LiveFloatReg( F24 ),
178 RegisterSaver_LiveFloatReg( F25 ),
179 RegisterSaver_LiveFloatReg( F26 ),
180 RegisterSaver_LiveFloatReg( F27 ),
181 RegisterSaver_LiveFloatReg( F28 ),
182 RegisterSaver_LiveFloatReg( F29 ),
183 RegisterSaver_LiveFloatReg( F30 ),
184 RegisterSaver_LiveFloatReg( F31 ),
185 //
186 // live integer registers:
187 //
188 RegisterSaver_LiveIntReg( R0 ),
189 //RegisterSaver_LiveIntReg( R1 ), // stack pointer
190 RegisterSaver_LiveIntReg( R2 ),
191 RegisterSaver_LiveIntReg( R3 ),
192 RegisterSaver_LiveIntReg( R4 ),
193 RegisterSaver_LiveIntReg( R5 ),
194 RegisterSaver_LiveIntReg( R6 ),
195 RegisterSaver_LiveIntReg( R7 ),
196 RegisterSaver_LiveIntReg( R8 ),
197 RegisterSaver_LiveIntReg( R9 ),
198 RegisterSaver_LiveIntReg( R10 ),
199 RegisterSaver_LiveIntReg( R11 ),
200 RegisterSaver_LiveIntReg( R12 ),
201 //RegisterSaver_LiveIntReg( R13 ), // system thread id
202 RegisterSaver_LiveIntReg( R14 ),
203 RegisterSaver_LiveIntReg( R15 ),
204 RegisterSaver_LiveIntReg( R16 ),
205 RegisterSaver_LiveIntReg( R17 ),
206 RegisterSaver_LiveIntReg( R18 ),
207 RegisterSaver_LiveIntReg( R19 ),
208 RegisterSaver_LiveIntReg( R20 ),
209 RegisterSaver_LiveIntReg( R21 ),
210 RegisterSaver_LiveIntReg( R22 ),
211 RegisterSaver_LiveIntReg( R23 ),
212 RegisterSaver_LiveIntReg( R24 ),
213 RegisterSaver_LiveIntReg( R25 ),
214 RegisterSaver_LiveIntReg( R26 ),
215 RegisterSaver_LiveIntReg( R27 ),
216 RegisterSaver_LiveIntReg( R28 ),
217 RegisterSaver_LiveIntReg( R29 ),
218 RegisterSaver_LiveIntReg( R30 ),
219 RegisterSaver_LiveIntReg( R31 ) // must be the last register (see save/restore functions below)
220 };
221
222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = {
223 //
224 // live vector registers (optional, only these ones are used by C2):
225 //
226 RegisterSaver_LiveVecReg( VR0 ),
227 RegisterSaver_LiveVecReg( VR1 ),
228 RegisterSaver_LiveVecReg( VR2 ),
229 RegisterSaver_LiveVecReg( VR3 ),
230 RegisterSaver_LiveVecReg( VR4 ),
231 RegisterSaver_LiveVecReg( VR5 ),
232 RegisterSaver_LiveVecReg( VR6 ),
233 RegisterSaver_LiveVecReg( VR7 ),
234 RegisterSaver_LiveVecReg( VR8 ),
235 RegisterSaver_LiveVecReg( VR9 ),
236 RegisterSaver_LiveVecReg( VR10 ),
237 RegisterSaver_LiveVecReg( VR11 ),
238 RegisterSaver_LiveVecReg( VR12 ),
239 RegisterSaver_LiveVecReg( VR13 ),
240 RegisterSaver_LiveVecReg( VR14 ),
241 RegisterSaver_LiveVecReg( VR15 ),
242 RegisterSaver_LiveVecReg( VR16 ),
243 RegisterSaver_LiveVecReg( VR17 ),
244 RegisterSaver_LiveVecReg( VR18 ),
245 RegisterSaver_LiveVecReg( VR19 ),
246 RegisterSaver_LiveVecReg( VR20 ),
247 RegisterSaver_LiveVecReg( VR21 ),
248 RegisterSaver_LiveVecReg( VR22 ),
249 RegisterSaver_LiveVecReg( VR23 ),
250 RegisterSaver_LiveVecReg( VR24 ),
251 RegisterSaver_LiveVecReg( VR25 ),
252 RegisterSaver_LiveVecReg( VR26 ),
253 RegisterSaver_LiveVecReg( VR27 ),
254 RegisterSaver_LiveVecReg( VR28 ),
255 RegisterSaver_LiveVecReg( VR29 ),
256 RegisterSaver_LiveVecReg( VR30 ),
257 RegisterSaver_LiveVecReg( VR31 )
258 };
259
260
261 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
262 int* out_frame_size_in_bytes,
263 bool generate_oop_map,
264 ReturnPCLocation return_pc_location,
265 bool save_vectors) {
266 // Push an abi_reg_args-frame and store all registers which may be live.
267 // If requested, create an OopMap: Record volatile registers as
268 // callee-save values in an OopMap so their save locations will be
269 // propagated to the RegisterMap of the caller frame during
270 // StackFrameStream construction (needed for deoptimization; see
271 // compiledVFrame::create_stack_value).
272 // Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
273
274 // calculate frame size
275 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
276 sizeof(RegisterSaver::LiveRegType);
277 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
278 sizeof(RegisterSaver::LiveRegType))
279 : 0;
280 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
281 const int frame_size_in_bytes = align_up(register_save_size, frame::alignment_in_bytes)
282 + frame::native_abi_reg_args_size;
283
284 *out_frame_size_in_bytes = frame_size_in_bytes;
285 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
286 const int register_save_offset = frame_size_in_bytes - register_save_size;
287
288 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
289 OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : nullptr;
290
291 BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
292
293 // push a new frame
294 __ push_frame(frame_size_in_bytes, noreg);
295
296 // Save some registers in the last (non-vector) slots of the new frame so we
297 // can use them as scratch regs or to determine the return pc.
298 __ std(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
299 __ std(R30, frame_size_in_bytes - 2*reg_size - vecregstosave_num * vec_reg_size, R1_SP);
300
301 // save the flags
302 // Do the save_LR by hand and adjust the return pc if requested.
303 switch (return_pc_location) {
304 case return_pc_is_lr: __ mflr(R31); break;
305 case return_pc_is_pre_saved: break;
306 case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
307 default: ShouldNotReachHere();
308 }
309 if (return_pc_location != return_pc_is_pre_saved) {
310 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
311 }
312
313 // save all registers (ints and floats)
314 int offset = register_save_offset;
315
316 for (int i = 0; i < regstosave_num; i++) {
317 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
318 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
319
320 switch (reg_type) {
321 case RegisterSaver::int_reg: {
322 if (reg_num < 30) { // We spilled R30-31 right at the beginning.
323 __ std(as_Register(reg_num), offset, R1_SP);
324 }
325 break;
326 }
327 case RegisterSaver::float_reg: {
328 __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
329 break;
330 }
331 case RegisterSaver::special_reg: {
332 if (reg_num == SR_CTR.encoding()) {
333 __ mfctr(R30);
334 __ std(R30, offset, R1_SP);
335 } else {
336 Unimplemented();
337 }
338 break;
339 }
340 default:
341 ShouldNotReachHere();
342 }
343
344 if (generate_oop_map) {
345 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
346 RegisterSaver_LiveRegs[i].vmreg);
347 }
348 offset += reg_size;
349 }
350
351 // Note that generate_oop_map in the following loop is only used for the
352 // polling_page_vectors_safepoint_handler_blob and the deopt_blob.
353 // The order in which the vector contents are stored depends on Endianess and
354 // the utilized instructions (PowerArchitecturePPC64).
355 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
356 if (PowerArchitecturePPC64 >= 10) {
357 assert(is_even(vecregstosave_num), "expectation");
358 for (int i = 0; i < vecregstosave_num; i += 2) {
359 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
360 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
361
362 __ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
363 // Note: The contents were read in the same order (see loadV16 node in ppc.ad).
364 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
365 if (generate_oop_map) {
366 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
367 RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg);
368 map->set_callee_saved(VMRegImpl::stack2reg((offset + vec_reg_size) >> 2),
369 RegisterSaver_LiveVecRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg);
370 }
371 offset += (2 * vec_reg_size);
372 }
373 } else {
374 for (int i = 0; i < vecregstosave_num; i++) {
375 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
376
377 __ stxv(as_VectorRegister(reg_num)->to_vsr(), offset, R1_SP);
378 // Note: The contents were read in the same order (see loadV16 node in ppc.ad).
379 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
380 if (generate_oop_map) {
381 VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg;
382 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr);
383 }
384 offset += vec_reg_size;
385 }
386 }
387
388 assert(offset == frame_size_in_bytes, "consistency check");
389
390 BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
391
392 // And we're done.
393 return map;
394 }
395
396
397 // Pop the current frame and restore all the registers that we
398 // saved.
399 void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
400 int frame_size_in_bytes,
401 bool restore_ctr,
402 bool save_vectors) {
403 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
404 sizeof(RegisterSaver::LiveRegType);
405 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
406 sizeof(RegisterSaver::LiveRegType))
407 : 0;
408 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
409
410 const int register_save_offset = frame_size_in_bytes - register_save_size;
411
412 BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
413
414 // restore all registers (ints and floats)
415 int offset = register_save_offset;
416
417 for (int i = 0; i < regstosave_num; i++) {
418 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
419 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
420
421 switch (reg_type) {
422 case RegisterSaver::int_reg: {
423 if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
424 __ ld(as_Register(reg_num), offset, R1_SP);
425 break;
426 }
427 case RegisterSaver::float_reg: {
428 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
429 break;
430 }
431 case RegisterSaver::special_reg: {
432 if (reg_num == SR_CTR.encoding()) {
433 if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
434 __ ld(R31, offset, R1_SP);
435 __ mtctr(R31);
436 }
437 } else {
438 Unimplemented();
439 }
440 break;
441 }
442 default:
443 ShouldNotReachHere();
444 }
445 offset += reg_size;
446 }
447
448 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
449 if (PowerArchitecturePPC64 >= 10) {
450 for (int i = 0; i < vecregstosave_num; i += 2) {
451 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
452 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
453
454 __ lxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
455
456 offset += (2 * vec_reg_size);
457 }
458 } else {
459 for (int i = 0; i < vecregstosave_num; i++) {
460 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
461
462 __ lxv(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
463
464 offset += vec_reg_size;
465 }
466 }
467
468 assert(offset == frame_size_in_bytes, "consistency check");
469
470 // restore link and the flags
471 __ ld(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
472 __ mtlr(R31);
473
474 // restore scratch register's value
475 __ ld(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
476
477 // pop the frame
478 __ addi(R1_SP, R1_SP, frame_size_in_bytes);
479
480 BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
481 }
482
483 void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
484 int frame_size,int total_args, const VMRegPair *regs,
485 const VMRegPair *regs2) {
486 __ push_frame(frame_size, r_temp);
487 int st_off = frame_size - wordSize;
488 for (int i = 0; i < total_args; i++) {
489 VMReg r_1 = regs[i].first();
490 VMReg r_2 = regs[i].second();
491 if (!r_1->is_valid()) {
492 assert(!r_2->is_valid(), "");
493 continue;
494 }
495 if (r_1->is_Register()) {
496 Register r = r_1->as_Register();
497 __ std(r, st_off, R1_SP);
498 st_off -= wordSize;
499 } else if (r_1->is_FloatRegister()) {
500 FloatRegister f = r_1->as_FloatRegister();
501 __ stfd(f, st_off, R1_SP);
502 st_off -= wordSize;
503 }
504 }
505 if (regs2 != nullptr) {
506 for (int i = 0; i < total_args; i++) {
507 VMReg r_1 = regs2[i].first();
508 VMReg r_2 = regs2[i].second();
509 if (!r_1->is_valid()) {
510 assert(!r_2->is_valid(), "");
511 continue;
512 }
513 if (r_1->is_Register()) {
514 Register r = r_1->as_Register();
515 __ std(r, st_off, R1_SP);
516 st_off -= wordSize;
517 } else if (r_1->is_FloatRegister()) {
518 FloatRegister f = r_1->as_FloatRegister();
519 __ stfd(f, st_off, R1_SP);
520 st_off -= wordSize;
521 }
522 }
523 }
524 }
525
526 void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
527 int total_args, const VMRegPair *regs,
528 const VMRegPair *regs2) {
529 int st_off = frame_size - wordSize;
530 for (int i = 0; i < total_args; i++) {
531 VMReg r_1 = regs[i].first();
532 VMReg r_2 = regs[i].second();
533 if (r_1->is_Register()) {
534 Register r = r_1->as_Register();
535 __ ld(r, st_off, R1_SP);
536 st_off -= wordSize;
537 } else if (r_1->is_FloatRegister()) {
538 FloatRegister f = r_1->as_FloatRegister();
539 __ lfd(f, st_off, R1_SP);
540 st_off -= wordSize;
541 }
542 }
543 if (regs2 != nullptr)
544 for (int i = 0; i < total_args; i++) {
545 VMReg r_1 = regs2[i].first();
546 VMReg r_2 = regs2[i].second();
547 if (r_1->is_Register()) {
548 Register r = r_1->as_Register();
549 __ ld(r, st_off, R1_SP);
550 st_off -= wordSize;
551 } else if (r_1->is_FloatRegister()) {
552 FloatRegister f = r_1->as_FloatRegister();
553 __ lfd(f, st_off, R1_SP);
554 st_off -= wordSize;
555 }
556 }
557 __ pop_frame();
558 }
559
560 // Restore the registers that might be holding a result.
561 void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors) {
562 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
563 sizeof(RegisterSaver::LiveRegType);
564 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
565 sizeof(RegisterSaver::LiveRegType))
566 : 0;
567 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
568
569 const int register_save_offset = frame_size_in_bytes - register_save_size;
570
571 // restore all result registers (ints and floats)
572 int offset = register_save_offset;
573 for (int i = 0; i < regstosave_num; i++) {
574 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
575 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
576 switch (reg_type) {
577 case RegisterSaver::int_reg: {
578 if (as_Register(reg_num)==R3_RET) // int result_reg
579 __ ld(as_Register(reg_num), offset, R1_SP);
580 break;
581 }
582 case RegisterSaver::float_reg: {
583 if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
584 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
585 break;
586 }
587 case RegisterSaver::special_reg: {
588 // Special registers don't hold a result.
589 break;
590 }
591 default:
592 ShouldNotReachHere();
593 }
594 offset += reg_size;
595 }
596
597 assert(offset == frame_size_in_bytes - (save_vectors ? vecregstosave_num * vec_reg_size : 0), "consistency check");
598 }
599
600 // Is vector's size (in bytes) bigger than a size saved by default?
601 bool SharedRuntime::is_wide_vector(int size) {
602 // Note, MaxVectorSize == 8/16 on PPC64.
603 assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
604 return size > 8;
605 }
606
607 static int reg2slot(VMReg r) {
608 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
609 }
610
611 static int reg2offset(VMReg r) {
612 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
613 }
614
615 // ---------------------------------------------------------------------------
616 // Read the array of BasicTypes from a signature, and compute where the
617 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
618 // quantities. Values less than VMRegImpl::stack0 are registers, those above
619 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
620 // as framesizes are fixed.
621 // VMRegImpl::stack0 refers to the first slot 0(sp).
622 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
623 // up to Register::number_of_registers) are the 64-bit
624 // integer registers.
625
626 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
627 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
628 // units regardless of build. Of course for i486 there is no 64 bit build
629
630 // In contrast to other platforms the Java calling convention is *NOT* a
631 // "shifted" version of the C ABI.
632
633 const VMReg java_iarg_reg[8] = {
634 R3->as_VMReg(),
635 R4->as_VMReg(),
636 R5->as_VMReg(),
637 R6->as_VMReg(),
638 R7->as_VMReg(),
639 R8->as_VMReg(),
640 R9->as_VMReg(),
641 R10->as_VMReg()
642 };
643
644 const VMReg java_farg_reg[13] = {
645 F1->as_VMReg(),
646 F2->as_VMReg(),
647 F3->as_VMReg(),
648 F4->as_VMReg(),
649 F5->as_VMReg(),
650 F6->as_VMReg(),
651 F7->as_VMReg(),
652 F8->as_VMReg(),
653 F9->as_VMReg(),
654 F10->as_VMReg(),
655 F11->as_VMReg(),
656 F12->as_VMReg(),
657 F13->as_VMReg()
658 };
659
660 const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
661 const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
662
663 STATIC_ASSERT(num_java_iarg_registers == Argument::n_int_register_parameters_j);
664 STATIC_ASSERT(num_java_farg_registers == Argument::n_float_register_parameters_j);
665
666 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
667 VMRegPair *regs,
668 int total_args_passed) {
669 // C2c calling conventions for compiled-compiled calls.
670 // Put 8 ints/longs into registers _AND_ 13 float/doubles into
671 // registers _AND_ put the rest on the stack.
672
673 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats
674 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
675
676 int i;
677 VMReg reg;
678 int stk = 0;
679 int ireg = 0;
680 int freg = 0;
681
682 // We put the first 8 arguments into registers and the rest on the
683 // stack, float arguments are already in their argument registers
684 // due to c2c calling conventions (see calling_convention).
685 for (int i = 0; i < total_args_passed; ++i) {
686 switch(sig_bt[i]) {
687 case T_BOOLEAN:
688 case T_CHAR:
689 case T_BYTE:
690 case T_SHORT:
691 case T_INT:
692 if (ireg < num_java_iarg_registers) {
693 // Put int/ptr in register
694 reg = java_iarg_reg[ireg];
695 ++ireg;
696 } else {
697 // Put int/ptr on stack.
698 reg = VMRegImpl::stack2reg(stk);
699 stk += inc_stk_for_intfloat;
700 }
701 regs[i].set1(reg);
702 break;
703 case T_LONG:
704 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
705 if (ireg < num_java_iarg_registers) {
706 // Put long in register.
707 reg = java_iarg_reg[ireg];
708 ++ireg;
709 } else {
710 // Put long on stack. They must be aligned to 2 slots.
711 if (stk & 0x1) ++stk;
712 reg = VMRegImpl::stack2reg(stk);
713 stk += inc_stk_for_longdouble;
714 }
715 regs[i].set2(reg);
716 break;
717 case T_OBJECT:
718 case T_ARRAY:
719 case T_ADDRESS:
720 if (ireg < num_java_iarg_registers) {
721 // Put ptr in register.
722 reg = java_iarg_reg[ireg];
723 ++ireg;
724 } else {
725 // Put ptr on stack. Objects must be aligned to 2 slots too,
726 // because "64-bit pointers record oop-ishness on 2 aligned
727 // adjacent registers." (see OopFlow::build_oop_map).
728 if (stk & 0x1) ++stk;
729 reg = VMRegImpl::stack2reg(stk);
730 stk += inc_stk_for_longdouble;
731 }
732 regs[i].set2(reg);
733 break;
734 case T_FLOAT:
735 if (freg < num_java_farg_registers) {
736 // Put float in register.
737 reg = java_farg_reg[freg];
738 ++freg;
739 } else {
740 // Put float on stack.
741 reg = VMRegImpl::stack2reg(stk);
742 stk += inc_stk_for_intfloat;
743 }
744 regs[i].set1(reg);
745 break;
746 case T_DOUBLE:
747 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
748 if (freg < num_java_farg_registers) {
749 // Put double in register.
750 reg = java_farg_reg[freg];
751 ++freg;
752 } else {
753 // Put double on stack. They must be aligned to 2 slots.
754 if (stk & 0x1) ++stk;
755 reg = VMRegImpl::stack2reg(stk);
756 stk += inc_stk_for_longdouble;
757 }
758 regs[i].set2(reg);
759 break;
760 case T_VOID:
761 // Do not count halves.
762 regs[i].set_bad();
763 break;
764 default:
765 ShouldNotReachHere();
766 }
767 }
768 return stk;
769 }
770
771 // Similar to java_calling_convention() but for multiple return
772 // values. There's no way to store them on the stack so if we don't
773 // have enough registers, multiple values can't be returned.
774 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
775 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
776 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
777 VMRegPair *regs,
778 int total_args_passed) {
779 // Create the mapping between argument positions and
780 // registers.
781 static const Register INT_ArgReg[java_return_convention_max_int] = {
782 R3_RET, R10_ARG8, R9_ARG7, R8_ARG6, R7_ARG5, R6_ARG4, R5_ARG3, R4_ARG2
783 };
784 static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
785 F1_RET, F2_ARG2, F3_ARG3, F4_ARG4, F5_ARG5, F6_ARG6, F7_ARG7, F8_ARG8,
786 F9_ARG9, F10_ARG10, F11_ARG11, F12_ARG12, F13_ARG13
787 };
788
789
790 uint int_args = 0;
791 uint fp_args = 0;
792
793 for (int i = 0; i < total_args_passed; i++) {
794 switch (sig_bt[i]) {
795 case T_BOOLEAN:
796 case T_CHAR:
797 case T_BYTE:
798 case T_SHORT:
799 case T_INT:
800 if (int_args < java_return_convention_max_int) {
801 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
802 int_args++;
803 } else {
804 return -1;
805 }
806 break;
807 case T_VOID:
808 // halves of T_LONG or T_DOUBLE
809 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
810 regs[i].set_bad();
811 break;
812 case T_LONG:
813 assert(sig_bt[i + 1] == T_VOID, "expecting half");
814 // fall through
815 case T_OBJECT:
816 case T_ARRAY:
817 case T_ADDRESS:
818 case T_METADATA:
819 if (int_args < java_return_convention_max_int) {
820 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
821 int_args++;
822 } else {
823 return -1;
824 }
825 break;
826 case T_FLOAT:
827 if (fp_args < java_return_convention_max_float) {
828 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
829 fp_args++;
830 } else {
831 return -1;
832 }
833 break;
834 case T_DOUBLE:
835 assert(sig_bt[i + 1] == T_VOID, "expecting half");
836 if (fp_args < java_return_convention_max_float) {
837 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
838 fp_args++;
839 } else {
840 return -1;
841 }
842 break;
843 default:
844 ShouldNotReachHere();
845 break;
846 }
847 }
848
849 return int_args + fp_args;
850 }
851
852 // Calling convention for calling C code.
853 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
854 VMRegPair *regs,
855 int total_args_passed) {
856 // Calling conventions for C runtime calls and calls to JNI native methods.
857 //
858 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
859 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
860 // the first 13 flt/dbl's in the first 13 fp regs but additionally
861 // copy flt/dbl to the stack if they are beyond the 8th argument.
862
863 const VMReg iarg_reg[8] = {
864 R3->as_VMReg(),
865 R4->as_VMReg(),
866 R5->as_VMReg(),
867 R6->as_VMReg(),
868 R7->as_VMReg(),
869 R8->as_VMReg(),
870 R9->as_VMReg(),
871 R10->as_VMReg()
872 };
873
874 const VMReg farg_reg[13] = {
875 F1->as_VMReg(),
876 F2->as_VMReg(),
877 F3->as_VMReg(),
878 F4->as_VMReg(),
879 F5->as_VMReg(),
880 F6->as_VMReg(),
881 F7->as_VMReg(),
882 F8->as_VMReg(),
883 F9->as_VMReg(),
884 F10->as_VMReg(),
885 F11->as_VMReg(),
886 F12->as_VMReg(),
887 F13->as_VMReg()
888 };
889
890 // Check calling conventions consistency.
891 assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
892 sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
893 "consistency");
894
895 const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size)
896 / VMRegImpl::stack_slot_size);
897 const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size;
898
899 VMReg reg;
900 int arg = 0;
901 int freg = 0;
902 bool stack_used = false;
903
904 for (int i = 0; i < total_args_passed; ++i, ++arg) {
905 // Each argument corresponds to a slot in the Parameter Save Area (if not omitted)
906 int stk = (arg * 2) + additional_frame_header_slots;
907
908 switch(sig_bt[i]) {
909 //
910 // If arguments 0-7 are integers, they are passed in integer registers.
911 // Argument i is placed in iarg_reg[i].
912 //
913 case T_BOOLEAN:
914 case T_CHAR:
915 case T_BYTE:
916 case T_SHORT:
917 case T_INT:
918 // We must cast ints to longs and use full 64 bit stack slots
919 // here. Thus fall through, handle as long.
920 case T_LONG:
921 case T_OBJECT:
922 case T_ARRAY:
923 case T_ADDRESS:
924 case T_METADATA:
925 // Oops are already boxed if required (JNI).
926 if (arg < Argument::n_int_register_parameters_c) {
927 reg = iarg_reg[arg];
928 } else {
929 reg = VMRegImpl::stack2reg(stk);
930 stack_used = true;
931 }
932 regs[i].set2(reg);
933 break;
934
935 //
936 // Floats are treated differently from int regs: The first 13 float arguments
937 // are passed in registers (not the float args among the first 13 args).
938 // Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
939 // in farg_reg[j] if argument i is the j-th float argument of this call.
940 //
941 case T_FLOAT:
942 if (freg < Argument::n_float_register_parameters_c) {
943 // Put float in register ...
944 reg = farg_reg[freg];
945 ++freg;
946 } else {
947 // Put float on stack.
948 reg = VMRegImpl::stack2reg(stk + float_offset_in_slots);
949 stack_used = true;
950 }
951 regs[i].set1(reg);
952 break;
953 case T_DOUBLE:
954 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
955 if (freg < Argument::n_float_register_parameters_c) {
956 // Put double in register ...
957 reg = farg_reg[freg];
958 ++freg;
959 } else {
960 // Put double on stack.
961 reg = VMRegImpl::stack2reg(stk);
962 stack_used = true;
963 }
964 regs[i].set2(reg);
965 break;
966
967 case T_VOID:
968 // Do not count halves.
969 regs[i].set_bad();
970 --arg;
971 break;
972 default:
973 ShouldNotReachHere();
974 }
975 }
976
977 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
978 #if defined(ABI_ELFv2)
979 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
980 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
981 // indicates that all parameters can be passed in registers.
982 return stack_used ? (arg * 2) : 0;
983 #else
984 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
985 // We have to add extra slots because ABIv1 uses a larger header.
986 return MAX2(arg, 8) * 2 + additional_frame_header_slots;
987 #endif
988 }
989
990 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
991 uint num_bits,
992 uint total_args_passed) {
993 Unimplemented();
994 return 0;
995 }
996
997 // Patch the callers callsite with entry to compiled code if it exists.
998 static void patch_callers_callsite(MacroAssembler *masm, int adapter_size, int total_args_passed, const VMRegPair *regs) {
999 Label L;
1000 __ ld(R0, in_bytes(Method::code_offset()), R19_method);
1001 __ cmpdi(CR0, R0, 0);
1002 __ beq(CR0, L);
1003
1004 // Patch caller's callsite, method_(code) was not null which means that
1005 // compiled code exists.
1006 const Register return_pc = R11_scratch1;
1007 const Register tmp = R12_scratch2;
1008 __ mflr(return_pc);
1009 __ std(return_pc, _abi0(lr), R1_SP);
1010 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
1011
1012 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
1013
1014 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
1015 __ ld(return_pc, _abi0(lr), R1_SP);
1016 __ mtlr(return_pc);
1017
1018 // callsite->set_to_clean() uses icache flush including isync
1019
1020 __ bind(L);
1021 }
1022
1023 // For each inline type argument, sig includes the list of fields of
1024 // the inline type. This utility function computes the number of
1025 // arguments for the call if inline types are passed by reference (the
1026 // calling convention the interpreter expects).
1027 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
1028 int total_args_passed = 0;
1029 if (InlineTypePassFieldsAsArgs) {
1030 for (int i = 0; i < sig_extended->length(); i++) {
1031 BasicType bt = sig_extended->at(i)._bt;
1032 if (bt == T_METADATA) {
1033 // In sig_extended, an inline type argument starts with:
1034 // T_METADATA, followed by the types of the fields of the
1035 // inline type and T_VOID to mark the end of the value
1036 // type. Inline types are flattened so, for instance, in the
1037 // case of an inline type with an int field and an inline type
1038 // field that itself has 2 fields, an int and a long:
1039 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
1040 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
1041 // (outer inline type)
1042 total_args_passed++;
1043 int vt = 1;
1044 do {
1045 i++;
1046 BasicType bt = sig_extended->at(i)._bt;
1047 BasicType prev_bt = sig_extended->at(i-1)._bt;
1048 if (bt == T_METADATA) {
1049 vt++;
1050 } else if (bt == T_VOID &&
1051 prev_bt != T_LONG &&
1052 prev_bt != T_DOUBLE) {
1053 vt--;
1054 }
1055 } while (vt != 0);
1056 } else {
1057 total_args_passed++;
1058 }
1059 }
1060 } else {
1061 total_args_passed = sig_extended->length();
1062 }
1063 return total_args_passed;
1064 }
1065
1066 static void gen_c2i_adapter(MacroAssembler *masm,
1067 const GrowableArray<SigEntry>* sig_extended,
1068 const VMRegPair *regs,
1069 bool requires_clinit_barrier,
1070 address& c2i_no_clinit_check_entry,
1071 Label& skip_fixup,
1072 address start,
1073 OopMapSet* oop_maps,
1074 int& frame_complete,
1075 int& frame_size_in_words,
1076 bool alloc_inline_receiver) {
1077 if (requires_clinit_barrier) {
1078 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1079 Label L_skip_barrier;
1080
1081 // Bypass the barrier for non-static methods
1082 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1083 __ andi_(R0, R0, JVM_ACC_STATIC);
1084 __ beq(CR0, L_skip_barrier); // non-static
1085
1086 Register klass = R11_scratch1;
1087 __ load_method_holder(klass, R19_method);
1088 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1089
1090 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1091 __ mtctr(klass);
1092 __ bctr();
1093
1094 __ bind(L_skip_barrier);
1095 c2i_no_clinit_check_entry = __ pc();
1096 }
1097
1098 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1099 bs->c2i_entry_barrier(masm, R11_scratch1, R12_scratch2, R21_tmp1);
1100
1101 // Since all args are passed on the stack, total_args_passed *
1102 // Interpreter::stackElementSize is the space we need.
1103 int total_args_passed = compute_total_args_passed_int(sig_extended);
1104 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
1105
1106 // Adapter needs TOP_IJAVA_FRAME_ABI.
1107 const int adapter_size = frame::top_ijava_frame_abi_size +
1108 align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
1109
1110 // Before we get into the guts of the C2I adapter, see if we should be here
1111 // at all. We've come from compiled code and are attempting to jump to the
1112 // interpreter, which means the caller made a static call to get here
1113 // (vcalls always get a compiled target if there is one). Check for a
1114 // compiled target. If there is one, we need to patch the caller's call.
1115 patch_callers_callsite(masm, adapter_size, total_args_passed, regs);
1116
1117 __ bind(skip_fixup);
1118
1119 if (InlineTypePassFieldsAsArgs) {
1120 // Is there an inline type argument?
1121 bool has_inline_argument = false;
1122 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
1123 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
1124 }
1125 if (has_inline_argument) {
1126 __ unimplemented("c2i has_inline_argument");
1127 }
1128 }
1129
1130 // Call the interpreter.
1131 const Register tmp = R22_tmp2, ientry = R23_tmp3;
1132 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
1133 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1134 int value_regs_index = 0;
1135
1136 __ ld(ientry, method_(interpreter_entry)); // preloaded
1137 __ mtctr(ientry);
1138
1139 // Get a copy of the current SP for loading caller's arguments.
1140 __ mr(R21_sender_SP, R1_SP);
1141
1142 // Add space for the adapter.
1143 __ resize_frame(-adapter_size, R12_scratch2);
1144
1145 int st_off = adapter_size - wordSize;
1146
1147 // Write the args into the outgoing interpreter space.
1148 // TODO: support for InlineTypePassFieldsAsArgs
1149 for (int i = 0; i < total_args_passed; i++) {
1150 BasicType bt = sig_extended->at(i)._bt;
1151
1152 VMReg r_1 = regs[i].first();
1153 VMReg r_2 = regs[i].second();
1154 if (!r_1->is_valid()) {
1155 assert(!r_2->is_valid(), "");
1156 continue;
1157 }
1158 if (r_1->is_stack()) {
1159 Register tmp_reg = value_regs[value_regs_index];
1160 value_regs_index = (value_regs_index + 1) % num_value_regs;
1161 // The calling convention produces OptoRegs that ignore the out
1162 // preserve area (JIT's ABI). We must account for it here.
1163 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1164 if (!r_2->is_valid()) {
1165 __ lwz(tmp_reg, ld_off, R21_sender_SP);
1166 } else {
1167 __ ld(tmp_reg, ld_off, R21_sender_SP);
1168 }
1169 // Pretend stack targets were loaded into tmp_reg.
1170 r_1 = tmp_reg->as_VMReg();
1171 }
1172
1173 if (r_1->is_Register()) {
1174 Register r = r_1->as_Register();
1175 if (!r_2->is_valid()) {
1176 __ stw(r, st_off, R1_SP);
1177 st_off-=wordSize;
1178 } else {
1179 // Longs are given 2 64-bit slots in the interpreter, but the
1180 // data is passed in only 1 slot.
1181 if (bt == T_LONG || bt == T_DOUBLE) {
1182 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1183 st_off-=wordSize;
1184 }
1185 __ std(r, st_off, R1_SP);
1186 st_off-=wordSize;
1187 }
1188 } else {
1189 assert(r_1->is_FloatRegister(), "");
1190 FloatRegister f = r_1->as_FloatRegister();
1191 if (!r_2->is_valid()) {
1192 __ stfs(f, st_off, R1_SP);
1193 st_off-=wordSize;
1194 } else {
1195 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1196 // data is passed in only 1 slot.
1197 // One of these should get known junk...
1198 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1199 st_off-=wordSize;
1200 __ stfd(f, st_off, R1_SP);
1201 st_off-=wordSize;
1202 }
1203 }
1204 }
1205
1206 // Jump to the interpreter just as if interpreter was doing it.
1207
1208 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1209
1210 // load TOS
1211 __ addi(R15_esp, R1_SP, st_off);
1212
1213 __ bctr();
1214 }
1215
1216 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1217 int comp_args_on_stack,
1218 const GrowableArray<SigEntry>* sig,
1219 const VMRegPair *regs) {
1220
1221 // Load method's entry-point from method.
1222 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1223 __ mtctr(R12_scratch2);
1224
1225 // We will only enter here from an interpreted frame and never from after
1226 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1227 // race and use a c2i we will remain interpreted for the race loser(s).
1228 // This removes all sorts of headaches on the x86 side and also eliminates
1229 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1230
1231 // Note: r13 contains the senderSP on entry. We must preserve it since
1232 // we may do a i2c -> c2i transition if we lose a race where compiled
1233 // code goes non-entrant while we get args ready.
1234 // In addition we use r13 to locate all the interpreter args as
1235 // we must align the stack to 16 bytes on an i2c entry else we
1236 // lose alignment we expect in all compiled code and register
1237 // save code can segv when fxsave instructions find improperly
1238 // aligned stack pointer.
1239
1240 const Register ld_ptr = R15_esp;
1241 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1242 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1243 int value_regs_index = 0;
1244
1245 int total_args_passed = sig->length();
1246 int ld_offset = total_args_passed*wordSize;
1247
1248 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1249 // in registers, we will occasionally have no stack args.
1250 int comp_words_on_stack = 0;
1251 if (comp_args_on_stack) {
1252 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1253 // registers are below. By subtracting stack0, we either get a negative
1254 // number (all values in registers) or the maximum stack slot accessed.
1255
1256 // Convert 4-byte c2 stack slots to words.
1257 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1258 // Round up to miminum stack alignment, in wordSize.
1259 comp_words_on_stack = align_up(comp_words_on_stack, 2);
1260 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1261 }
1262
1263 // Now generate the shuffle code. Pick up all register args and move the
1264 // rest through register value=Z_R12.
1265 BLOCK_COMMENT("Shuffle arguments");
1266
1267 for (int i = 0; i < total_args_passed; i++) {
1268 BasicType bt = sig->at(i)._bt;
1269 if (bt == T_VOID) {
1270 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
1271 continue;
1272 }
1273
1274 // Pick up 0, 1 or 2 words from ld_ptr.
1275 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1276 "scrambled load targets?");
1277 VMReg r_1 = regs[i].first();
1278 VMReg r_2 = regs[i].second();
1279 if (!r_1->is_valid()) {
1280 assert(!r_2->is_valid(), "");
1281 continue;
1282 }
1283 if (r_1->is_FloatRegister()) {
1284 if (!r_2->is_valid()) {
1285 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1286 ld_offset-=wordSize;
1287 } else {
1288 // Skip the unused interpreter slot.
1289 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1290 ld_offset-=2*wordSize;
1291 }
1292 } else {
1293 Register r;
1294 if (r_1->is_stack()) {
1295 // Must do a memory to memory move thru "value".
1296 r = value_regs[value_regs_index];
1297 value_regs_index = (value_regs_index + 1) % num_value_regs;
1298 } else {
1299 r = r_1->as_Register();
1300 }
1301 if (!r_2->is_valid()) {
1302 // Not sure we need to do this but it shouldn't hurt.
1303 if (is_reference_type(bt) || bt == T_ADDRESS) {
1304 __ ld(r, ld_offset, ld_ptr);
1305 ld_offset-=wordSize;
1306 } else {
1307 __ lwz(r, ld_offset, ld_ptr);
1308 ld_offset-=wordSize;
1309 }
1310 } else {
1311 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1312 // data is passed in only 1 slot.
1313 if (bt == T_LONG || bt == T_DOUBLE) {
1314 ld_offset-=wordSize;
1315 }
1316 __ ld(r, ld_offset, ld_ptr);
1317 ld_offset-=wordSize;
1318 }
1319
1320 if (r_1->is_stack()) {
1321 // Now store value where the compiler expects it
1322 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1323
1324 if (bt == T_INT || bt == T_FLOAT || bt == T_BOOLEAN ||
1325 bt == T_SHORT || bt == T_CHAR || bt == T_BYTE) {
1326 __ stw(r, st_off, R1_SP);
1327 } else {
1328 __ std(r, st_off, R1_SP);
1329 }
1330 }
1331 }
1332 }
1333
1334 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1335
1336 BLOCK_COMMENT("Store method");
1337 // Store method into thread->callee_target.
1338 // We might end up in handle_wrong_method if the callee is
1339 // deoptimized as we race thru here. If that happens we don't want
1340 // to take a safepoint because the caller frame will look
1341 // interpreted and arguments are now "compiled" so it is much better
1342 // to make this transition invisible to the stack walking
1343 // code. Unfortunately if we try and find the callee by normal means
1344 // a safepoint is possible. So we stash the desired callee in the
1345 // thread and the vm will find there should this case occur.
1346 __ std(R19_method, thread_(callee_target));
1347
1348 // Jump to the compiled code just as if compiled code was doing it.
1349 __ bctr();
1350 }
1351
1352 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1353 __ ic_check(BytesPerInstWord /* end_alignment */);
1354 __ ld(R19_method, CompiledICData::speculated_method_offset(), R19_inline_cache_reg);
1355
1356 // Method might have been compiled since the call site was patched to
1357 // interpreted; if that is the case treat it as a miss so we can get
1358 // the call site corrected.
1359 __ ld(R0, method_(code));
1360 __ cmpdi(CR0, R0, 0);
1361 __ beq_predict_taken(CR0, skip_fixup);
1362
1363 // Branch to ic_miss_stub.
1364 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1365 }
1366
1367 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1368 int comp_args_on_stack,
1369 const GrowableArray<SigEntry>* sig,
1370 const VMRegPair* regs,
1371 const GrowableArray<SigEntry>* sig_cc,
1372 const VMRegPair* regs_cc,
1373 const GrowableArray<SigEntry>* sig_cc_ro,
1374 const VMRegPair* regs_cc_ro,
1375 address entry_address[AdapterBlob::ENTRY_COUNT],
1376 AdapterBlob*& new_adapter,
1377 bool allocate_code_blob) {
1378
1379 entry_address[AdapterBlob::I2C] = __ pc();
1380 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1381
1382 // -------------------------------------------------------------------------
1383 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
1384 // to the interpreter. The args start out packed in the compiled layout. They
1385 // need to be unpacked into the interpreter layout. This will almost always
1386 // require some stack space. We grow the current (compiled) stack, then repack
1387 // the args. We finally end in a jump to the generic interpreter entry point.
1388 // On exit from the interpreter, the interpreter will restore our SP (lest the
1389 // compiled code, which relies solely on SP and not FP, get sick).
1390
1391 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1392 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1393 Label skip_fixup;
1394
1395 gen_inline_cache_check(masm, skip_fixup);
1396
1397 OopMapSet* oop_maps = new OopMapSet();
1398 int frame_complete = CodeOffsets::frame_never_safe;
1399 int frame_size_in_words = 0;
1400
1401 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1402 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1403 entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
1404 if (regs_cc != regs_cc_ro) {
1405 // No class init barrier needed because method is guaranteed to be non-static
1406 __ unimplemented("C2I_Inline_RO");
1407 #if 0
1408 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1409 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1410 #endif
1411 skip_fixup.reset();
1412 }
1413
1414 // Scalarized c2i adapter
1415 entry_address[AdapterBlob::C2I] = __ pc();
1416 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1417 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1418 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1419
1420 // Non-scalarized c2i adapter
1421 if (regs != regs_cc) {
1422 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1423 Label inline_entry_skip_fixup;
1424 __ unimplemented("C2I_Unverified_Inline");
1425 #if 0
1426 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1427 #endif
1428
1429 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1430 __ unimplemented("C2I_Inline2");
1431 #if 0
1432 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1433 inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1434 #endif
1435 }
1436 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1437 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1438 if (allocate_code_blob) {
1439 bool caller_must_gc_arguments = (regs != regs_cc);
1440 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1441 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1442 AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1443 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1444 }
1445 }
1446
1447 // An oop arg. Must pass a handle not the oop itself.
1448 static void object_move(MacroAssembler* masm,
1449 int frame_size_in_slots,
1450 OopMap* oop_map, int oop_handle_offset,
1451 bool is_receiver, int* receiver_offset,
1452 VMRegPair src, VMRegPair dst,
1453 Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1454 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1455 "receiver has already been moved");
1456
1457 // We must pass a handle. First figure out the location we use as a handle.
1458
1459 if (src.first()->is_stack()) {
1460 // stack to stack or reg
1461
1462 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1463 Label skip;
1464 const int oop_slot_in_callers_frame = reg2slot(src.first());
1465
1466 guarantee(!is_receiver, "expecting receiver in register");
1467 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1468
1469 __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1470 __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp);
1471 __ cmpdi(CR0, r_temp_2, 0);
1472 __ bne(CR0, skip);
1473 // Use a null handle if oop is null.
1474 __ li(r_handle, 0);
1475 __ bind(skip);
1476
1477 if (dst.first()->is_stack()) {
1478 // stack to stack
1479 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1480 } else {
1481 // stack to reg
1482 // Nothing to do, r_handle is already the dst register.
1483 }
1484 } else {
1485 // reg to stack or reg
1486 const Register r_oop = src.first()->as_Register();
1487 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1488 const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1489 + oop_handle_offset; // in slots
1490 const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1491 Label skip;
1492
1493 if (is_receiver) {
1494 *receiver_offset = oop_offset;
1495 }
1496 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1497
1498 __ std( r_oop, oop_offset, R1_SP);
1499 __ addi(r_handle, R1_SP, oop_offset);
1500
1501 __ cmpdi(CR0, r_oop, 0);
1502 __ bne(CR0, skip);
1503 // Use a null handle if oop is null.
1504 __ li(r_handle, 0);
1505 __ bind(skip);
1506
1507 if (dst.first()->is_stack()) {
1508 // reg to stack
1509 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1510 } else {
1511 // reg to reg
1512 // Nothing to do, r_handle is already the dst register.
1513 }
1514 }
1515 }
1516
1517 static void int_move(MacroAssembler*masm,
1518 VMRegPair src, VMRegPair dst,
1519 Register r_caller_sp, Register r_temp) {
1520 assert(src.first()->is_valid(), "incoming must be int");
1521 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1522
1523 if (src.first()->is_stack()) {
1524 if (dst.first()->is_stack()) {
1525 // stack to stack
1526 __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1527 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1528 } else {
1529 // stack to reg
1530 __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1531 }
1532 } else if (dst.first()->is_stack()) {
1533 // reg to stack
1534 __ extsw(r_temp, src.first()->as_Register());
1535 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1536 } else {
1537 // reg to reg
1538 __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1539 }
1540 }
1541
1542 static void long_move(MacroAssembler*masm,
1543 VMRegPair src, VMRegPair dst,
1544 Register r_caller_sp, Register r_temp) {
1545 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1546 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1547
1548 if (src.first()->is_stack()) {
1549 if (dst.first()->is_stack()) {
1550 // stack to stack
1551 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1552 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1553 } else {
1554 // stack to reg
1555 __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1556 }
1557 } else if (dst.first()->is_stack()) {
1558 // reg to stack
1559 __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1560 } else {
1561 // reg to reg
1562 if (dst.first()->as_Register() != src.first()->as_Register())
1563 __ mr(dst.first()->as_Register(), src.first()->as_Register());
1564 }
1565 }
1566
1567 static void float_move(MacroAssembler*masm,
1568 VMRegPair src, VMRegPair dst,
1569 Register r_caller_sp, Register r_temp) {
1570 assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1571 assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1572
1573 if (src.first()->is_stack()) {
1574 if (dst.first()->is_stack()) {
1575 // stack to stack
1576 __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1577 __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1578 } else {
1579 // stack to reg
1580 __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1581 }
1582 } else if (dst.first()->is_stack()) {
1583 // reg to stack
1584 __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1585 } else {
1586 // reg to reg
1587 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1588 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1589 }
1590 }
1591
1592 static void double_move(MacroAssembler*masm,
1593 VMRegPair src, VMRegPair dst,
1594 Register r_caller_sp, Register r_temp) {
1595 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1596 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1597
1598 if (src.first()->is_stack()) {
1599 if (dst.first()->is_stack()) {
1600 // stack to stack
1601 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1602 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1603 } else {
1604 // stack to reg
1605 __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1606 }
1607 } else if (dst.first()->is_stack()) {
1608 // reg to stack
1609 __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1610 } else {
1611 // reg to reg
1612 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1613 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1614 }
1615 }
1616
1617 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1618 switch (ret_type) {
1619 case T_BOOLEAN:
1620 case T_CHAR:
1621 case T_BYTE:
1622 case T_SHORT:
1623 case T_INT:
1624 __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1625 break;
1626 case T_ARRAY:
1627 case T_OBJECT:
1628 case T_LONG:
1629 __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1630 break;
1631 case T_FLOAT:
1632 __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1633 break;
1634 case T_DOUBLE:
1635 __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1636 break;
1637 case T_VOID:
1638 break;
1639 default:
1640 ShouldNotReachHere();
1641 break;
1642 }
1643 }
1644
1645 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1646 switch (ret_type) {
1647 case T_BOOLEAN:
1648 case T_CHAR:
1649 case T_BYTE:
1650 case T_SHORT:
1651 case T_INT:
1652 __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1653 break;
1654 case T_ARRAY:
1655 case T_OBJECT:
1656 case T_LONG:
1657 __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1658 break;
1659 case T_FLOAT:
1660 __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1661 break;
1662 case T_DOUBLE:
1663 __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1664 break;
1665 case T_VOID:
1666 break;
1667 default:
1668 ShouldNotReachHere();
1669 break;
1670 }
1671 }
1672
1673 static void verify_oop_args(MacroAssembler* masm,
1674 const methodHandle& method,
1675 const BasicType* sig_bt,
1676 const VMRegPair* regs) {
1677 Register temp_reg = R19_method; // not part of any compiled calling seq
1678 if (VerifyOops) {
1679 for (int i = 0; i < method->size_of_parameters(); i++) {
1680 if (is_reference_type(sig_bt[i])) {
1681 VMReg r = regs[i].first();
1682 assert(r->is_valid(), "bad oop arg");
1683 if (r->is_stack()) {
1684 __ ld(temp_reg, reg2offset(r), R1_SP);
1685 __ verify_oop(temp_reg, FILE_AND_LINE);
1686 } else {
1687 __ verify_oop(r->as_Register(), FILE_AND_LINE);
1688 }
1689 }
1690 }
1691 }
1692 }
1693
1694 static void gen_special_dispatch(MacroAssembler* masm,
1695 const methodHandle& method,
1696 const BasicType* sig_bt,
1697 const VMRegPair* regs) {
1698 verify_oop_args(masm, method, sig_bt, regs);
1699 vmIntrinsics::ID iid = method->intrinsic_id();
1700
1701 // Now write the args into the outgoing interpreter space
1702 bool has_receiver = false;
1703 Register receiver_reg = noreg;
1704 int member_arg_pos = -1;
1705 Register member_reg = noreg;
1706 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1707 if (ref_kind != 0) {
1708 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1709 member_reg = R19_method; // known to be free at this point
1710 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1711 } else if (iid == vmIntrinsics::_invokeBasic) {
1712 has_receiver = true;
1713 } else if (iid == vmIntrinsics::_linkToNative) {
1714 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1715 member_reg = R19_method; // known to be free at this point
1716 } else {
1717 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1718 }
1719
1720 if (member_reg != noreg) {
1721 // Load the member_arg into register, if necessary.
1722 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1723 VMReg r = regs[member_arg_pos].first();
1724 if (r->is_stack()) {
1725 __ ld(member_reg, reg2offset(r), R1_SP);
1726 } else {
1727 // no data motion is needed
1728 member_reg = r->as_Register();
1729 }
1730 }
1731
1732 if (has_receiver) {
1733 // Make sure the receiver is loaded into a register.
1734 assert(method->size_of_parameters() > 0, "oob");
1735 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1736 VMReg r = regs[0].first();
1737 assert(r->is_valid(), "bad receiver arg");
1738 if (r->is_stack()) {
1739 // Porting note: This assumes that compiled calling conventions always
1740 // pass the receiver oop in a register. If this is not true on some
1741 // platform, pick a temp and load the receiver from stack.
1742 fatal("receiver always in a register");
1743 receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point?
1744 __ ld(receiver_reg, reg2offset(r), R1_SP);
1745 } else {
1746 // no data motion is needed
1747 receiver_reg = r->as_Register();
1748 }
1749 }
1750
1751 // Figure out which address we are really jumping to:
1752 MethodHandles::generate_method_handle_dispatch(masm, iid,
1753 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1754 }
1755
1756 //---------------------------- continuation_enter_setup ---------------------------
1757 //
1758 // Frame setup.
1759 //
1760 // Arguments:
1761 // None.
1762 //
1763 // Results:
1764 // R1_SP: pointer to blank ContinuationEntry in the pushed frame.
1765 //
1766 // Kills:
1767 // R0, R20
1768 //
1769 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& framesize_words) {
1770 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1771 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, "");
1772 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1773
1774 const int frame_size_in_bytes = (int)ContinuationEntry::size();
1775 assert(is_aligned(frame_size_in_bytes, frame::alignment_in_bytes), "alignment error");
1776
1777 framesize_words = frame_size_in_bytes / wordSize;
1778
1779 DEBUG_ONLY(__ block_comment("setup {"));
1780 // Save return pc and push entry frame
1781 const Register return_pc = R20;
1782 __ mflr(return_pc);
1783 __ std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc
1784 __ push_frame(frame_size_in_bytes , R0); // SP -= frame_size_in_bytes
1785
1786 OopMap* map = new OopMap((int)frame_size_in_bytes / VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
1787
1788 __ ld_ptr(R0, JavaThread::cont_entry_offset(), R16_thread);
1789 __ st_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1790 __ st_ptr(R0, ContinuationEntry::parent_offset(), R1_SP);
1791 DEBUG_ONLY(__ block_comment("} setup"));
1792
1793 return map;
1794 }
1795
1796 //---------------------------- fill_continuation_entry ---------------------------
1797 //
1798 // Initialize the new ContinuationEntry.
1799 //
1800 // Arguments:
1801 // R1_SP: pointer to blank Continuation entry
1802 // reg_cont_obj: pointer to the continuation
1803 // reg_flags: flags
1804 //
1805 // Results:
1806 // R1_SP: pointer to filled out ContinuationEntry
1807 //
1808 // Kills:
1809 // R8_ARG6, R9_ARG7, R10_ARG8
1810 //
1811 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1812 assert_different_registers(reg_cont_obj, reg_flags);
1813 Register zero = R8_ARG6;
1814 Register tmp2 = R9_ARG7;
1815
1816 DEBUG_ONLY(__ block_comment("fill {"));
1817 #ifdef ASSERT
1818 __ load_const_optimized(tmp2, ContinuationEntry::cookie_value());
1819 __ stw(tmp2, in_bytes(ContinuationEntry::cookie_offset()), R1_SP);
1820 #endif //ASSERT
1821
1822 __ li(zero, 0);
1823 __ st_ptr(reg_cont_obj, ContinuationEntry::cont_offset(), R1_SP);
1824 __ stw(reg_flags, in_bytes(ContinuationEntry::flags_offset()), R1_SP);
1825 __ st_ptr(zero, ContinuationEntry::chunk_offset(), R1_SP);
1826 __ stw(zero, in_bytes(ContinuationEntry::argsize_offset()), R1_SP);
1827 __ stw(zero, in_bytes(ContinuationEntry::pin_count_offset()), R1_SP);
1828
1829 __ ld_ptr(tmp2, JavaThread::cont_fastpath_offset(), R16_thread);
1830 __ st_ptr(tmp2, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1831
1832 __ st_ptr(zero, JavaThread::cont_fastpath_offset(), R16_thread);
1833 DEBUG_ONLY(__ block_comment("} fill"));
1834 }
1835
1836 //---------------------------- continuation_enter_cleanup ---------------------------
1837 //
1838 // Copy corresponding attributes from the top ContinuationEntry to the JavaThread
1839 // before deleting it.
1840 //
1841 // Arguments:
1842 // R1_SP: pointer to the ContinuationEntry
1843 //
1844 // Results:
1845 // None.
1846 //
1847 // Kills:
1848 // R8_ARG6, R9_ARG7, R10_ARG8, R15_esp
1849 //
1850 static void continuation_enter_cleanup(MacroAssembler* masm) {
1851 Register tmp1 = R8_ARG6;
1852 Register tmp2 = R9_ARG7;
1853
1854 #ifdef ASSERT
1855 __ block_comment("clean {");
1856 __ ld_ptr(tmp1, JavaThread::cont_entry_offset(), R16_thread);
1857 __ cmpd(CR0, R1_SP, tmp1);
1858 __ asm_assert_eq(FILE_AND_LINE ": incorrect R1_SP");
1859 #endif
1860
1861 __ ld_ptr(tmp1, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1862 __ st_ptr(tmp1, JavaThread::cont_fastpath_offset(), R16_thread);
1863 __ ld_ptr(tmp2, ContinuationEntry::parent_offset(), R1_SP);
1864 __ st_ptr(tmp2, JavaThread::cont_entry_offset(), R16_thread);
1865 DEBUG_ONLY(__ block_comment("} clean"));
1866 }
1867
1868 static void check_continuation_enter_argument(VMReg actual_vmreg,
1869 Register expected_reg,
1870 const char* name) {
1871 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1872 assert(actual_vmreg->as_Register() == expected_reg,
1873 "%s is in unexpected register: %s instead of %s",
1874 name, actual_vmreg->as_Register()->name(), expected_reg->name());
1875 }
1876
1877 static void gen_continuation_enter(MacroAssembler* masm,
1878 const VMRegPair* regs,
1879 int& exception_offset,
1880 OopMapSet* oop_maps,
1881 int& frame_complete,
1882 int& framesize_words,
1883 int& interpreted_entry_offset,
1884 int& compiled_entry_offset) {
1885
1886 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1887 int pos_cont_obj = 0;
1888 int pos_is_cont = 1;
1889 int pos_is_virtual = 2;
1890
1891 // The platform-specific calling convention may present the arguments in various registers.
1892 // To simplify the rest of the code, we expect the arguments to reside at these known
1893 // registers, and we additionally check the placement here in case calling convention ever
1894 // changes.
1895 Register reg_cont_obj = R3_ARG1;
1896 Register reg_is_cont = R4_ARG2;
1897 Register reg_is_virtual = R5_ARG3;
1898
1899 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object");
1900 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
1901 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1902
1903 AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
1904 address start = __ pc();
1905 Label L_thaw, L_exit;
1906
1907 // i2i entry used at interp_only_mode only
1908 interpreted_entry_offset = __ pc() - start;
1909 {
1910 #ifdef ASSERT
1911 Label is_interp_only;
1912 __ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
1913 __ cmpwi(CR0, R0, 0);
1914 __ bne(CR0, is_interp_only);
1915 __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1916 __ bind(is_interp_only);
1917 #endif
1918
1919 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1920 __ ld(reg_cont_obj, Interpreter::stackElementSize*3, R15_esp);
1921 __ lwz(reg_is_cont, Interpreter::stackElementSize*2, R15_esp);
1922 __ lwz(reg_is_virtual, Interpreter::stackElementSize*1, R15_esp);
1923
1924 __ push_cont_fastpath();
1925
1926 OopMap* map = continuation_enter_setup(masm, framesize_words);
1927
1928 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1929 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1930
1931 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1932
1933 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1934 __ cmpwi(CR0, reg_is_cont, 0);
1935 __ bne(CR0, L_thaw);
1936
1937 // --- call Continuation.enter(Continuation c, boolean isContinue)
1938
1939 // Emit compiled static call. The call will be always resolved to the c2i
1940 // entry of Continuation.enter(Continuation c, boolean isContinue).
1941 address c2i_call_pc = __ trampoline_call(resolve);
1942 guarantee(c2i_call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
1943
1944 // Emit stub for static call
1945 address stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
1946 guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
1947
1948 oop_maps->add_gc_map(__ pc() - start, map);
1949 __ post_call_nop();
1950
1951 __ b(L_exit);
1952 }
1953
1954 // compiled entry
1955 __ align(CodeEntryAlignment);
1956 compiled_entry_offset = __ pc() - start;
1957
1958 OopMap* map = continuation_enter_setup(masm, framesize_words);
1959
1960 // Frame is now completed as far as size and linkage.
1961 frame_complete =__ pc() - start;
1962
1963 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1964
1965 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1966 __ cmpwi(CR0, reg_is_cont, 0);
1967 __ bne(CR0, L_thaw);
1968
1969 // --- call Continuation.enter(Continuation c, boolean isContinue)
1970
1971 // Emit compiled static call
1972 // The call needs to be resolved. There's a special case for this in
1973 // SharedRuntime::find_callee_info_helper() which calls
1974 // LinkResolver::resolve_continuation_enter() which resolves the call to
1975 // Continuation.enter(Continuation c, boolean isContinue).
1976 address call_pc = __ trampoline_call(resolve);
1977 guarantee(call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
1978
1979 oop_maps->add_gc_map(__ pc() - start, map);
1980 __ post_call_nop();
1981
1982 __ b(L_exit);
1983
1984 // --- Thawing path
1985
1986 __ bind(L_thaw);
1987 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1988 __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(StubRoutines::cont_thaw()));
1989 __ mtctr(R0);
1990 __ bctrl();
1991 oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1992 ContinuationEntry::_return_pc_offset = __ pc() - start;
1993 __ post_call_nop();
1994
1995 // --- Normal exit (resolve/thawing)
1996
1997 __ bind(L_exit);
1998 ContinuationEntry::_cleanup_offset = __ pc() - start;
1999 continuation_enter_cleanup(masm);
2000
2001 // Pop frame and return
2002 DEBUG_ONLY(__ ld_ptr(R0, 0, R1_SP));
2003 __ addi(R1_SP, R1_SP, framesize_words*wordSize);
2004 DEBUG_ONLY(__ cmpd(CR0, R0, R1_SP));
2005 __ asm_assert_eq(FILE_AND_LINE ": inconsistent frame size");
2006 __ ld(R0, _abi0(lr), R1_SP); // Return pc
2007 __ mtlr(R0);
2008 __ blr();
2009
2010 // --- Exception handling path
2011
2012 exception_offset = __ pc() - start;
2013
2014 continuation_enter_cleanup(masm);
2015 Register ex_pc = R17_tos; // nonvolatile register
2016 Register ex_oop = R15_esp; // nonvolatile register
2017 __ ld(ex_pc, _abi0(callers_sp), R1_SP); // Load caller's return pc
2018 __ ld(ex_pc, _abi0(lr), ex_pc);
2019 __ mr(ex_oop, R3_RET); // save return value containing the exception oop
2020 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, ex_pc);
2021 __ mtlr(R3_RET); // the exception handler
2022 __ ld(R1_SP, _abi0(callers_sp), R1_SP); // remove enterSpecial frame
2023
2024 // Continue at exception handler
2025 // See OptoRuntime::generate_exception_blob for register arguments
2026 __ mr(R3_ARG1, ex_oop); // pass exception oop
2027 __ mr(R4_ARG2, ex_pc); // pass exception pc
2028 __ blr();
2029
2030 // static stub for the call above
2031 address stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
2032 guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
2033 }
2034
2035 static void gen_continuation_yield(MacroAssembler* masm,
2036 const VMRegPair* regs,
2037 OopMapSet* oop_maps,
2038 int& frame_complete,
2039 int& framesize_words,
2040 int& compiled_entry_offset) {
2041 Register tmp = R10_ARG8;
2042
2043 const int framesize_bytes = (int)align_up((int)frame::native_abi_reg_args_size, frame::alignment_in_bytes);
2044 framesize_words = framesize_bytes / wordSize;
2045
2046 address start = __ pc();
2047 compiled_entry_offset = __ pc() - start;
2048
2049 // Save return pc and push entry frame
2050 __ mflr(tmp);
2051 __ std(tmp, _abi0(lr), R1_SP); // SP->lr = return_pc
2052 __ push_frame(framesize_bytes , R0); // SP -= frame_size_in_bytes
2053
2054 DEBUG_ONLY(__ block_comment("Frame Complete"));
2055 frame_complete = __ pc() - start;
2056 address last_java_pc = __ pc();
2057
2058 // This nop must be exactly at the PC we push into the frame info.
2059 // We use this nop for fast CodeBlob lookup, associate the OopMap
2060 // with it right away.
2061 __ post_call_nop();
2062 OopMap* map = new OopMap(framesize_bytes / VMRegImpl::stack_slot_size, 1);
2063 oop_maps->add_gc_map(last_java_pc - start, map);
2064
2065 __ calculate_address_from_global_toc(tmp, last_java_pc); // will be relocated
2066 __ set_last_Java_frame(R1_SP, tmp);
2067 __ call_VM_leaf(Continuation::freeze_entry(), R16_thread, R1_SP);
2068 __ reset_last_Java_frame();
2069
2070 Label L_pinned;
2071
2072 __ cmpwi(CR0, R3_RET, 0);
2073 __ bne(CR0, L_pinned);
2074
2075 // yield succeeded
2076
2077 // Pop frames of continuation including this stub's frame
2078 __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
2079 // The frame pushed by gen_continuation_enter is on top now again
2080 continuation_enter_cleanup(masm);
2081
2082 // Pop frame and return
2083 Label L_return;
2084 __ bind(L_return);
2085 __ pop_frame();
2086 __ ld(R0, _abi0(lr), R1_SP); // Return pc
2087 __ mtlr(R0);
2088 __ blr();
2089
2090 // yield failed - continuation is pinned
2091
2092 __ bind(L_pinned);
2093
2094 // handle pending exception thrown by freeze
2095 __ ld(tmp, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
2096 __ cmpdi(CR0, tmp, 0);
2097 __ beq(CR0, L_return); // return if no exception is pending
2098 __ pop_frame();
2099 __ ld(R0, _abi0(lr), R1_SP); // Return pc
2100 __ mtlr(R0);
2101 __ load_const_optimized(tmp, StubRoutines::forward_exception_entry(), R0);
2102 __ mtctr(tmp);
2103 __ bctr();
2104 }
2105
2106 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
2107 ::continuation_enter_cleanup(masm);
2108 }
2109
2110 // ---------------------------------------------------------------------------
2111 // Generate a native wrapper for a given method. The method takes arguments
2112 // in the Java compiled code convention, marshals them to the native
2113 // convention (handlizes oops, etc), transitions to native, makes the call,
2114 // returns to java state (possibly blocking), unhandlizes any result and
2115 // returns.
2116 //
2117 // Critical native functions are a shorthand for the use of
2118 // GetPrimtiveArrayCritical and disallow the use of any other JNI
2119 // functions. The wrapper is expected to unpack the arguments before
2120 // passing them to the callee. Critical native functions leave the state _in_Java,
2121 // since they cannot stop for GC.
2122 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
2123 // block and the check for pending exceptions it's impossible for them
2124 // to be thrown.
2125 //
2126 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
2127 const methodHandle& method,
2128 int compile_id,
2129 BasicType *in_sig_bt,
2130 VMRegPair *in_regs,
2131 BasicType ret_type) {
2132 if (method->is_continuation_native_intrinsic()) {
2133 int exception_offset = -1;
2134 OopMapSet* oop_maps = new OopMapSet();
2135 int frame_complete = -1;
2136 int stack_slots = -1;
2137 int interpreted_entry_offset = -1;
2138 int vep_offset = -1;
2139 if (method->is_continuation_enter_intrinsic()) {
2140 gen_continuation_enter(masm,
2141 in_regs,
2142 exception_offset,
2143 oop_maps,
2144 frame_complete,
2145 stack_slots,
2146 interpreted_entry_offset,
2147 vep_offset);
2148 } else if (method->is_continuation_yield_intrinsic()) {
2149 gen_continuation_yield(masm,
2150 in_regs,
2151 oop_maps,
2152 frame_complete,
2153 stack_slots,
2154 vep_offset);
2155 } else {
2156 guarantee(false, "Unknown Continuation native intrinsic");
2157 }
2158
2159 #ifdef ASSERT
2160 if (method->is_continuation_enter_intrinsic()) {
2161 assert(interpreted_entry_offset != -1, "Must be set");
2162 assert(exception_offset != -1, "Must be set");
2163 } else {
2164 assert(interpreted_entry_offset == -1, "Must be unset");
2165 assert(exception_offset == -1, "Must be unset");
2166 }
2167 assert(frame_complete != -1, "Must be set");
2168 assert(stack_slots != -1, "Must be set");
2169 assert(vep_offset != -1, "Must be set");
2170 #endif
2171
2172 __ flush();
2173 nmethod* nm = nmethod::new_native_nmethod(method,
2174 compile_id,
2175 masm->code(),
2176 vep_offset,
2177 frame_complete,
2178 stack_slots,
2179 in_ByteSize(-1),
2180 in_ByteSize(-1),
2181 oop_maps,
2182 exception_offset);
2183 if (nm == nullptr) return nm;
2184 if (method->is_continuation_enter_intrinsic()) {
2185 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2186 } else if (method->is_continuation_yield_intrinsic()) {
2187 _cont_doYield_stub = nm;
2188 }
2189 return nm;
2190 }
2191
2192 if (method->is_method_handle_intrinsic()) {
2193 vmIntrinsics::ID iid = method->intrinsic_id();
2194 intptr_t start = (intptr_t)__ pc();
2195 int vep_offset = ((intptr_t)__ pc()) - start;
2196 gen_special_dispatch(masm,
2197 method,
2198 in_sig_bt,
2199 in_regs);
2200 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
2201 __ flush();
2202 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
2203 return nmethod::new_native_nmethod(method,
2204 compile_id,
2205 masm->code(),
2206 vep_offset,
2207 frame_complete,
2208 stack_slots / VMRegImpl::slots_per_word,
2209 in_ByteSize(-1),
2210 in_ByteSize(-1),
2211 (OopMapSet*)nullptr);
2212 }
2213
2214 address native_func = method->native_function();
2215 assert(native_func != nullptr, "must have function");
2216
2217 // First, create signature for outgoing C call
2218 // --------------------------------------------------------------------------
2219
2220 int total_in_args = method->size_of_parameters();
2221 // We have received a description of where all the java args are located
2222 // on entry to the wrapper. We need to convert these args to where
2223 // the jni function will expect them. To figure out where they go
2224 // we convert the java signature to a C signature by inserting
2225 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2226
2227 // Calculate the total number of C arguments and create arrays for the
2228 // signature and the outgoing registers.
2229 // On ppc64, we have two arrays for the outgoing registers, because
2230 // some floating-point arguments must be passed in registers _and_
2231 // in stack locations.
2232 bool method_is_static = method->is_static();
2233 int total_c_args = total_in_args + (method_is_static ? 2 : 1);
2234
2235 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2236 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2237
2238 // Create the signature for the C call:
2239 // 1) add the JNIEnv*
2240 // 2) add the class if the method is static
2241 // 3) copy the rest of the incoming signature (shifted by the number of
2242 // hidden arguments).
2243
2244 int argc = 0;
2245 out_sig_bt[argc++] = T_ADDRESS;
2246 if (method->is_static()) {
2247 out_sig_bt[argc++] = T_OBJECT;
2248 }
2249
2250 for (int i = 0; i < total_in_args ; i++ ) {
2251 out_sig_bt[argc++] = in_sig_bt[i];
2252 }
2253
2254
2255 // Compute the wrapper's frame size.
2256 // --------------------------------------------------------------------------
2257
2258 // Now figure out where the args must be stored and how much stack space
2259 // they require.
2260 //
2261 // Compute framesize for the wrapper. We need to handlize all oops in
2262 // incoming registers.
2263 //
2264 // Calculate the total number of stack slots we will need:
2265 // 1) abi requirements
2266 // 2) outgoing arguments
2267 // 3) space for inbound oop handle area
2268 // 4) space for handlizing a klass if static method
2269 // 5) space for a lock if synchronized method
2270 // 6) workspace for saving return values, int <-> float reg moves, etc.
2271 // 7) alignment
2272 //
2273 // Layout of the native wrapper frame:
2274 // (stack grows upwards, memory grows downwards)
2275 //
2276 // NW [ABI_REG_ARGS] <-- 1) R1_SP
2277 // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset
2278 // [oopHandle area] <-- 3) R1_SP + oop_handle_offset
2279 // klass <-- 4) R1_SP + klass_offset
2280 // lock <-- 5) R1_SP + lock_offset
2281 // [workspace] <-- 6) R1_SP + workspace_offset
2282 // [alignment] (optional) <-- 7)
2283 // caller [JIT_TOP_ABI_48] <-- r_callers_sp
2284 //
2285 // - *_slot_offset Indicates offset from SP in number of stack slots.
2286 // - *_offset Indicates offset from SP in bytes.
2287
2288 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2)
2289 SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
2290
2291 // Now the space for the inbound oop handle area.
2292 int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
2293
2294 int oop_handle_slot_offset = stack_slots;
2295 stack_slots += total_save_slots; // 3)
2296
2297 int klass_slot_offset = 0;
2298 int klass_offset = -1;
2299 if (method_is_static) { // 4)
2300 klass_slot_offset = stack_slots;
2301 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2302 stack_slots += VMRegImpl::slots_per_word;
2303 }
2304
2305 int lock_slot_offset = 0;
2306 int lock_offset = -1;
2307 if (method->is_synchronized()) { // 5)
2308 lock_slot_offset = stack_slots;
2309 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
2310 stack_slots += VMRegImpl::slots_per_word;
2311 }
2312
2313 int workspace_slot_offset = stack_slots; // 6)
2314 stack_slots += 2;
2315
2316 // Now compute actual number of stack words we need.
2317 // Rounding to make stack properly aligned.
2318 stack_slots = align_up(stack_slots, // 7)
2319 frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
2320 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
2321
2322
2323 // Now we can start generating code.
2324 // --------------------------------------------------------------------------
2325
2326 intptr_t start_pc = (intptr_t)__ pc();
2327 intptr_t vep_start_pc;
2328 intptr_t frame_done_pc;
2329
2330 Label handle_pending_exception;
2331 Label last_java_pc;
2332
2333 Register r_callers_sp = R21;
2334 Register r_temp_1 = R22;
2335 Register r_temp_2 = R23;
2336 Register r_temp_3 = R24;
2337 Register r_temp_4 = R25;
2338 Register r_temp_5 = R26;
2339 Register r_temp_6 = R27;
2340 Register r_last_java_pc = R28;
2341
2342 Register r_carg1_jnienv = noreg;
2343 Register r_carg2_classorobject = noreg;
2344 r_carg1_jnienv = out_regs[0].first()->as_Register();
2345 r_carg2_classorobject = out_regs[1].first()->as_Register();
2346
2347
2348 // Generate the Unverified Entry Point (UEP).
2349 // --------------------------------------------------------------------------
2350 assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
2351
2352 // Check ic: object class == cached class?
2353 if (!method_is_static) {
2354 __ ic_check(4 /* end_alignment */);
2355 }
2356
2357 // Generate the Verified Entry Point (VEP).
2358 // --------------------------------------------------------------------------
2359 vep_start_pc = (intptr_t)__ pc();
2360
2361 if (method->needs_clinit_barrier()) {
2362 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2363 Label L_skip_barrier;
2364 Register klass = r_temp_1;
2365 // Notify OOP recorder (don't need the relocation)
2366 AddressLiteral md = __ constant_metadata_address(method->method_holder());
2367 __ load_const_optimized(klass, md.value(), R0);
2368 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
2369
2370 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
2371 __ mtctr(klass);
2372 __ bctr();
2373
2374 __ bind(L_skip_barrier);
2375 }
2376
2377 __ save_LR(r_temp_1);
2378 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2379 __ mr(r_callers_sp, R1_SP); // Remember frame pointer.
2380 __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
2381
2382 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2383 bs->nmethod_entry_barrier(masm, r_temp_1);
2384
2385 frame_done_pc = (intptr_t)__ pc();
2386
2387 // Native nmethod wrappers never take possession of the oop arguments.
2388 // So the caller will gc the arguments.
2389 // The only thing we need an oopMap for is if the call is static.
2390 //
2391 // An OopMap for lock (and class if static), and one for the VM call itself.
2392 OopMapSet *oop_maps = new OopMapSet();
2393 OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2394
2395 // Move arguments from register/stack to register/stack.
2396 // --------------------------------------------------------------------------
2397 //
2398 // We immediately shuffle the arguments so that for any vm call we have
2399 // to make from here on out (sync slow path, jvmti, etc.) we will have
2400 // captured the oops from our caller and have a valid oopMap for them.
2401 //
2402 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2403 // (derived from JavaThread* which is in R16_thread) and, if static,
2404 // the class mirror instead of a receiver. This pretty much guarantees that
2405 // register layout will not match. We ignore these extra arguments during
2406 // the shuffle. The shuffle is described by the two calling convention
2407 // vectors we have in our possession. We simply walk the java vector to
2408 // get the source locations and the c vector to get the destinations.
2409
2410 // Record sp-based slot for receiver on stack for non-static methods.
2411 int receiver_offset = -1;
2412
2413 // We move the arguments backward because the floating point registers
2414 // destination will always be to a register with a greater or equal
2415 // register number or the stack.
2416 // in is the index of the incoming Java arguments
2417 // out is the index of the outgoing C arguments
2418
2419 #ifdef ASSERT
2420 bool reg_destroyed[Register::number_of_registers];
2421 bool freg_destroyed[FloatRegister::number_of_registers];
2422 for (int r = 0 ; r < Register::number_of_registers ; r++) {
2423 reg_destroyed[r] = false;
2424 }
2425 for (int f = 0 ; f < FloatRegister::number_of_registers ; f++) {
2426 freg_destroyed[f] = false;
2427 }
2428 #endif // ASSERT
2429
2430 for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2431
2432 #ifdef ASSERT
2433 if (in_regs[in].first()->is_Register()) {
2434 assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2435 } else if (in_regs[in].first()->is_FloatRegister()) {
2436 assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2437 }
2438 if (out_regs[out].first()->is_Register()) {
2439 reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2440 } else if (out_regs[out].first()->is_FloatRegister()) {
2441 freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2442 }
2443 #endif // ASSERT
2444
2445 switch (in_sig_bt[in]) {
2446 case T_BOOLEAN:
2447 case T_CHAR:
2448 case T_BYTE:
2449 case T_SHORT:
2450 case T_INT:
2451 // Move int and do sign extension.
2452 int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2453 break;
2454 case T_LONG:
2455 long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2456 break;
2457 case T_ARRAY:
2458 case T_OBJECT:
2459 object_move(masm, stack_slots,
2460 oop_map, oop_handle_slot_offset,
2461 ((in == 0) && (!method_is_static)), &receiver_offset,
2462 in_regs[in], out_regs[out],
2463 r_callers_sp, r_temp_1, r_temp_2);
2464 break;
2465 case T_VOID:
2466 break;
2467 case T_FLOAT:
2468 float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2469 break;
2470 case T_DOUBLE:
2471 double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2472 break;
2473 case T_ADDRESS:
2474 fatal("found type (T_ADDRESS) in java args");
2475 break;
2476 default:
2477 ShouldNotReachHere();
2478 break;
2479 }
2480 }
2481
2482 // Pre-load a static method's oop into ARG2.
2483 // Used both by locking code and the normal JNI call code.
2484 if (method_is_static) {
2485 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2486 r_carg2_classorobject);
2487
2488 // Now handlize the static class mirror in carg2. It's known not-null.
2489 __ std(r_carg2_classorobject, klass_offset, R1_SP);
2490 oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2491 __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2492 }
2493
2494 // Get JNIEnv* which is first argument to native.
2495 __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2496
2497 // NOTE:
2498 //
2499 // We have all of the arguments setup at this point.
2500 // We MUST NOT touch any outgoing regs from this point on.
2501 // So if we must call out we must push a new frame.
2502
2503 // The last java pc will also be used as resume pc if this is the wrapper for wait0.
2504 // For this purpose the precise location matters but not for oopmap lookup.
2505 __ calculate_address_from_global_toc(r_last_java_pc, last_java_pc, true, true, true, true);
2506
2507 // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2508 assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2509
2510 // Lock a synchronized method.
2511 // --------------------------------------------------------------------------
2512
2513 if (method->is_synchronized()) {
2514 Register r_oop = r_temp_4;
2515 const Register r_box = r_temp_5;
2516 Label done, locked;
2517
2518 // Load the oop for the object or class. r_carg2_classorobject contains
2519 // either the handlized oop from the incoming arguments or the handlized
2520 // class mirror (if the method is static).
2521 __ ld(r_oop, 0, r_carg2_classorobject);
2522
2523 // Get the lock box slot's address.
2524 __ addi(r_box, R1_SP, lock_offset);
2525
2526 // Try fastpath for locking.
2527 // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2528 Register r_temp_3_or_noreg = UseObjectMonitorTable ? r_temp_3 : noreg;
2529 __ compiler_fast_lock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3_or_noreg);
2530 __ beq(CR0, locked);
2531
2532 // None of the above fast optimizations worked so we have to get into the
2533 // slow case of monitor enter. Inline a special case of call_VM that
2534 // disallows any pending_exception.
2535
2536 // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2537 int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes);
2538 __ mr(R11_scratch1, R1_SP);
2539 RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs);
2540
2541 // Do the call.
2542 __ set_last_Java_frame(R11_scratch1, r_last_java_pc);
2543 assert(r_last_java_pc->is_nonvolatile(), "r_last_java_pc needs to be preserved accross complete_monitor_locking_C call");
2544 // The following call will not be preempted.
2545 // push_cont_fastpath forces freeze slow path in case we try to preempt where we will pin the
2546 // vthread to the carrier (see FreezeBase::recurse_freeze_native_frame()).
2547 __ push_cont_fastpath();
2548 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2549 __ pop_cont_fastpath();
2550 __ reset_last_Java_frame();
2551
2552 RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs);
2553
2554 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2555 "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C");
2556
2557 __ bind(locked);
2558 }
2559
2560 __ set_last_Java_frame(R1_SP, r_last_java_pc);
2561
2562 // Publish thread state
2563 // --------------------------------------------------------------------------
2564
2565 // Transition from _thread_in_Java to _thread_in_native.
2566 __ li(R0, _thread_in_native);
2567 __ release();
2568 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2569 __ stw(R0, thread_(thread_state));
2570
2571
2572 // The JNI call
2573 // --------------------------------------------------------------------------
2574 __ call_c(native_func, relocInfo::runtime_call_type);
2575
2576
2577 // Now, we are back from the native code.
2578
2579
2580 // Unpack the native result.
2581 // --------------------------------------------------------------------------
2582
2583 // For int-types, we do any needed sign-extension required.
2584 // Care must be taken that the return values (R3_RET and F1_RET)
2585 // will survive any VM calls for blocking or unlocking.
2586 // An OOP result (handle) is done specially in the slow-path code.
2587
2588 switch (ret_type) {
2589 case T_VOID: break; // Nothing to do!
2590 case T_FLOAT: break; // Got it where we want it (unless slow-path).
2591 case T_DOUBLE: break; // Got it where we want it (unless slow-path).
2592 case T_LONG: break; // Got it where we want it (unless slow-path).
2593 case T_OBJECT: break; // Really a handle.
2594 // Cannot de-handlize until after reclaiming jvm_lock.
2595 case T_ARRAY: break;
2596
2597 case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1)
2598 __ normalize_bool(R3_RET);
2599 break;
2600 }
2601 case T_BYTE: { // sign extension
2602 __ extsb(R3_RET, R3_RET);
2603 break;
2604 }
2605 case T_CHAR: { // unsigned result
2606 __ andi(R3_RET, R3_RET, 0xffff);
2607 break;
2608 }
2609 case T_SHORT: { // sign extension
2610 __ extsh(R3_RET, R3_RET);
2611 break;
2612 }
2613 case T_INT: // nothing to do
2614 break;
2615 default:
2616 ShouldNotReachHere();
2617 break;
2618 }
2619
2620 // Publish thread state
2621 // --------------------------------------------------------------------------
2622
2623 // Switch thread to "native transition" state before reading the
2624 // synchronization state. This additional state is necessary because reading
2625 // and testing the synchronization state is not atomic w.r.t. GC, as this
2626 // scenario demonstrates:
2627 // - Java thread A, in _thread_in_native state, loads _not_synchronized
2628 // and is preempted.
2629 // - VM thread changes sync state to synchronizing and suspends threads
2630 // for GC.
2631 // - Thread A is resumed to finish this native method, but doesn't block
2632 // here since it didn't see any synchronization in progress, and escapes.
2633
2634 // Transition from _thread_in_native to _thread_in_native_trans.
2635 __ li(R0, _thread_in_native_trans);
2636 __ release();
2637 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2638 __ stw(R0, thread_(thread_state));
2639
2640
2641 // Must we block?
2642 // --------------------------------------------------------------------------
2643
2644 // Block, if necessary, before resuming in _thread_in_Java state.
2645 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2646 {
2647 Label no_block, sync;
2648
2649 // Force this write out before the read below.
2650 if (!UseSystemMemoryBarrier) {
2651 __ fence();
2652 }
2653
2654 Register sync_state_addr = r_temp_4;
2655 Register sync_state = r_temp_5;
2656 Register suspend_flags = r_temp_6;
2657
2658 // No synchronization in progress nor yet synchronized
2659 // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
2660 __ safepoint_poll(sync, sync_state, true /* at_return */, false /* in_nmethod */);
2661
2662 // Not suspended.
2663 // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2664 __ lwz(suspend_flags, thread_(suspend_flags));
2665 __ cmpwi(CR1, suspend_flags, 0);
2666 __ beq(CR1, no_block);
2667
2668 // Block. Save any potential method result value before the operation and
2669 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2670 // lets us share the oopMap we used when we went native rather than create
2671 // a distinct one for this pc.
2672 __ bind(sync);
2673 __ isync();
2674
2675 address entry_point =
2676 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2677 save_native_result(masm, ret_type, workspace_slot_offset);
2678 __ call_VM_leaf(entry_point, R16_thread);
2679 restore_native_result(masm, ret_type, workspace_slot_offset);
2680
2681 __ bind(no_block);
2682
2683 // Publish thread state.
2684 // --------------------------------------------------------------------------
2685
2686 // Thread state is thread_in_native_trans. Any safepoint blocking has
2687 // already happened so we can now change state to _thread_in_Java.
2688
2689 // Transition from _thread_in_native_trans to _thread_in_Java.
2690 __ li(R0, _thread_in_Java);
2691 __ lwsync(); // Acquire safepoint and suspend state, release thread state.
2692 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2693 __ stw(R0, thread_(thread_state));
2694
2695 // Check preemption for Object.wait()
2696 if (method->is_object_wait0()) {
2697 Label not_preempted;
2698 __ ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2699 __ cmpdi(CR0, R0, 0);
2700 __ beq(CR0, not_preempted);
2701 __ mtlr(R0);
2702 __ li(R0, 0);
2703 __ std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2704 __ blr();
2705 __ bind(not_preempted);
2706 }
2707 __ bind(last_java_pc);
2708 // We use the same pc/oopMap repeatedly when we call out above.
2709 intptr_t oopmap_pc = (intptr_t) __ pc();
2710 oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2711 }
2712
2713 // Reguard any pages if necessary.
2714 // --------------------------------------------------------------------------
2715
2716 Label no_reguard;
2717 __ lwz(r_temp_1, thread_(stack_guard_state));
2718 __ cmpwi(CR0, r_temp_1, StackOverflow::stack_guard_yellow_reserved_disabled);
2719 __ bne(CR0, no_reguard);
2720
2721 save_native_result(masm, ret_type, workspace_slot_offset);
2722 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2723 restore_native_result(masm, ret_type, workspace_slot_offset);
2724
2725 __ bind(no_reguard);
2726
2727
2728 // Unlock
2729 // --------------------------------------------------------------------------
2730
2731 if (method->is_synchronized()) {
2732 const Register r_oop = r_temp_4;
2733 const Register r_box = r_temp_5;
2734 const Register r_exception = r_temp_6;
2735 Label done;
2736
2737 // Get oop and address of lock object box.
2738 if (method_is_static) {
2739 assert(klass_offset != -1, "");
2740 __ ld(r_oop, klass_offset, R1_SP);
2741 } else {
2742 assert(receiver_offset != -1, "");
2743 __ ld(r_oop, receiver_offset, R1_SP);
2744 }
2745 __ addi(r_box, R1_SP, lock_offset);
2746
2747 // Try fastpath for unlocking.
2748 __ compiler_fast_unlock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2749 __ beq(CR0, done);
2750
2751 // Save and restore any potential method result value around the unlocking operation.
2752 save_native_result(masm, ret_type, workspace_slot_offset);
2753
2754 // Must save pending exception around the slow-path VM call. Since it's a
2755 // leaf call, the pending exception (if any) can be kept in a register.
2756 __ ld(r_exception, thread_(pending_exception));
2757 assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2758 __ li(R0, 0);
2759 __ std(R0, thread_(pending_exception));
2760
2761 // Slow case of monitor enter.
2762 // Inline a special case of call_VM that disallows any pending_exception.
2763 // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2764 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2765
2766 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2767 "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C");
2768
2769 restore_native_result(masm, ret_type, workspace_slot_offset);
2770
2771 // Check_forward_pending_exception jump to forward_exception if any pending
2772 // exception is set. The forward_exception routine expects to see the
2773 // exception in pending_exception and not in a register. Kind of clumsy,
2774 // since all folks who branch to forward_exception must have tested
2775 // pending_exception first and hence have it in a register already.
2776 __ std(r_exception, thread_(pending_exception));
2777
2778 __ bind(done);
2779 }
2780
2781 // Clear "last Java frame" SP and PC.
2782 // --------------------------------------------------------------------------
2783
2784 // Last java frame won't be set if we're resuming after preemption
2785 bool maybe_preempted = method->is_object_wait0();
2786 __ reset_last_Java_frame(!maybe_preempted /* check_last_java_sp */);
2787
2788 // Unbox oop result, e.g. JNIHandles::resolve value.
2789 // --------------------------------------------------------------------------
2790
2791 if (is_reference_type(ret_type)) {
2792 __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, MacroAssembler::PRESERVATION_NONE);
2793 }
2794
2795 if (CheckJNICalls) {
2796 // clear_pending_jni_exception_check
2797 __ load_const_optimized(R0, 0L);
2798 __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread);
2799 }
2800
2801 // Reset handle block.
2802 // --------------------------------------------------------------------------
2803 __ ld(r_temp_1, thread_(active_handles));
2804 // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2805 __ li(r_temp_2, 0);
2806 __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1);
2807
2808 // Prepare for return
2809 // --------------------------------------------------------------------------
2810 __ pop_frame();
2811 __ restore_LR(R11);
2812
2813 #if INCLUDE_JFR
2814 // We need to do a poll test after unwind in case the sampler
2815 // managed to sample the native frame after returning to Java.
2816 Label L_stub;
2817 int safepoint_offset = __ offset();
2818 if (!UseSIGTRAP) {
2819 __ relocate(relocInfo::poll_return_type);
2820 }
2821 __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */);
2822 #endif // INCLUDE_JFR
2823
2824 // Check for pending exceptions.
2825 // --------------------------------------------------------------------------
2826 __ ld(r_temp_2, thread_(pending_exception));
2827 __ cmpdi(CR0, r_temp_2, 0);
2828 __ bne(CR0, handle_pending_exception);
2829
2830 // Return.
2831 __ blr();
2832
2833 // Handler for return safepoint (out-of-line).
2834 #if INCLUDE_JFR
2835 if (!UseSIGTRAP) {
2836 __ bind(L_stub);
2837 __ jump_to_polling_page_return_handler_blob(safepoint_offset);
2838 }
2839 #endif // INCLUDE_JFR
2840
2841 // Handler for pending exceptions (out-of-line).
2842 // --------------------------------------------------------------------------
2843 // Since this is a native call, we know the proper exception handler
2844 // is the empty function. We just pop this frame and then jump to
2845 // forward_exception_entry.
2846 __ bind(handle_pending_exception);
2847 __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2848 relocInfo::runtime_call_type);
2849
2850 // Done.
2851 // --------------------------------------------------------------------------
2852
2853 __ flush();
2854
2855 nmethod *nm = nmethod::new_native_nmethod(method,
2856 compile_id,
2857 masm->code(),
2858 vep_start_pc-start_pc,
2859 frame_done_pc-start_pc,
2860 stack_slots / VMRegImpl::slots_per_word,
2861 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2862 in_ByteSize(lock_offset),
2863 oop_maps);
2864
2865 return nm;
2866 }
2867
2868 // This function returns the adjust size (in number of words) to a c2i adapter
2869 // activation for use during deoptimization.
2870 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2871 return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::frame_alignment_in_words);
2872 }
2873
2874 uint SharedRuntime::in_preserve_stack_slots() {
2875 return frame::jit_in_preserve_size / VMRegImpl::stack_slot_size;
2876 }
2877
2878 uint SharedRuntime::out_preserve_stack_slots() {
2879 #if defined(COMPILER1) || defined(COMPILER2)
2880 return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2881 #else
2882 return 0;
2883 #endif
2884 }
2885
2886 VMReg SharedRuntime::thread_register() {
2887 // On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames).
2888 ShouldNotCallThis();
2889 return nullptr;
2890 }
2891
2892 #if defined(COMPILER1) || defined(COMPILER2)
2893 // Frame generation for deopt and uncommon trap blobs.
2894 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2895 /* Read */
2896 Register unroll_block_reg,
2897 /* Update */
2898 Register frame_sizes_reg,
2899 Register number_of_frames_reg,
2900 Register pcs_reg,
2901 /* Invalidate */
2902 Register frame_size_reg,
2903 Register pc_reg) {
2904
2905 __ ld(pc_reg, 0, pcs_reg);
2906 __ ld(frame_size_reg, 0, frame_sizes_reg);
2907 __ std(pc_reg, _abi0(lr), R1_SP);
2908 __ push_frame(frame_size_reg, R0/*tmp*/);
2909 __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2910 __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2911 __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2912 __ addi(pcs_reg, pcs_reg, wordSize);
2913 }
2914
2915 // Loop through the UnrollBlock info and create new frames.
2916 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2917 /* read */
2918 Register unroll_block_reg,
2919 /* invalidate */
2920 Register frame_sizes_reg,
2921 Register number_of_frames_reg,
2922 Register pcs_reg,
2923 Register frame_size_reg,
2924 Register pc_reg) {
2925 Label loop;
2926
2927 // _number_of_frames is of type int (deoptimization.hpp)
2928 __ lwa(number_of_frames_reg,
2929 in_bytes(Deoptimization::UnrollBlock::number_of_frames_offset()),
2930 unroll_block_reg);
2931 __ ld(pcs_reg,
2932 in_bytes(Deoptimization::UnrollBlock::frame_pcs_offset()),
2933 unroll_block_reg);
2934 __ ld(frame_sizes_reg,
2935 in_bytes(Deoptimization::UnrollBlock::frame_sizes_offset()),
2936 unroll_block_reg);
2937
2938 // stack: (caller_of_deoptee, ...).
2939
2940 // At this point we either have an interpreter frame or a compiled
2941 // frame on top of stack. If it is a compiled frame we push a new c2i
2942 // adapter here
2943
2944 // Memorize top-frame stack-pointer.
2945 __ mr(frame_size_reg/*old_sp*/, R1_SP);
2946
2947 // Resize interpreter top frame OR C2I adapter.
2948
2949 // At this moment, the top frame (which is the caller of the deoptee) is
2950 // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2951 // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2952 // outgoing arguments.
2953 //
2954 // In order to push the interpreter frame for the deoptee, we need to
2955 // resize the top frame such that we are able to place the deoptee's
2956 // locals in the frame.
2957 // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2958 // into a valid PARENT_IJAVA_FRAME_ABI.
2959
2960 __ lwa(R11_scratch1,
2961 in_bytes(Deoptimization::UnrollBlock::caller_adjustment_offset()),
2962 unroll_block_reg);
2963 __ neg(R11_scratch1, R11_scratch1);
2964
2965 // R11_scratch1 contains size of locals for frame resizing.
2966 // R12_scratch2 contains top frame's lr.
2967
2968 // Resize frame by complete frame size prevents TOC from being
2969 // overwritten by locals. A more stack space saving way would be
2970 // to copy the TOC to its location in the new abi.
2971 __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2972
2973 // now, resize the frame
2974 __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2975
2976 // In the case where we have resized a c2i frame above, the optional
2977 // alignment below the locals has size 32 (why?).
2978 __ std(R12_scratch2, _abi0(lr), R1_SP);
2979
2980 // Initialize initial_caller_sp.
2981 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2982
2983 #ifdef ASSERT
2984 // Make sure that there is at least one entry in the array.
2985 __ cmpdi(CR0, number_of_frames_reg, 0);
2986 __ asm_assert_ne("array_size must be > 0");
2987 #endif
2988
2989 // Now push the new interpreter frames.
2990 //
2991 __ bind(loop);
2992 // Allocate a new frame, fill in the pc.
2993 push_skeleton_frame(masm, deopt,
2994 unroll_block_reg,
2995 frame_sizes_reg,
2996 number_of_frames_reg,
2997 pcs_reg,
2998 frame_size_reg,
2999 pc_reg);
3000 __ cmpdi(CR0, number_of_frames_reg, 0);
3001 __ bne(CR0, loop);
3002
3003 // Get the return address pointing into the template interpreter.
3004 __ ld(R0, 0, pcs_reg);
3005 // Store it in the top interpreter frame.
3006 __ std(R0, _abi0(lr), R1_SP);
3007 // Initialize frame_manager_lr of interpreter top frame.
3008 }
3009 #endif
3010
3011 void SharedRuntime::generate_deopt_blob() {
3012 // Allocate space for the code
3013 ResourceMark rm;
3014 // Setup code generation tools
3015 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
3016 CodeBuffer buffer(name, 2048, 1024);
3017 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3018 Label exec_mode_initialized;
3019 OopMap* map = nullptr;
3020 OopMapSet *oop_maps = new OopMapSet();
3021
3022 // size of ABI112 plus spill slots for R3_RET and F1_RET.
3023 const int frame_size_in_bytes = frame::native_abi_reg_args_spill_size;
3024 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
3025 int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
3026
3027 const Register exec_mode_reg = R21_tmp1;
3028
3029 const address start = __ pc();
3030 int exception_offset = 0;
3031 int exception_in_tls_offset = 0;
3032 int reexecute_offset = 0;
3033
3034 #if defined(COMPILER1) || defined(COMPILER2)
3035 // --------------------------------------------------------------------------
3036 // Prolog for non exception case!
3037
3038 // We have been called from the deopt handler of the deoptee.
3039 //
3040 // deoptee:
3041 // ...
3042 // call X
3043 // ...
3044 // deopt_handler: call_deopt_stub
3045 // cur. return pc --> ...
3046 //
3047 // The return_pc has been stored in the frame of the deoptee and
3048 // will replace the address of the deopt_handler in the call
3049 // to Deoptimization::fetch_unroll_info below.
3050
3051 // Push the "unpack frame"
3052 // Save everything in sight.
3053 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3054 &first_frame_size_in_bytes,
3055 /*generate_oop_map=*/ true,
3056 RegisterSaver::return_pc_is_lr,
3057 /*save_vectors*/ SuperwordUseVSX);
3058 assert(map != nullptr, "OopMap must have been created");
3059
3060 __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
3061 // Save exec mode for unpack_frames.
3062 __ b(exec_mode_initialized);
3063
3064 // --------------------------------------------------------------------------
3065 // Prolog for exception case
3066
3067 // An exception is pending.
3068 // We have been called with a return (interpreter) or a jump (exception blob).
3069 //
3070 // - R3_ARG1: exception oop
3071 // - R4_ARG2: exception pc
3072
3073 exception_offset = __ pc() - start;
3074
3075 BLOCK_COMMENT("Prolog for exception case");
3076
3077 // Store exception oop and pc in thread (location known to GC).
3078 // This is needed since the call to "fetch_unroll_info()" may safepoint.
3079 __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3080 __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3081 __ std(R4_ARG2, _abi0(lr), R1_SP);
3082
3083 // Vanilla deoptimization with an exception pending in exception_oop.
3084 exception_in_tls_offset = __ pc() - start;
3085
3086 // Push the "unpack frame".
3087 // Save everything in sight.
3088 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3089 &first_frame_size_in_bytes,
3090 /*generate_oop_map=*/ false,
3091 RegisterSaver::return_pc_is_pre_saved,
3092 /*save_vectors*/ SuperwordUseVSX);
3093
3094 // Deopt during an exception. Save exec mode for unpack_frames.
3095 __ li(exec_mode_reg, Deoptimization::Unpack_exception);
3096
3097 // fall through
3098 #ifdef COMPILER1
3099 __ b(exec_mode_initialized);
3100
3101 // Reexecute entry, similar to c2 uncommon trap
3102 reexecute_offset = __ pc() - start;
3103
3104 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3105 &first_frame_size_in_bytes,
3106 /*generate_oop_map=*/ false,
3107 RegisterSaver::return_pc_is_pre_saved,
3108 /*save_vectors*/ SuperwordUseVSX);
3109 __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
3110 #endif
3111
3112 // --------------------------------------------------------------------------
3113 __ BIND(exec_mode_initialized);
3114
3115 const Register unroll_block_reg = R22_tmp2;
3116
3117 // We need to set `last_Java_frame' because `fetch_unroll_info' will
3118 // call `last_Java_frame()'. The value of the pc in the frame is not
3119 // particularly important. It just needs to identify this blob.
3120 __ set_last_Java_frame(R1_SP, noreg);
3121
3122 // With EscapeAnalysis turned on, this call may safepoint!
3123 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
3124 address calls_return_pc = __ last_calls_return_pc();
3125 // Set an oopmap for the call site that describes all our saved registers.
3126 oop_maps->add_gc_map(calls_return_pc - start, map);
3127
3128 __ reset_last_Java_frame();
3129 // Save the return value.
3130 __ mr(unroll_block_reg, R3_RET);
3131
3132 // Restore only the result registers that have been saved
3133 // by save_volatile_registers(...).
3134 RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes, /*save_vectors*/ SuperwordUseVSX);
3135
3136 // reload the exec mode from the UnrollBlock (it might have changed)
3137 __ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3138 // In excp_deopt_mode, restore and clear exception oop which we
3139 // stored in the thread during exception entry above. The exception
3140 // oop will be the return value of this stub.
3141 Label skip_restore_excp;
3142 __ cmpdi(CR0, exec_mode_reg, Deoptimization::Unpack_exception);
3143 __ bne(CR0, skip_restore_excp);
3144 __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3145 __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3146 __ li(R0, 0);
3147 __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3148 __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3149 __ BIND(skip_restore_excp);
3150
3151 __ pop_frame();
3152
3153 // stack: (deoptee, optional i2c, caller of deoptee, ...).
3154
3155 // pop the deoptee's frame
3156 __ pop_frame();
3157
3158 // stack: (caller_of_deoptee, ...).
3159
3160 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3161 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3162 // and the frame is effectively not resized.
3163 Register caller_sp = R23_tmp3;
3164 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3165 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3166
3167 // Loop through the `UnrollBlock' info and create interpreter frames.
3168 push_skeleton_frames(masm, true/*deopt*/,
3169 unroll_block_reg,
3170 R23_tmp3,
3171 R24_tmp4,
3172 R25_tmp5,
3173 R26_tmp6,
3174 R27_tmp7);
3175
3176 // stack: (skeletal interpreter frame, ..., optional skeletal
3177 // interpreter frame, optional c2i, caller of deoptee, ...).
3178
3179 // push an `unpack_frame' taking care of float / int return values.
3180 __ push_frame(frame_size_in_bytes, R0/*tmp*/);
3181
3182 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3183 // skeletal interpreter frame, optional c2i, caller of deoptee,
3184 // ...).
3185
3186 // Spill live volatile registers since we'll do a call.
3187 __ std( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3188 __ stfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3189
3190 // Let the unpacker layout information in the skeletal frames just
3191 // allocated.
3192 __ calculate_address_from_global_toc(R3_RET, calls_return_pc, true, true, true, true);
3193 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
3194 // This is a call to a LEAF method, so no oop map is required.
3195 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3196 R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3197 __ reset_last_Java_frame();
3198
3199 // Restore the volatiles saved above.
3200 __ ld( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3201 __ lfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3202
3203 // Pop the unpack frame.
3204 __ pop_frame();
3205 __ restore_LR(R0);
3206
3207 // stack: (top interpreter frame, ..., optional interpreter frame,
3208 // optional c2i, caller of deoptee, ...).
3209
3210 // Initialize R14_state.
3211 __ restore_interpreter_state(R11_scratch1);
3212 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3213
3214 // Return to the interpreter entry point.
3215 __ blr();
3216 #else // !defined(COMPILER1) && !defined(COMPILER2)
3217 __ unimplemented("deopt blob needed only with compiler");
3218 #endif
3219
3220 // Make sure all code is generated
3221 __ flush();
3222
3223 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
3224 reexecute_offset, first_frame_size_in_bytes / wordSize);
3225 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3226 }
3227
3228 #ifdef COMPILER2
3229 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
3230 // Allocate space for the code.
3231 ResourceMark rm;
3232 // Setup code generation tools.
3233 const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
3234 CodeBuffer buffer(name, 2048, 1024);
3235 if (buffer.blob() == nullptr) {
3236 return nullptr;
3237 }
3238 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3239 address start = __ pc();
3240
3241 Register unroll_block_reg = R21_tmp1;
3242 Register klass_index_reg = R22_tmp2;
3243 Register unc_trap_reg = R23_tmp3;
3244 Register r_return_pc = R27_tmp7;
3245
3246 OopMapSet* oop_maps = new OopMapSet();
3247 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3248 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3249
3250 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3251
3252 // Push a dummy `unpack_frame' and call
3253 // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3254 // vframe array and return the `UnrollBlock' information.
3255
3256 // Save LR to compiled frame.
3257 __ save_LR(R11_scratch1);
3258
3259 // Push an "uncommon_trap" frame.
3260 __ push_frame_reg_args(0, R11_scratch1);
3261
3262 // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
3263
3264 // Set the `unpack_frame' as last_Java_frame.
3265 // `Deoptimization::uncommon_trap' expects it and considers its
3266 // sender frame as the deoptee frame.
3267 // Remember the offset of the instruction whose address will be
3268 // moved to R11_scratch1.
3269 address gc_map_pc = __ pc();
3270 __ calculate_address_from_global_toc(r_return_pc, gc_map_pc, true, true, true, true);
3271 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3272
3273 __ mr(klass_index_reg, R3);
3274 __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
3275 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
3276 R16_thread, klass_index_reg, R5_ARG3);
3277
3278 // Set an oopmap for the call site.
3279 oop_maps->add_gc_map(gc_map_pc - start, map);
3280
3281 __ reset_last_Java_frame();
3282
3283 // Pop the `unpack frame'.
3284 __ pop_frame();
3285
3286 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3287
3288 // Save the return value.
3289 __ mr(unroll_block_reg, R3_RET);
3290
3291 // Pop the uncommon_trap frame.
3292 __ pop_frame();
3293
3294 // stack: (caller_of_deoptee, ...).
3295
3296 #ifdef ASSERT
3297 __ lwz(R22_tmp2, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3298 __ cmpdi(CR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
3299 __ asm_assert_eq("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
3300 #endif
3301
3302 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3303 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3304 // and the frame is effectively not resized.
3305 Register caller_sp = R23_tmp3;
3306 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3307 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3308
3309 // Allocate new interpreter frame(s) and possibly a c2i adapter
3310 // frame.
3311 push_skeleton_frames(masm, false/*deopt*/,
3312 unroll_block_reg,
3313 R22_tmp2,
3314 R23_tmp3,
3315 R24_tmp4,
3316 R25_tmp5,
3317 R26_tmp6);
3318
3319 // stack: (skeletal interpreter frame, ..., optional skeletal
3320 // interpreter frame, optional c2i, caller of deoptee, ...).
3321
3322 // Push a dummy `unpack_frame' taking care of float return values.
3323 // Call `Deoptimization::unpack_frames' to layout information in the
3324 // interpreter frames just created.
3325
3326 // Push a simple "unpack frame" here.
3327 __ push_frame_reg_args(0, R11_scratch1);
3328
3329 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3330 // skeletal interpreter frame, optional c2i, caller of deoptee,
3331 // ...).
3332
3333 // Set the "unpack_frame" as last_Java_frame.
3334 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3335
3336 // Indicate it is the uncommon trap case.
3337 __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3338 // Let the unpacker layout information in the skeletal frames just
3339 // allocated.
3340 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3341 R16_thread, unc_trap_reg);
3342
3343 __ reset_last_Java_frame();
3344 // Pop the `unpack frame'.
3345 __ pop_frame();
3346 // Restore LR from top interpreter frame.
3347 __ restore_LR(R11_scratch1);
3348
3349 // stack: (top interpreter frame, ..., optional interpreter frame,
3350 // optional c2i, caller of deoptee, ...).
3351
3352 __ restore_interpreter_state(R11_scratch1);
3353 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3354
3355 // Return to the interpreter entry point.
3356 __ blr();
3357
3358 masm->flush();
3359
3360 return UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3361 }
3362 #endif // COMPILER2
3363
3364 // Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3365 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3366 assert(StubRoutines::forward_exception_entry() != nullptr,
3367 "must be generated before");
3368 assert(is_polling_page_id(id), "expected a polling page stub id");
3369
3370 ResourceMark rm;
3371 OopMapSet *oop_maps = new OopMapSet();
3372 OopMap* map;
3373
3374 // Allocate space for the code. Setup code generation tools.
3375 const char* name = SharedRuntime::stub_name(id);
3376 CodeBuffer buffer(name, 2048, 1024);
3377 MacroAssembler* masm = new MacroAssembler(&buffer);
3378
3379 address start = __ pc();
3380 int frame_size_in_bytes = 0;
3381
3382 RegisterSaver::ReturnPCLocation return_pc_location;
3383 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3384 if (cause_return) {
3385 // Nothing to do here. The frame has already been popped in MachEpilogNode.
3386 // Register LR already contains the return pc.
3387 return_pc_location = RegisterSaver::return_pc_is_pre_saved;
3388 } else {
3389 // Use thread()->saved_exception_pc() as return pc.
3390 return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3391 }
3392
3393 bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3394
3395 // Save registers, fpu state, and flags. Set R31 = return pc.
3396 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3397 &frame_size_in_bytes,
3398 /*generate_oop_map=*/ true,
3399 return_pc_location, save_vectors);
3400
3401 // The following is basically a call_VM. However, we need the precise
3402 // address of the call in order to generate an oopmap. Hence, we do all the
3403 // work ourselves.
3404 __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3405
3406 // The return address must always be correct so that the frame constructor
3407 // never sees an invalid pc.
3408
3409 // Do the call
3410 __ call_VM_leaf(call_ptr, R16_thread);
3411 address calls_return_pc = __ last_calls_return_pc();
3412
3413 // Set an oopmap for the call site. This oopmap will map all
3414 // oop-registers and debug-info registers as callee-saved. This
3415 // will allow deoptimization at this safepoint to find all possible
3416 // debug-info recordings, as well as let GC find all oops.
3417 oop_maps->add_gc_map(calls_return_pc - start, map);
3418
3419 Label noException;
3420
3421 // Clear the last Java frame.
3422 __ reset_last_Java_frame();
3423
3424 BLOCK_COMMENT(" Check pending exception.");
3425 const Register pending_exception = R0;
3426 __ ld(pending_exception, thread_(pending_exception));
3427 __ cmpdi(CR0, pending_exception, 0);
3428 __ beq(CR0, noException);
3429
3430 // Exception pending
3431 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3432 frame_size_in_bytes,
3433 /*restore_ctr=*/true, save_vectors);
3434
3435 BLOCK_COMMENT(" Jump to forward_exception_entry.");
3436 // Jump to forward_exception_entry, with the issuing PC in LR
3437 // so it looks like the original nmethod called forward_exception_entry.
3438 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3439
3440 // No exception case.
3441 __ BIND(noException);
3442
3443 if (!cause_return) {
3444 Label no_adjust;
3445 // If our stashed return pc was modified by the runtime we avoid touching it
3446 __ ld(R0, frame_size_in_bytes + _abi0(lr), R1_SP);
3447 __ cmpd(CR0, R0, R31);
3448 __ bne(CR0, no_adjust);
3449
3450 // Adjust return pc forward to step over the safepoint poll instruction
3451 __ addi(R31, R31, 4);
3452 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
3453
3454 __ bind(no_adjust);
3455 }
3456
3457 // Normal exit, restore registers and exit.
3458 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3459 frame_size_in_bytes,
3460 /*restore_ctr=*/true, save_vectors);
3461
3462 __ blr();
3463
3464 // Make sure all code is generated
3465 masm->flush();
3466
3467 // Fill-out other meta info
3468 // CodeBlob frame size is in words.
3469 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3470 }
3471
3472 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3473 //
3474 // Generate a stub that calls into the vm to find out the proper destination
3475 // of a java call. All the argument registers are live at this point
3476 // but since this is generic code we don't know what they are and the caller
3477 // must do any gc of the args.
3478 //
3479 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3480 assert(is_resolve_id(id), "expected a resolve stub id");
3481
3482 // allocate space for the code
3483 ResourceMark rm;
3484
3485 const char* name = SharedRuntime::stub_name(id);
3486 CodeBuffer buffer(name, 1000, 512);
3487 MacroAssembler* masm = new MacroAssembler(&buffer);
3488
3489 int frame_size_in_bytes;
3490
3491 OopMapSet *oop_maps = new OopMapSet();
3492 OopMap* map = nullptr;
3493
3494 address start = __ pc();
3495
3496 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3497 &frame_size_in_bytes,
3498 /*generate_oop_map*/ true,
3499 RegisterSaver::return_pc_is_lr);
3500
3501 // Use noreg as last_Java_pc, the return pc will be reconstructed
3502 // from the physical frame.
3503 __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3504
3505 int frame_complete = __ offset();
3506
3507 // Pass R19_method as 2nd (optional) argument, used by
3508 // counter_overflow_stub.
3509 __ call_VM_leaf(destination, R16_thread, R19_method);
3510 address calls_return_pc = __ last_calls_return_pc();
3511 // Set an oopmap for the call site.
3512 // We need this not only for callee-saved registers, but also for volatile
3513 // registers that the compiler might be keeping live across a safepoint.
3514 // Create the oopmap for the call's return pc.
3515 oop_maps->add_gc_map(calls_return_pc - start, map);
3516
3517 // R3_RET contains the address we are going to jump to assuming no exception got installed.
3518
3519 // clear last_Java_sp
3520 __ reset_last_Java_frame();
3521
3522 // Check for pending exceptions.
3523 BLOCK_COMMENT("Check for pending exceptions.");
3524 Label pending;
3525 __ ld(R11_scratch1, thread_(pending_exception));
3526 __ cmpdi(CR0, R11_scratch1, 0);
3527 __ bne(CR0, pending);
3528
3529 __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3530
3531 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3532
3533 // Get the returned method.
3534 __ get_vm_result_metadata(R19_method);
3535
3536 __ bctr();
3537
3538
3539 // Pending exception after the safepoint.
3540 __ BIND(pending);
3541
3542 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3543
3544 // exception pending => remove activation and forward to exception handler
3545
3546 __ li(R11_scratch1, 0);
3547 __ ld(R3_ARG1, thread_(pending_exception));
3548 __ std(R11_scratch1, in_bytes(JavaThread::vm_result_oop_offset()), R16_thread);
3549 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3550
3551 // -------------
3552 // Make sure all code is generated.
3553 masm->flush();
3554
3555 // return the blob
3556 // frame_size_words or bytes??
3557 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3558 oop_maps, true);
3559 }
3560
3561 // Continuation point for throwing of implicit exceptions that are
3562 // not handled in the current activation. Fabricates an exception
3563 // oop and initiates normal exception dispatching in this
3564 // frame. Only callee-saved registers are preserved (through the
3565 // normal register window / RegisterMap handling). If the compiler
3566 // needs all registers to be preserved between the fault point and
3567 // the exception handler then it must assume responsibility for that
3568 // in AbstractCompiler::continuation_for_implicit_null_exception or
3569 // continuation_for_implicit_division_by_zero_exception. All other
3570 // implicit exceptions (e.g., NullPointerException or
3571 // AbstractMethodError on entry) are either at call sites or
3572 // otherwise assume that stack unwinding will be initiated, so
3573 // caller saved registers were assumed volatile in the compiler.
3574 //
3575 // Note that we generate only this stub into a RuntimeStub, because
3576 // it needs to be properly traversed and ignored during GC, so we
3577 // change the meaning of the "__" macro within this method.
3578 //
3579 // Note: the routine set_pc_not_at_call_for_caller in
3580 // SharedRuntime.cpp requires that this code be generated into a
3581 // RuntimeStub.
3582 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3583 assert(is_throw_id(id), "expected a throw stub id");
3584
3585 const char* name = SharedRuntime::stub_name(id);
3586
3587 ResourceMark rm;
3588 const char* timer_msg = "SharedRuntime generate_throw_exception";
3589 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3590
3591 CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0);
3592 MacroAssembler* masm = new MacroAssembler(&code);
3593
3594 OopMapSet* oop_maps = new OopMapSet();
3595 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3596 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3597
3598 address start = __ pc();
3599
3600 __ save_LR(R11_scratch1);
3601
3602 // Push a frame.
3603 __ push_frame_reg_args(0, R11_scratch1);
3604
3605 address frame_complete_pc = __ pc();
3606
3607 // Note that we always have a runtime stub frame on the top of
3608 // stack by this point. Remember the offset of the instruction
3609 // whose address will be moved to R11_scratch1.
3610 address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
3611
3612 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3613
3614 __ mr(R3_ARG1, R16_thread);
3615 __ call_c(runtime_entry);
3616
3617 // Set an oopmap for the call site.
3618 oop_maps->add_gc_map((int)(gc_map_pc - start), map);
3619
3620 __ reset_last_Java_frame();
3621
3622 #ifdef ASSERT
3623 // Make sure that this code is only executed if there is a pending
3624 // exception.
3625 {
3626 Label L;
3627 __ ld(R0,
3628 in_bytes(Thread::pending_exception_offset()),
3629 R16_thread);
3630 __ cmpdi(CR0, R0, 0);
3631 __ bne(CR0, L);
3632 __ stop("SharedRuntime::throw_exception: no pending exception");
3633 __ bind(L);
3634 }
3635 #endif
3636
3637 // Pop frame.
3638 __ pop_frame();
3639
3640 __ restore_LR(R11_scratch1);
3641
3642 __ load_const(R11_scratch1, StubRoutines::forward_exception_entry());
3643 __ mtctr(R11_scratch1);
3644 __ bctr();
3645
3646 // Create runtime stub with OopMap.
3647 RuntimeStub* stub =
3648 RuntimeStub::new_runtime_stub(name, &code,
3649 /*frame_complete=*/ (int)(frame_complete_pc - start),
3650 frame_size_in_bytes/wordSize,
3651 oop_maps,
3652 false);
3653 return stub;
3654 }
3655
3656 //------------------------------Montgomery multiplication------------------------
3657 //
3658
3659 // Subtract 0:b from carry:a. Return carry.
3660 static unsigned long
3661 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3662 long i = 0;
3663 unsigned long tmp, tmp2;
3664 __asm__ __volatile__ (
3665 "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA
3666 "mtctr %[len] \n"
3667 "0: \n"
3668 "ldx %[tmp], %[i], %[a] \n"
3669 "ldx %[tmp2], %[i], %[b] \n"
3670 "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended
3671 "stdx %[tmp], %[i], %[a] \n"
3672 "addi %[i], %[i], 8 \n"
3673 "bdnz 0b \n"
3674 "addme %[tmp], %[carry] \n" // carry + CA - 1
3675 : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
3676 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
3677 : "ctr", "xer", "memory"
3678 );
3679 return tmp;
3680 }
3681
3682 // Multiply (unsigned) Long A by Long B, accumulating the double-
3683 // length result into the accumulator formed of T0, T1, and T2.
3684 inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3685 unsigned long hi, lo;
3686 __asm__ __volatile__ (
3687 "mulld %[lo], %[A], %[B] \n"
3688 "mulhdu %[hi], %[A], %[B] \n"
3689 "addc %[T0], %[T0], %[lo] \n"
3690 "adde %[T1], %[T1], %[hi] \n"
3691 "addze %[T2], %[T2] \n"
3692 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3693 : [A]"r"(A), [B]"r"(B)
3694 : "xer"
3695 );
3696 }
3697
3698 // As above, but add twice the double-length result into the
3699 // accumulator.
3700 inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3701 unsigned long hi, lo;
3702 __asm__ __volatile__ (
3703 "mulld %[lo], %[A], %[B] \n"
3704 "mulhdu %[hi], %[A], %[B] \n"
3705 "addc %[T0], %[T0], %[lo] \n"
3706 "adde %[T1], %[T1], %[hi] \n"
3707 "addze %[T2], %[T2] \n"
3708 "addc %[T0], %[T0], %[lo] \n"
3709 "adde %[T1], %[T1], %[hi] \n"
3710 "addze %[T2], %[T2] \n"
3711 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3712 : [A]"r"(A), [B]"r"(B)
3713 : "xer"
3714 );
3715 }
3716
3717 // Fast Montgomery multiplication. The derivation of the algorithm is
3718 // in "A Cryptographic Library for the Motorola DSP56000,
3719 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3720 static void
3721 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3722 unsigned long m[], unsigned long inv, int len) {
3723 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3724 int i;
3725
3726 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3727
3728 for (i = 0; i < len; i++) {
3729 int j;
3730 for (j = 0; j < i; j++) {
3731 MACC(a[j], b[i-j], t0, t1, t2);
3732 MACC(m[j], n[i-j], t0, t1, t2);
3733 }
3734 MACC(a[i], b[0], t0, t1, t2);
3735 m[i] = t0 * inv;
3736 MACC(m[i], n[0], t0, t1, t2);
3737
3738 assert(t0 == 0, "broken Montgomery multiply");
3739
3740 t0 = t1; t1 = t2; t2 = 0;
3741 }
3742
3743 for (i = len; i < 2*len; i++) {
3744 int j;
3745 for (j = i-len+1; j < len; j++) {
3746 MACC(a[j], b[i-j], t0, t1, t2);
3747 MACC(m[j], n[i-j], t0, t1, t2);
3748 }
3749 m[i-len] = t0;
3750 t0 = t1; t1 = t2; t2 = 0;
3751 }
3752
3753 while (t0) {
3754 t0 = sub(m, n, t0, len);
3755 }
3756 }
3757
3758 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3759 // multiplies so it should be up to 25% faster than Montgomery
3760 // multiplication. However, its loop control is more complex and it
3761 // may actually run slower on some machines.
3762 static void
3763 montgomery_square(unsigned long a[], unsigned long n[],
3764 unsigned long m[], unsigned long inv, int len) {
3765 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3766 int i;
3767
3768 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3769
3770 for (i = 0; i < len; i++) {
3771 int j;
3772 int end = (i+1)/2;
3773 for (j = 0; j < end; j++) {
3774 MACC2(a[j], a[i-j], t0, t1, t2);
3775 MACC(m[j], n[i-j], t0, t1, t2);
3776 }
3777 if ((i & 1) == 0) {
3778 MACC(a[j], a[j], t0, t1, t2);
3779 }
3780 for (; j < i; j++) {
3781 MACC(m[j], n[i-j], t0, t1, t2);
3782 }
3783 m[i] = t0 * inv;
3784 MACC(m[i], n[0], t0, t1, t2);
3785
3786 assert(t0 == 0, "broken Montgomery square");
3787
3788 t0 = t1; t1 = t2; t2 = 0;
3789 }
3790
3791 for (i = len; i < 2*len; i++) {
3792 int start = i-len+1;
3793 int end = start + (len - start)/2;
3794 int j;
3795 for (j = start; j < end; j++) {
3796 MACC2(a[j], a[i-j], t0, t1, t2);
3797 MACC(m[j], n[i-j], t0, t1, t2);
3798 }
3799 if ((i & 1) == 0) {
3800 MACC(a[j], a[j], t0, t1, t2);
3801 }
3802 for (; j < len; j++) {
3803 MACC(m[j], n[i-j], t0, t1, t2);
3804 }
3805 m[i-len] = t0;
3806 t0 = t1; t1 = t2; t2 = 0;
3807 }
3808
3809 while (t0) {
3810 t0 = sub(m, n, t0, len);
3811 }
3812 }
3813
3814 // The threshold at which squaring is advantageous was determined
3815 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3816 // Doesn't seem to be relevant for Power8 so we use the same value.
3817 #define MONTGOMERY_SQUARING_THRESHOLD 64
3818
3819 // Copy len longwords from s to d, word-swapping as we go. The
3820 // destination array is reversed.
3821 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3822 d += len;
3823 while(len-- > 0) {
3824 d--;
3825 unsigned long s_val = *s;
3826 // Swap words in a longword on little endian machines.
3827 #ifdef VM_LITTLE_ENDIAN
3828 s_val = (s_val << 32) | (s_val >> 32);
3829 #endif
3830 *d = s_val;
3831 s++;
3832 }
3833 }
3834
3835 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3836 jint len, jlong inv,
3837 jint *m_ints) {
3838 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3839 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3840 int longwords = len/2;
3841
3842 // Make very sure we don't use so much space that the stack might
3843 // overflow. 512 jints corresponds to an 16384-bit integer and
3844 // will use here a total of 8k bytes of stack space.
3845 int divisor = sizeof(unsigned long) * 4;
3846 guarantee(longwords <= 8192 / divisor, "must be");
3847 int total_allocation = longwords * sizeof (unsigned long) * 4;
3848 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3849
3850 // Local scratch arrays
3851 unsigned long
3852 *a = scratch + 0 * longwords,
3853 *b = scratch + 1 * longwords,
3854 *n = scratch + 2 * longwords,
3855 *m = scratch + 3 * longwords;
3856
3857 reverse_words((unsigned long *)a_ints, a, longwords);
3858 reverse_words((unsigned long *)b_ints, b, longwords);
3859 reverse_words((unsigned long *)n_ints, n, longwords);
3860
3861 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3862
3863 reverse_words(m, (unsigned long *)m_ints, longwords);
3864 }
3865
3866 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3867 jint len, jlong inv,
3868 jint *m_ints) {
3869 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3870 assert(len % 2 == 0, "array length in montgomery_square must be even");
3871 int longwords = len/2;
3872
3873 // Make very sure we don't use so much space that the stack might
3874 // overflow. 512 jints corresponds to an 16384-bit integer and
3875 // will use here a total of 6k bytes of stack space.
3876 int divisor = sizeof(unsigned long) * 3;
3877 guarantee(longwords <= (8192 / divisor), "must be");
3878 int total_allocation = longwords * sizeof (unsigned long) * 3;
3879 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3880
3881 // Local scratch arrays
3882 unsigned long
3883 *a = scratch + 0 * longwords,
3884 *n = scratch + 1 * longwords,
3885 *m = scratch + 2 * longwords;
3886
3887 reverse_words((unsigned long *)a_ints, a, longwords);
3888 reverse_words((unsigned long *)n_ints, n, longwords);
3889
3890 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3891 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3892 } else {
3893 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3894 }
3895
3896 reverse_words(m, (unsigned long *)m_ints, longwords);
3897 }
3898
3899 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3900 Unimplemented();
3901 return nullptr;
3902 }
3903
3904 #if INCLUDE_JFR
3905
3906 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3907 // It returns a jobject handle to the event writer.
3908 // The handle is dereferenced and the return value is the event writer oop.
3909 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3910 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3911 CodeBuffer code(name, 512, 64);
3912 MacroAssembler* masm = new MacroAssembler(&code);
3913
3914 Register tmp1 = R10_ARG8;
3915 Register tmp2 = R9_ARG7;
3916
3917 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3918 address start = __ pc();
3919 __ mflr(tmp1);
3920 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3921 __ push_frame_reg_args(0, tmp1);
3922 int frame_complete = __ pc() - start;
3923 __ set_last_Java_frame(R1_SP, noreg);
3924 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), R16_thread);
3925 address calls_return_pc = __ last_calls_return_pc();
3926 __ reset_last_Java_frame();
3927 // The handle is dereferenced through a load barrier.
3928 __ resolve_global_jobject(R3_RET, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE);
3929 __ pop_frame();
3930 __ ld(tmp1, _abi0(lr), R1_SP);
3931 __ mtlr(tmp1);
3932 __ blr();
3933
3934 OopMapSet* oop_maps = new OopMapSet();
3935 OopMap* map = new OopMap(framesize, 0);
3936 oop_maps->add_gc_map(calls_return_pc - start, map);
3937
3938 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3939 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3940 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3941 oop_maps, false);
3942 return stub;
3943 }
3944
3945 // For c2: call to return a leased buffer.
3946 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3947 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3948 CodeBuffer code(name, 512, 64);
3949 MacroAssembler* masm = new MacroAssembler(&code);
3950
3951 Register tmp1 = R10_ARG8;
3952 Register tmp2 = R9_ARG7;
3953
3954 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3955 address start = __ pc();
3956 __ mflr(tmp1);
3957 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3958 __ push_frame_reg_args(0, tmp1);
3959 int frame_complete = __ pc() - start;
3960 __ set_last_Java_frame(R1_SP, noreg);
3961 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3962 address calls_return_pc = __ last_calls_return_pc();
3963 __ reset_last_Java_frame();
3964 __ pop_frame();
3965 __ ld(tmp1, _abi0(lr), R1_SP);
3966 __ mtlr(tmp1);
3967 __ blr();
3968
3969 OopMapSet* oop_maps = new OopMapSet();
3970 OopMap* map = new OopMap(framesize, 0);
3971 oop_maps->add_gc_map(calls_return_pc - start, map);
3972
3973 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3974 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3975 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3976 oop_maps, false);
3977 return stub;
3978 }
3979 #endif // INCLUDE_JFR