1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/compiledIC.hpp"
29 #include "code/vtableStubs.hpp"
30 #include "frame_ppc.hpp"
31 #include "compiler/oopMap.hpp"
32 #include "gc/shared/gcLocker.hpp"
33 #include "interpreter/interpreter.hpp"
34 #include "interpreter/interp_masm.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "oops/klass.inline.hpp"
37 #include "prims/methodHandles.hpp"
38 #include "runtime/continuation.hpp"
39 #include "runtime/continuationEntry.inline.hpp"
40 #include "runtime/jniHandles.hpp"
41 #include "runtime/os.inline.hpp"
42 #include "runtime/safepointMechanism.hpp"
43 #include "runtime/sharedRuntime.hpp"
44 #include "runtime/signature.hpp"
45 #include "runtime/stubRoutines.hpp"
46 #include "runtime/timerTrace.hpp"
47 #include "runtime/vframeArray.hpp"
48 #include "utilities/align.hpp"
49 #include "utilities/macros.hpp"
50 #include "vmreg_ppc.inline.hpp"
51 #ifdef COMPILER1
52 #include "c1/c1_Runtime1.hpp"
53 #endif
54 #ifdef COMPILER2
55 #include "opto/ad.hpp"
56 #include "opto/runtime.hpp"
57 #endif
58 #if INCLUDE_SHENANDOAHGC
59 #include "gc/shenandoah/shenandoahRuntime.hpp"
60 #endif
61
62 #include <alloca.h>
63
64 #define __ masm->
65
66 #ifdef PRODUCT
67 #define BLOCK_COMMENT(str) // nothing
68 #else
69 #define BLOCK_COMMENT(str) __ block_comment(str)
70 #endif
71
72 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
73
74
75 class RegisterSaver {
76 // Used for saving volatile registers.
77 public:
78
79 // Support different return pc locations.
80 enum ReturnPCLocation {
81 return_pc_is_lr,
82 return_pc_is_pre_saved,
83 return_pc_is_thread_saved_exception_pc
84 };
85
86 static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
87 int* out_frame_size_in_bytes,
88 bool generate_oop_map,
89 ReturnPCLocation return_pc_location,
90 bool save_vectors = false);
91 static void restore_live_registers_and_pop_frame(MacroAssembler* masm,
92 int frame_size_in_bytes,
93 bool restore_ctr,
94 bool save_vectors = false);
95
96 static void push_frame_and_save_argument_registers(MacroAssembler* masm,
97 Register r_temp,
98 int frame_size,
99 int total_args,
100 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
101 static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
102 int frame_size,
103 int total_args,
104 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
105
106 // During deoptimization only the result registers need to be restored
107 // all the other values have already been extracted.
108 static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors);
109
110 // Constants and data structures:
111
112 typedef enum {
113 int_reg,
114 float_reg,
115 special_reg,
116 vec_reg
117 } RegisterType;
118
119 typedef enum {
120 reg_size = 8,
121 half_reg_size = reg_size / 2,
122 vec_reg_size = 16
123 } RegisterConstants;
124
125 typedef struct {
126 RegisterType reg_type;
127 int reg_num;
128 VMReg vmreg;
129 } LiveRegType;
130 };
131
132
133 #define RegisterSaver_LiveIntReg(regname) \
134 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
135
136 #define RegisterSaver_LiveFloatReg(regname) \
137 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
138
139 #define RegisterSaver_LiveSpecialReg(regname) \
140 { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
141
142 #define RegisterSaver_LiveVecReg(regname) \
143 { RegisterSaver::vec_reg, regname->encoding(), regname->as_VMReg() }
144
145 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
146 // Live registers which get spilled to the stack. Register
147 // positions in this array correspond directly to the stack layout.
148
149 //
150 // live special registers:
151 //
152 RegisterSaver_LiveSpecialReg(SR_CTR),
153 //
154 // live float registers:
155 //
156 RegisterSaver_LiveFloatReg( F0 ),
157 RegisterSaver_LiveFloatReg( F1 ),
158 RegisterSaver_LiveFloatReg( F2 ),
159 RegisterSaver_LiveFloatReg( F3 ),
160 RegisterSaver_LiveFloatReg( F4 ),
161 RegisterSaver_LiveFloatReg( F5 ),
162 RegisterSaver_LiveFloatReg( F6 ),
163 RegisterSaver_LiveFloatReg( F7 ),
164 RegisterSaver_LiveFloatReg( F8 ),
165 RegisterSaver_LiveFloatReg( F9 ),
166 RegisterSaver_LiveFloatReg( F10 ),
167 RegisterSaver_LiveFloatReg( F11 ),
168 RegisterSaver_LiveFloatReg( F12 ),
169 RegisterSaver_LiveFloatReg( F13 ),
170 RegisterSaver_LiveFloatReg( F14 ),
171 RegisterSaver_LiveFloatReg( F15 ),
172 RegisterSaver_LiveFloatReg( F16 ),
173 RegisterSaver_LiveFloatReg( F17 ),
174 RegisterSaver_LiveFloatReg( F18 ),
175 RegisterSaver_LiveFloatReg( F19 ),
176 RegisterSaver_LiveFloatReg( F20 ),
177 RegisterSaver_LiveFloatReg( F21 ),
178 RegisterSaver_LiveFloatReg( F22 ),
179 RegisterSaver_LiveFloatReg( F23 ),
180 RegisterSaver_LiveFloatReg( F24 ),
181 RegisterSaver_LiveFloatReg( F25 ),
182 RegisterSaver_LiveFloatReg( F26 ),
183 RegisterSaver_LiveFloatReg( F27 ),
184 RegisterSaver_LiveFloatReg( F28 ),
185 RegisterSaver_LiveFloatReg( F29 ),
186 RegisterSaver_LiveFloatReg( F30 ),
187 RegisterSaver_LiveFloatReg( F31 ),
188 //
189 // live integer registers:
190 //
191 RegisterSaver_LiveIntReg( R0 ),
192 //RegisterSaver_LiveIntReg( R1 ), // stack pointer
193 RegisterSaver_LiveIntReg( R2 ),
194 RegisterSaver_LiveIntReg( R3 ),
195 RegisterSaver_LiveIntReg( R4 ),
196 RegisterSaver_LiveIntReg( R5 ),
197 RegisterSaver_LiveIntReg( R6 ),
198 RegisterSaver_LiveIntReg( R7 ),
199 RegisterSaver_LiveIntReg( R8 ),
200 RegisterSaver_LiveIntReg( R9 ),
201 RegisterSaver_LiveIntReg( R10 ),
202 RegisterSaver_LiveIntReg( R11 ),
203 RegisterSaver_LiveIntReg( R12 ),
204 //RegisterSaver_LiveIntReg( R13 ), // system thread id
205 RegisterSaver_LiveIntReg( R14 ),
206 RegisterSaver_LiveIntReg( R15 ),
207 RegisterSaver_LiveIntReg( R16 ),
208 RegisterSaver_LiveIntReg( R17 ),
209 RegisterSaver_LiveIntReg( R18 ),
210 RegisterSaver_LiveIntReg( R19 ),
211 RegisterSaver_LiveIntReg( R20 ),
212 RegisterSaver_LiveIntReg( R21 ),
213 RegisterSaver_LiveIntReg( R22 ),
214 RegisterSaver_LiveIntReg( R23 ),
215 RegisterSaver_LiveIntReg( R24 ),
216 RegisterSaver_LiveIntReg( R25 ),
217 RegisterSaver_LiveIntReg( R26 ),
218 RegisterSaver_LiveIntReg( R27 ),
219 RegisterSaver_LiveIntReg( R28 ),
220 RegisterSaver_LiveIntReg( R29 ),
221 RegisterSaver_LiveIntReg( R30 ),
222 RegisterSaver_LiveIntReg( R31 ) // must be the last register (see save/restore functions below)
223 };
224
225 static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = {
226 //
227 // live vector registers (optional, only these ones are used by C2):
228 //
229 RegisterSaver_LiveVecReg( VR0 ),
230 RegisterSaver_LiveVecReg( VR1 ),
231 RegisterSaver_LiveVecReg( VR2 ),
232 RegisterSaver_LiveVecReg( VR3 ),
233 RegisterSaver_LiveVecReg( VR4 ),
234 RegisterSaver_LiveVecReg( VR5 ),
235 RegisterSaver_LiveVecReg( VR6 ),
236 RegisterSaver_LiveVecReg( VR7 ),
237 RegisterSaver_LiveVecReg( VR8 ),
238 RegisterSaver_LiveVecReg( VR9 ),
239 RegisterSaver_LiveVecReg( VR10 ),
240 RegisterSaver_LiveVecReg( VR11 ),
241 RegisterSaver_LiveVecReg( VR12 ),
242 RegisterSaver_LiveVecReg( VR13 ),
243 RegisterSaver_LiveVecReg( VR14 ),
244 RegisterSaver_LiveVecReg( VR15 ),
245 RegisterSaver_LiveVecReg( VR16 ),
246 RegisterSaver_LiveVecReg( VR17 ),
247 RegisterSaver_LiveVecReg( VR18 ),
248 RegisterSaver_LiveVecReg( VR19 ),
249 RegisterSaver_LiveVecReg( VR20 ),
250 RegisterSaver_LiveVecReg( VR21 ),
251 RegisterSaver_LiveVecReg( VR22 ),
252 RegisterSaver_LiveVecReg( VR23 ),
253 RegisterSaver_LiveVecReg( VR24 ),
254 RegisterSaver_LiveVecReg( VR25 ),
255 RegisterSaver_LiveVecReg( VR26 ),
256 RegisterSaver_LiveVecReg( VR27 ),
257 RegisterSaver_LiveVecReg( VR28 ),
258 RegisterSaver_LiveVecReg( VR29 ),
259 RegisterSaver_LiveVecReg( VR30 ),
260 RegisterSaver_LiveVecReg( VR31 )
261 };
262
263
264 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
265 int* out_frame_size_in_bytes,
266 bool generate_oop_map,
267 ReturnPCLocation return_pc_location,
268 bool save_vectors) {
269 // Push an abi_reg_args-frame and store all registers which may be live.
270 // If requested, create an OopMap: Record volatile registers as
271 // callee-save values in an OopMap so their save locations will be
272 // propagated to the RegisterMap of the caller frame during
273 // StackFrameStream construction (needed for deoptimization; see
274 // compiledVFrame::create_stack_value).
275 // Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
276
277 // calculate frame size
278 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
279 sizeof(RegisterSaver::LiveRegType);
280 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
281 sizeof(RegisterSaver::LiveRegType))
282 : 0;
283 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
284 const int frame_size_in_bytes = align_up(register_save_size, frame::alignment_in_bytes)
285 + frame::native_abi_reg_args_size;
286
287 *out_frame_size_in_bytes = frame_size_in_bytes;
288 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
289 const int register_save_offset = frame_size_in_bytes - register_save_size;
290
291 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
292 OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : nullptr;
293
294 BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
295
296 // push a new frame
297 __ push_frame(frame_size_in_bytes, noreg);
298
299 // Save some registers in the last (non-vector) slots of the new frame so we
300 // can use them as scratch regs or to determine the return pc.
301 __ std(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
302 __ std(R30, frame_size_in_bytes - 2*reg_size - vecregstosave_num * vec_reg_size, R1_SP);
303
304 // save the flags
305 // Do the save_LR by hand and adjust the return pc if requested.
306 switch (return_pc_location) {
307 case return_pc_is_lr: __ mflr(R31); break;
308 case return_pc_is_pre_saved: break;
309 case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
310 default: ShouldNotReachHere();
311 }
312 if (return_pc_location != return_pc_is_pre_saved) {
313 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
314 }
315
316 // save all registers (ints and floats)
317 int offset = register_save_offset;
318
319 for (int i = 0; i < regstosave_num; i++) {
320 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
321 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
322
323 switch (reg_type) {
324 case RegisterSaver::int_reg: {
325 if (reg_num < 30) { // We spilled R30-31 right at the beginning.
326 __ std(as_Register(reg_num), offset, R1_SP);
327 }
328 break;
329 }
330 case RegisterSaver::float_reg: {
331 __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
332 break;
333 }
334 case RegisterSaver::special_reg: {
335 if (reg_num == SR_CTR.encoding()) {
336 __ mfctr(R30);
337 __ std(R30, offset, R1_SP);
338 } else {
339 Unimplemented();
340 }
341 break;
342 }
343 default:
344 ShouldNotReachHere();
345 }
346
347 if (generate_oop_map) {
348 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
349 RegisterSaver_LiveRegs[i].vmreg);
350 }
351 offset += reg_size;
352 }
353
354 // Note that generate_oop_map in the following loop is only used for the
355 // polling_page_vectors_safepoint_handler_blob and the deopt_blob.
356 // The order in which the vector contents are stored depends on Endianess and
357 // the utilized instructions (PowerArchitecturePPC64).
358 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
359 if (PowerArchitecturePPC64 >= 10) {
360 assert(is_even(vecregstosave_num), "expectation");
361 for (int i = 0; i < vecregstosave_num; i += 2) {
362 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
363 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
364
365 __ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
366 // Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad).
367 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
368 if (generate_oop_map) {
369 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
370 RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg);
371 map->set_callee_saved(VMRegImpl::stack2reg((offset + vec_reg_size) >> 2),
372 RegisterSaver_LiveVecRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg);
373 }
374 offset += (2 * vec_reg_size);
375 }
376 } else {
377 for (int i = 0; i < vecregstosave_num; i++) {
378 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
379
380 if (PowerArchitecturePPC64 >= 9) {
381 __ stxv(as_VectorRegister(reg_num)->to_vsr(), offset, R1_SP);
382 } else {
383 __ li(R31, offset);
384 __ stxvd2x(as_VectorRegister(reg_num)->to_vsr(), R31, R1_SP);
385 }
386 // Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad).
387 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
388 if (generate_oop_map) {
389 VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg;
390 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr);
391 }
392 offset += vec_reg_size;
393 }
394 }
395
396 assert(offset == frame_size_in_bytes, "consistency check");
397
398 BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
399
400 // And we're done.
401 return map;
402 }
403
404
405 // Pop the current frame and restore all the registers that we
406 // saved.
407 void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
408 int frame_size_in_bytes,
409 bool restore_ctr,
410 bool save_vectors) {
411 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
412 sizeof(RegisterSaver::LiveRegType);
413 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
414 sizeof(RegisterSaver::LiveRegType))
415 : 0;
416 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
417
418 const int register_save_offset = frame_size_in_bytes - register_save_size;
419
420 BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
421
422 // restore all registers (ints and floats)
423 int offset = register_save_offset;
424
425 for (int i = 0; i < regstosave_num; i++) {
426 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
427 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
428
429 switch (reg_type) {
430 case RegisterSaver::int_reg: {
431 if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
432 __ ld(as_Register(reg_num), offset, R1_SP);
433 break;
434 }
435 case RegisterSaver::float_reg: {
436 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
437 break;
438 }
439 case RegisterSaver::special_reg: {
440 if (reg_num == SR_CTR.encoding()) {
441 if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
442 __ ld(R31, offset, R1_SP);
443 __ mtctr(R31);
444 }
445 } else {
446 Unimplemented();
447 }
448 break;
449 }
450 default:
451 ShouldNotReachHere();
452 }
453 offset += reg_size;
454 }
455
456 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
457 if (PowerArchitecturePPC64 >= 10) {
458 for (int i = 0; i < vecregstosave_num; i += 2) {
459 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
460 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
461
462 __ lxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
463
464 offset += (2 * vec_reg_size);
465 }
466 } else {
467 for (int i = 0; i < vecregstosave_num; i++) {
468 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
469
470 if (PowerArchitecturePPC64 >= 9) {
471 __ lxv(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
472 } else {
473 __ li(R31, offset);
474 __ lxvd2x(as_VectorRegister(reg_num).to_vsr(), R31, R1_SP);
475 }
476
477 offset += vec_reg_size;
478 }
479 }
480
481 assert(offset == frame_size_in_bytes, "consistency check");
482
483 // restore link and the flags
484 __ ld(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
485 __ mtlr(R31);
486
487 // restore scratch register's value
488 __ ld(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
489
490 // pop the frame
491 __ addi(R1_SP, R1_SP, frame_size_in_bytes);
492
493 BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
494 }
495
496 void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
497 int frame_size,int total_args, const VMRegPair *regs,
498 const VMRegPair *regs2) {
499 __ push_frame(frame_size, r_temp);
500 int st_off = frame_size - wordSize;
501 for (int i = 0; i < total_args; i++) {
502 VMReg r_1 = regs[i].first();
503 VMReg r_2 = regs[i].second();
504 if (!r_1->is_valid()) {
505 assert(!r_2->is_valid(), "");
506 continue;
507 }
508 if (r_1->is_Register()) {
509 Register r = r_1->as_Register();
510 __ std(r, st_off, R1_SP);
511 st_off -= wordSize;
512 } else if (r_1->is_FloatRegister()) {
513 FloatRegister f = r_1->as_FloatRegister();
514 __ stfd(f, st_off, R1_SP);
515 st_off -= wordSize;
516 }
517 }
518 if (regs2 != nullptr) {
519 for (int i = 0; i < total_args; i++) {
520 VMReg r_1 = regs2[i].first();
521 VMReg r_2 = regs2[i].second();
522 if (!r_1->is_valid()) {
523 assert(!r_2->is_valid(), "");
524 continue;
525 }
526 if (r_1->is_Register()) {
527 Register r = r_1->as_Register();
528 __ std(r, st_off, R1_SP);
529 st_off -= wordSize;
530 } else if (r_1->is_FloatRegister()) {
531 FloatRegister f = r_1->as_FloatRegister();
532 __ stfd(f, st_off, R1_SP);
533 st_off -= wordSize;
534 }
535 }
536 }
537 }
538
539 void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
540 int total_args, const VMRegPair *regs,
541 const VMRegPair *regs2) {
542 int st_off = frame_size - wordSize;
543 for (int i = 0; i < total_args; i++) {
544 VMReg r_1 = regs[i].first();
545 VMReg r_2 = regs[i].second();
546 if (r_1->is_Register()) {
547 Register r = r_1->as_Register();
548 __ ld(r, st_off, R1_SP);
549 st_off -= wordSize;
550 } else if (r_1->is_FloatRegister()) {
551 FloatRegister f = r_1->as_FloatRegister();
552 __ lfd(f, st_off, R1_SP);
553 st_off -= wordSize;
554 }
555 }
556 if (regs2 != nullptr)
557 for (int i = 0; i < total_args; i++) {
558 VMReg r_1 = regs2[i].first();
559 VMReg r_2 = regs2[i].second();
560 if (r_1->is_Register()) {
561 Register r = r_1->as_Register();
562 __ ld(r, st_off, R1_SP);
563 st_off -= wordSize;
564 } else if (r_1->is_FloatRegister()) {
565 FloatRegister f = r_1->as_FloatRegister();
566 __ lfd(f, st_off, R1_SP);
567 st_off -= wordSize;
568 }
569 }
570 __ pop_frame();
571 }
572
573 // Restore the registers that might be holding a result.
574 void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors) {
575 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
576 sizeof(RegisterSaver::LiveRegType);
577 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
578 sizeof(RegisterSaver::LiveRegType))
579 : 0;
580 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
581
582 const int register_save_offset = frame_size_in_bytes - register_save_size;
583
584 // restore all result registers (ints and floats)
585 int offset = register_save_offset;
586 for (int i = 0; i < regstosave_num; i++) {
587 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
588 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
589 switch (reg_type) {
590 case RegisterSaver::int_reg: {
591 if (as_Register(reg_num)==R3_RET) // int result_reg
592 __ ld(as_Register(reg_num), offset, R1_SP);
593 break;
594 }
595 case RegisterSaver::float_reg: {
596 if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
597 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
598 break;
599 }
600 case RegisterSaver::special_reg: {
601 // Special registers don't hold a result.
602 break;
603 }
604 default:
605 ShouldNotReachHere();
606 }
607 offset += reg_size;
608 }
609
610 assert(offset == frame_size_in_bytes - (save_vectors ? vecregstosave_num * vec_reg_size : 0), "consistency check");
611 }
612
613 // Is vector's size (in bytes) bigger than a size saved by default?
614 bool SharedRuntime::is_wide_vector(int size) {
615 // Note, MaxVectorSize == 8/16 on PPC64.
616 assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
617 return size > 8;
618 }
619
620 static int reg2slot(VMReg r) {
621 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
622 }
623
624 static int reg2offset(VMReg r) {
625 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
626 }
627
628 // ---------------------------------------------------------------------------
629 // Read the array of BasicTypes from a signature, and compute where the
630 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
631 // quantities. Values less than VMRegImpl::stack0 are registers, those above
632 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
633 // as framesizes are fixed.
634 // VMRegImpl::stack0 refers to the first slot 0(sp).
635 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
636 // up to Register::number_of_registers) are the 64-bit
637 // integer registers.
638
639 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
640 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
641 // units regardless of build. Of course for i486 there is no 64 bit build
642
643 // The Java calling convention is a "shifted" version of the C ABI.
644 // By skipping the first C ABI register we can call non-static jni methods
645 // with small numbers of arguments without having to shuffle the arguments
646 // at all. Since we control the java ABI we ought to at least get some
647 // advantage out of it.
648
649 const VMReg java_iarg_reg[8] = {
650 R3->as_VMReg(),
651 R4->as_VMReg(),
652 R5->as_VMReg(),
653 R6->as_VMReg(),
654 R7->as_VMReg(),
655 R8->as_VMReg(),
656 R9->as_VMReg(),
657 R10->as_VMReg()
658 };
659
660 const VMReg java_farg_reg[13] = {
661 F1->as_VMReg(),
662 F2->as_VMReg(),
663 F3->as_VMReg(),
664 F4->as_VMReg(),
665 F5->as_VMReg(),
666 F6->as_VMReg(),
667 F7->as_VMReg(),
668 F8->as_VMReg(),
669 F9->as_VMReg(),
670 F10->as_VMReg(),
671 F11->as_VMReg(),
672 F12->as_VMReg(),
673 F13->as_VMReg()
674 };
675
676 const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
677 const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
678
679 STATIC_ASSERT(num_java_iarg_registers == Argument::n_int_register_parameters_j);
680 STATIC_ASSERT(num_java_farg_registers == Argument::n_float_register_parameters_j);
681
682 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
683 VMRegPair *regs,
684 int total_args_passed) {
685 // C2c calling conventions for compiled-compiled calls.
686 // Put 8 ints/longs into registers _AND_ 13 float/doubles into
687 // registers _AND_ put the rest on the stack.
688
689 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats
690 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
691
692 int i;
693 VMReg reg;
694 int stk = 0;
695 int ireg = 0;
696 int freg = 0;
697
698 // We put the first 8 arguments into registers and the rest on the
699 // stack, float arguments are already in their argument registers
700 // due to c2c calling conventions (see calling_convention).
701 for (int i = 0; i < total_args_passed; ++i) {
702 switch(sig_bt[i]) {
703 case T_BOOLEAN:
704 case T_CHAR:
705 case T_BYTE:
706 case T_SHORT:
707 case T_INT:
708 if (ireg < num_java_iarg_registers) {
709 // Put int/ptr in register
710 reg = java_iarg_reg[ireg];
711 ++ireg;
712 } else {
713 // Put int/ptr on stack.
714 reg = VMRegImpl::stack2reg(stk);
715 stk += inc_stk_for_intfloat;
716 }
717 regs[i].set1(reg);
718 break;
719 case T_LONG:
720 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
721 if (ireg < num_java_iarg_registers) {
722 // Put long in register.
723 reg = java_iarg_reg[ireg];
724 ++ireg;
725 } else {
726 // Put long on stack. They must be aligned to 2 slots.
727 if (stk & 0x1) ++stk;
728 reg = VMRegImpl::stack2reg(stk);
729 stk += inc_stk_for_longdouble;
730 }
731 regs[i].set2(reg);
732 break;
733 case T_OBJECT:
734 case T_ARRAY:
735 case T_ADDRESS:
736 if (ireg < num_java_iarg_registers) {
737 // Put ptr in register.
738 reg = java_iarg_reg[ireg];
739 ++ireg;
740 } else {
741 // Put ptr on stack. Objects must be aligned to 2 slots too,
742 // because "64-bit pointers record oop-ishness on 2 aligned
743 // adjacent registers." (see OopFlow::build_oop_map).
744 if (stk & 0x1) ++stk;
745 reg = VMRegImpl::stack2reg(stk);
746 stk += inc_stk_for_longdouble;
747 }
748 regs[i].set2(reg);
749 break;
750 case T_FLOAT:
751 if (freg < num_java_farg_registers) {
752 // Put float in register.
753 reg = java_farg_reg[freg];
754 ++freg;
755 } else {
756 // Put float on stack.
757 reg = VMRegImpl::stack2reg(stk);
758 stk += inc_stk_for_intfloat;
759 }
760 regs[i].set1(reg);
761 break;
762 case T_DOUBLE:
763 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
764 if (freg < num_java_farg_registers) {
765 // Put double in register.
766 reg = java_farg_reg[freg];
767 ++freg;
768 } else {
769 // Put double on stack. They must be aligned to 2 slots.
770 if (stk & 0x1) ++stk;
771 reg = VMRegImpl::stack2reg(stk);
772 stk += inc_stk_for_longdouble;
773 }
774 regs[i].set2(reg);
775 break;
776 case T_VOID:
777 // Do not count halves.
778 regs[i].set_bad();
779 break;
780 default:
781 ShouldNotReachHere();
782 }
783 }
784 return stk;
785 }
786
787 // Calling convention for calling C code.
788 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
789 VMRegPair *regs,
790 int total_args_passed) {
791 // Calling conventions for C runtime calls and calls to JNI native methods.
792 //
793 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
794 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
795 // the first 13 flt/dbl's in the first 13 fp regs but additionally
796 // copy flt/dbl to the stack if they are beyond the 8th argument.
797
798 const VMReg iarg_reg[8] = {
799 R3->as_VMReg(),
800 R4->as_VMReg(),
801 R5->as_VMReg(),
802 R6->as_VMReg(),
803 R7->as_VMReg(),
804 R8->as_VMReg(),
805 R9->as_VMReg(),
806 R10->as_VMReg()
807 };
808
809 const VMReg farg_reg[13] = {
810 F1->as_VMReg(),
811 F2->as_VMReg(),
812 F3->as_VMReg(),
813 F4->as_VMReg(),
814 F5->as_VMReg(),
815 F6->as_VMReg(),
816 F7->as_VMReg(),
817 F8->as_VMReg(),
818 F9->as_VMReg(),
819 F10->as_VMReg(),
820 F11->as_VMReg(),
821 F12->as_VMReg(),
822 F13->as_VMReg()
823 };
824
825 // Check calling conventions consistency.
826 assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
827 sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
828 "consistency");
829
830 const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size)
831 / VMRegImpl::stack_slot_size);
832 const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size;
833
834 VMReg reg;
835 int arg = 0;
836 int freg = 0;
837 bool stack_used = false;
838
839 for (int i = 0; i < total_args_passed; ++i, ++arg) {
840 // Each argument corresponds to a slot in the Parameter Save Area (if not omitted)
841 int stk = (arg * 2) + additional_frame_header_slots;
842
843 switch(sig_bt[i]) {
844 //
845 // If arguments 0-7 are integers, they are passed in integer registers.
846 // Argument i is placed in iarg_reg[i].
847 //
848 case T_BOOLEAN:
849 case T_CHAR:
850 case T_BYTE:
851 case T_SHORT:
852 case T_INT:
853 // We must cast ints to longs and use full 64 bit stack slots
854 // here. Thus fall through, handle as long.
855 case T_LONG:
856 case T_OBJECT:
857 case T_ARRAY:
858 case T_ADDRESS:
859 case T_METADATA:
860 // Oops are already boxed if required (JNI).
861 if (arg < Argument::n_int_register_parameters_c) {
862 reg = iarg_reg[arg];
863 } else {
864 reg = VMRegImpl::stack2reg(stk);
865 stack_used = true;
866 }
867 regs[i].set2(reg);
868 break;
869
870 //
871 // Floats are treated differently from int regs: The first 13 float arguments
872 // are passed in registers (not the float args among the first 13 args).
873 // Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
874 // in farg_reg[j] if argument i is the j-th float argument of this call.
875 //
876 case T_FLOAT:
877 if (freg < Argument::n_float_register_parameters_c) {
878 // Put float in register ...
879 reg = farg_reg[freg];
880 ++freg;
881 } else {
882 // Put float on stack.
883 reg = VMRegImpl::stack2reg(stk + float_offset_in_slots);
884 stack_used = true;
885 }
886 regs[i].set1(reg);
887 break;
888 case T_DOUBLE:
889 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
890 if (freg < Argument::n_float_register_parameters_c) {
891 // Put double in register ...
892 reg = farg_reg[freg];
893 ++freg;
894 } else {
895 // Put double on stack.
896 reg = VMRegImpl::stack2reg(stk);
897 stack_used = true;
898 }
899 regs[i].set2(reg);
900 break;
901
902 case T_VOID:
903 // Do not count halves.
904 regs[i].set_bad();
905 --arg;
906 break;
907 default:
908 ShouldNotReachHere();
909 }
910 }
911
912 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
913 #if defined(ABI_ELFv2)
914 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
915 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
916 // indicates that all parameters can be passed in registers.
917 return stack_used ? (arg * 2) : 0;
918 #else
919 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
920 // We have to add extra slots because ABIv1 uses a larger header.
921 return MAX2(arg, 8) * 2 + additional_frame_header_slots;
922 #endif
923 }
924
925 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
926 uint num_bits,
927 uint total_args_passed) {
928 Unimplemented();
929 return 0;
930 }
931
932 static address gen_c2i_adapter(MacroAssembler *masm,
933 int total_args_passed,
934 int comp_args_on_stack,
935 const BasicType *sig_bt,
936 const VMRegPair *regs,
937 Label& call_interpreter,
938 const Register& ientry) {
939
940 address c2i_entrypoint;
941
942 const Register sender_SP = R21_sender_SP; // == R21_tmp1
943 const Register code = R22_tmp2;
944 //const Register ientry = R23_tmp3;
945 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
946 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
947 int value_regs_index = 0;
948
949 const Register return_pc = R27_tmp7;
950 const Register tmp = R28_tmp8;
951
952 assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
953
954 // Adapter needs TOP_IJAVA_FRAME_ABI.
955 const int adapter_size = frame::top_ijava_frame_abi_size +
956 align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
957
958 // regular (verified) c2i entry point
959 c2i_entrypoint = __ pc();
960
961 // Does compiled code exists? If yes, patch the caller's callsite.
962 __ ld(code, method_(code));
963 __ cmpdi(CR0, code, 0);
964 __ ld(ientry, method_(interpreter_entry)); // preloaded
965 __ beq(CR0, call_interpreter);
966
967
968 // Patch caller's callsite, method_(code) was not null which means that
969 // compiled code exists.
970 __ mflr(return_pc);
971 __ std(return_pc, _abi0(lr), R1_SP);
972 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
973
974 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
975
976 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
977 __ ld(return_pc, _abi0(lr), R1_SP);
978 __ ld(ientry, method_(interpreter_entry)); // preloaded
979 __ mtlr(return_pc);
980
981
982 // Call the interpreter.
983 __ BIND(call_interpreter);
984 __ mtctr(ientry);
985
986 // Get a copy of the current SP for loading caller's arguments.
987 __ mr(sender_SP, R1_SP);
988
989 // Add space for the adapter.
990 __ resize_frame(-adapter_size, R12_scratch2);
991
992 int st_off = adapter_size - wordSize;
993
994 // Write the args into the outgoing interpreter space.
995 for (int i = 0; i < total_args_passed; i++) {
996 VMReg r_1 = regs[i].first();
997 VMReg r_2 = regs[i].second();
998 if (!r_1->is_valid()) {
999 assert(!r_2->is_valid(), "");
1000 continue;
1001 }
1002 if (r_1->is_stack()) {
1003 Register tmp_reg = value_regs[value_regs_index];
1004 value_regs_index = (value_regs_index + 1) % num_value_regs;
1005 // The calling convention produces OptoRegs that ignore the out
1006 // preserve area (JIT's ABI). We must account for it here.
1007 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1008 if (!r_2->is_valid()) {
1009 __ lwz(tmp_reg, ld_off, sender_SP);
1010 } else {
1011 __ ld(tmp_reg, ld_off, sender_SP);
1012 }
1013 // Pretend stack targets were loaded into tmp_reg.
1014 r_1 = tmp_reg->as_VMReg();
1015 }
1016
1017 if (r_1->is_Register()) {
1018 Register r = r_1->as_Register();
1019 if (!r_2->is_valid()) {
1020 __ stw(r, st_off, R1_SP);
1021 st_off-=wordSize;
1022 } else {
1023 // Longs are given 2 64-bit slots in the interpreter, but the
1024 // data is passed in only 1 slot.
1025 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1026 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1027 st_off-=wordSize;
1028 }
1029 __ std(r, st_off, R1_SP);
1030 st_off-=wordSize;
1031 }
1032 } else {
1033 assert(r_1->is_FloatRegister(), "");
1034 FloatRegister f = r_1->as_FloatRegister();
1035 if (!r_2->is_valid()) {
1036 __ stfs(f, st_off, R1_SP);
1037 st_off-=wordSize;
1038 } else {
1039 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1040 // data is passed in only 1 slot.
1041 // One of these should get known junk...
1042 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1043 st_off-=wordSize;
1044 __ stfd(f, st_off, R1_SP);
1045 st_off-=wordSize;
1046 }
1047 }
1048 }
1049
1050 // Jump to the interpreter just as if interpreter was doing it.
1051
1052 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1053
1054 // load TOS
1055 __ addi(R15_esp, R1_SP, st_off);
1056
1057 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
1058 assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
1059 __ bctr();
1060
1061 return c2i_entrypoint;
1062 }
1063
1064 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1065 int total_args_passed,
1066 int comp_args_on_stack,
1067 const BasicType *sig_bt,
1068 const VMRegPair *regs) {
1069
1070 // Load method's entry-point from method.
1071 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1072 __ mtctr(R12_scratch2);
1073
1074 // We will only enter here from an interpreted frame and never from after
1075 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1076 // race and use a c2i we will remain interpreted for the race loser(s).
1077 // This removes all sorts of headaches on the x86 side and also eliminates
1078 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1079
1080 // Note: r13 contains the senderSP on entry. We must preserve it since
1081 // we may do a i2c -> c2i transition if we lose a race where compiled
1082 // code goes non-entrant while we get args ready.
1083 // In addition we use r13 to locate all the interpreter args as
1084 // we must align the stack to 16 bytes on an i2c entry else we
1085 // lose alignment we expect in all compiled code and register
1086 // save code can segv when fxsave instructions find improperly
1087 // aligned stack pointer.
1088
1089 const Register ld_ptr = R15_esp;
1090 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1091 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1092 int value_regs_index = 0;
1093
1094 int ld_offset = total_args_passed*wordSize;
1095
1096 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1097 // in registers, we will occasionally have no stack args.
1098 int comp_words_on_stack = 0;
1099 if (comp_args_on_stack) {
1100 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1101 // registers are below. By subtracting stack0, we either get a negative
1102 // number (all values in registers) or the maximum stack slot accessed.
1103
1104 // Convert 4-byte c2 stack slots to words.
1105 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1106 // Round up to miminum stack alignment, in wordSize.
1107 comp_words_on_stack = align_up(comp_words_on_stack, 2);
1108 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1109 }
1110
1111 // Now generate the shuffle code. Pick up all register args and move the
1112 // rest through register value=Z_R12.
1113 BLOCK_COMMENT("Shuffle arguments");
1114 for (int i = 0; i < total_args_passed; i++) {
1115 if (sig_bt[i] == T_VOID) {
1116 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1117 continue;
1118 }
1119
1120 // Pick up 0, 1 or 2 words from ld_ptr.
1121 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1122 "scrambled load targets?");
1123 VMReg r_1 = regs[i].first();
1124 VMReg r_2 = regs[i].second();
1125 if (!r_1->is_valid()) {
1126 assert(!r_2->is_valid(), "");
1127 continue;
1128 }
1129 if (r_1->is_FloatRegister()) {
1130 if (!r_2->is_valid()) {
1131 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1132 ld_offset-=wordSize;
1133 } else {
1134 // Skip the unused interpreter slot.
1135 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1136 ld_offset-=2*wordSize;
1137 }
1138 } else {
1139 Register r;
1140 if (r_1->is_stack()) {
1141 // Must do a memory to memory move thru "value".
1142 r = value_regs[value_regs_index];
1143 value_regs_index = (value_regs_index + 1) % num_value_regs;
1144 } else {
1145 r = r_1->as_Register();
1146 }
1147 if (!r_2->is_valid()) {
1148 // Not sure we need to do this but it shouldn't hurt.
1149 if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
1150 __ ld(r, ld_offset, ld_ptr);
1151 ld_offset-=wordSize;
1152 } else {
1153 __ lwz(r, ld_offset, ld_ptr);
1154 ld_offset-=wordSize;
1155 }
1156 } else {
1157 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1158 // data is passed in only 1 slot.
1159 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1160 ld_offset-=wordSize;
1161 }
1162 __ ld(r, ld_offset, ld_ptr);
1163 ld_offset-=wordSize;
1164 }
1165
1166 if (r_1->is_stack()) {
1167 // Now store value where the compiler expects it
1168 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1169
1170 if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
1171 sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) {
1172 __ stw(r, st_off, R1_SP);
1173 } else {
1174 __ std(r, st_off, R1_SP);
1175 }
1176 }
1177 }
1178 }
1179
1180 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1181
1182 BLOCK_COMMENT("Store method");
1183 // Store method into thread->callee_target.
1184 // We might end up in handle_wrong_method if the callee is
1185 // deoptimized as we race thru here. If that happens we don't want
1186 // to take a safepoint because the caller frame will look
1187 // interpreted and arguments are now "compiled" so it is much better
1188 // to make this transition invisible to the stack walking
1189 // code. Unfortunately if we try and find the callee by normal means
1190 // a safepoint is possible. So we stash the desired callee in the
1191 // thread and the vm will find there should this case occur.
1192 __ std(R19_method, thread_(callee_target));
1193
1194 // Jump to the compiled code just as if compiled code was doing it.
1195 __ bctr();
1196 }
1197
1198 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1199 int total_args_passed,
1200 int comp_args_on_stack,
1201 const BasicType *sig_bt,
1202 const VMRegPair *regs,
1203 address entry_address[AdapterBlob::ENTRY_COUNT]) {
1204 // entry: i2c
1205
1206 __ align(CodeEntryAlignment);
1207 entry_address[AdapterBlob::I2C] = __ pc();
1208 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1209
1210
1211 // entry: c2i unverified
1212
1213 __ align(CodeEntryAlignment);
1214 BLOCK_COMMENT("c2i unverified entry");
1215 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1216
1217 // inline_cache contains a CompiledICData
1218 const Register ic = R19_inline_cache_reg;
1219 const Register ic_klass = R11_scratch1;
1220 const Register receiver_klass = R12_scratch2;
1221 const Register code = R21_tmp1;
1222 const Register ientry = R23_tmp3;
1223
1224 assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
1225 assert(R11_scratch1 == R11, "need prologue scratch register");
1226
1227 Label call_interpreter;
1228
1229 __ ic_check(4 /* end_alignment */);
1230 __ ld(R19_method, CompiledICData::speculated_method_offset(), ic);
1231 // Argument is valid and klass is as expected, continue.
1232
1233 __ ld(code, method_(code));
1234 __ cmpdi(CR0, code, 0);
1235 __ ld(ientry, method_(interpreter_entry)); // preloaded
1236 __ beq_predict_taken(CR0, call_interpreter);
1237
1238 // Branch to ic_miss_stub.
1239 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1240
1241 // entry: c2i
1242
1243 entry_address[AdapterBlob::C2I] = __ pc();
1244
1245 // Class initialization barrier for static methods
1246 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1247 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1248 Label L_skip_barrier;
1249
1250 // Bypass the barrier for non-static methods
1251 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1252 __ andi_(R0, R0, JVM_ACC_STATIC);
1253 __ beq(CR0, L_skip_barrier); // non-static
1254
1255 Register klass = R11_scratch1;
1256 __ load_method_holder(klass, R19_method);
1257 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1258
1259 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1260 __ mtctr(klass);
1261 __ bctr();
1262
1263 __ bind(L_skip_barrier);
1264 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
1265
1266 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1267 bs->c2i_entry_barrier(masm, /* tmp register*/ ic_klass, /* tmp register*/ receiver_klass, /* tmp register*/ code);
1268
1269 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
1270 return;
1271 }
1272
1273 // An oop arg. Must pass a handle not the oop itself.
1274 static void object_move(MacroAssembler* masm,
1275 int frame_size_in_slots,
1276 OopMap* oop_map, int oop_handle_offset,
1277 bool is_receiver, int* receiver_offset,
1278 VMRegPair src, VMRegPair dst,
1279 Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1280 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1281 "receiver has already been moved");
1282
1283 // We must pass a handle. First figure out the location we use as a handle.
1284
1285 if (src.first()->is_stack()) {
1286 // stack to stack or reg
1287
1288 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1289 Label skip;
1290 const int oop_slot_in_callers_frame = reg2slot(src.first());
1291
1292 guarantee(!is_receiver, "expecting receiver in register");
1293 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1294
1295 __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1296 __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp);
1297 __ cmpdi(CR0, r_temp_2, 0);
1298 __ bne(CR0, skip);
1299 // Use a null handle if oop is null.
1300 __ li(r_handle, 0);
1301 __ bind(skip);
1302
1303 if (dst.first()->is_stack()) {
1304 // stack to stack
1305 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1306 } else {
1307 // stack to reg
1308 // Nothing to do, r_handle is already the dst register.
1309 }
1310 } else {
1311 // reg to stack or reg
1312 const Register r_oop = src.first()->as_Register();
1313 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1314 const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1315 + oop_handle_offset; // in slots
1316 const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1317 Label skip;
1318
1319 if (is_receiver) {
1320 *receiver_offset = oop_offset;
1321 }
1322 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1323
1324 __ std( r_oop, oop_offset, R1_SP);
1325 __ addi(r_handle, R1_SP, oop_offset);
1326
1327 __ cmpdi(CR0, r_oop, 0);
1328 __ bne(CR0, skip);
1329 // Use a null handle if oop is null.
1330 __ li(r_handle, 0);
1331 __ bind(skip);
1332
1333 if (dst.first()->is_stack()) {
1334 // reg to stack
1335 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1336 } else {
1337 // reg to reg
1338 // Nothing to do, r_handle is already the dst register.
1339 }
1340 }
1341 }
1342
1343 static void int_move(MacroAssembler*masm,
1344 VMRegPair src, VMRegPair dst,
1345 Register r_caller_sp, Register r_temp) {
1346 assert(src.first()->is_valid(), "incoming must be int");
1347 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1348
1349 if (src.first()->is_stack()) {
1350 if (dst.first()->is_stack()) {
1351 // stack to stack
1352 __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1353 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1354 } else {
1355 // stack to reg
1356 __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1357 }
1358 } else if (dst.first()->is_stack()) {
1359 // reg to stack
1360 __ extsw(r_temp, src.first()->as_Register());
1361 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1362 } else {
1363 // reg to reg
1364 __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1365 }
1366 }
1367
1368 static void long_move(MacroAssembler*masm,
1369 VMRegPair src, VMRegPair dst,
1370 Register r_caller_sp, Register r_temp) {
1371 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1372 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1373
1374 if (src.first()->is_stack()) {
1375 if (dst.first()->is_stack()) {
1376 // stack to stack
1377 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1378 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1379 } else {
1380 // stack to reg
1381 __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1382 }
1383 } else if (dst.first()->is_stack()) {
1384 // reg to stack
1385 __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1386 } else {
1387 // reg to reg
1388 if (dst.first()->as_Register() != src.first()->as_Register())
1389 __ mr(dst.first()->as_Register(), src.first()->as_Register());
1390 }
1391 }
1392
1393 static void float_move(MacroAssembler*masm,
1394 VMRegPair src, VMRegPair dst,
1395 Register r_caller_sp, Register r_temp) {
1396 assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1397 assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1398
1399 if (src.first()->is_stack()) {
1400 if (dst.first()->is_stack()) {
1401 // stack to stack
1402 __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1403 __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1404 } else {
1405 // stack to reg
1406 __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1407 }
1408 } else if (dst.first()->is_stack()) {
1409 // reg to stack
1410 __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1411 } else {
1412 // reg to reg
1413 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1414 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1415 }
1416 }
1417
1418 static void double_move(MacroAssembler*masm,
1419 VMRegPair src, VMRegPair dst,
1420 Register r_caller_sp, Register r_temp) {
1421 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1422 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1423
1424 if (src.first()->is_stack()) {
1425 if (dst.first()->is_stack()) {
1426 // stack to stack
1427 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1428 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1429 } else {
1430 // stack to reg
1431 __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1432 }
1433 } else if (dst.first()->is_stack()) {
1434 // reg to stack
1435 __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1436 } else {
1437 // reg to reg
1438 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1439 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1440 }
1441 }
1442
1443 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1444 switch (ret_type) {
1445 case T_BOOLEAN:
1446 case T_CHAR:
1447 case T_BYTE:
1448 case T_SHORT:
1449 case T_INT:
1450 __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1451 break;
1452 case T_ARRAY:
1453 case T_OBJECT:
1454 case T_LONG:
1455 __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1456 break;
1457 case T_FLOAT:
1458 __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1459 break;
1460 case T_DOUBLE:
1461 __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1462 break;
1463 case T_VOID:
1464 break;
1465 default:
1466 ShouldNotReachHere();
1467 break;
1468 }
1469 }
1470
1471 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1472 switch (ret_type) {
1473 case T_BOOLEAN:
1474 case T_CHAR:
1475 case T_BYTE:
1476 case T_SHORT:
1477 case T_INT:
1478 __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1479 break;
1480 case T_ARRAY:
1481 case T_OBJECT:
1482 case T_LONG:
1483 __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1484 break;
1485 case T_FLOAT:
1486 __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1487 break;
1488 case T_DOUBLE:
1489 __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1490 break;
1491 case T_VOID:
1492 break;
1493 default:
1494 ShouldNotReachHere();
1495 break;
1496 }
1497 }
1498
1499 static void verify_oop_args(MacroAssembler* masm,
1500 const methodHandle& method,
1501 const BasicType* sig_bt,
1502 const VMRegPair* regs) {
1503 Register temp_reg = R19_method; // not part of any compiled calling seq
1504 if (VerifyOops) {
1505 for (int i = 0; i < method->size_of_parameters(); i++) {
1506 if (is_reference_type(sig_bt[i])) {
1507 VMReg r = regs[i].first();
1508 assert(r->is_valid(), "bad oop arg");
1509 if (r->is_stack()) {
1510 __ ld(temp_reg, reg2offset(r), R1_SP);
1511 __ verify_oop(temp_reg, FILE_AND_LINE);
1512 } else {
1513 __ verify_oop(r->as_Register(), FILE_AND_LINE);
1514 }
1515 }
1516 }
1517 }
1518 }
1519
1520 static void gen_special_dispatch(MacroAssembler* masm,
1521 const methodHandle& method,
1522 const BasicType* sig_bt,
1523 const VMRegPair* regs) {
1524 verify_oop_args(masm, method, sig_bt, regs);
1525 vmIntrinsics::ID iid = method->intrinsic_id();
1526
1527 // Now write the args into the outgoing interpreter space
1528 bool has_receiver = false;
1529 Register receiver_reg = noreg;
1530 int member_arg_pos = -1;
1531 Register member_reg = noreg;
1532 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1533 if (ref_kind != 0) {
1534 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1535 member_reg = R19_method; // known to be free at this point
1536 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1537 } else if (iid == vmIntrinsics::_invokeBasic) {
1538 has_receiver = true;
1539 } else if (iid == vmIntrinsics::_linkToNative) {
1540 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1541 member_reg = R19_method; // known to be free at this point
1542 } else {
1543 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1544 }
1545
1546 if (member_reg != noreg) {
1547 // Load the member_arg into register, if necessary.
1548 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1549 VMReg r = regs[member_arg_pos].first();
1550 if (r->is_stack()) {
1551 __ ld(member_reg, reg2offset(r), R1_SP);
1552 } else {
1553 // no data motion is needed
1554 member_reg = r->as_Register();
1555 }
1556 }
1557
1558 if (has_receiver) {
1559 // Make sure the receiver is loaded into a register.
1560 assert(method->size_of_parameters() > 0, "oob");
1561 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1562 VMReg r = regs[0].first();
1563 assert(r->is_valid(), "bad receiver arg");
1564 if (r->is_stack()) {
1565 // Porting note: This assumes that compiled calling conventions always
1566 // pass the receiver oop in a register. If this is not true on some
1567 // platform, pick a temp and load the receiver from stack.
1568 fatal("receiver always in a register");
1569 receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point?
1570 __ ld(receiver_reg, reg2offset(r), R1_SP);
1571 } else {
1572 // no data motion is needed
1573 receiver_reg = r->as_Register();
1574 }
1575 }
1576
1577 // Figure out which address we are really jumping to:
1578 MethodHandles::generate_method_handle_dispatch(masm, iid,
1579 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1580 }
1581
1582 //---------------------------- continuation_enter_setup ---------------------------
1583 //
1584 // Frame setup.
1585 //
1586 // Arguments:
1587 // None.
1588 //
1589 // Results:
1590 // R1_SP: pointer to blank ContinuationEntry in the pushed frame.
1591 //
1592 // Kills:
1593 // R0, R20
1594 //
1595 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& framesize_words) {
1596 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1597 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, "");
1598 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1599
1600 const int frame_size_in_bytes = (int)ContinuationEntry::size();
1601 assert(is_aligned(frame_size_in_bytes, frame::alignment_in_bytes), "alignment error");
1602
1603 framesize_words = frame_size_in_bytes / wordSize;
1604
1605 DEBUG_ONLY(__ block_comment("setup {"));
1606 // Save return pc and push entry frame
1607 const Register return_pc = R20;
1608 __ mflr(return_pc);
1609 __ std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc
1610 __ push_frame(frame_size_in_bytes , R0); // SP -= frame_size_in_bytes
1611
1612 OopMap* map = new OopMap((int)frame_size_in_bytes / VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
1613
1614 __ ld_ptr(R0, JavaThread::cont_entry_offset(), R16_thread);
1615 __ st_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1616 __ st_ptr(R0, ContinuationEntry::parent_offset(), R1_SP);
1617 DEBUG_ONLY(__ block_comment("} setup"));
1618
1619 return map;
1620 }
1621
1622 //---------------------------- fill_continuation_entry ---------------------------
1623 //
1624 // Initialize the new ContinuationEntry.
1625 //
1626 // Arguments:
1627 // R1_SP: pointer to blank Continuation entry
1628 // reg_cont_obj: pointer to the continuation
1629 // reg_flags: flags
1630 //
1631 // Results:
1632 // R1_SP: pointer to filled out ContinuationEntry
1633 //
1634 // Kills:
1635 // R8_ARG6, R9_ARG7, R10_ARG8
1636 //
1637 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1638 assert_different_registers(reg_cont_obj, reg_flags);
1639 Register zero = R8_ARG6;
1640 Register tmp2 = R9_ARG7;
1641
1642 DEBUG_ONLY(__ block_comment("fill {"));
1643 #ifdef ASSERT
1644 __ load_const_optimized(tmp2, ContinuationEntry::cookie_value());
1645 __ stw(tmp2, in_bytes(ContinuationEntry::cookie_offset()), R1_SP);
1646 #endif //ASSERT
1647
1648 __ li(zero, 0);
1649 __ st_ptr(reg_cont_obj, ContinuationEntry::cont_offset(), R1_SP);
1650 __ stw(reg_flags, in_bytes(ContinuationEntry::flags_offset()), R1_SP);
1651 __ st_ptr(zero, ContinuationEntry::chunk_offset(), R1_SP);
1652 __ stw(zero, in_bytes(ContinuationEntry::argsize_offset()), R1_SP);
1653 __ stw(zero, in_bytes(ContinuationEntry::pin_count_offset()), R1_SP);
1654
1655 __ ld_ptr(tmp2, JavaThread::cont_fastpath_offset(), R16_thread);
1656 __ st_ptr(tmp2, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1657
1658 __ st_ptr(zero, JavaThread::cont_fastpath_offset(), R16_thread);
1659 DEBUG_ONLY(__ block_comment("} fill"));
1660 }
1661
1662 //---------------------------- continuation_enter_cleanup ---------------------------
1663 //
1664 // Copy corresponding attributes from the top ContinuationEntry to the JavaThread
1665 // before deleting it.
1666 //
1667 // Arguments:
1668 // R1_SP: pointer to the ContinuationEntry
1669 //
1670 // Results:
1671 // None.
1672 //
1673 // Kills:
1674 // R8_ARG6, R9_ARG7, R10_ARG8, R15_esp
1675 //
1676 static void continuation_enter_cleanup(MacroAssembler* masm) {
1677 Register tmp1 = R8_ARG6;
1678 Register tmp2 = R9_ARG7;
1679
1680 #ifdef ASSERT
1681 __ block_comment("clean {");
1682 __ ld_ptr(tmp1, JavaThread::cont_entry_offset(), R16_thread);
1683 __ cmpd(CR0, R1_SP, tmp1);
1684 __ asm_assert_eq(FILE_AND_LINE ": incorrect R1_SP");
1685 #endif
1686
1687 __ ld_ptr(tmp1, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1688 __ st_ptr(tmp1, JavaThread::cont_fastpath_offset(), R16_thread);
1689 __ ld_ptr(tmp2, ContinuationEntry::parent_offset(), R1_SP);
1690 __ st_ptr(tmp2, JavaThread::cont_entry_offset(), R16_thread);
1691 DEBUG_ONLY(__ block_comment("} clean"));
1692 }
1693
1694 static void check_continuation_enter_argument(VMReg actual_vmreg,
1695 Register expected_reg,
1696 const char* name) {
1697 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1698 assert(actual_vmreg->as_Register() == expected_reg,
1699 "%s is in unexpected register: %s instead of %s",
1700 name, actual_vmreg->as_Register()->name(), expected_reg->name());
1701 }
1702
1703 static void gen_continuation_enter(MacroAssembler* masm,
1704 const VMRegPair* regs,
1705 int& exception_offset,
1706 OopMapSet* oop_maps,
1707 int& frame_complete,
1708 int& framesize_words,
1709 int& interpreted_entry_offset,
1710 int& compiled_entry_offset) {
1711
1712 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1713 int pos_cont_obj = 0;
1714 int pos_is_cont = 1;
1715 int pos_is_virtual = 2;
1716
1717 // The platform-specific calling convention may present the arguments in various registers.
1718 // To simplify the rest of the code, we expect the arguments to reside at these known
1719 // registers, and we additionally check the placement here in case calling convention ever
1720 // changes.
1721 Register reg_cont_obj = R3_ARG1;
1722 Register reg_is_cont = R4_ARG2;
1723 Register reg_is_virtual = R5_ARG3;
1724
1725 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object");
1726 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
1727 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1728
1729 address resolve_static_call = SharedRuntime::get_resolve_static_call_stub();
1730
1731 address start = __ pc();
1732
1733 Label L_thaw, L_exit;
1734
1735 // i2i entry used at interp_only_mode only
1736 interpreted_entry_offset = __ pc() - start;
1737 {
1738 #ifdef ASSERT
1739 Label is_interp_only;
1740 __ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
1741 __ cmpwi(CR0, R0, 0);
1742 __ bne(CR0, is_interp_only);
1743 __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1744 __ bind(is_interp_only);
1745 #endif
1746
1747 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1748 __ ld(reg_cont_obj, Interpreter::stackElementSize*3, R15_esp);
1749 __ lwz(reg_is_cont, Interpreter::stackElementSize*2, R15_esp);
1750 __ lwz(reg_is_virtual, Interpreter::stackElementSize*1, R15_esp);
1751
1752 __ push_cont_fastpath();
1753
1754 OopMap* map = continuation_enter_setup(masm, framesize_words);
1755
1756 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1757 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1758
1759 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1760
1761 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1762 __ cmpwi(CR0, reg_is_cont, 0);
1763 __ bne(CR0, L_thaw);
1764
1765 // --- call Continuation.enter(Continuation c, boolean isContinue)
1766
1767 // Emit compiled static call. The call will be always resolved to the c2i
1768 // entry of Continuation.enter(Continuation c, boolean isContinue).
1769 // There are special cases in SharedRuntime::resolve_static_call_C() and
1770 // SharedRuntime::resolve_sub_helper_internal() to achieve this
1771 // See also corresponding call below.
1772 address c2i_call_pc = __ pc();
1773 int start_offset = __ offset();
1774 // Put the entry point as a constant into the constant pool.
1775 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
1776 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
1777 guarantee(entry_point_toc_addr != nullptr, "const section overflow");
1778
1779 // Emit the trampoline stub which will be related to the branch-and-link below.
1780 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
1781 guarantee(stub != nullptr, "no space for trampoline stub");
1782
1783 __ relocate(relocInfo::static_call_type);
1784 // Note: At this point we do not have the address of the trampoline
1785 // stub, and the entry point might be too far away for bl, so __ pc()
1786 // serves as dummy and the bl will be patched later.
1787 __ bl(__ pc());
1788 oop_maps->add_gc_map(__ pc() - start, map);
1789 __ post_call_nop();
1790
1791 __ b(L_exit);
1792
1793 // static stub for the call above
1794 stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
1795 guarantee(stub != nullptr, "no space for static stub");
1796 }
1797
1798 // compiled entry
1799 __ align(CodeEntryAlignment);
1800 compiled_entry_offset = __ pc() - start;
1801
1802 OopMap* map = continuation_enter_setup(masm, framesize_words);
1803
1804 // Frame is now completed as far as size and linkage.
1805 frame_complete =__ pc() - start;
1806
1807 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1808
1809 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1810 __ cmpwi(CR0, reg_is_cont, 0);
1811 __ bne(CR0, L_thaw);
1812
1813 // --- call Continuation.enter(Continuation c, boolean isContinue)
1814
1815 // Emit compiled static call
1816 // The call needs to be resolved. There's a special case for this in
1817 // SharedRuntime::find_callee_info_helper() which calls
1818 // LinkResolver::resolve_continuation_enter() which resolves the call to
1819 // Continuation.enter(Continuation c, boolean isContinue).
1820 address call_pc = __ pc();
1821 int start_offset = __ offset();
1822 // Put the entry point as a constant into the constant pool.
1823 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
1824 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
1825 guarantee(entry_point_toc_addr != nullptr, "const section overflow");
1826
1827 // Emit the trampoline stub which will be related to the branch-and-link below.
1828 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
1829 guarantee(stub != nullptr, "no space for trampoline stub");
1830
1831 __ relocate(relocInfo::static_call_type);
1832 // Note: At this point we do not have the address of the trampoline
1833 // stub, and the entry point might be too far away for bl, so __ pc()
1834 // serves as dummy and the bl will be patched later.
1835 __ bl(__ pc());
1836 oop_maps->add_gc_map(__ pc() - start, map);
1837 __ post_call_nop();
1838
1839 __ b(L_exit);
1840
1841 // --- Thawing path
1842
1843 __ bind(L_thaw);
1844 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1845 __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(StubRoutines::cont_thaw()));
1846 __ mtctr(R0);
1847 __ bctrl();
1848 oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1849 ContinuationEntry::_return_pc_offset = __ pc() - start;
1850 __ post_call_nop();
1851
1852 // --- Normal exit (resolve/thawing)
1853
1854 __ bind(L_exit);
1855 ContinuationEntry::_cleanup_offset = __ pc() - start;
1856 continuation_enter_cleanup(masm);
1857
1858 // Pop frame and return
1859 DEBUG_ONLY(__ ld_ptr(R0, 0, R1_SP));
1860 __ addi(R1_SP, R1_SP, framesize_words*wordSize);
1861 DEBUG_ONLY(__ cmpd(CR0, R0, R1_SP));
1862 __ asm_assert_eq(FILE_AND_LINE ": inconsistent frame size");
1863 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1864 __ mtlr(R0);
1865 __ blr();
1866
1867 // --- Exception handling path
1868
1869 exception_offset = __ pc() - start;
1870
1871 continuation_enter_cleanup(masm);
1872 Register ex_pc = R17_tos; // nonvolatile register
1873 Register ex_oop = R15_esp; // nonvolatile register
1874 __ ld(ex_pc, _abi0(callers_sp), R1_SP); // Load caller's return pc
1875 __ ld(ex_pc, _abi0(lr), ex_pc);
1876 __ mr(ex_oop, R3_RET); // save return value containing the exception oop
1877 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, ex_pc);
1878 __ mtlr(R3_RET); // the exception handler
1879 __ ld(R1_SP, _abi0(callers_sp), R1_SP); // remove enterSpecial frame
1880
1881 // Continue at exception handler
1882 // See OptoRuntime::generate_exception_blob for register arguments
1883 __ mr(R3_ARG1, ex_oop); // pass exception oop
1884 __ mr(R4_ARG2, ex_pc); // pass exception pc
1885 __ blr();
1886
1887 // static stub for the call above
1888 stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
1889 guarantee(stub != nullptr, "no space for static stub");
1890 }
1891
1892 static void gen_continuation_yield(MacroAssembler* masm,
1893 const VMRegPair* regs,
1894 OopMapSet* oop_maps,
1895 int& frame_complete,
1896 int& framesize_words,
1897 int& compiled_entry_offset) {
1898 Register tmp = R10_ARG8;
1899
1900 const int framesize_bytes = (int)align_up((int)frame::native_abi_reg_args_size, frame::alignment_in_bytes);
1901 framesize_words = framesize_bytes / wordSize;
1902
1903 address start = __ pc();
1904 compiled_entry_offset = __ pc() - start;
1905
1906 // Save return pc and push entry frame
1907 __ mflr(tmp);
1908 __ std(tmp, _abi0(lr), R1_SP); // SP->lr = return_pc
1909 __ push_frame(framesize_bytes , R0); // SP -= frame_size_in_bytes
1910
1911 DEBUG_ONLY(__ block_comment("Frame Complete"));
1912 frame_complete = __ pc() - start;
1913 address last_java_pc = __ pc();
1914
1915 // This nop must be exactly at the PC we push into the frame info.
1916 // We use this nop for fast CodeBlob lookup, associate the OopMap
1917 // with it right away.
1918 __ post_call_nop();
1919 OopMap* map = new OopMap(framesize_bytes / VMRegImpl::stack_slot_size, 1);
1920 oop_maps->add_gc_map(last_java_pc - start, map);
1921
1922 __ calculate_address_from_global_toc(tmp, last_java_pc); // will be relocated
1923 __ set_last_Java_frame(R1_SP, tmp);
1924 __ call_VM_leaf(Continuation::freeze_entry(), R16_thread, R1_SP);
1925 __ reset_last_Java_frame();
1926
1927 Label L_pinned;
1928
1929 __ cmpwi(CR0, R3_RET, 0);
1930 __ bne(CR0, L_pinned);
1931
1932 // yield succeeded
1933
1934 // Pop frames of continuation including this stub's frame
1935 __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1936 // The frame pushed by gen_continuation_enter is on top now again
1937 continuation_enter_cleanup(masm);
1938
1939 // Pop frame and return
1940 Label L_return;
1941 __ bind(L_return);
1942 __ pop_frame();
1943 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1944 __ mtlr(R0);
1945 __ blr();
1946
1947 // yield failed - continuation is pinned
1948
1949 __ bind(L_pinned);
1950
1951 // handle pending exception thrown by freeze
1952 __ ld(tmp, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
1953 __ cmpdi(CR0, tmp, 0);
1954 __ beq(CR0, L_return); // return if no exception is pending
1955 __ pop_frame();
1956 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1957 __ mtlr(R0);
1958 __ load_const_optimized(tmp, StubRoutines::forward_exception_entry(), R0);
1959 __ mtctr(tmp);
1960 __ bctr();
1961 }
1962
1963 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
1964 ::continuation_enter_cleanup(masm);
1965 }
1966
1967 // ---------------------------------------------------------------------------
1968 // Generate a native wrapper for a given method. The method takes arguments
1969 // in the Java compiled code convention, marshals them to the native
1970 // convention (handlizes oops, etc), transitions to native, makes the call,
1971 // returns to java state (possibly blocking), unhandlizes any result and
1972 // returns.
1973 //
1974 // Critical native functions are a shorthand for the use of
1975 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1976 // functions. The wrapper is expected to unpack the arguments before
1977 // passing them to the callee. Critical native functions leave the state _in_Java,
1978 // since they cannot stop for GC.
1979 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
1980 // block and the check for pending exceptions it's impossible for them
1981 // to be thrown.
1982 //
1983 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1984 const methodHandle& method,
1985 int compile_id,
1986 BasicType *in_sig_bt,
1987 VMRegPair *in_regs,
1988 BasicType ret_type) {
1989 if (method->is_continuation_native_intrinsic()) {
1990 int exception_offset = -1;
1991 OopMapSet* oop_maps = new OopMapSet();
1992 int frame_complete = -1;
1993 int stack_slots = -1;
1994 int interpreted_entry_offset = -1;
1995 int vep_offset = -1;
1996 if (method->is_continuation_enter_intrinsic()) {
1997 gen_continuation_enter(masm,
1998 in_regs,
1999 exception_offset,
2000 oop_maps,
2001 frame_complete,
2002 stack_slots,
2003 interpreted_entry_offset,
2004 vep_offset);
2005 } else if (method->is_continuation_yield_intrinsic()) {
2006 gen_continuation_yield(masm,
2007 in_regs,
2008 oop_maps,
2009 frame_complete,
2010 stack_slots,
2011 vep_offset);
2012 } else {
2013 guarantee(false, "Unknown Continuation native intrinsic");
2014 }
2015
2016 #ifdef ASSERT
2017 if (method->is_continuation_enter_intrinsic()) {
2018 assert(interpreted_entry_offset != -1, "Must be set");
2019 assert(exception_offset != -1, "Must be set");
2020 } else {
2021 assert(interpreted_entry_offset == -1, "Must be unset");
2022 assert(exception_offset == -1, "Must be unset");
2023 }
2024 assert(frame_complete != -1, "Must be set");
2025 assert(stack_slots != -1, "Must be set");
2026 assert(vep_offset != -1, "Must be set");
2027 #endif
2028
2029 __ flush();
2030 nmethod* nm = nmethod::new_native_nmethod(method,
2031 compile_id,
2032 masm->code(),
2033 vep_offset,
2034 frame_complete,
2035 stack_slots,
2036 in_ByteSize(-1),
2037 in_ByteSize(-1),
2038 oop_maps,
2039 exception_offset);
2040 if (nm == nullptr) return nm;
2041 if (method->is_continuation_enter_intrinsic()) {
2042 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2043 } else if (method->is_continuation_yield_intrinsic()) {
2044 _cont_doYield_stub = nm;
2045 }
2046 return nm;
2047 }
2048
2049 if (method->is_method_handle_intrinsic()) {
2050 vmIntrinsics::ID iid = method->intrinsic_id();
2051 intptr_t start = (intptr_t)__ pc();
2052 int vep_offset = ((intptr_t)__ pc()) - start;
2053 gen_special_dispatch(masm,
2054 method,
2055 in_sig_bt,
2056 in_regs);
2057 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
2058 __ flush();
2059 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
2060 return nmethod::new_native_nmethod(method,
2061 compile_id,
2062 masm->code(),
2063 vep_offset,
2064 frame_complete,
2065 stack_slots / VMRegImpl::slots_per_word,
2066 in_ByteSize(-1),
2067 in_ByteSize(-1),
2068 (OopMapSet*)nullptr);
2069 }
2070
2071 address native_func = method->native_function();
2072 assert(native_func != nullptr, "must have function");
2073
2074 // First, create signature for outgoing C call
2075 // --------------------------------------------------------------------------
2076
2077 int total_in_args = method->size_of_parameters();
2078 // We have received a description of where all the java args are located
2079 // on entry to the wrapper. We need to convert these args to where
2080 // the jni function will expect them. To figure out where they go
2081 // we convert the java signature to a C signature by inserting
2082 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2083
2084 // Calculate the total number of C arguments and create arrays for the
2085 // signature and the outgoing registers.
2086 // On ppc64, we have two arrays for the outgoing registers, because
2087 // some floating-point arguments must be passed in registers _and_
2088 // in stack locations.
2089 bool method_is_static = method->is_static();
2090 int total_c_args = total_in_args + (method_is_static ? 2 : 1);
2091
2092 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2093 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2094
2095 // Create the signature for the C call:
2096 // 1) add the JNIEnv*
2097 // 2) add the class if the method is static
2098 // 3) copy the rest of the incoming signature (shifted by the number of
2099 // hidden arguments).
2100
2101 int argc = 0;
2102 out_sig_bt[argc++] = T_ADDRESS;
2103 if (method->is_static()) {
2104 out_sig_bt[argc++] = T_OBJECT;
2105 }
2106
2107 for (int i = 0; i < total_in_args ; i++ ) {
2108 out_sig_bt[argc++] = in_sig_bt[i];
2109 }
2110
2111
2112 // Compute the wrapper's frame size.
2113 // --------------------------------------------------------------------------
2114
2115 // Now figure out where the args must be stored and how much stack space
2116 // they require.
2117 //
2118 // Compute framesize for the wrapper. We need to handlize all oops in
2119 // incoming registers.
2120 //
2121 // Calculate the total number of stack slots we will need:
2122 // 1) abi requirements
2123 // 2) outgoing arguments
2124 // 3) space for inbound oop handle area
2125 // 4) space for handlizing a klass if static method
2126 // 5) space for a lock if synchronized method
2127 // 6) workspace for saving return values, int <-> float reg moves, etc.
2128 // 7) alignment
2129 //
2130 // Layout of the native wrapper frame:
2131 // (stack grows upwards, memory grows downwards)
2132 //
2133 // NW [ABI_REG_ARGS] <-- 1) R1_SP
2134 // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset
2135 // [oopHandle area] <-- 3) R1_SP + oop_handle_offset
2136 // klass <-- 4) R1_SP + klass_offset
2137 // lock <-- 5) R1_SP + lock_offset
2138 // [workspace] <-- 6) R1_SP + workspace_offset
2139 // [alignment] (optional) <-- 7)
2140 // caller [JIT_TOP_ABI_48] <-- r_callers_sp
2141 //
2142 // - *_slot_offset Indicates offset from SP in number of stack slots.
2143 // - *_offset Indicates offset from SP in bytes.
2144
2145 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2)
2146 SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
2147
2148 // Now the space for the inbound oop handle area.
2149 int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
2150
2151 int oop_handle_slot_offset = stack_slots;
2152 stack_slots += total_save_slots; // 3)
2153
2154 int klass_slot_offset = 0;
2155 int klass_offset = -1;
2156 if (method_is_static) { // 4)
2157 klass_slot_offset = stack_slots;
2158 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2159 stack_slots += VMRegImpl::slots_per_word;
2160 }
2161
2162 int lock_slot_offset = 0;
2163 int lock_offset = -1;
2164 if (method->is_synchronized()) { // 5)
2165 lock_slot_offset = stack_slots;
2166 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
2167 stack_slots += VMRegImpl::slots_per_word;
2168 }
2169
2170 int workspace_slot_offset = stack_slots; // 6)
2171 stack_slots += 2;
2172
2173 // Now compute actual number of stack words we need.
2174 // Rounding to make stack properly aligned.
2175 stack_slots = align_up(stack_slots, // 7)
2176 frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
2177 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
2178
2179
2180 // Now we can start generating code.
2181 // --------------------------------------------------------------------------
2182
2183 intptr_t start_pc = (intptr_t)__ pc();
2184 intptr_t vep_start_pc;
2185 intptr_t frame_done_pc;
2186
2187 Label handle_pending_exception;
2188 Label last_java_pc;
2189
2190 Register r_callers_sp = R21;
2191 Register r_temp_1 = R22;
2192 Register r_temp_2 = R23;
2193 Register r_temp_3 = R24;
2194 Register r_temp_4 = R25;
2195 Register r_temp_5 = R26;
2196 Register r_temp_6 = R27;
2197 Register r_last_java_pc = R28;
2198
2199 Register r_carg1_jnienv = noreg;
2200 Register r_carg2_classorobject = noreg;
2201 r_carg1_jnienv = out_regs[0].first()->as_Register();
2202 r_carg2_classorobject = out_regs[1].first()->as_Register();
2203
2204
2205 // Generate the Unverified Entry Point (UEP).
2206 // --------------------------------------------------------------------------
2207 assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
2208
2209 // Check ic: object class == cached class?
2210 if (!method_is_static) {
2211 __ ic_check(4 /* end_alignment */);
2212 }
2213
2214 // Generate the Verified Entry Point (VEP).
2215 // --------------------------------------------------------------------------
2216 vep_start_pc = (intptr_t)__ pc();
2217
2218 if (method->needs_clinit_barrier()) {
2219 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2220 Label L_skip_barrier;
2221 Register klass = r_temp_1;
2222 // Notify OOP recorder (don't need the relocation)
2223 AddressLiteral md = __ constant_metadata_address(method->method_holder());
2224 __ load_const_optimized(klass, md.value(), R0);
2225 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
2226
2227 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
2228 __ mtctr(klass);
2229 __ bctr();
2230
2231 __ bind(L_skip_barrier);
2232 }
2233
2234 __ save_LR(r_temp_1);
2235 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2236 __ mr(r_callers_sp, R1_SP); // Remember frame pointer.
2237 __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
2238
2239 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2240 bs->nmethod_entry_barrier(masm, r_temp_1);
2241
2242 frame_done_pc = (intptr_t)__ pc();
2243
2244 // Native nmethod wrappers never take possession of the oop arguments.
2245 // So the caller will gc the arguments.
2246 // The only thing we need an oopMap for is if the call is static.
2247 //
2248 // An OopMap for lock (and class if static), and one for the VM call itself.
2249 OopMapSet *oop_maps = new OopMapSet();
2250 OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2251
2252 // Move arguments from register/stack to register/stack.
2253 // --------------------------------------------------------------------------
2254 //
2255 // We immediately shuffle the arguments so that for any vm call we have
2256 // to make from here on out (sync slow path, jvmti, etc.) we will have
2257 // captured the oops from our caller and have a valid oopMap for them.
2258 //
2259 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2260 // (derived from JavaThread* which is in R16_thread) and, if static,
2261 // the class mirror instead of a receiver. This pretty much guarantees that
2262 // register layout will not match. We ignore these extra arguments during
2263 // the shuffle. The shuffle is described by the two calling convention
2264 // vectors we have in our possession. We simply walk the java vector to
2265 // get the source locations and the c vector to get the destinations.
2266
2267 // Record sp-based slot for receiver on stack for non-static methods.
2268 int receiver_offset = -1;
2269
2270 // We move the arguments backward because the floating point registers
2271 // destination will always be to a register with a greater or equal
2272 // register number or the stack.
2273 // in is the index of the incoming Java arguments
2274 // out is the index of the outgoing C arguments
2275
2276 #ifdef ASSERT
2277 bool reg_destroyed[Register::number_of_registers];
2278 bool freg_destroyed[FloatRegister::number_of_registers];
2279 for (int r = 0 ; r < Register::number_of_registers ; r++) {
2280 reg_destroyed[r] = false;
2281 }
2282 for (int f = 0 ; f < FloatRegister::number_of_registers ; f++) {
2283 freg_destroyed[f] = false;
2284 }
2285 #endif // ASSERT
2286
2287 for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2288
2289 #ifdef ASSERT
2290 if (in_regs[in].first()->is_Register()) {
2291 assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2292 } else if (in_regs[in].first()->is_FloatRegister()) {
2293 assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2294 }
2295 if (out_regs[out].first()->is_Register()) {
2296 reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2297 } else if (out_regs[out].first()->is_FloatRegister()) {
2298 freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2299 }
2300 #endif // ASSERT
2301
2302 switch (in_sig_bt[in]) {
2303 case T_BOOLEAN:
2304 case T_CHAR:
2305 case T_BYTE:
2306 case T_SHORT:
2307 case T_INT:
2308 // Move int and do sign extension.
2309 int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2310 break;
2311 case T_LONG:
2312 long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2313 break;
2314 case T_ARRAY:
2315 case T_OBJECT:
2316 object_move(masm, stack_slots,
2317 oop_map, oop_handle_slot_offset,
2318 ((in == 0) && (!method_is_static)), &receiver_offset,
2319 in_regs[in], out_regs[out],
2320 r_callers_sp, r_temp_1, r_temp_2);
2321 break;
2322 case T_VOID:
2323 break;
2324 case T_FLOAT:
2325 float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2326 break;
2327 case T_DOUBLE:
2328 double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2329 break;
2330 case T_ADDRESS:
2331 fatal("found type (T_ADDRESS) in java args");
2332 break;
2333 default:
2334 ShouldNotReachHere();
2335 break;
2336 }
2337 }
2338
2339 // Pre-load a static method's oop into ARG2.
2340 // Used both by locking code and the normal JNI call code.
2341 if (method_is_static) {
2342 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2343 r_carg2_classorobject);
2344
2345 // Now handlize the static class mirror in carg2. It's known not-null.
2346 __ std(r_carg2_classorobject, klass_offset, R1_SP);
2347 oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2348 __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2349 }
2350
2351 // Get JNIEnv* which is first argument to native.
2352 __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2353
2354 // NOTE:
2355 //
2356 // We have all of the arguments setup at this point.
2357 // We MUST NOT touch any outgoing regs from this point on.
2358 // So if we must call out we must push a new frame.
2359
2360 // The last java pc will also be used as resume pc if this is the wrapper for wait0.
2361 // For this purpose the precise location matters but not for oopmap lookup.
2362 __ calculate_address_from_global_toc(r_last_java_pc, last_java_pc, true, true, true, true);
2363
2364 // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2365 assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2366
2367 # if 0
2368 // DTrace method entry
2369 # endif
2370
2371 // Lock a synchronized method.
2372 // --------------------------------------------------------------------------
2373
2374 if (method->is_synchronized()) {
2375 Register r_oop = r_temp_4;
2376 const Register r_box = r_temp_5;
2377 Label done, locked;
2378
2379 // Load the oop for the object or class. r_carg2_classorobject contains
2380 // either the handlized oop from the incoming arguments or the handlized
2381 // class mirror (if the method is static).
2382 __ ld(r_oop, 0, r_carg2_classorobject);
2383
2384 // Get the lock box slot's address.
2385 __ addi(r_box, R1_SP, lock_offset);
2386
2387 // Try fastpath for locking.
2388 // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2389 Register r_temp_3_or_noreg = UseObjectMonitorTable ? r_temp_3 : noreg;
2390 __ compiler_fast_lock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3_or_noreg);
2391 __ beq(CR0, locked);
2392
2393 // None of the above fast optimizations worked so we have to get into the
2394 // slow case of monitor enter. Inline a special case of call_VM that
2395 // disallows any pending_exception.
2396
2397 // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2398 int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes);
2399 __ mr(R11_scratch1, R1_SP);
2400 RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs);
2401
2402 // Do the call.
2403 __ set_last_Java_frame(R11_scratch1, r_last_java_pc);
2404 assert(r_last_java_pc->is_nonvolatile(), "r_last_java_pc needs to be preserved accross complete_monitor_locking_C call");
2405 // The following call will not be preempted.
2406 // push_cont_fastpath forces freeze slow path in case we try to preempt where we will pin the
2407 // vthread to the carrier (see FreezeBase::recurse_freeze_native_frame()).
2408 __ push_cont_fastpath();
2409 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2410 __ pop_cont_fastpath();
2411 __ reset_last_Java_frame();
2412
2413 RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs);
2414
2415 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2416 "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C");
2417
2418 __ bind(locked);
2419 }
2420
2421 __ set_last_Java_frame(R1_SP, r_last_java_pc);
2422
2423 // Publish thread state
2424 // --------------------------------------------------------------------------
2425
2426 // Transition from _thread_in_Java to _thread_in_native.
2427 __ li(R0, _thread_in_native);
2428 __ release();
2429 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2430 __ stw(R0, thread_(thread_state));
2431
2432
2433 // The JNI call
2434 // --------------------------------------------------------------------------
2435 __ call_c(native_func, relocInfo::runtime_call_type);
2436
2437
2438 // Now, we are back from the native code.
2439
2440
2441 // Unpack the native result.
2442 // --------------------------------------------------------------------------
2443
2444 // For int-types, we do any needed sign-extension required.
2445 // Care must be taken that the return values (R3_RET and F1_RET)
2446 // will survive any VM calls for blocking or unlocking.
2447 // An OOP result (handle) is done specially in the slow-path code.
2448
2449 switch (ret_type) {
2450 case T_VOID: break; // Nothing to do!
2451 case T_FLOAT: break; // Got it where we want it (unless slow-path).
2452 case T_DOUBLE: break; // Got it where we want it (unless slow-path).
2453 case T_LONG: break; // Got it where we want it (unless slow-path).
2454 case T_OBJECT: break; // Really a handle.
2455 // Cannot de-handlize until after reclaiming jvm_lock.
2456 case T_ARRAY: break;
2457
2458 case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1)
2459 __ normalize_bool(R3_RET);
2460 break;
2461 }
2462 case T_BYTE: { // sign extension
2463 __ extsb(R3_RET, R3_RET);
2464 break;
2465 }
2466 case T_CHAR: { // unsigned result
2467 __ andi(R3_RET, R3_RET, 0xffff);
2468 break;
2469 }
2470 case T_SHORT: { // sign extension
2471 __ extsh(R3_RET, R3_RET);
2472 break;
2473 }
2474 case T_INT: // nothing to do
2475 break;
2476 default:
2477 ShouldNotReachHere();
2478 break;
2479 }
2480
2481 // Publish thread state
2482 // --------------------------------------------------------------------------
2483
2484 // Switch thread to "native transition" state before reading the
2485 // synchronization state. This additional state is necessary because reading
2486 // and testing the synchronization state is not atomic w.r.t. GC, as this
2487 // scenario demonstrates:
2488 // - Java thread A, in _thread_in_native state, loads _not_synchronized
2489 // and is preempted.
2490 // - VM thread changes sync state to synchronizing and suspends threads
2491 // for GC.
2492 // - Thread A is resumed to finish this native method, but doesn't block
2493 // here since it didn't see any synchronization in progress, and escapes.
2494
2495 // Transition from _thread_in_native to _thread_in_native_trans.
2496 __ li(R0, _thread_in_native_trans);
2497 __ release();
2498 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2499 __ stw(R0, thread_(thread_state));
2500
2501
2502 // Must we block?
2503 // --------------------------------------------------------------------------
2504
2505 // Block, if necessary, before resuming in _thread_in_Java state.
2506 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2507 {
2508 Label no_block, sync;
2509
2510 // Force this write out before the read below.
2511 if (!UseSystemMemoryBarrier) {
2512 __ fence();
2513 }
2514
2515 Register sync_state_addr = r_temp_4;
2516 Register sync_state = r_temp_5;
2517 Register suspend_flags = r_temp_6;
2518
2519 // No synchronization in progress nor yet synchronized
2520 // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
2521 __ safepoint_poll(sync, sync_state, true /* at_return */, false /* in_nmethod */);
2522
2523 // Not suspended.
2524 // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2525 __ lwz(suspend_flags, thread_(suspend_flags));
2526 __ cmpwi(CR1, suspend_flags, 0);
2527 __ beq(CR1, no_block);
2528
2529 // Block. Save any potential method result value before the operation and
2530 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2531 // lets us share the oopMap we used when we went native rather than create
2532 // a distinct one for this pc.
2533 __ bind(sync);
2534 __ isync();
2535
2536 address entry_point =
2537 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2538 save_native_result(masm, ret_type, workspace_slot_offset);
2539 __ call_VM_leaf(entry_point, R16_thread);
2540 restore_native_result(masm, ret_type, workspace_slot_offset);
2541
2542 __ bind(no_block);
2543
2544 // Publish thread state.
2545 // --------------------------------------------------------------------------
2546
2547 // Thread state is thread_in_native_trans. Any safepoint blocking has
2548 // already happened so we can now change state to _thread_in_Java.
2549
2550 // Transition from _thread_in_native_trans to _thread_in_Java.
2551 __ li(R0, _thread_in_Java);
2552 __ lwsync(); // Acquire safepoint and suspend state, release thread state.
2553 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2554 __ stw(R0, thread_(thread_state));
2555
2556 // Check preemption for Object.wait()
2557 if (method->is_object_wait0()) {
2558 Label not_preempted;
2559 __ ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2560 __ cmpdi(CR0, R0, 0);
2561 __ beq(CR0, not_preempted);
2562 __ mtlr(R0);
2563 __ li(R0, 0);
2564 __ std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2565 __ blr();
2566 __ bind(not_preempted);
2567 }
2568 __ bind(last_java_pc);
2569 // We use the same pc/oopMap repeatedly when we call out above.
2570 intptr_t oopmap_pc = (intptr_t) __ pc();
2571 oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2572 }
2573
2574 // Reguard any pages if necessary.
2575 // --------------------------------------------------------------------------
2576
2577 Label no_reguard;
2578 __ lwz(r_temp_1, thread_(stack_guard_state));
2579 __ cmpwi(CR0, r_temp_1, StackOverflow::stack_guard_yellow_reserved_disabled);
2580 __ bne(CR0, no_reguard);
2581
2582 save_native_result(masm, ret_type, workspace_slot_offset);
2583 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2584 restore_native_result(masm, ret_type, workspace_slot_offset);
2585
2586 __ bind(no_reguard);
2587
2588
2589 // Unlock
2590 // --------------------------------------------------------------------------
2591
2592 if (method->is_synchronized()) {
2593 const Register r_oop = r_temp_4;
2594 const Register r_box = r_temp_5;
2595 const Register r_exception = r_temp_6;
2596 Label done;
2597
2598 // Get oop and address of lock object box.
2599 if (method_is_static) {
2600 assert(klass_offset != -1, "");
2601 __ ld(r_oop, klass_offset, R1_SP);
2602 } else {
2603 assert(receiver_offset != -1, "");
2604 __ ld(r_oop, receiver_offset, R1_SP);
2605 }
2606 __ addi(r_box, R1_SP, lock_offset);
2607
2608 // Try fastpath for unlocking.
2609 __ compiler_fast_unlock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2610 __ beq(CR0, done);
2611
2612 // Save and restore any potential method result value around the unlocking operation.
2613 save_native_result(masm, ret_type, workspace_slot_offset);
2614
2615 // Must save pending exception around the slow-path VM call. Since it's a
2616 // leaf call, the pending exception (if any) can be kept in a register.
2617 __ ld(r_exception, thread_(pending_exception));
2618 assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2619 __ li(R0, 0);
2620 __ std(R0, thread_(pending_exception));
2621
2622 // Slow case of monitor enter.
2623 // Inline a special case of call_VM that disallows any pending_exception.
2624 // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2625 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2626
2627 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2628 "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C");
2629
2630 restore_native_result(masm, ret_type, workspace_slot_offset);
2631
2632 // Check_forward_pending_exception jump to forward_exception if any pending
2633 // exception is set. The forward_exception routine expects to see the
2634 // exception in pending_exception and not in a register. Kind of clumsy,
2635 // since all folks who branch to forward_exception must have tested
2636 // pending_exception first and hence have it in a register already.
2637 __ std(r_exception, thread_(pending_exception));
2638
2639 __ bind(done);
2640 }
2641
2642 # if 0
2643 // DTrace method exit
2644 # endif
2645
2646 // Clear "last Java frame" SP and PC.
2647 // --------------------------------------------------------------------------
2648
2649 // Last java frame won't be set if we're resuming after preemption
2650 bool maybe_preempted = method->is_object_wait0();
2651 __ reset_last_Java_frame(!maybe_preempted /* check_last_java_sp */);
2652
2653 // Unbox oop result, e.g. JNIHandles::resolve value.
2654 // --------------------------------------------------------------------------
2655
2656 if (is_reference_type(ret_type)) {
2657 __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, MacroAssembler::PRESERVATION_NONE);
2658 }
2659
2660 if (CheckJNICalls) {
2661 // clear_pending_jni_exception_check
2662 __ load_const_optimized(R0, 0L);
2663 __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread);
2664 }
2665
2666 // Reset handle block.
2667 // --------------------------------------------------------------------------
2668 __ ld(r_temp_1, thread_(active_handles));
2669 // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2670 __ li(r_temp_2, 0);
2671 __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1);
2672
2673 // Prepare for return
2674 // --------------------------------------------------------------------------
2675 __ pop_frame();
2676 __ restore_LR(R11);
2677
2678 #if INCLUDE_JFR
2679 // We need to do a poll test after unwind in case the sampler
2680 // managed to sample the native frame after returning to Java.
2681 Label L_stub;
2682 int safepoint_offset = __ offset();
2683 if (!UseSIGTRAP) {
2684 __ relocate(relocInfo::poll_return_type);
2685 }
2686 __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */);
2687 #endif // INCLUDE_JFR
2688
2689 // Check for pending exceptions.
2690 // --------------------------------------------------------------------------
2691 __ ld(r_temp_2, thread_(pending_exception));
2692 __ cmpdi(CR0, r_temp_2, 0);
2693 __ bne(CR0, handle_pending_exception);
2694
2695 // Return.
2696 __ blr();
2697
2698 // Handler for return safepoint (out-of-line).
2699 #if INCLUDE_JFR
2700 if (!UseSIGTRAP) {
2701 __ bind(L_stub);
2702 __ jump_to_polling_page_return_handler_blob(safepoint_offset);
2703 }
2704 #endif // INCLUDE_JFR
2705
2706 // Handler for pending exceptions (out-of-line).
2707 // --------------------------------------------------------------------------
2708 // Since this is a native call, we know the proper exception handler
2709 // is the empty function. We just pop this frame and then jump to
2710 // forward_exception_entry.
2711 __ bind(handle_pending_exception);
2712 __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2713 relocInfo::runtime_call_type);
2714
2715 // Done.
2716 // --------------------------------------------------------------------------
2717
2718 __ flush();
2719
2720 nmethod *nm = nmethod::new_native_nmethod(method,
2721 compile_id,
2722 masm->code(),
2723 vep_start_pc-start_pc,
2724 frame_done_pc-start_pc,
2725 stack_slots / VMRegImpl::slots_per_word,
2726 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2727 in_ByteSize(lock_offset),
2728 oop_maps);
2729
2730 return nm;
2731 }
2732
2733 // This function returns the adjust size (in number of words) to a c2i adapter
2734 // activation for use during deoptimization.
2735 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2736 return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::frame_alignment_in_words);
2737 }
2738
2739 uint SharedRuntime::in_preserve_stack_slots() {
2740 return frame::jit_in_preserve_size / VMRegImpl::stack_slot_size;
2741 }
2742
2743 uint SharedRuntime::out_preserve_stack_slots() {
2744 #if defined(COMPILER1) || defined(COMPILER2)
2745 return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2746 #else
2747 return 0;
2748 #endif
2749 }
2750
2751 VMReg SharedRuntime::thread_register() {
2752 // On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames).
2753 ShouldNotCallThis();
2754 return nullptr;
2755 }
2756
2757 #if defined(COMPILER1) || defined(COMPILER2)
2758 // Frame generation for deopt and uncommon trap blobs.
2759 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2760 /* Read */
2761 Register unroll_block_reg,
2762 /* Update */
2763 Register frame_sizes_reg,
2764 Register number_of_frames_reg,
2765 Register pcs_reg,
2766 /* Invalidate */
2767 Register frame_size_reg,
2768 Register pc_reg) {
2769
2770 __ ld(pc_reg, 0, pcs_reg);
2771 __ ld(frame_size_reg, 0, frame_sizes_reg);
2772 __ std(pc_reg, _abi0(lr), R1_SP);
2773 __ push_frame(frame_size_reg, R0/*tmp*/);
2774 __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2775 __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2776 __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2777 __ addi(pcs_reg, pcs_reg, wordSize);
2778 }
2779
2780 // Loop through the UnrollBlock info and create new frames.
2781 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2782 /* read */
2783 Register unroll_block_reg,
2784 /* invalidate */
2785 Register frame_sizes_reg,
2786 Register number_of_frames_reg,
2787 Register pcs_reg,
2788 Register frame_size_reg,
2789 Register pc_reg) {
2790 Label loop;
2791
2792 // _number_of_frames is of type int (deoptimization.hpp)
2793 __ lwa(number_of_frames_reg,
2794 in_bytes(Deoptimization::UnrollBlock::number_of_frames_offset()),
2795 unroll_block_reg);
2796 __ ld(pcs_reg,
2797 in_bytes(Deoptimization::UnrollBlock::frame_pcs_offset()),
2798 unroll_block_reg);
2799 __ ld(frame_sizes_reg,
2800 in_bytes(Deoptimization::UnrollBlock::frame_sizes_offset()),
2801 unroll_block_reg);
2802
2803 // stack: (caller_of_deoptee, ...).
2804
2805 // At this point we either have an interpreter frame or a compiled
2806 // frame on top of stack. If it is a compiled frame we push a new c2i
2807 // adapter here
2808
2809 // Memorize top-frame stack-pointer.
2810 __ mr(frame_size_reg/*old_sp*/, R1_SP);
2811
2812 // Resize interpreter top frame OR C2I adapter.
2813
2814 // At this moment, the top frame (which is the caller of the deoptee) is
2815 // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2816 // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2817 // outgoing arguments.
2818 //
2819 // In order to push the interpreter frame for the deoptee, we need to
2820 // resize the top frame such that we are able to place the deoptee's
2821 // locals in the frame.
2822 // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2823 // into a valid PARENT_IJAVA_FRAME_ABI.
2824
2825 __ lwa(R11_scratch1,
2826 in_bytes(Deoptimization::UnrollBlock::caller_adjustment_offset()),
2827 unroll_block_reg);
2828 __ neg(R11_scratch1, R11_scratch1);
2829
2830 // R11_scratch1 contains size of locals for frame resizing.
2831 // R12_scratch2 contains top frame's lr.
2832
2833 // Resize frame by complete frame size prevents TOC from being
2834 // overwritten by locals. A more stack space saving way would be
2835 // to copy the TOC to its location in the new abi.
2836 __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2837
2838 // now, resize the frame
2839 __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2840
2841 // In the case where we have resized a c2i frame above, the optional
2842 // alignment below the locals has size 32 (why?).
2843 __ std(R12_scratch2, _abi0(lr), R1_SP);
2844
2845 // Initialize initial_caller_sp.
2846 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2847
2848 #ifdef ASSERT
2849 // Make sure that there is at least one entry in the array.
2850 __ cmpdi(CR0, number_of_frames_reg, 0);
2851 __ asm_assert_ne("array_size must be > 0");
2852 #endif
2853
2854 // Now push the new interpreter frames.
2855 //
2856 __ bind(loop);
2857 // Allocate a new frame, fill in the pc.
2858 push_skeleton_frame(masm, deopt,
2859 unroll_block_reg,
2860 frame_sizes_reg,
2861 number_of_frames_reg,
2862 pcs_reg,
2863 frame_size_reg,
2864 pc_reg);
2865 __ cmpdi(CR0, number_of_frames_reg, 0);
2866 __ bne(CR0, loop);
2867
2868 // Get the return address pointing into the template interpreter.
2869 __ ld(R0, 0, pcs_reg);
2870 // Store it in the top interpreter frame.
2871 __ std(R0, _abi0(lr), R1_SP);
2872 // Initialize frame_manager_lr of interpreter top frame.
2873 }
2874 #endif
2875
2876 void SharedRuntime::generate_deopt_blob() {
2877 // Allocate space for the code
2878 ResourceMark rm;
2879 // Setup code generation tools
2880 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2881 CodeBuffer buffer(name, 2048, 1024);
2882 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2883 Label exec_mode_initialized;
2884 OopMap* map = nullptr;
2885 OopMapSet *oop_maps = new OopMapSet();
2886
2887 // size of ABI112 plus spill slots for R3_RET and F1_RET.
2888 const int frame_size_in_bytes = frame::native_abi_reg_args_spill_size;
2889 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
2890 int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
2891
2892 const Register exec_mode_reg = R21_tmp1;
2893
2894 const address start = __ pc();
2895 int exception_offset = 0;
2896 int exception_in_tls_offset = 0;
2897 int reexecute_offset = 0;
2898
2899 #if defined(COMPILER1) || defined(COMPILER2)
2900 // --------------------------------------------------------------------------
2901 // Prolog for non exception case!
2902
2903 // We have been called from the deopt handler of the deoptee.
2904 //
2905 // deoptee:
2906 // ...
2907 // call X
2908 // ...
2909 // deopt_handler: call_deopt_stub
2910 // cur. return pc --> ...
2911 //
2912 // The return_pc has been stored in the frame of the deoptee and
2913 // will replace the address of the deopt_handler in the call
2914 // to Deoptimization::fetch_unroll_info below.
2915
2916 // Push the "unpack frame"
2917 // Save everything in sight.
2918 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2919 &first_frame_size_in_bytes,
2920 /*generate_oop_map=*/ true,
2921 RegisterSaver::return_pc_is_lr,
2922 /*save_vectors*/ SuperwordUseVSX);
2923 assert(map != nullptr, "OopMap must have been created");
2924
2925 __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
2926 // Save exec mode for unpack_frames.
2927 __ b(exec_mode_initialized);
2928
2929 // --------------------------------------------------------------------------
2930 // Prolog for exception case
2931
2932 // An exception is pending.
2933 // We have been called with a return (interpreter) or a jump (exception blob).
2934 //
2935 // - R3_ARG1: exception oop
2936 // - R4_ARG2: exception pc
2937
2938 exception_offset = __ pc() - start;
2939
2940 BLOCK_COMMENT("Prolog for exception case");
2941
2942 // Store exception oop and pc in thread (location known to GC).
2943 // This is needed since the call to "fetch_unroll_info()" may safepoint.
2944 __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2945 __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
2946 __ std(R4_ARG2, _abi0(lr), R1_SP);
2947
2948 // Vanilla deoptimization with an exception pending in exception_oop.
2949 exception_in_tls_offset = __ pc() - start;
2950
2951 // Push the "unpack frame".
2952 // Save everything in sight.
2953 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2954 &first_frame_size_in_bytes,
2955 /*generate_oop_map=*/ false,
2956 RegisterSaver::return_pc_is_pre_saved,
2957 /*save_vectors*/ SuperwordUseVSX);
2958
2959 // Deopt during an exception. Save exec mode for unpack_frames.
2960 __ li(exec_mode_reg, Deoptimization::Unpack_exception);
2961
2962 // fall through
2963 #ifdef COMPILER1
2964 __ b(exec_mode_initialized);
2965
2966 // Reexecute entry, similar to c2 uncommon trap
2967 reexecute_offset = __ pc() - start;
2968
2969 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2970 &first_frame_size_in_bytes,
2971 /*generate_oop_map=*/ false,
2972 RegisterSaver::return_pc_is_pre_saved,
2973 /*save_vectors*/ SuperwordUseVSX);
2974 __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
2975 #endif
2976
2977 // --------------------------------------------------------------------------
2978 __ BIND(exec_mode_initialized);
2979
2980 const Register unroll_block_reg = R22_tmp2;
2981
2982 // We need to set `last_Java_frame' because `fetch_unroll_info' will
2983 // call `last_Java_frame()'. The value of the pc in the frame is not
2984 // particularly important. It just needs to identify this blob.
2985 __ set_last_Java_frame(R1_SP, noreg);
2986
2987 // With EscapeAnalysis turned on, this call may safepoint!
2988 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
2989 address calls_return_pc = __ last_calls_return_pc();
2990 // Set an oopmap for the call site that describes all our saved registers.
2991 oop_maps->add_gc_map(calls_return_pc - start, map);
2992
2993 __ reset_last_Java_frame();
2994 // Save the return value.
2995 __ mr(unroll_block_reg, R3_RET);
2996
2997 // Restore only the result registers that have been saved
2998 // by save_volatile_registers(...).
2999 RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes, /*save_vectors*/ SuperwordUseVSX);
3000
3001 // reload the exec mode from the UnrollBlock (it might have changed)
3002 __ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3003 // In excp_deopt_mode, restore and clear exception oop which we
3004 // stored in the thread during exception entry above. The exception
3005 // oop will be the return value of this stub.
3006 Label skip_restore_excp;
3007 __ cmpdi(CR0, exec_mode_reg, Deoptimization::Unpack_exception);
3008 __ bne(CR0, skip_restore_excp);
3009 __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3010 __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3011 __ li(R0, 0);
3012 __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3013 __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3014 __ BIND(skip_restore_excp);
3015
3016 __ pop_frame();
3017
3018 // stack: (deoptee, optional i2c, caller of deoptee, ...).
3019
3020 // pop the deoptee's frame
3021 __ pop_frame();
3022
3023 // stack: (caller_of_deoptee, ...).
3024
3025 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3026 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3027 // and the frame is effectively not resized.
3028 Register caller_sp = R23_tmp3;
3029 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3030 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3031
3032 // Loop through the `UnrollBlock' info and create interpreter frames.
3033 push_skeleton_frames(masm, true/*deopt*/,
3034 unroll_block_reg,
3035 R23_tmp3,
3036 R24_tmp4,
3037 R25_tmp5,
3038 R26_tmp6,
3039 R27_tmp7);
3040
3041 // stack: (skeletal interpreter frame, ..., optional skeletal
3042 // interpreter frame, optional c2i, caller of deoptee, ...).
3043
3044 // push an `unpack_frame' taking care of float / int return values.
3045 __ push_frame(frame_size_in_bytes, R0/*tmp*/);
3046
3047 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3048 // skeletal interpreter frame, optional c2i, caller of deoptee,
3049 // ...).
3050
3051 // Spill live volatile registers since we'll do a call.
3052 __ std( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3053 __ stfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3054
3055 // Let the unpacker layout information in the skeletal frames just
3056 // allocated.
3057 __ calculate_address_from_global_toc(R3_RET, calls_return_pc, true, true, true, true);
3058 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
3059 // This is a call to a LEAF method, so no oop map is required.
3060 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3061 R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3062 __ reset_last_Java_frame();
3063
3064 // Restore the volatiles saved above.
3065 __ ld( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3066 __ lfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3067
3068 // Pop the unpack frame.
3069 __ pop_frame();
3070 __ restore_LR(R0);
3071
3072 // stack: (top interpreter frame, ..., optional interpreter frame,
3073 // optional c2i, caller of deoptee, ...).
3074
3075 // Initialize R14_state.
3076 __ restore_interpreter_state(R11_scratch1);
3077 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3078
3079 // Return to the interpreter entry point.
3080 __ blr();
3081 #else // !defined(COMPILER1) && !defined(COMPILER2)
3082 __ unimplemented("deopt blob needed only with compiler");
3083 #endif
3084
3085 // Make sure all code is generated
3086 __ flush();
3087
3088 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
3089 reexecute_offset, first_frame_size_in_bytes / wordSize);
3090 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3091 }
3092
3093 #ifdef COMPILER2
3094 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
3095 // Allocate space for the code.
3096 ResourceMark rm;
3097 // Setup code generation tools.
3098 const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
3099 CodeBuffer buffer(name, 2048, 1024);
3100 if (buffer.blob() == nullptr) {
3101 return nullptr;
3102 }
3103 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3104 address start = __ pc();
3105
3106 Register unroll_block_reg = R21_tmp1;
3107 Register klass_index_reg = R22_tmp2;
3108 Register unc_trap_reg = R23_tmp3;
3109 Register r_return_pc = R27_tmp7;
3110
3111 OopMapSet* oop_maps = new OopMapSet();
3112 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3113 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3114
3115 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3116
3117 // Push a dummy `unpack_frame' and call
3118 // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3119 // vframe array and return the `UnrollBlock' information.
3120
3121 // Save LR to compiled frame.
3122 __ save_LR(R11_scratch1);
3123
3124 // Push an "uncommon_trap" frame.
3125 __ push_frame_reg_args(0, R11_scratch1);
3126
3127 // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
3128
3129 // Set the `unpack_frame' as last_Java_frame.
3130 // `Deoptimization::uncommon_trap' expects it and considers its
3131 // sender frame as the deoptee frame.
3132 // Remember the offset of the instruction whose address will be
3133 // moved to R11_scratch1.
3134 address gc_map_pc = __ pc();
3135 __ calculate_address_from_global_toc(r_return_pc, gc_map_pc, true, true, true, true);
3136 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3137
3138 __ mr(klass_index_reg, R3);
3139 __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
3140 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
3141 R16_thread, klass_index_reg, R5_ARG3);
3142
3143 // Set an oopmap for the call site.
3144 oop_maps->add_gc_map(gc_map_pc - start, map);
3145
3146 __ reset_last_Java_frame();
3147
3148 // Pop the `unpack frame'.
3149 __ pop_frame();
3150
3151 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3152
3153 // Save the return value.
3154 __ mr(unroll_block_reg, R3_RET);
3155
3156 // Pop the uncommon_trap frame.
3157 __ pop_frame();
3158
3159 // stack: (caller_of_deoptee, ...).
3160
3161 #ifdef ASSERT
3162 __ lwz(R22_tmp2, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3163 __ cmpdi(CR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
3164 __ asm_assert_eq("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
3165 #endif
3166
3167 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3168 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3169 // and the frame is effectively not resized.
3170 Register caller_sp = R23_tmp3;
3171 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3172 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3173
3174 // Allocate new interpreter frame(s) and possibly a c2i adapter
3175 // frame.
3176 push_skeleton_frames(masm, false/*deopt*/,
3177 unroll_block_reg,
3178 R22_tmp2,
3179 R23_tmp3,
3180 R24_tmp4,
3181 R25_tmp5,
3182 R26_tmp6);
3183
3184 // stack: (skeletal interpreter frame, ..., optional skeletal
3185 // interpreter frame, optional c2i, caller of deoptee, ...).
3186
3187 // Push a dummy `unpack_frame' taking care of float return values.
3188 // Call `Deoptimization::unpack_frames' to layout information in the
3189 // interpreter frames just created.
3190
3191 // Push a simple "unpack frame" here.
3192 __ push_frame_reg_args(0, R11_scratch1);
3193
3194 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3195 // skeletal interpreter frame, optional c2i, caller of deoptee,
3196 // ...).
3197
3198 // Set the "unpack_frame" as last_Java_frame.
3199 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3200
3201 // Indicate it is the uncommon trap case.
3202 __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3203 // Let the unpacker layout information in the skeletal frames just
3204 // allocated.
3205 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3206 R16_thread, unc_trap_reg);
3207
3208 __ reset_last_Java_frame();
3209 // Pop the `unpack frame'.
3210 __ pop_frame();
3211 // Restore LR from top interpreter frame.
3212 __ restore_LR(R11_scratch1);
3213
3214 // stack: (top interpreter frame, ..., optional interpreter frame,
3215 // optional c2i, caller of deoptee, ...).
3216
3217 __ restore_interpreter_state(R11_scratch1);
3218 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3219
3220 // Return to the interpreter entry point.
3221 __ blr();
3222
3223 masm->flush();
3224
3225 return UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3226 }
3227 #endif // COMPILER2
3228
3229 // Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3230 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3231 assert(StubRoutines::forward_exception_entry() != nullptr,
3232 "must be generated before");
3233 assert(is_polling_page_id(id), "expected a polling page stub id");
3234
3235 ResourceMark rm;
3236 OopMapSet *oop_maps = new OopMapSet();
3237 OopMap* map;
3238
3239 // Allocate space for the code. Setup code generation tools.
3240 const char* name = SharedRuntime::stub_name(id);
3241 CodeBuffer buffer(name, 2048, 1024);
3242 MacroAssembler* masm = new MacroAssembler(&buffer);
3243
3244 address start = __ pc();
3245 int frame_size_in_bytes = 0;
3246
3247 RegisterSaver::ReturnPCLocation return_pc_location;
3248 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3249 if (cause_return) {
3250 // Nothing to do here. The frame has already been popped in MachEpilogNode.
3251 // Register LR already contains the return pc.
3252 return_pc_location = RegisterSaver::return_pc_is_pre_saved;
3253 } else {
3254 // Use thread()->saved_exception_pc() as return pc.
3255 return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3256 }
3257
3258 bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3259
3260 // Save registers, fpu state, and flags. Set R31 = return pc.
3261 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3262 &frame_size_in_bytes,
3263 /*generate_oop_map=*/ true,
3264 return_pc_location, save_vectors);
3265
3266 // The following is basically a call_VM. However, we need the precise
3267 // address of the call in order to generate an oopmap. Hence, we do all the
3268 // work ourselves.
3269 __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3270
3271 // The return address must always be correct so that the frame constructor
3272 // never sees an invalid pc.
3273
3274 // Do the call
3275 __ call_VM_leaf(call_ptr, R16_thread);
3276 address calls_return_pc = __ last_calls_return_pc();
3277
3278 // Set an oopmap for the call site. This oopmap will map all
3279 // oop-registers and debug-info registers as callee-saved. This
3280 // will allow deoptimization at this safepoint to find all possible
3281 // debug-info recordings, as well as let GC find all oops.
3282 oop_maps->add_gc_map(calls_return_pc - start, map);
3283
3284 Label noException;
3285
3286 // Clear the last Java frame.
3287 __ reset_last_Java_frame();
3288
3289 BLOCK_COMMENT(" Check pending exception.");
3290 const Register pending_exception = R0;
3291 __ ld(pending_exception, thread_(pending_exception));
3292 __ cmpdi(CR0, pending_exception, 0);
3293 __ beq(CR0, noException);
3294
3295 // Exception pending
3296 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3297 frame_size_in_bytes,
3298 /*restore_ctr=*/true, save_vectors);
3299
3300 BLOCK_COMMENT(" Jump to forward_exception_entry.");
3301 // Jump to forward_exception_entry, with the issuing PC in LR
3302 // so it looks like the original nmethod called forward_exception_entry.
3303 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3304
3305 // No exception case.
3306 __ BIND(noException);
3307
3308 if (!cause_return) {
3309 Label no_adjust;
3310 // If our stashed return pc was modified by the runtime we avoid touching it
3311 __ ld(R0, frame_size_in_bytes + _abi0(lr), R1_SP);
3312 __ cmpd(CR0, R0, R31);
3313 __ bne(CR0, no_adjust);
3314
3315 // Adjust return pc forward to step over the safepoint poll instruction
3316 __ addi(R31, R31, 4);
3317 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
3318
3319 __ bind(no_adjust);
3320 }
3321
3322 // Normal exit, restore registers and exit.
3323 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3324 frame_size_in_bytes,
3325 /*restore_ctr=*/true, save_vectors);
3326
3327 __ blr();
3328
3329 // Make sure all code is generated
3330 masm->flush();
3331
3332 // Fill-out other meta info
3333 // CodeBlob frame size is in words.
3334 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3335 }
3336
3337 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3338 //
3339 // Generate a stub that calls into the vm to find out the proper destination
3340 // of a java call. All the argument registers are live at this point
3341 // but since this is generic code we don't know what they are and the caller
3342 // must do any gc of the args.
3343 //
3344 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3345 assert(is_resolve_id(id), "expected a resolve stub id");
3346
3347 // allocate space for the code
3348 ResourceMark rm;
3349
3350 const char* name = SharedRuntime::stub_name(id);
3351 CodeBuffer buffer(name, 1000, 512);
3352 MacroAssembler* masm = new MacroAssembler(&buffer);
3353
3354 int frame_size_in_bytes;
3355
3356 OopMapSet *oop_maps = new OopMapSet();
3357 OopMap* map = nullptr;
3358
3359 address start = __ pc();
3360
3361 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3362 &frame_size_in_bytes,
3363 /*generate_oop_map*/ true,
3364 RegisterSaver::return_pc_is_lr);
3365
3366 // Use noreg as last_Java_pc, the return pc will be reconstructed
3367 // from the physical frame.
3368 __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3369
3370 int frame_complete = __ offset();
3371
3372 // Pass R19_method as 2nd (optional) argument, used by
3373 // counter_overflow_stub.
3374 __ call_VM_leaf(destination, R16_thread, R19_method);
3375 address calls_return_pc = __ last_calls_return_pc();
3376 // Set an oopmap for the call site.
3377 // We need this not only for callee-saved registers, but also for volatile
3378 // registers that the compiler might be keeping live across a safepoint.
3379 // Create the oopmap for the call's return pc.
3380 oop_maps->add_gc_map(calls_return_pc - start, map);
3381
3382 // R3_RET contains the address we are going to jump to assuming no exception got installed.
3383
3384 // clear last_Java_sp
3385 __ reset_last_Java_frame();
3386
3387 // Check for pending exceptions.
3388 BLOCK_COMMENT("Check for pending exceptions.");
3389 Label pending;
3390 __ ld(R11_scratch1, thread_(pending_exception));
3391 __ cmpdi(CR0, R11_scratch1, 0);
3392 __ bne(CR0, pending);
3393
3394 __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3395
3396 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3397
3398 // Get the returned method.
3399 __ get_vm_result_metadata(R19_method);
3400
3401 __ bctr();
3402
3403
3404 // Pending exception after the safepoint.
3405 __ BIND(pending);
3406
3407 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3408
3409 // exception pending => remove activation and forward to exception handler
3410
3411 __ li(R11_scratch1, 0);
3412 __ ld(R3_ARG1, thread_(pending_exception));
3413 __ std(R11_scratch1, in_bytes(JavaThread::vm_result_oop_offset()), R16_thread);
3414 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3415
3416 // -------------
3417 // Make sure all code is generated.
3418 masm->flush();
3419
3420 // return the blob
3421 // frame_size_words or bytes??
3422 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3423 oop_maps, true);
3424 }
3425
3426 // Continuation point for throwing of implicit exceptions that are
3427 // not handled in the current activation. Fabricates an exception
3428 // oop and initiates normal exception dispatching in this
3429 // frame. Only callee-saved registers are preserved (through the
3430 // normal register window / RegisterMap handling). If the compiler
3431 // needs all registers to be preserved between the fault point and
3432 // the exception handler then it must assume responsibility for that
3433 // in AbstractCompiler::continuation_for_implicit_null_exception or
3434 // continuation_for_implicit_division_by_zero_exception. All other
3435 // implicit exceptions (e.g., NullPointerException or
3436 // AbstractMethodError on entry) are either at call sites or
3437 // otherwise assume that stack unwinding will be initiated, so
3438 // caller saved registers were assumed volatile in the compiler.
3439 //
3440 // Note that we generate only this stub into a RuntimeStub, because
3441 // it needs to be properly traversed and ignored during GC, so we
3442 // change the meaning of the "__" macro within this method.
3443 //
3444 // Note: the routine set_pc_not_at_call_for_caller in
3445 // SharedRuntime.cpp requires that this code be generated into a
3446 // RuntimeStub.
3447 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3448 assert(is_throw_id(id), "expected a throw stub id");
3449
3450 const char* name = SharedRuntime::stub_name(id);
3451
3452 ResourceMark rm;
3453 const char* timer_msg = "SharedRuntime generate_throw_exception";
3454 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3455
3456 CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0);
3457 MacroAssembler* masm = new MacroAssembler(&code);
3458
3459 OopMapSet* oop_maps = new OopMapSet();
3460 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3461 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3462
3463 address start = __ pc();
3464
3465 __ save_LR(R11_scratch1);
3466
3467 // Push a frame.
3468 __ push_frame_reg_args(0, R11_scratch1);
3469
3470 address frame_complete_pc = __ pc();
3471
3472 // Note that we always have a runtime stub frame on the top of
3473 // stack by this point. Remember the offset of the instruction
3474 // whose address will be moved to R11_scratch1.
3475 address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
3476
3477 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3478
3479 __ mr(R3_ARG1, R16_thread);
3480 __ call_c(runtime_entry);
3481
3482 // Set an oopmap for the call site.
3483 oop_maps->add_gc_map((int)(gc_map_pc - start), map);
3484
3485 __ reset_last_Java_frame();
3486
3487 #ifdef ASSERT
3488 // Make sure that this code is only executed if there is a pending
3489 // exception.
3490 {
3491 Label L;
3492 __ ld(R0,
3493 in_bytes(Thread::pending_exception_offset()),
3494 R16_thread);
3495 __ cmpdi(CR0, R0, 0);
3496 __ bne(CR0, L);
3497 __ stop("SharedRuntime::throw_exception: no pending exception");
3498 __ bind(L);
3499 }
3500 #endif
3501
3502 // Pop frame.
3503 __ pop_frame();
3504
3505 __ restore_LR(R11_scratch1);
3506
3507 __ load_const(R11_scratch1, StubRoutines::forward_exception_entry());
3508 __ mtctr(R11_scratch1);
3509 __ bctr();
3510
3511 // Create runtime stub with OopMap.
3512 RuntimeStub* stub =
3513 RuntimeStub::new_runtime_stub(name, &code,
3514 /*frame_complete=*/ (int)(frame_complete_pc - start),
3515 frame_size_in_bytes/wordSize,
3516 oop_maps,
3517 false);
3518 return stub;
3519 }
3520
3521 //------------------------------Montgomery multiplication------------------------
3522 //
3523
3524 // Subtract 0:b from carry:a. Return carry.
3525 static unsigned long
3526 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3527 long i = 0;
3528 unsigned long tmp, tmp2;
3529 __asm__ __volatile__ (
3530 "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA
3531 "mtctr %[len] \n"
3532 "0: \n"
3533 "ldx %[tmp], %[i], %[a] \n"
3534 "ldx %[tmp2], %[i], %[b] \n"
3535 "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended
3536 "stdx %[tmp], %[i], %[a] \n"
3537 "addi %[i], %[i], 8 \n"
3538 "bdnz 0b \n"
3539 "addme %[tmp], %[carry] \n" // carry + CA - 1
3540 : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
3541 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
3542 : "ctr", "xer", "memory"
3543 );
3544 return tmp;
3545 }
3546
3547 // Multiply (unsigned) Long A by Long B, accumulating the double-
3548 // length result into the accumulator formed of T0, T1, and T2.
3549 inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3550 unsigned long hi, lo;
3551 __asm__ __volatile__ (
3552 "mulld %[lo], %[A], %[B] \n"
3553 "mulhdu %[hi], %[A], %[B] \n"
3554 "addc %[T0], %[T0], %[lo] \n"
3555 "adde %[T1], %[T1], %[hi] \n"
3556 "addze %[T2], %[T2] \n"
3557 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3558 : [A]"r"(A), [B]"r"(B)
3559 : "xer"
3560 );
3561 }
3562
3563 // As above, but add twice the double-length result into the
3564 // accumulator.
3565 inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3566 unsigned long hi, lo;
3567 __asm__ __volatile__ (
3568 "mulld %[lo], %[A], %[B] \n"
3569 "mulhdu %[hi], %[A], %[B] \n"
3570 "addc %[T0], %[T0], %[lo] \n"
3571 "adde %[T1], %[T1], %[hi] \n"
3572 "addze %[T2], %[T2] \n"
3573 "addc %[T0], %[T0], %[lo] \n"
3574 "adde %[T1], %[T1], %[hi] \n"
3575 "addze %[T2], %[T2] \n"
3576 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3577 : [A]"r"(A), [B]"r"(B)
3578 : "xer"
3579 );
3580 }
3581
3582 // Fast Montgomery multiplication. The derivation of the algorithm is
3583 // in "A Cryptographic Library for the Motorola DSP56000,
3584 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3585 static void
3586 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3587 unsigned long m[], unsigned long inv, int len) {
3588 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3589 int i;
3590
3591 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3592
3593 for (i = 0; i < len; i++) {
3594 int j;
3595 for (j = 0; j < i; j++) {
3596 MACC(a[j], b[i-j], t0, t1, t2);
3597 MACC(m[j], n[i-j], t0, t1, t2);
3598 }
3599 MACC(a[i], b[0], t0, t1, t2);
3600 m[i] = t0 * inv;
3601 MACC(m[i], n[0], t0, t1, t2);
3602
3603 assert(t0 == 0, "broken Montgomery multiply");
3604
3605 t0 = t1; t1 = t2; t2 = 0;
3606 }
3607
3608 for (i = len; i < 2*len; i++) {
3609 int j;
3610 for (j = i-len+1; j < len; j++) {
3611 MACC(a[j], b[i-j], t0, t1, t2);
3612 MACC(m[j], n[i-j], t0, t1, t2);
3613 }
3614 m[i-len] = t0;
3615 t0 = t1; t1 = t2; t2 = 0;
3616 }
3617
3618 while (t0) {
3619 t0 = sub(m, n, t0, len);
3620 }
3621 }
3622
3623 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3624 // multiplies so it should be up to 25% faster than Montgomery
3625 // multiplication. However, its loop control is more complex and it
3626 // may actually run slower on some machines.
3627 static void
3628 montgomery_square(unsigned long a[], unsigned long n[],
3629 unsigned long m[], unsigned long inv, int len) {
3630 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3631 int i;
3632
3633 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3634
3635 for (i = 0; i < len; i++) {
3636 int j;
3637 int end = (i+1)/2;
3638 for (j = 0; j < end; j++) {
3639 MACC2(a[j], a[i-j], t0, t1, t2);
3640 MACC(m[j], n[i-j], t0, t1, t2);
3641 }
3642 if ((i & 1) == 0) {
3643 MACC(a[j], a[j], t0, t1, t2);
3644 }
3645 for (; j < i; j++) {
3646 MACC(m[j], n[i-j], t0, t1, t2);
3647 }
3648 m[i] = t0 * inv;
3649 MACC(m[i], n[0], t0, t1, t2);
3650
3651 assert(t0 == 0, "broken Montgomery square");
3652
3653 t0 = t1; t1 = t2; t2 = 0;
3654 }
3655
3656 for (i = len; i < 2*len; i++) {
3657 int start = i-len+1;
3658 int end = start + (len - start)/2;
3659 int j;
3660 for (j = start; j < end; j++) {
3661 MACC2(a[j], a[i-j], t0, t1, t2);
3662 MACC(m[j], n[i-j], t0, t1, t2);
3663 }
3664 if ((i & 1) == 0) {
3665 MACC(a[j], a[j], t0, t1, t2);
3666 }
3667 for (; j < len; j++) {
3668 MACC(m[j], n[i-j], t0, t1, t2);
3669 }
3670 m[i-len] = t0;
3671 t0 = t1; t1 = t2; t2 = 0;
3672 }
3673
3674 while (t0) {
3675 t0 = sub(m, n, t0, len);
3676 }
3677 }
3678
3679 // The threshold at which squaring is advantageous was determined
3680 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3681 // Doesn't seem to be relevant for Power8 so we use the same value.
3682 #define MONTGOMERY_SQUARING_THRESHOLD 64
3683
3684 // Copy len longwords from s to d, word-swapping as we go. The
3685 // destination array is reversed.
3686 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3687 d += len;
3688 while(len-- > 0) {
3689 d--;
3690 unsigned long s_val = *s;
3691 // Swap words in a longword on little endian machines.
3692 #ifdef VM_LITTLE_ENDIAN
3693 s_val = (s_val << 32) | (s_val >> 32);
3694 #endif
3695 *d = s_val;
3696 s++;
3697 }
3698 }
3699
3700 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3701 jint len, jlong inv,
3702 jint *m_ints) {
3703 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3704 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3705 int longwords = len/2;
3706
3707 // Make very sure we don't use so much space that the stack might
3708 // overflow. 512 jints corresponds to an 16384-bit integer and
3709 // will use here a total of 8k bytes of stack space.
3710 int divisor = sizeof(unsigned long) * 4;
3711 guarantee(longwords <= 8192 / divisor, "must be");
3712 int total_allocation = longwords * sizeof (unsigned long) * 4;
3713 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3714
3715 // Local scratch arrays
3716 unsigned long
3717 *a = scratch + 0 * longwords,
3718 *b = scratch + 1 * longwords,
3719 *n = scratch + 2 * longwords,
3720 *m = scratch + 3 * longwords;
3721
3722 reverse_words((unsigned long *)a_ints, a, longwords);
3723 reverse_words((unsigned long *)b_ints, b, longwords);
3724 reverse_words((unsigned long *)n_ints, n, longwords);
3725
3726 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3727
3728 reverse_words(m, (unsigned long *)m_ints, longwords);
3729 }
3730
3731 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3732 jint len, jlong inv,
3733 jint *m_ints) {
3734 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3735 assert(len % 2 == 0, "array length in montgomery_square must be even");
3736 int longwords = len/2;
3737
3738 // Make very sure we don't use so much space that the stack might
3739 // overflow. 512 jints corresponds to an 16384-bit integer and
3740 // will use here a total of 6k bytes of stack space.
3741 int divisor = sizeof(unsigned long) * 3;
3742 guarantee(longwords <= (8192 / divisor), "must be");
3743 int total_allocation = longwords * sizeof (unsigned long) * 3;
3744 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3745
3746 // Local scratch arrays
3747 unsigned long
3748 *a = scratch + 0 * longwords,
3749 *n = scratch + 1 * longwords,
3750 *m = scratch + 2 * longwords;
3751
3752 reverse_words((unsigned long *)a_ints, a, longwords);
3753 reverse_words((unsigned long *)n_ints, n, longwords);
3754
3755 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3756 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3757 } else {
3758 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3759 }
3760
3761 reverse_words(m, (unsigned long *)m_ints, longwords);
3762 }
3763
3764 #if INCLUDE_JFR
3765
3766 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3767 // It returns a jobject handle to the event writer.
3768 // The handle is dereferenced and the return value is the event writer oop.
3769 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3770 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3771 CodeBuffer code(name, 512, 64);
3772 MacroAssembler* masm = new MacroAssembler(&code);
3773
3774 Register tmp1 = R10_ARG8;
3775 Register tmp2 = R9_ARG7;
3776
3777 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3778 address start = __ pc();
3779 __ mflr(tmp1);
3780 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3781 __ push_frame_reg_args(0, tmp1);
3782 int frame_complete = __ pc() - start;
3783 __ set_last_Java_frame(R1_SP, noreg);
3784 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), R16_thread);
3785 address calls_return_pc = __ last_calls_return_pc();
3786 __ reset_last_Java_frame();
3787 // The handle is dereferenced through a load barrier.
3788 __ resolve_global_jobject(R3_RET, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE);
3789 __ pop_frame();
3790 __ ld(tmp1, _abi0(lr), R1_SP);
3791 __ mtlr(tmp1);
3792 __ blr();
3793
3794 OopMapSet* oop_maps = new OopMapSet();
3795 OopMap* map = new OopMap(framesize, 0);
3796 oop_maps->add_gc_map(calls_return_pc - start, map);
3797
3798 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3799 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3800 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3801 oop_maps, false);
3802 return stub;
3803 }
3804
3805 // For c2: call to return a leased buffer.
3806 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3807 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3808 CodeBuffer code(name, 512, 64);
3809 MacroAssembler* masm = new MacroAssembler(&code);
3810
3811 Register tmp1 = R10_ARG8;
3812 Register tmp2 = R9_ARG7;
3813
3814 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3815 address start = __ pc();
3816 __ mflr(tmp1);
3817 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3818 __ push_frame_reg_args(0, tmp1);
3819 int frame_complete = __ pc() - start;
3820 __ set_last_Java_frame(R1_SP, noreg);
3821 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3822 address calls_return_pc = __ last_calls_return_pc();
3823 __ reset_last_Java_frame();
3824 __ pop_frame();
3825 __ ld(tmp1, _abi0(lr), R1_SP);
3826 __ mtlr(tmp1);
3827 __ blr();
3828
3829 OopMapSet* oop_maps = new OopMapSet();
3830 OopMap* map = new OopMap(framesize, 0);
3831 oop_maps->add_gc_map(calls_return_pc - start, map);
3832
3833 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3834 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3835 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3836 oop_maps, false);
3837 return stub;
3838 }
3839
3840 #endif // INCLUDE_JFR
3841
3842 #if INCLUDE_SHENANDOAHGC
3843 RuntimeStub* SharedRuntime::generate_shenandoah_stub(StubId stub_id) {
3844 assert(UseShenandoahGC, "Only generate when Shenandoah is enabled");
3845 return nullptr;
3846 }
3847 #endif