1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/compiledIC.hpp"
29 #include "code/vtableStubs.hpp"
30 #include "frame_ppc.hpp"
31 #include "compiler/oopMap.hpp"
32 #include "gc/shared/gcLocker.hpp"
33 #include "interpreter/interpreter.hpp"
34 #include "interpreter/interp_masm.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "oops/klass.inline.hpp"
37 #include "prims/methodHandles.hpp"
38 #include "runtime/continuation.hpp"
39 #include "runtime/continuationEntry.inline.hpp"
40 #include "runtime/jniHandles.hpp"
41 #include "runtime/os.inline.hpp"
42 #include "runtime/safepointMechanism.hpp"
43 #include "runtime/sharedRuntime.hpp"
44 #include "runtime/signature.hpp"
45 #include "runtime/stubRoutines.hpp"
46 #include "runtime/timerTrace.hpp"
47 #include "runtime/vframeArray.hpp"
48 #include "utilities/align.hpp"
49 #include "utilities/macros.hpp"
50 #include "vmreg_ppc.inline.hpp"
51 #ifdef COMPILER1
52 #include "c1/c1_Runtime1.hpp"
53 #endif
54 #ifdef COMPILER2
55 #include "opto/ad.hpp"
56 #include "opto/runtime.hpp"
57 #endif
58
59 #include <alloca.h>
60
61 #define __ masm->
62
63 #ifdef PRODUCT
64 #define BLOCK_COMMENT(str) // nothing
65 #else
66 #define BLOCK_COMMENT(str) __ block_comment(str)
67 #endif
68
69 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
70
71
72 class RegisterSaver {
73 // Used for saving volatile registers.
74 public:
75
76 // Support different return pc locations.
77 enum ReturnPCLocation {
78 return_pc_is_lr,
79 return_pc_is_pre_saved,
80 return_pc_is_thread_saved_exception_pc
81 };
82
83 static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
84 int* out_frame_size_in_bytes,
85 bool generate_oop_map,
86 ReturnPCLocation return_pc_location,
87 bool save_vectors = false);
88 static void restore_live_registers_and_pop_frame(MacroAssembler* masm,
89 int frame_size_in_bytes,
90 bool restore_ctr,
91 bool save_vectors = false);
92
93 static void push_frame_and_save_argument_registers(MacroAssembler* masm,
94 Register r_temp,
95 int frame_size,
96 int total_args,
97 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
98 static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
99 int frame_size,
100 int total_args,
101 const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
102
103 // During deoptimization only the result registers need to be restored
104 // all the other values have already been extracted.
105 static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors);
106
107 // Constants and data structures:
108
109 typedef enum {
110 int_reg,
111 float_reg,
112 special_reg,
113 vec_reg
114 } RegisterType;
115
116 typedef enum {
117 reg_size = 8,
118 half_reg_size = reg_size / 2,
119 vec_reg_size = 16
120 } RegisterConstants;
121
122 typedef struct {
123 RegisterType reg_type;
124 int reg_num;
125 VMReg vmreg;
126 } LiveRegType;
127 };
128
129
130 #define RegisterSaver_LiveIntReg(regname) \
131 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
132
133 #define RegisterSaver_LiveFloatReg(regname) \
134 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
135
136 #define RegisterSaver_LiveSpecialReg(regname) \
137 { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
138
139 #define RegisterSaver_LiveVecReg(regname) \
140 { RegisterSaver::vec_reg, regname->encoding(), regname->as_VMReg() }
141
142 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
143 // Live registers which get spilled to the stack. Register
144 // positions in this array correspond directly to the stack layout.
145
146 //
147 // live special registers:
148 //
149 RegisterSaver_LiveSpecialReg(SR_CTR),
150 //
151 // live float registers:
152 //
153 RegisterSaver_LiveFloatReg( F0 ),
154 RegisterSaver_LiveFloatReg( F1 ),
155 RegisterSaver_LiveFloatReg( F2 ),
156 RegisterSaver_LiveFloatReg( F3 ),
157 RegisterSaver_LiveFloatReg( F4 ),
158 RegisterSaver_LiveFloatReg( F5 ),
159 RegisterSaver_LiveFloatReg( F6 ),
160 RegisterSaver_LiveFloatReg( F7 ),
161 RegisterSaver_LiveFloatReg( F8 ),
162 RegisterSaver_LiveFloatReg( F9 ),
163 RegisterSaver_LiveFloatReg( F10 ),
164 RegisterSaver_LiveFloatReg( F11 ),
165 RegisterSaver_LiveFloatReg( F12 ),
166 RegisterSaver_LiveFloatReg( F13 ),
167 RegisterSaver_LiveFloatReg( F14 ),
168 RegisterSaver_LiveFloatReg( F15 ),
169 RegisterSaver_LiveFloatReg( F16 ),
170 RegisterSaver_LiveFloatReg( F17 ),
171 RegisterSaver_LiveFloatReg( F18 ),
172 RegisterSaver_LiveFloatReg( F19 ),
173 RegisterSaver_LiveFloatReg( F20 ),
174 RegisterSaver_LiveFloatReg( F21 ),
175 RegisterSaver_LiveFloatReg( F22 ),
176 RegisterSaver_LiveFloatReg( F23 ),
177 RegisterSaver_LiveFloatReg( F24 ),
178 RegisterSaver_LiveFloatReg( F25 ),
179 RegisterSaver_LiveFloatReg( F26 ),
180 RegisterSaver_LiveFloatReg( F27 ),
181 RegisterSaver_LiveFloatReg( F28 ),
182 RegisterSaver_LiveFloatReg( F29 ),
183 RegisterSaver_LiveFloatReg( F30 ),
184 RegisterSaver_LiveFloatReg( F31 ),
185 //
186 // live integer registers:
187 //
188 RegisterSaver_LiveIntReg( R0 ),
189 //RegisterSaver_LiveIntReg( R1 ), // stack pointer
190 RegisterSaver_LiveIntReg( R2 ),
191 RegisterSaver_LiveIntReg( R3 ),
192 RegisterSaver_LiveIntReg( R4 ),
193 RegisterSaver_LiveIntReg( R5 ),
194 RegisterSaver_LiveIntReg( R6 ),
195 RegisterSaver_LiveIntReg( R7 ),
196 RegisterSaver_LiveIntReg( R8 ),
197 RegisterSaver_LiveIntReg( R9 ),
198 RegisterSaver_LiveIntReg( R10 ),
199 RegisterSaver_LiveIntReg( R11 ),
200 RegisterSaver_LiveIntReg( R12 ),
201 //RegisterSaver_LiveIntReg( R13 ), // system thread id
202 RegisterSaver_LiveIntReg( R14 ),
203 RegisterSaver_LiveIntReg( R15 ),
204 RegisterSaver_LiveIntReg( R16 ),
205 RegisterSaver_LiveIntReg( R17 ),
206 RegisterSaver_LiveIntReg( R18 ),
207 RegisterSaver_LiveIntReg( R19 ),
208 RegisterSaver_LiveIntReg( R20 ),
209 RegisterSaver_LiveIntReg( R21 ),
210 RegisterSaver_LiveIntReg( R22 ),
211 RegisterSaver_LiveIntReg( R23 ),
212 RegisterSaver_LiveIntReg( R24 ),
213 RegisterSaver_LiveIntReg( R25 ),
214 RegisterSaver_LiveIntReg( R26 ),
215 RegisterSaver_LiveIntReg( R27 ),
216 RegisterSaver_LiveIntReg( R28 ),
217 RegisterSaver_LiveIntReg( R29 ),
218 RegisterSaver_LiveIntReg( R30 ),
219 RegisterSaver_LiveIntReg( R31 ) // must be the last register (see save/restore functions below)
220 };
221
222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = {
223 //
224 // live vector registers (optional, only these ones are used by C2):
225 //
226 RegisterSaver_LiveVecReg( VR0 ),
227 RegisterSaver_LiveVecReg( VR1 ),
228 RegisterSaver_LiveVecReg( VR2 ),
229 RegisterSaver_LiveVecReg( VR3 ),
230 RegisterSaver_LiveVecReg( VR4 ),
231 RegisterSaver_LiveVecReg( VR5 ),
232 RegisterSaver_LiveVecReg( VR6 ),
233 RegisterSaver_LiveVecReg( VR7 ),
234 RegisterSaver_LiveVecReg( VR8 ),
235 RegisterSaver_LiveVecReg( VR9 ),
236 RegisterSaver_LiveVecReg( VR10 ),
237 RegisterSaver_LiveVecReg( VR11 ),
238 RegisterSaver_LiveVecReg( VR12 ),
239 RegisterSaver_LiveVecReg( VR13 ),
240 RegisterSaver_LiveVecReg( VR14 ),
241 RegisterSaver_LiveVecReg( VR15 ),
242 RegisterSaver_LiveVecReg( VR16 ),
243 RegisterSaver_LiveVecReg( VR17 ),
244 RegisterSaver_LiveVecReg( VR18 ),
245 RegisterSaver_LiveVecReg( VR19 ),
246 RegisterSaver_LiveVecReg( VR20 ),
247 RegisterSaver_LiveVecReg( VR21 ),
248 RegisterSaver_LiveVecReg( VR22 ),
249 RegisterSaver_LiveVecReg( VR23 ),
250 RegisterSaver_LiveVecReg( VR24 ),
251 RegisterSaver_LiveVecReg( VR25 ),
252 RegisterSaver_LiveVecReg( VR26 ),
253 RegisterSaver_LiveVecReg( VR27 ),
254 RegisterSaver_LiveVecReg( VR28 ),
255 RegisterSaver_LiveVecReg( VR29 ),
256 RegisterSaver_LiveVecReg( VR30 ),
257 RegisterSaver_LiveVecReg( VR31 )
258 };
259
260
261 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
262 int* out_frame_size_in_bytes,
263 bool generate_oop_map,
264 ReturnPCLocation return_pc_location,
265 bool save_vectors) {
266 // Push an abi_reg_args-frame and store all registers which may be live.
267 // If requested, create an OopMap: Record volatile registers as
268 // callee-save values in an OopMap so their save locations will be
269 // propagated to the RegisterMap of the caller frame during
270 // StackFrameStream construction (needed for deoptimization; see
271 // compiledVFrame::create_stack_value).
272 // Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
273
274 // calculate frame size
275 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
276 sizeof(RegisterSaver::LiveRegType);
277 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
278 sizeof(RegisterSaver::LiveRegType))
279 : 0;
280 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
281 const int frame_size_in_bytes = align_up(register_save_size, frame::alignment_in_bytes)
282 + frame::native_abi_reg_args_size;
283
284 *out_frame_size_in_bytes = frame_size_in_bytes;
285 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
286 const int register_save_offset = frame_size_in_bytes - register_save_size;
287
288 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
289 OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : nullptr;
290
291 BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
292
293 // push a new frame
294 __ push_frame(frame_size_in_bytes, noreg);
295
296 // Save some registers in the last (non-vector) slots of the new frame so we
297 // can use them as scratch regs or to determine the return pc.
298 __ std(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
299 __ std(R30, frame_size_in_bytes - 2*reg_size - vecregstosave_num * vec_reg_size, R1_SP);
300
301 // save the flags
302 // Do the save_LR by hand and adjust the return pc if requested.
303 switch (return_pc_location) {
304 case return_pc_is_lr: __ mflr(R31); break;
305 case return_pc_is_pre_saved: break;
306 case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
307 default: ShouldNotReachHere();
308 }
309 if (return_pc_location != return_pc_is_pre_saved) {
310 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
311 }
312
313 // save all registers (ints and floats)
314 int offset = register_save_offset;
315
316 for (int i = 0; i < regstosave_num; i++) {
317 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
318 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
319
320 switch (reg_type) {
321 case RegisterSaver::int_reg: {
322 if (reg_num < 30) { // We spilled R30-31 right at the beginning.
323 __ std(as_Register(reg_num), offset, R1_SP);
324 }
325 break;
326 }
327 case RegisterSaver::float_reg: {
328 __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
329 break;
330 }
331 case RegisterSaver::special_reg: {
332 if (reg_num == SR_CTR.encoding()) {
333 __ mfctr(R30);
334 __ std(R30, offset, R1_SP);
335 } else {
336 Unimplemented();
337 }
338 break;
339 }
340 default:
341 ShouldNotReachHere();
342 }
343
344 if (generate_oop_map) {
345 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
346 RegisterSaver_LiveRegs[i].vmreg);
347 }
348 offset += reg_size;
349 }
350
351 // Note that generate_oop_map in the following loop is only used for the
352 // polling_page_vectors_safepoint_handler_blob and the deopt_blob.
353 // The order in which the vector contents are stored depends on Endianess and
354 // the utilized instructions (PowerArchitecturePPC64).
355 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
356 if (PowerArchitecturePPC64 >= 10) {
357 assert(is_even(vecregstosave_num), "expectation");
358 for (int i = 0; i < vecregstosave_num; i += 2) {
359 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
360 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
361
362 __ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
363 // Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad).
364 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
365 if (generate_oop_map) {
366 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
367 RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg);
368 map->set_callee_saved(VMRegImpl::stack2reg((offset + vec_reg_size) >> 2),
369 RegisterSaver_LiveVecRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg);
370 }
371 offset += (2 * vec_reg_size);
372 }
373 } else {
374 for (int i = 0; i < vecregstosave_num; i++) {
375 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
376
377 if (PowerArchitecturePPC64 >= 9) {
378 __ stxv(as_VectorRegister(reg_num)->to_vsr(), offset, R1_SP);
379 } else {
380 __ li(R31, offset);
381 __ stxvd2x(as_VectorRegister(reg_num)->to_vsr(), R31, R1_SP);
382 }
383 // Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad).
384 // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
385 if (generate_oop_map) {
386 VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg;
387 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr);
388 }
389 offset += vec_reg_size;
390 }
391 }
392
393 assert(offset == frame_size_in_bytes, "consistency check");
394
395 BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
396
397 // And we're done.
398 return map;
399 }
400
401
402 // Pop the current frame and restore all the registers that we
403 // saved.
404 void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
405 int frame_size_in_bytes,
406 bool restore_ctr,
407 bool save_vectors) {
408 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
409 sizeof(RegisterSaver::LiveRegType);
410 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
411 sizeof(RegisterSaver::LiveRegType))
412 : 0;
413 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
414
415 const int register_save_offset = frame_size_in_bytes - register_save_size;
416
417 BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
418
419 // restore all registers (ints and floats)
420 int offset = register_save_offset;
421
422 for (int i = 0; i < regstosave_num; i++) {
423 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
424 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
425
426 switch (reg_type) {
427 case RegisterSaver::int_reg: {
428 if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
429 __ ld(as_Register(reg_num), offset, R1_SP);
430 break;
431 }
432 case RegisterSaver::float_reg: {
433 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
434 break;
435 }
436 case RegisterSaver::special_reg: {
437 if (reg_num == SR_CTR.encoding()) {
438 if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
439 __ ld(R31, offset, R1_SP);
440 __ mtctr(R31);
441 }
442 } else {
443 Unimplemented();
444 }
445 break;
446 }
447 default:
448 ShouldNotReachHere();
449 }
450 offset += reg_size;
451 }
452
453 assert(is_aligned(offset, StackAlignmentInBytes), "should be");
454 if (PowerArchitecturePPC64 >= 10) {
455 for (int i = 0; i < vecregstosave_num; i += 2) {
456 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
457 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
458
459 __ lxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
460
461 offset += (2 * vec_reg_size);
462 }
463 } else {
464 for (int i = 0; i < vecregstosave_num; i++) {
465 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
466
467 if (PowerArchitecturePPC64 >= 9) {
468 __ lxv(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
469 } else {
470 __ li(R31, offset);
471 __ lxvd2x(as_VectorRegister(reg_num).to_vsr(), R31, R1_SP);
472 }
473
474 offset += vec_reg_size;
475 }
476 }
477
478 assert(offset == frame_size_in_bytes, "consistency check");
479
480 // restore link and the flags
481 __ ld(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
482 __ mtlr(R31);
483
484 // restore scratch register's value
485 __ ld(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
486
487 // pop the frame
488 __ addi(R1_SP, R1_SP, frame_size_in_bytes);
489
490 BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
491 }
492
493 void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
494 int frame_size,int total_args, const VMRegPair *regs,
495 const VMRegPair *regs2) {
496 __ push_frame(frame_size, r_temp);
497 int st_off = frame_size - wordSize;
498 for (int i = 0; i < total_args; i++) {
499 VMReg r_1 = regs[i].first();
500 VMReg r_2 = regs[i].second();
501 if (!r_1->is_valid()) {
502 assert(!r_2->is_valid(), "");
503 continue;
504 }
505 if (r_1->is_Register()) {
506 Register r = r_1->as_Register();
507 __ std(r, st_off, R1_SP);
508 st_off -= wordSize;
509 } else if (r_1->is_FloatRegister()) {
510 FloatRegister f = r_1->as_FloatRegister();
511 __ stfd(f, st_off, R1_SP);
512 st_off -= wordSize;
513 }
514 }
515 if (regs2 != nullptr) {
516 for (int i = 0; i < total_args; i++) {
517 VMReg r_1 = regs2[i].first();
518 VMReg r_2 = regs2[i].second();
519 if (!r_1->is_valid()) {
520 assert(!r_2->is_valid(), "");
521 continue;
522 }
523 if (r_1->is_Register()) {
524 Register r = r_1->as_Register();
525 __ std(r, st_off, R1_SP);
526 st_off -= wordSize;
527 } else if (r_1->is_FloatRegister()) {
528 FloatRegister f = r_1->as_FloatRegister();
529 __ stfd(f, st_off, R1_SP);
530 st_off -= wordSize;
531 }
532 }
533 }
534 }
535
536 void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
537 int total_args, const VMRegPair *regs,
538 const VMRegPair *regs2) {
539 int st_off = frame_size - wordSize;
540 for (int i = 0; i < total_args; i++) {
541 VMReg r_1 = regs[i].first();
542 VMReg r_2 = regs[i].second();
543 if (r_1->is_Register()) {
544 Register r = r_1->as_Register();
545 __ ld(r, st_off, R1_SP);
546 st_off -= wordSize;
547 } else if (r_1->is_FloatRegister()) {
548 FloatRegister f = r_1->as_FloatRegister();
549 __ lfd(f, st_off, R1_SP);
550 st_off -= wordSize;
551 }
552 }
553 if (regs2 != nullptr)
554 for (int i = 0; i < total_args; i++) {
555 VMReg r_1 = regs2[i].first();
556 VMReg r_2 = regs2[i].second();
557 if (r_1->is_Register()) {
558 Register r = r_1->as_Register();
559 __ ld(r, st_off, R1_SP);
560 st_off -= wordSize;
561 } else if (r_1->is_FloatRegister()) {
562 FloatRegister f = r_1->as_FloatRegister();
563 __ lfd(f, st_off, R1_SP);
564 st_off -= wordSize;
565 }
566 }
567 __ pop_frame();
568 }
569
570 // Restore the registers that might be holding a result.
571 void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors) {
572 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
573 sizeof(RegisterSaver::LiveRegType);
574 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
575 sizeof(RegisterSaver::LiveRegType))
576 : 0;
577 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
578
579 const int register_save_offset = frame_size_in_bytes - register_save_size;
580
581 // restore all result registers (ints and floats)
582 int offset = register_save_offset;
583 for (int i = 0; i < regstosave_num; i++) {
584 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
585 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
586 switch (reg_type) {
587 case RegisterSaver::int_reg: {
588 if (as_Register(reg_num)==R3_RET) // int result_reg
589 __ ld(as_Register(reg_num), offset, R1_SP);
590 break;
591 }
592 case RegisterSaver::float_reg: {
593 if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
594 __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
595 break;
596 }
597 case RegisterSaver::special_reg: {
598 // Special registers don't hold a result.
599 break;
600 }
601 default:
602 ShouldNotReachHere();
603 }
604 offset += reg_size;
605 }
606
607 assert(offset == frame_size_in_bytes - (save_vectors ? vecregstosave_num * vec_reg_size : 0), "consistency check");
608 }
609
610 // Is vector's size (in bytes) bigger than a size saved by default?
611 bool SharedRuntime::is_wide_vector(int size) {
612 // Note, MaxVectorSize == 8/16 on PPC64.
613 assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
614 return size > 8;
615 }
616
617 static int reg2slot(VMReg r) {
618 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
619 }
620
621 static int reg2offset(VMReg r) {
622 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
623 }
624
625 // ---------------------------------------------------------------------------
626 // Read the array of BasicTypes from a signature, and compute where the
627 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
628 // quantities. Values less than VMRegImpl::stack0 are registers, those above
629 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
630 // as framesizes are fixed.
631 // VMRegImpl::stack0 refers to the first slot 0(sp).
632 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
633 // up to Register::number_of_registers) are the 64-bit
634 // integer registers.
635
636 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
637 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
638 // units regardless of build. Of course for i486 there is no 64 bit build
639
640 // The Java calling convention is a "shifted" version of the C ABI.
641 // By skipping the first C ABI register we can call non-static jni methods
642 // with small numbers of arguments without having to shuffle the arguments
643 // at all. Since we control the java ABI we ought to at least get some
644 // advantage out of it.
645
646 const VMReg java_iarg_reg[8] = {
647 R3->as_VMReg(),
648 R4->as_VMReg(),
649 R5->as_VMReg(),
650 R6->as_VMReg(),
651 R7->as_VMReg(),
652 R8->as_VMReg(),
653 R9->as_VMReg(),
654 R10->as_VMReg()
655 };
656
657 const VMReg java_farg_reg[13] = {
658 F1->as_VMReg(),
659 F2->as_VMReg(),
660 F3->as_VMReg(),
661 F4->as_VMReg(),
662 F5->as_VMReg(),
663 F6->as_VMReg(),
664 F7->as_VMReg(),
665 F8->as_VMReg(),
666 F9->as_VMReg(),
667 F10->as_VMReg(),
668 F11->as_VMReg(),
669 F12->as_VMReg(),
670 F13->as_VMReg()
671 };
672
673 const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
674 const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
675
676 STATIC_ASSERT(num_java_iarg_registers == Argument::n_int_register_parameters_j);
677 STATIC_ASSERT(num_java_farg_registers == Argument::n_float_register_parameters_j);
678
679 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
680 VMRegPair *regs,
681 int total_args_passed) {
682 // C2c calling conventions for compiled-compiled calls.
683 // Put 8 ints/longs into registers _AND_ 13 float/doubles into
684 // registers _AND_ put the rest on the stack.
685
686 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats
687 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
688
689 int i;
690 VMReg reg;
691 int stk = 0;
692 int ireg = 0;
693 int freg = 0;
694
695 // We put the first 8 arguments into registers and the rest on the
696 // stack, float arguments are already in their argument registers
697 // due to c2c calling conventions (see calling_convention).
698 for (int i = 0; i < total_args_passed; ++i) {
699 switch(sig_bt[i]) {
700 case T_BOOLEAN:
701 case T_CHAR:
702 case T_BYTE:
703 case T_SHORT:
704 case T_INT:
705 if (ireg < num_java_iarg_registers) {
706 // Put int/ptr in register
707 reg = java_iarg_reg[ireg];
708 ++ireg;
709 } else {
710 // Put int/ptr on stack.
711 reg = VMRegImpl::stack2reg(stk);
712 stk += inc_stk_for_intfloat;
713 }
714 regs[i].set1(reg);
715 break;
716 case T_LONG:
717 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
718 if (ireg < num_java_iarg_registers) {
719 // Put long in register.
720 reg = java_iarg_reg[ireg];
721 ++ireg;
722 } else {
723 // Put long on stack. They must be aligned to 2 slots.
724 if (stk & 0x1) ++stk;
725 reg = VMRegImpl::stack2reg(stk);
726 stk += inc_stk_for_longdouble;
727 }
728 regs[i].set2(reg);
729 break;
730 case T_OBJECT:
731 case T_ARRAY:
732 case T_ADDRESS:
733 if (ireg < num_java_iarg_registers) {
734 // Put ptr in register.
735 reg = java_iarg_reg[ireg];
736 ++ireg;
737 } else {
738 // Put ptr on stack. Objects must be aligned to 2 slots too,
739 // because "64-bit pointers record oop-ishness on 2 aligned
740 // adjacent registers." (see OopFlow::build_oop_map).
741 if (stk & 0x1) ++stk;
742 reg = VMRegImpl::stack2reg(stk);
743 stk += inc_stk_for_longdouble;
744 }
745 regs[i].set2(reg);
746 break;
747 case T_FLOAT:
748 if (freg < num_java_farg_registers) {
749 // Put float in register.
750 reg = java_farg_reg[freg];
751 ++freg;
752 } else {
753 // Put float on stack.
754 reg = VMRegImpl::stack2reg(stk);
755 stk += inc_stk_for_intfloat;
756 }
757 regs[i].set1(reg);
758 break;
759 case T_DOUBLE:
760 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
761 if (freg < num_java_farg_registers) {
762 // Put double in register.
763 reg = java_farg_reg[freg];
764 ++freg;
765 } else {
766 // Put double on stack. They must be aligned to 2 slots.
767 if (stk & 0x1) ++stk;
768 reg = VMRegImpl::stack2reg(stk);
769 stk += inc_stk_for_longdouble;
770 }
771 regs[i].set2(reg);
772 break;
773 case T_VOID:
774 // Do not count halves.
775 regs[i].set_bad();
776 break;
777 default:
778 ShouldNotReachHere();
779 }
780 }
781 return stk;
782 }
783
784 // Calling convention for calling C code.
785 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
786 VMRegPair *regs,
787 int total_args_passed) {
788 // Calling conventions for C runtime calls and calls to JNI native methods.
789 //
790 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
791 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
792 // the first 13 flt/dbl's in the first 13 fp regs but additionally
793 // copy flt/dbl to the stack if they are beyond the 8th argument.
794
795 const VMReg iarg_reg[8] = {
796 R3->as_VMReg(),
797 R4->as_VMReg(),
798 R5->as_VMReg(),
799 R6->as_VMReg(),
800 R7->as_VMReg(),
801 R8->as_VMReg(),
802 R9->as_VMReg(),
803 R10->as_VMReg()
804 };
805
806 const VMReg farg_reg[13] = {
807 F1->as_VMReg(),
808 F2->as_VMReg(),
809 F3->as_VMReg(),
810 F4->as_VMReg(),
811 F5->as_VMReg(),
812 F6->as_VMReg(),
813 F7->as_VMReg(),
814 F8->as_VMReg(),
815 F9->as_VMReg(),
816 F10->as_VMReg(),
817 F11->as_VMReg(),
818 F12->as_VMReg(),
819 F13->as_VMReg()
820 };
821
822 // Check calling conventions consistency.
823 assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
824 sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
825 "consistency");
826
827 const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size)
828 / VMRegImpl::stack_slot_size);
829 const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size;
830
831 VMReg reg;
832 int arg = 0;
833 int freg = 0;
834 bool stack_used = false;
835
836 for (int i = 0; i < total_args_passed; ++i, ++arg) {
837 // Each argument corresponds to a slot in the Parameter Save Area (if not omitted)
838 int stk = (arg * 2) + additional_frame_header_slots;
839
840 switch(sig_bt[i]) {
841 //
842 // If arguments 0-7 are integers, they are passed in integer registers.
843 // Argument i is placed in iarg_reg[i].
844 //
845 case T_BOOLEAN:
846 case T_CHAR:
847 case T_BYTE:
848 case T_SHORT:
849 case T_INT:
850 // We must cast ints to longs and use full 64 bit stack slots
851 // here. Thus fall through, handle as long.
852 case T_LONG:
853 case T_OBJECT:
854 case T_ARRAY:
855 case T_ADDRESS:
856 case T_METADATA:
857 // Oops are already boxed if required (JNI).
858 if (arg < Argument::n_int_register_parameters_c) {
859 reg = iarg_reg[arg];
860 } else {
861 reg = VMRegImpl::stack2reg(stk);
862 stack_used = true;
863 }
864 regs[i].set2(reg);
865 break;
866
867 //
868 // Floats are treated differently from int regs: The first 13 float arguments
869 // are passed in registers (not the float args among the first 13 args).
870 // Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed
871 // in farg_reg[j] if argument i is the j-th float argument of this call.
872 //
873 case T_FLOAT:
874 if (freg < Argument::n_float_register_parameters_c) {
875 // Put float in register ...
876 reg = farg_reg[freg];
877 ++freg;
878 } else {
879 // Put float on stack.
880 reg = VMRegImpl::stack2reg(stk + float_offset_in_slots);
881 stack_used = true;
882 }
883 regs[i].set1(reg);
884 break;
885 case T_DOUBLE:
886 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
887 if (freg < Argument::n_float_register_parameters_c) {
888 // Put double in register ...
889 reg = farg_reg[freg];
890 ++freg;
891 } else {
892 // Put double on stack.
893 reg = VMRegImpl::stack2reg(stk);
894 stack_used = true;
895 }
896 regs[i].set2(reg);
897 break;
898
899 case T_VOID:
900 // Do not count halves.
901 regs[i].set_bad();
902 --arg;
903 break;
904 default:
905 ShouldNotReachHere();
906 }
907 }
908
909 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
910 #if defined(ABI_ELFv2)
911 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
912 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
913 // indicates that all parameters can be passed in registers.
914 return stack_used ? (arg * 2) : 0;
915 #else
916 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
917 // We have to add extra slots because ABIv1 uses a larger header.
918 return MAX2(arg, 8) * 2 + additional_frame_header_slots;
919 #endif
920 }
921
922 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
923 uint num_bits,
924 uint total_args_passed) {
925 Unimplemented();
926 return 0;
927 }
928
929 static address gen_c2i_adapter(MacroAssembler *masm,
930 int total_args_passed,
931 int comp_args_on_stack,
932 const BasicType *sig_bt,
933 const VMRegPair *regs,
934 Label& call_interpreter,
935 const Register& ientry) {
936
937 address c2i_entrypoint;
938
939 const Register sender_SP = R21_sender_SP; // == R21_tmp1
940 const Register code = R22_tmp2;
941 //const Register ientry = R23_tmp3;
942 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
943 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
944 int value_regs_index = 0;
945
946 const Register return_pc = R27_tmp7;
947 const Register tmp = R28_tmp8;
948
949 assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
950
951 // Adapter needs TOP_IJAVA_FRAME_ABI.
952 const int adapter_size = frame::top_ijava_frame_abi_size +
953 align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
954
955 // regular (verified) c2i entry point
956 c2i_entrypoint = __ pc();
957
958 // Does compiled code exists? If yes, patch the caller's callsite.
959 __ ld(code, method_(code));
960 __ cmpdi(CR0, code, 0);
961 __ ld(ientry, method_(interpreter_entry)); // preloaded
962 __ beq(CR0, call_interpreter);
963
964
965 // Patch caller's callsite, method_(code) was not null which means that
966 // compiled code exists.
967 __ mflr(return_pc);
968 __ std(return_pc, _abi0(lr), R1_SP);
969 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
970
971 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
972
973 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
974 __ ld(return_pc, _abi0(lr), R1_SP);
975 __ ld(ientry, method_(interpreter_entry)); // preloaded
976 __ mtlr(return_pc);
977
978
979 // Call the interpreter.
980 __ BIND(call_interpreter);
981 __ mtctr(ientry);
982
983 // Get a copy of the current SP for loading caller's arguments.
984 __ mr(sender_SP, R1_SP);
985
986 // Add space for the adapter.
987 __ resize_frame(-adapter_size, R12_scratch2);
988
989 int st_off = adapter_size - wordSize;
990
991 // Write the args into the outgoing interpreter space.
992 for (int i = 0; i < total_args_passed; i++) {
993 VMReg r_1 = regs[i].first();
994 VMReg r_2 = regs[i].second();
995 if (!r_1->is_valid()) {
996 assert(!r_2->is_valid(), "");
997 continue;
998 }
999 if (r_1->is_stack()) {
1000 Register tmp_reg = value_regs[value_regs_index];
1001 value_regs_index = (value_regs_index + 1) % num_value_regs;
1002 // The calling convention produces OptoRegs that ignore the out
1003 // preserve area (JIT's ABI). We must account for it here.
1004 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1005 if (!r_2->is_valid()) {
1006 __ lwz(tmp_reg, ld_off, sender_SP);
1007 } else {
1008 __ ld(tmp_reg, ld_off, sender_SP);
1009 }
1010 // Pretend stack targets were loaded into tmp_reg.
1011 r_1 = tmp_reg->as_VMReg();
1012 }
1013
1014 if (r_1->is_Register()) {
1015 Register r = r_1->as_Register();
1016 if (!r_2->is_valid()) {
1017 __ stw(r, st_off, R1_SP);
1018 st_off-=wordSize;
1019 } else {
1020 // Longs are given 2 64-bit slots in the interpreter, but the
1021 // data is passed in only 1 slot.
1022 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1023 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1024 st_off-=wordSize;
1025 }
1026 __ std(r, st_off, R1_SP);
1027 st_off-=wordSize;
1028 }
1029 } else {
1030 assert(r_1->is_FloatRegister(), "");
1031 FloatRegister f = r_1->as_FloatRegister();
1032 if (!r_2->is_valid()) {
1033 __ stfs(f, st_off, R1_SP);
1034 st_off-=wordSize;
1035 } else {
1036 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1037 // data is passed in only 1 slot.
1038 // One of these should get known junk...
1039 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1040 st_off-=wordSize;
1041 __ stfd(f, st_off, R1_SP);
1042 st_off-=wordSize;
1043 }
1044 }
1045 }
1046
1047 // Jump to the interpreter just as if interpreter was doing it.
1048
1049 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1050
1051 // load TOS
1052 __ addi(R15_esp, R1_SP, st_off);
1053
1054 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
1055 assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
1056 __ bctr();
1057
1058 return c2i_entrypoint;
1059 }
1060
1061 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1062 int total_args_passed,
1063 int comp_args_on_stack,
1064 const BasicType *sig_bt,
1065 const VMRegPair *regs) {
1066
1067 // Load method's entry-point from method.
1068 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1069 __ mtctr(R12_scratch2);
1070
1071 // We will only enter here from an interpreted frame and never from after
1072 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1073 // race and use a c2i we will remain interpreted for the race loser(s).
1074 // This removes all sorts of headaches on the x86 side and also eliminates
1075 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1076
1077 // Note: r13 contains the senderSP on entry. We must preserve it since
1078 // we may do a i2c -> c2i transition if we lose a race where compiled
1079 // code goes non-entrant while we get args ready.
1080 // In addition we use r13 to locate all the interpreter args as
1081 // we must align the stack to 16 bytes on an i2c entry else we
1082 // lose alignment we expect in all compiled code and register
1083 // save code can segv when fxsave instructions find improperly
1084 // aligned stack pointer.
1085
1086 const Register ld_ptr = R15_esp;
1087 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1088 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1089 int value_regs_index = 0;
1090
1091 int ld_offset = total_args_passed*wordSize;
1092
1093 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1094 // in registers, we will occasionally have no stack args.
1095 int comp_words_on_stack = 0;
1096 if (comp_args_on_stack) {
1097 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1098 // registers are below. By subtracting stack0, we either get a negative
1099 // number (all values in registers) or the maximum stack slot accessed.
1100
1101 // Convert 4-byte c2 stack slots to words.
1102 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1103 // Round up to miminum stack alignment, in wordSize.
1104 comp_words_on_stack = align_up(comp_words_on_stack, 2);
1105 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1106 }
1107
1108 // Now generate the shuffle code. Pick up all register args and move the
1109 // rest through register value=Z_R12.
1110 BLOCK_COMMENT("Shuffle arguments");
1111 for (int i = 0; i < total_args_passed; i++) {
1112 if (sig_bt[i] == T_VOID) {
1113 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1114 continue;
1115 }
1116
1117 // Pick up 0, 1 or 2 words from ld_ptr.
1118 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1119 "scrambled load targets?");
1120 VMReg r_1 = regs[i].first();
1121 VMReg r_2 = regs[i].second();
1122 if (!r_1->is_valid()) {
1123 assert(!r_2->is_valid(), "");
1124 continue;
1125 }
1126 if (r_1->is_FloatRegister()) {
1127 if (!r_2->is_valid()) {
1128 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1129 ld_offset-=wordSize;
1130 } else {
1131 // Skip the unused interpreter slot.
1132 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1133 ld_offset-=2*wordSize;
1134 }
1135 } else {
1136 Register r;
1137 if (r_1->is_stack()) {
1138 // Must do a memory to memory move thru "value".
1139 r = value_regs[value_regs_index];
1140 value_regs_index = (value_regs_index + 1) % num_value_regs;
1141 } else {
1142 r = r_1->as_Register();
1143 }
1144 if (!r_2->is_valid()) {
1145 // Not sure we need to do this but it shouldn't hurt.
1146 if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
1147 __ ld(r, ld_offset, ld_ptr);
1148 ld_offset-=wordSize;
1149 } else {
1150 __ lwz(r, ld_offset, ld_ptr);
1151 ld_offset-=wordSize;
1152 }
1153 } else {
1154 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1155 // data is passed in only 1 slot.
1156 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1157 ld_offset-=wordSize;
1158 }
1159 __ ld(r, ld_offset, ld_ptr);
1160 ld_offset-=wordSize;
1161 }
1162
1163 if (r_1->is_stack()) {
1164 // Now store value where the compiler expects it
1165 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1166
1167 if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
1168 sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) {
1169 __ stw(r, st_off, R1_SP);
1170 } else {
1171 __ std(r, st_off, R1_SP);
1172 }
1173 }
1174 }
1175 }
1176
1177 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1178
1179 BLOCK_COMMENT("Store method");
1180 // Store method into thread->callee_target.
1181 // We might end up in handle_wrong_method if the callee is
1182 // deoptimized as we race thru here. If that happens we don't want
1183 // to take a safepoint because the caller frame will look
1184 // interpreted and arguments are now "compiled" so it is much better
1185 // to make this transition invisible to the stack walking
1186 // code. Unfortunately if we try and find the callee by normal means
1187 // a safepoint is possible. So we stash the desired callee in the
1188 // thread and the vm will find there should this case occur.
1189 __ std(R19_method, thread_(callee_target));
1190
1191 // Jump to the compiled code just as if compiled code was doing it.
1192 __ bctr();
1193 }
1194
1195 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1196 int total_args_passed,
1197 int comp_args_on_stack,
1198 const BasicType *sig_bt,
1199 const VMRegPair *regs,
1200 address entry_address[AdapterBlob::ENTRY_COUNT]) {
1201 // entry: i2c
1202
1203 __ align(CodeEntryAlignment);
1204 entry_address[AdapterBlob::I2C] = __ pc();
1205 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1206
1207
1208 // entry: c2i unverified
1209
1210 __ align(CodeEntryAlignment);
1211 BLOCK_COMMENT("c2i unverified entry");
1212 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1213
1214 // inline_cache contains a CompiledICData
1215 const Register ic = R19_inline_cache_reg;
1216 const Register ic_klass = R11_scratch1;
1217 const Register receiver_klass = R12_scratch2;
1218 const Register code = R21_tmp1;
1219 const Register ientry = R23_tmp3;
1220
1221 assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
1222 assert(R11_scratch1 == R11, "need prologue scratch register");
1223
1224 Label call_interpreter;
1225
1226 __ ic_check(4 /* end_alignment */);
1227 __ ld(R19_method, CompiledICData::speculated_method_offset(), ic);
1228 // Argument is valid and klass is as expected, continue.
1229
1230 __ ld(code, method_(code));
1231 __ cmpdi(CR0, code, 0);
1232 __ ld(ientry, method_(interpreter_entry)); // preloaded
1233 __ beq_predict_taken(CR0, call_interpreter);
1234
1235 // Branch to ic_miss_stub.
1236 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1237
1238 // entry: c2i
1239
1240 entry_address[AdapterBlob::C2I] = __ pc();
1241
1242 // Class initialization barrier for static methods
1243 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1244 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1245 Label L_skip_barrier;
1246
1247 // Bypass the barrier for non-static methods
1248 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1249 __ andi_(R0, R0, JVM_ACC_STATIC);
1250 __ beq(CR0, L_skip_barrier); // non-static
1251
1252 Register klass = R11_scratch1;
1253 __ load_method_holder(klass, R19_method);
1254 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1255
1256 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1257 __ mtctr(klass);
1258 __ bctr();
1259
1260 __ bind(L_skip_barrier);
1261 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
1262
1263 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1264 bs->c2i_entry_barrier(masm, /* tmp register*/ ic_klass, /* tmp register*/ receiver_klass, /* tmp register*/ code);
1265
1266 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
1267 return;
1268 }
1269
1270 // An oop arg. Must pass a handle not the oop itself.
1271 static void object_move(MacroAssembler* masm,
1272 int frame_size_in_slots,
1273 OopMap* oop_map, int oop_handle_offset,
1274 bool is_receiver, int* receiver_offset,
1275 VMRegPair src, VMRegPair dst,
1276 Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1277 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1278 "receiver has already been moved");
1279
1280 // We must pass a handle. First figure out the location we use as a handle.
1281
1282 if (src.first()->is_stack()) {
1283 // stack to stack or reg
1284
1285 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1286 Label skip;
1287 const int oop_slot_in_callers_frame = reg2slot(src.first());
1288
1289 guarantee(!is_receiver, "expecting receiver in register");
1290 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1291
1292 __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1293 __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp);
1294 __ cmpdi(CR0, r_temp_2, 0);
1295 __ bne(CR0, skip);
1296 // Use a null handle if oop is null.
1297 __ li(r_handle, 0);
1298 __ bind(skip);
1299
1300 if (dst.first()->is_stack()) {
1301 // stack to stack
1302 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1303 } else {
1304 // stack to reg
1305 // Nothing to do, r_handle is already the dst register.
1306 }
1307 } else {
1308 // reg to stack or reg
1309 const Register r_oop = src.first()->as_Register();
1310 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1311 const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1312 + oop_handle_offset; // in slots
1313 const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1314 Label skip;
1315
1316 if (is_receiver) {
1317 *receiver_offset = oop_offset;
1318 }
1319 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1320
1321 __ std( r_oop, oop_offset, R1_SP);
1322 __ addi(r_handle, R1_SP, oop_offset);
1323
1324 __ cmpdi(CR0, r_oop, 0);
1325 __ bne(CR0, skip);
1326 // Use a null handle if oop is null.
1327 __ li(r_handle, 0);
1328 __ bind(skip);
1329
1330 if (dst.first()->is_stack()) {
1331 // reg to stack
1332 __ std(r_handle, reg2offset(dst.first()), R1_SP);
1333 } else {
1334 // reg to reg
1335 // Nothing to do, r_handle is already the dst register.
1336 }
1337 }
1338 }
1339
1340 static void int_move(MacroAssembler*masm,
1341 VMRegPair src, VMRegPair dst,
1342 Register r_caller_sp, Register r_temp) {
1343 assert(src.first()->is_valid(), "incoming must be int");
1344 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1345
1346 if (src.first()->is_stack()) {
1347 if (dst.first()->is_stack()) {
1348 // stack to stack
1349 __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1350 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1351 } else {
1352 // stack to reg
1353 __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1354 }
1355 } else if (dst.first()->is_stack()) {
1356 // reg to stack
1357 __ extsw(r_temp, src.first()->as_Register());
1358 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1359 } else {
1360 // reg to reg
1361 __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1362 }
1363 }
1364
1365 static void long_move(MacroAssembler*masm,
1366 VMRegPair src, VMRegPair dst,
1367 Register r_caller_sp, Register r_temp) {
1368 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1369 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1370
1371 if (src.first()->is_stack()) {
1372 if (dst.first()->is_stack()) {
1373 // stack to stack
1374 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1375 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1376 } else {
1377 // stack to reg
1378 __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1379 }
1380 } else if (dst.first()->is_stack()) {
1381 // reg to stack
1382 __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1383 } else {
1384 // reg to reg
1385 if (dst.first()->as_Register() != src.first()->as_Register())
1386 __ mr(dst.first()->as_Register(), src.first()->as_Register());
1387 }
1388 }
1389
1390 static void float_move(MacroAssembler*masm,
1391 VMRegPair src, VMRegPair dst,
1392 Register r_caller_sp, Register r_temp) {
1393 assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1394 assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1395
1396 if (src.first()->is_stack()) {
1397 if (dst.first()->is_stack()) {
1398 // stack to stack
1399 __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1400 __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1401 } else {
1402 // stack to reg
1403 __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1404 }
1405 } else if (dst.first()->is_stack()) {
1406 // reg to stack
1407 __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1408 } else {
1409 // reg to reg
1410 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1411 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1412 }
1413 }
1414
1415 static void double_move(MacroAssembler*masm,
1416 VMRegPair src, VMRegPair dst,
1417 Register r_caller_sp, Register r_temp) {
1418 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1419 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1420
1421 if (src.first()->is_stack()) {
1422 if (dst.first()->is_stack()) {
1423 // stack to stack
1424 __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1425 __ std(r_temp, reg2offset(dst.first()), R1_SP);
1426 } else {
1427 // stack to reg
1428 __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1429 }
1430 } else if (dst.first()->is_stack()) {
1431 // reg to stack
1432 __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1433 } else {
1434 // reg to reg
1435 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1436 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1437 }
1438 }
1439
1440 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1441 switch (ret_type) {
1442 case T_BOOLEAN:
1443 case T_CHAR:
1444 case T_BYTE:
1445 case T_SHORT:
1446 case T_INT:
1447 __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1448 break;
1449 case T_ARRAY:
1450 case T_OBJECT:
1451 case T_LONG:
1452 __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1453 break;
1454 case T_FLOAT:
1455 __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1456 break;
1457 case T_DOUBLE:
1458 __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1459 break;
1460 case T_VOID:
1461 break;
1462 default:
1463 ShouldNotReachHere();
1464 break;
1465 }
1466 }
1467
1468 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1469 switch (ret_type) {
1470 case T_BOOLEAN:
1471 case T_CHAR:
1472 case T_BYTE:
1473 case T_SHORT:
1474 case T_INT:
1475 __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1476 break;
1477 case T_ARRAY:
1478 case T_OBJECT:
1479 case T_LONG:
1480 __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1481 break;
1482 case T_FLOAT:
1483 __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1484 break;
1485 case T_DOUBLE:
1486 __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1487 break;
1488 case T_VOID:
1489 break;
1490 default:
1491 ShouldNotReachHere();
1492 break;
1493 }
1494 }
1495
1496 static void verify_oop_args(MacroAssembler* masm,
1497 const methodHandle& method,
1498 const BasicType* sig_bt,
1499 const VMRegPair* regs) {
1500 Register temp_reg = R19_method; // not part of any compiled calling seq
1501 if (VerifyOops) {
1502 for (int i = 0; i < method->size_of_parameters(); i++) {
1503 if (is_reference_type(sig_bt[i])) {
1504 VMReg r = regs[i].first();
1505 assert(r->is_valid(), "bad oop arg");
1506 if (r->is_stack()) {
1507 __ ld(temp_reg, reg2offset(r), R1_SP);
1508 __ verify_oop(temp_reg, FILE_AND_LINE);
1509 } else {
1510 __ verify_oop(r->as_Register(), FILE_AND_LINE);
1511 }
1512 }
1513 }
1514 }
1515 }
1516
1517 static void gen_special_dispatch(MacroAssembler* masm,
1518 const methodHandle& method,
1519 const BasicType* sig_bt,
1520 const VMRegPair* regs) {
1521 verify_oop_args(masm, method, sig_bt, regs);
1522 vmIntrinsics::ID iid = method->intrinsic_id();
1523
1524 // Now write the args into the outgoing interpreter space
1525 bool has_receiver = false;
1526 Register receiver_reg = noreg;
1527 int member_arg_pos = -1;
1528 Register member_reg = noreg;
1529 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1530 if (ref_kind != 0) {
1531 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1532 member_reg = R19_method; // known to be free at this point
1533 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1534 } else if (iid == vmIntrinsics::_invokeBasic) {
1535 has_receiver = true;
1536 } else if (iid == vmIntrinsics::_linkToNative) {
1537 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1538 member_reg = R19_method; // known to be free at this point
1539 } else {
1540 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1541 }
1542
1543 if (member_reg != noreg) {
1544 // Load the member_arg into register, if necessary.
1545 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1546 VMReg r = regs[member_arg_pos].first();
1547 if (r->is_stack()) {
1548 __ ld(member_reg, reg2offset(r), R1_SP);
1549 } else {
1550 // no data motion is needed
1551 member_reg = r->as_Register();
1552 }
1553 }
1554
1555 if (has_receiver) {
1556 // Make sure the receiver is loaded into a register.
1557 assert(method->size_of_parameters() > 0, "oob");
1558 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1559 VMReg r = regs[0].first();
1560 assert(r->is_valid(), "bad receiver arg");
1561 if (r->is_stack()) {
1562 // Porting note: This assumes that compiled calling conventions always
1563 // pass the receiver oop in a register. If this is not true on some
1564 // platform, pick a temp and load the receiver from stack.
1565 fatal("receiver always in a register");
1566 receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point?
1567 __ ld(receiver_reg, reg2offset(r), R1_SP);
1568 } else {
1569 // no data motion is needed
1570 receiver_reg = r->as_Register();
1571 }
1572 }
1573
1574 // Figure out which address we are really jumping to:
1575 MethodHandles::generate_method_handle_dispatch(masm, iid,
1576 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1577 }
1578
1579 //---------------------------- continuation_enter_setup ---------------------------
1580 //
1581 // Frame setup.
1582 //
1583 // Arguments:
1584 // None.
1585 //
1586 // Results:
1587 // R1_SP: pointer to blank ContinuationEntry in the pushed frame.
1588 //
1589 // Kills:
1590 // R0, R20
1591 //
1592 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& framesize_words) {
1593 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1594 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, "");
1595 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1596
1597 const int frame_size_in_bytes = (int)ContinuationEntry::size();
1598 assert(is_aligned(frame_size_in_bytes, frame::alignment_in_bytes), "alignment error");
1599
1600 framesize_words = frame_size_in_bytes / wordSize;
1601
1602 DEBUG_ONLY(__ block_comment("setup {"));
1603 // Save return pc and push entry frame
1604 const Register return_pc = R20;
1605 __ mflr(return_pc);
1606 __ std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc
1607 __ push_frame(frame_size_in_bytes , R0); // SP -= frame_size_in_bytes
1608
1609 OopMap* map = new OopMap((int)frame_size_in_bytes / VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
1610
1611 __ ld_ptr(R0, JavaThread::cont_entry_offset(), R16_thread);
1612 __ st_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1613 __ st_ptr(R0, ContinuationEntry::parent_offset(), R1_SP);
1614 DEBUG_ONLY(__ block_comment("} setup"));
1615
1616 return map;
1617 }
1618
1619 //---------------------------- fill_continuation_entry ---------------------------
1620 //
1621 // Initialize the new ContinuationEntry.
1622 //
1623 // Arguments:
1624 // R1_SP: pointer to blank Continuation entry
1625 // reg_cont_obj: pointer to the continuation
1626 // reg_flags: flags
1627 //
1628 // Results:
1629 // R1_SP: pointer to filled out ContinuationEntry
1630 //
1631 // Kills:
1632 // R8_ARG6, R9_ARG7, R10_ARG8
1633 //
1634 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1635 assert_different_registers(reg_cont_obj, reg_flags);
1636 Register zero = R8_ARG6;
1637 Register tmp2 = R9_ARG7;
1638
1639 DEBUG_ONLY(__ block_comment("fill {"));
1640 #ifdef ASSERT
1641 __ load_const_optimized(tmp2, ContinuationEntry::cookie_value());
1642 __ stw(tmp2, in_bytes(ContinuationEntry::cookie_offset()), R1_SP);
1643 #endif //ASSERT
1644
1645 __ li(zero, 0);
1646 __ st_ptr(reg_cont_obj, ContinuationEntry::cont_offset(), R1_SP);
1647 __ stw(reg_flags, in_bytes(ContinuationEntry::flags_offset()), R1_SP);
1648 __ st_ptr(zero, ContinuationEntry::chunk_offset(), R1_SP);
1649 __ stw(zero, in_bytes(ContinuationEntry::argsize_offset()), R1_SP);
1650 __ stw(zero, in_bytes(ContinuationEntry::pin_count_offset()), R1_SP);
1651
1652 __ ld_ptr(tmp2, JavaThread::cont_fastpath_offset(), R16_thread);
1653 __ st_ptr(tmp2, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1654
1655 __ st_ptr(zero, JavaThread::cont_fastpath_offset(), R16_thread);
1656 DEBUG_ONLY(__ block_comment("} fill"));
1657 }
1658
1659 //---------------------------- continuation_enter_cleanup ---------------------------
1660 //
1661 // Copy corresponding attributes from the top ContinuationEntry to the JavaThread
1662 // before deleting it.
1663 //
1664 // Arguments:
1665 // R1_SP: pointer to the ContinuationEntry
1666 //
1667 // Results:
1668 // None.
1669 //
1670 // Kills:
1671 // R8_ARG6, R9_ARG7, R10_ARG8, R15_esp
1672 //
1673 static void continuation_enter_cleanup(MacroAssembler* masm) {
1674 Register tmp1 = R8_ARG6;
1675 Register tmp2 = R9_ARG7;
1676
1677 #ifdef ASSERT
1678 __ block_comment("clean {");
1679 __ ld_ptr(tmp1, JavaThread::cont_entry_offset(), R16_thread);
1680 __ cmpd(CR0, R1_SP, tmp1);
1681 __ asm_assert_eq(FILE_AND_LINE ": incorrect R1_SP");
1682 #endif
1683
1684 __ ld_ptr(tmp1, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1685 __ st_ptr(tmp1, JavaThread::cont_fastpath_offset(), R16_thread);
1686 __ ld_ptr(tmp2, ContinuationEntry::parent_offset(), R1_SP);
1687 __ st_ptr(tmp2, JavaThread::cont_entry_offset(), R16_thread);
1688 DEBUG_ONLY(__ block_comment("} clean"));
1689 }
1690
1691 static void check_continuation_enter_argument(VMReg actual_vmreg,
1692 Register expected_reg,
1693 const char* name) {
1694 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1695 assert(actual_vmreg->as_Register() == expected_reg,
1696 "%s is in unexpected register: %s instead of %s",
1697 name, actual_vmreg->as_Register()->name(), expected_reg->name());
1698 }
1699
1700 static void gen_continuation_enter(MacroAssembler* masm,
1701 const VMRegPair* regs,
1702 int& exception_offset,
1703 OopMapSet* oop_maps,
1704 int& frame_complete,
1705 int& framesize_words,
1706 int& interpreted_entry_offset,
1707 int& compiled_entry_offset) {
1708
1709 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1710 int pos_cont_obj = 0;
1711 int pos_is_cont = 1;
1712 int pos_is_virtual = 2;
1713
1714 // The platform-specific calling convention may present the arguments in various registers.
1715 // To simplify the rest of the code, we expect the arguments to reside at these known
1716 // registers, and we additionally check the placement here in case calling convention ever
1717 // changes.
1718 Register reg_cont_obj = R3_ARG1;
1719 Register reg_is_cont = R4_ARG2;
1720 Register reg_is_virtual = R5_ARG3;
1721
1722 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object");
1723 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue");
1724 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1725
1726 address resolve_static_call = SharedRuntime::get_resolve_static_call_stub();
1727
1728 address start = __ pc();
1729
1730 Label L_thaw, L_exit;
1731
1732 // i2i entry used at interp_only_mode only
1733 interpreted_entry_offset = __ pc() - start;
1734 {
1735 #ifdef ASSERT
1736 Label is_interp_only;
1737 __ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
1738 __ cmpwi(CR0, R0, 0);
1739 __ bne(CR0, is_interp_only);
1740 __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1741 __ bind(is_interp_only);
1742 #endif
1743
1744 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1745 __ ld(reg_cont_obj, Interpreter::stackElementSize*3, R15_esp);
1746 __ lwz(reg_is_cont, Interpreter::stackElementSize*2, R15_esp);
1747 __ lwz(reg_is_virtual, Interpreter::stackElementSize*1, R15_esp);
1748
1749 __ push_cont_fastpath();
1750
1751 OopMap* map = continuation_enter_setup(masm, framesize_words);
1752
1753 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1754 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1755
1756 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1757
1758 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1759 __ cmpwi(CR0, reg_is_cont, 0);
1760 __ bne(CR0, L_thaw);
1761
1762 // --- call Continuation.enter(Continuation c, boolean isContinue)
1763
1764 // Emit compiled static call. The call will be always resolved to the c2i
1765 // entry of Continuation.enter(Continuation c, boolean isContinue).
1766 // There are special cases in SharedRuntime::resolve_static_call_C() and
1767 // SharedRuntime::resolve_sub_helper_internal() to achieve this
1768 // See also corresponding call below.
1769 address c2i_call_pc = __ pc();
1770 int start_offset = __ offset();
1771 // Put the entry point as a constant into the constant pool.
1772 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
1773 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
1774 guarantee(entry_point_toc_addr != nullptr, "const section overflow");
1775
1776 // Emit the trampoline stub which will be related to the branch-and-link below.
1777 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
1778 guarantee(stub != nullptr, "no space for trampoline stub");
1779
1780 __ relocate(relocInfo::static_call_type);
1781 // Note: At this point we do not have the address of the trampoline
1782 // stub, and the entry point might be too far away for bl, so __ pc()
1783 // serves as dummy and the bl will be patched later.
1784 __ bl(__ pc());
1785 oop_maps->add_gc_map(__ pc() - start, map);
1786 __ post_call_nop();
1787
1788 __ b(L_exit);
1789
1790 // static stub for the call above
1791 stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
1792 guarantee(stub != nullptr, "no space for static stub");
1793 }
1794
1795 // compiled entry
1796 __ align(CodeEntryAlignment);
1797 compiled_entry_offset = __ pc() - start;
1798
1799 OopMap* map = continuation_enter_setup(masm, framesize_words);
1800
1801 // Frame is now completed as far as size and linkage.
1802 frame_complete =__ pc() - start;
1803
1804 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1805
1806 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1807 __ cmpwi(CR0, reg_is_cont, 0);
1808 __ bne(CR0, L_thaw);
1809
1810 // --- call Continuation.enter(Continuation c, boolean isContinue)
1811
1812 // Emit compiled static call
1813 // The call needs to be resolved. There's a special case for this in
1814 // SharedRuntime::find_callee_info_helper() which calls
1815 // LinkResolver::resolve_continuation_enter() which resolves the call to
1816 // Continuation.enter(Continuation c, boolean isContinue).
1817 address call_pc = __ pc();
1818 int start_offset = __ offset();
1819 // Put the entry point as a constant into the constant pool.
1820 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none);
1821 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
1822 guarantee(entry_point_toc_addr != nullptr, "const section overflow");
1823
1824 // Emit the trampoline stub which will be related to the branch-and-link below.
1825 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset);
1826 guarantee(stub != nullptr, "no space for trampoline stub");
1827
1828 __ relocate(relocInfo::static_call_type);
1829 // Note: At this point we do not have the address of the trampoline
1830 // stub, and the entry point might be too far away for bl, so __ pc()
1831 // serves as dummy and the bl will be patched later.
1832 __ bl(__ pc());
1833 oop_maps->add_gc_map(__ pc() - start, map);
1834 __ post_call_nop();
1835
1836 __ b(L_exit);
1837
1838 // --- Thawing path
1839
1840 __ bind(L_thaw);
1841 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1842 __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(StubRoutines::cont_thaw()));
1843 __ mtctr(R0);
1844 __ bctrl();
1845 oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1846 ContinuationEntry::_return_pc_offset = __ pc() - start;
1847 __ post_call_nop();
1848
1849 // --- Normal exit (resolve/thawing)
1850
1851 __ bind(L_exit);
1852 ContinuationEntry::_cleanup_offset = __ pc() - start;
1853 continuation_enter_cleanup(masm);
1854
1855 // Pop frame and return
1856 DEBUG_ONLY(__ ld_ptr(R0, 0, R1_SP));
1857 __ addi(R1_SP, R1_SP, framesize_words*wordSize);
1858 DEBUG_ONLY(__ cmpd(CR0, R0, R1_SP));
1859 __ asm_assert_eq(FILE_AND_LINE ": inconsistent frame size");
1860 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1861 __ mtlr(R0);
1862 __ blr();
1863
1864 // --- Exception handling path
1865
1866 exception_offset = __ pc() - start;
1867
1868 continuation_enter_cleanup(masm);
1869 Register ex_pc = R17_tos; // nonvolatile register
1870 Register ex_oop = R15_esp; // nonvolatile register
1871 __ ld(ex_pc, _abi0(callers_sp), R1_SP); // Load caller's return pc
1872 __ ld(ex_pc, _abi0(lr), ex_pc);
1873 __ mr(ex_oop, R3_RET); // save return value containing the exception oop
1874 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, ex_pc);
1875 __ mtlr(R3_RET); // the exception handler
1876 __ ld(R1_SP, _abi0(callers_sp), R1_SP); // remove enterSpecial frame
1877
1878 // Continue at exception handler
1879 // See OptoRuntime::generate_exception_blob for register arguments
1880 __ mr(R3_ARG1, ex_oop); // pass exception oop
1881 __ mr(R4_ARG2, ex_pc); // pass exception pc
1882 __ blr();
1883
1884 // static stub for the call above
1885 stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
1886 guarantee(stub != nullptr, "no space for static stub");
1887 }
1888
1889 static void gen_continuation_yield(MacroAssembler* masm,
1890 const VMRegPair* regs,
1891 OopMapSet* oop_maps,
1892 int& frame_complete,
1893 int& framesize_words,
1894 int& compiled_entry_offset) {
1895 Register tmp = R10_ARG8;
1896
1897 const int framesize_bytes = (int)align_up((int)frame::native_abi_reg_args_size, frame::alignment_in_bytes);
1898 framesize_words = framesize_bytes / wordSize;
1899
1900 address start = __ pc();
1901 compiled_entry_offset = __ pc() - start;
1902
1903 // Save return pc and push entry frame
1904 __ mflr(tmp);
1905 __ std(tmp, _abi0(lr), R1_SP); // SP->lr = return_pc
1906 __ push_frame(framesize_bytes , R0); // SP -= frame_size_in_bytes
1907
1908 DEBUG_ONLY(__ block_comment("Frame Complete"));
1909 frame_complete = __ pc() - start;
1910 address last_java_pc = __ pc();
1911
1912 // This nop must be exactly at the PC we push into the frame info.
1913 // We use this nop for fast CodeBlob lookup, associate the OopMap
1914 // with it right away.
1915 __ post_call_nop();
1916 OopMap* map = new OopMap(framesize_bytes / VMRegImpl::stack_slot_size, 1);
1917 oop_maps->add_gc_map(last_java_pc - start, map);
1918
1919 __ calculate_address_from_global_toc(tmp, last_java_pc); // will be relocated
1920 __ set_last_Java_frame(R1_SP, tmp);
1921 __ call_VM_leaf(Continuation::freeze_entry(), R16_thread, R1_SP);
1922 __ reset_last_Java_frame();
1923
1924 Label L_pinned;
1925
1926 __ cmpwi(CR0, R3_RET, 0);
1927 __ bne(CR0, L_pinned);
1928
1929 // yield succeeded
1930
1931 // Pop frames of continuation including this stub's frame
1932 __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1933 // The frame pushed by gen_continuation_enter is on top now again
1934 continuation_enter_cleanup(masm);
1935
1936 // Pop frame and return
1937 Label L_return;
1938 __ bind(L_return);
1939 __ pop_frame();
1940 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1941 __ mtlr(R0);
1942 __ blr();
1943
1944 // yield failed - continuation is pinned
1945
1946 __ bind(L_pinned);
1947
1948 // handle pending exception thrown by freeze
1949 __ ld(tmp, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
1950 __ cmpdi(CR0, tmp, 0);
1951 __ beq(CR0, L_return); // return if no exception is pending
1952 __ pop_frame();
1953 __ ld(R0, _abi0(lr), R1_SP); // Return pc
1954 __ mtlr(R0);
1955 __ load_const_optimized(tmp, StubRoutines::forward_exception_entry(), R0);
1956 __ mtctr(tmp);
1957 __ bctr();
1958 }
1959
1960 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
1961 ::continuation_enter_cleanup(masm);
1962 }
1963
1964 // ---------------------------------------------------------------------------
1965 // Generate a native wrapper for a given method. The method takes arguments
1966 // in the Java compiled code convention, marshals them to the native
1967 // convention (handlizes oops, etc), transitions to native, makes the call,
1968 // returns to java state (possibly blocking), unhandlizes any result and
1969 // returns.
1970 //
1971 // Critical native functions are a shorthand for the use of
1972 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1973 // functions. The wrapper is expected to unpack the arguments before
1974 // passing them to the callee. Critical native functions leave the state _in_Java,
1975 // since they cannot stop for GC.
1976 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
1977 // block and the check for pending exceptions it's impossible for them
1978 // to be thrown.
1979 //
1980 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1981 const methodHandle& method,
1982 int compile_id,
1983 BasicType *in_sig_bt,
1984 VMRegPair *in_regs,
1985 BasicType ret_type) {
1986 if (method->is_continuation_native_intrinsic()) {
1987 int exception_offset = -1;
1988 OopMapSet* oop_maps = new OopMapSet();
1989 int frame_complete = -1;
1990 int stack_slots = -1;
1991 int interpreted_entry_offset = -1;
1992 int vep_offset = -1;
1993 if (method->is_continuation_enter_intrinsic()) {
1994 gen_continuation_enter(masm,
1995 in_regs,
1996 exception_offset,
1997 oop_maps,
1998 frame_complete,
1999 stack_slots,
2000 interpreted_entry_offset,
2001 vep_offset);
2002 } else if (method->is_continuation_yield_intrinsic()) {
2003 gen_continuation_yield(masm,
2004 in_regs,
2005 oop_maps,
2006 frame_complete,
2007 stack_slots,
2008 vep_offset);
2009 } else {
2010 guarantee(false, "Unknown Continuation native intrinsic");
2011 }
2012
2013 #ifdef ASSERT
2014 if (method->is_continuation_enter_intrinsic()) {
2015 assert(interpreted_entry_offset != -1, "Must be set");
2016 assert(exception_offset != -1, "Must be set");
2017 } else {
2018 assert(interpreted_entry_offset == -1, "Must be unset");
2019 assert(exception_offset == -1, "Must be unset");
2020 }
2021 assert(frame_complete != -1, "Must be set");
2022 assert(stack_slots != -1, "Must be set");
2023 assert(vep_offset != -1, "Must be set");
2024 #endif
2025
2026 __ flush();
2027 nmethod* nm = nmethod::new_native_nmethod(method,
2028 compile_id,
2029 masm->code(),
2030 vep_offset,
2031 frame_complete,
2032 stack_slots,
2033 in_ByteSize(-1),
2034 in_ByteSize(-1),
2035 oop_maps,
2036 exception_offset);
2037 if (nm == nullptr) return nm;
2038 if (method->is_continuation_enter_intrinsic()) {
2039 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2040 } else if (method->is_continuation_yield_intrinsic()) {
2041 _cont_doYield_stub = nm;
2042 }
2043 return nm;
2044 }
2045
2046 if (method->is_method_handle_intrinsic()) {
2047 vmIntrinsics::ID iid = method->intrinsic_id();
2048 intptr_t start = (intptr_t)__ pc();
2049 int vep_offset = ((intptr_t)__ pc()) - start;
2050 gen_special_dispatch(masm,
2051 method,
2052 in_sig_bt,
2053 in_regs);
2054 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
2055 __ flush();
2056 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
2057 return nmethod::new_native_nmethod(method,
2058 compile_id,
2059 masm->code(),
2060 vep_offset,
2061 frame_complete,
2062 stack_slots / VMRegImpl::slots_per_word,
2063 in_ByteSize(-1),
2064 in_ByteSize(-1),
2065 (OopMapSet*)nullptr);
2066 }
2067
2068 address native_func = method->native_function();
2069 assert(native_func != nullptr, "must have function");
2070
2071 // First, create signature for outgoing C call
2072 // --------------------------------------------------------------------------
2073
2074 int total_in_args = method->size_of_parameters();
2075 // We have received a description of where all the java args are located
2076 // on entry to the wrapper. We need to convert these args to where
2077 // the jni function will expect them. To figure out where they go
2078 // we convert the java signature to a C signature by inserting
2079 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2080
2081 // Calculate the total number of C arguments and create arrays for the
2082 // signature and the outgoing registers.
2083 // On ppc64, we have two arrays for the outgoing registers, because
2084 // some floating-point arguments must be passed in registers _and_
2085 // in stack locations.
2086 bool method_is_static = method->is_static();
2087 int total_c_args = total_in_args + (method_is_static ? 2 : 1);
2088
2089 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2090 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2091
2092 // Create the signature for the C call:
2093 // 1) add the JNIEnv*
2094 // 2) add the class if the method is static
2095 // 3) copy the rest of the incoming signature (shifted by the number of
2096 // hidden arguments).
2097
2098 int argc = 0;
2099 out_sig_bt[argc++] = T_ADDRESS;
2100 if (method->is_static()) {
2101 out_sig_bt[argc++] = T_OBJECT;
2102 }
2103
2104 for (int i = 0; i < total_in_args ; i++ ) {
2105 out_sig_bt[argc++] = in_sig_bt[i];
2106 }
2107
2108
2109 // Compute the wrapper's frame size.
2110 // --------------------------------------------------------------------------
2111
2112 // Now figure out where the args must be stored and how much stack space
2113 // they require.
2114 //
2115 // Compute framesize for the wrapper. We need to handlize all oops in
2116 // incoming registers.
2117 //
2118 // Calculate the total number of stack slots we will need:
2119 // 1) abi requirements
2120 // 2) outgoing arguments
2121 // 3) space for inbound oop handle area
2122 // 4) space for handlizing a klass if static method
2123 // 5) space for a lock if synchronized method
2124 // 6) workspace for saving return values, int <-> float reg moves, etc.
2125 // 7) alignment
2126 //
2127 // Layout of the native wrapper frame:
2128 // (stack grows upwards, memory grows downwards)
2129 //
2130 // NW [ABI_REG_ARGS] <-- 1) R1_SP
2131 // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset
2132 // [oopHandle area] <-- 3) R1_SP + oop_handle_offset
2133 // klass <-- 4) R1_SP + klass_offset
2134 // lock <-- 5) R1_SP + lock_offset
2135 // [workspace] <-- 6) R1_SP + workspace_offset
2136 // [alignment] (optional) <-- 7)
2137 // caller [JIT_TOP_ABI_48] <-- r_callers_sp
2138 //
2139 // - *_slot_offset Indicates offset from SP in number of stack slots.
2140 // - *_offset Indicates offset from SP in bytes.
2141
2142 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2)
2143 SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
2144
2145 // Now the space for the inbound oop handle area.
2146 int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
2147
2148 int oop_handle_slot_offset = stack_slots;
2149 stack_slots += total_save_slots; // 3)
2150
2151 int klass_slot_offset = 0;
2152 int klass_offset = -1;
2153 if (method_is_static) { // 4)
2154 klass_slot_offset = stack_slots;
2155 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2156 stack_slots += VMRegImpl::slots_per_word;
2157 }
2158
2159 int lock_slot_offset = 0;
2160 int lock_offset = -1;
2161 if (method->is_synchronized()) { // 5)
2162 lock_slot_offset = stack_slots;
2163 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
2164 stack_slots += VMRegImpl::slots_per_word;
2165 }
2166
2167 int workspace_slot_offset = stack_slots; // 6)
2168 stack_slots += 2;
2169
2170 // Now compute actual number of stack words we need.
2171 // Rounding to make stack properly aligned.
2172 stack_slots = align_up(stack_slots, // 7)
2173 frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
2174 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
2175
2176
2177 // Now we can start generating code.
2178 // --------------------------------------------------------------------------
2179
2180 intptr_t start_pc = (intptr_t)__ pc();
2181 intptr_t vep_start_pc;
2182 intptr_t frame_done_pc;
2183
2184 Label handle_pending_exception;
2185 Label last_java_pc;
2186
2187 Register r_callers_sp = R21;
2188 Register r_temp_1 = R22;
2189 Register r_temp_2 = R23;
2190 Register r_temp_3 = R24;
2191 Register r_temp_4 = R25;
2192 Register r_temp_5 = R26;
2193 Register r_temp_6 = R27;
2194 Register r_last_java_pc = R28;
2195
2196 Register r_carg1_jnienv = noreg;
2197 Register r_carg2_classorobject = noreg;
2198 r_carg1_jnienv = out_regs[0].first()->as_Register();
2199 r_carg2_classorobject = out_regs[1].first()->as_Register();
2200
2201
2202 // Generate the Unverified Entry Point (UEP).
2203 // --------------------------------------------------------------------------
2204 assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
2205
2206 // Check ic: object class == cached class?
2207 if (!method_is_static) {
2208 __ ic_check(4 /* end_alignment */);
2209 }
2210
2211 // Generate the Verified Entry Point (VEP).
2212 // --------------------------------------------------------------------------
2213 vep_start_pc = (intptr_t)__ pc();
2214
2215 if (method->needs_clinit_barrier()) {
2216 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2217 Label L_skip_barrier;
2218 Register klass = r_temp_1;
2219 // Notify OOP recorder (don't need the relocation)
2220 AddressLiteral md = __ constant_metadata_address(method->method_holder());
2221 __ load_const_optimized(klass, md.value(), R0);
2222 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
2223
2224 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
2225 __ mtctr(klass);
2226 __ bctr();
2227
2228 __ bind(L_skip_barrier);
2229 }
2230
2231 __ save_LR(r_temp_1);
2232 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2233 __ mr(r_callers_sp, R1_SP); // Remember frame pointer.
2234 __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame.
2235
2236 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2237 bs->nmethod_entry_barrier(masm, r_temp_1);
2238
2239 frame_done_pc = (intptr_t)__ pc();
2240
2241 // Native nmethod wrappers never take possession of the oop arguments.
2242 // So the caller will gc the arguments.
2243 // The only thing we need an oopMap for is if the call is static.
2244 //
2245 // An OopMap for lock (and class if static), and one for the VM call itself.
2246 OopMapSet *oop_maps = new OopMapSet();
2247 OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2248
2249 // Move arguments from register/stack to register/stack.
2250 // --------------------------------------------------------------------------
2251 //
2252 // We immediately shuffle the arguments so that for any vm call we have
2253 // to make from here on out (sync slow path, jvmti, etc.) we will have
2254 // captured the oops from our caller and have a valid oopMap for them.
2255 //
2256 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2257 // (derived from JavaThread* which is in R16_thread) and, if static,
2258 // the class mirror instead of a receiver. This pretty much guarantees that
2259 // register layout will not match. We ignore these extra arguments during
2260 // the shuffle. The shuffle is described by the two calling convention
2261 // vectors we have in our possession. We simply walk the java vector to
2262 // get the source locations and the c vector to get the destinations.
2263
2264 // Record sp-based slot for receiver on stack for non-static methods.
2265 int receiver_offset = -1;
2266
2267 // We move the arguments backward because the floating point registers
2268 // destination will always be to a register with a greater or equal
2269 // register number or the stack.
2270 // in is the index of the incoming Java arguments
2271 // out is the index of the outgoing C arguments
2272
2273 #ifdef ASSERT
2274 bool reg_destroyed[Register::number_of_registers];
2275 bool freg_destroyed[FloatRegister::number_of_registers];
2276 for (int r = 0 ; r < Register::number_of_registers ; r++) {
2277 reg_destroyed[r] = false;
2278 }
2279 for (int f = 0 ; f < FloatRegister::number_of_registers ; f++) {
2280 freg_destroyed[f] = false;
2281 }
2282 #endif // ASSERT
2283
2284 for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2285
2286 #ifdef ASSERT
2287 if (in_regs[in].first()->is_Register()) {
2288 assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2289 } else if (in_regs[in].first()->is_FloatRegister()) {
2290 assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2291 }
2292 if (out_regs[out].first()->is_Register()) {
2293 reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2294 } else if (out_regs[out].first()->is_FloatRegister()) {
2295 freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2296 }
2297 #endif // ASSERT
2298
2299 switch (in_sig_bt[in]) {
2300 case T_BOOLEAN:
2301 case T_CHAR:
2302 case T_BYTE:
2303 case T_SHORT:
2304 case T_INT:
2305 // Move int and do sign extension.
2306 int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2307 break;
2308 case T_LONG:
2309 long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2310 break;
2311 case T_ARRAY:
2312 case T_OBJECT:
2313 object_move(masm, stack_slots,
2314 oop_map, oop_handle_slot_offset,
2315 ((in == 0) && (!method_is_static)), &receiver_offset,
2316 in_regs[in], out_regs[out],
2317 r_callers_sp, r_temp_1, r_temp_2);
2318 break;
2319 case T_VOID:
2320 break;
2321 case T_FLOAT:
2322 float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2323 break;
2324 case T_DOUBLE:
2325 double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2326 break;
2327 case T_ADDRESS:
2328 fatal("found type (T_ADDRESS) in java args");
2329 break;
2330 default:
2331 ShouldNotReachHere();
2332 break;
2333 }
2334 }
2335
2336 // Pre-load a static method's oop into ARG2.
2337 // Used both by locking code and the normal JNI call code.
2338 if (method_is_static) {
2339 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2340 r_carg2_classorobject);
2341
2342 // Now handlize the static class mirror in carg2. It's known not-null.
2343 __ std(r_carg2_classorobject, klass_offset, R1_SP);
2344 oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2345 __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2346 }
2347
2348 // Get JNIEnv* which is first argument to native.
2349 __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2350
2351 // NOTE:
2352 //
2353 // We have all of the arguments setup at this point.
2354 // We MUST NOT touch any outgoing regs from this point on.
2355 // So if we must call out we must push a new frame.
2356
2357 // The last java pc will also be used as resume pc if this is the wrapper for wait0.
2358 // For this purpose the precise location matters but not for oopmap lookup.
2359 __ calculate_address_from_global_toc(r_last_java_pc, last_java_pc, true, true, true, true);
2360
2361 // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2362 assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2363
2364 # if 0
2365 // DTrace method entry
2366 # endif
2367
2368 // Lock a synchronized method.
2369 // --------------------------------------------------------------------------
2370
2371 if (method->is_synchronized()) {
2372 Register r_oop = r_temp_4;
2373 const Register r_box = r_temp_5;
2374 Label done, locked;
2375
2376 // Load the oop for the object or class. r_carg2_classorobject contains
2377 // either the handlized oop from the incoming arguments or the handlized
2378 // class mirror (if the method is static).
2379 __ ld(r_oop, 0, r_carg2_classorobject);
2380
2381 // Get the lock box slot's address.
2382 __ addi(r_box, R1_SP, lock_offset);
2383
2384 // Try fastpath for locking.
2385 // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2386 Register r_temp_3_or_noreg = UseObjectMonitorTable ? r_temp_3 : noreg;
2387 __ compiler_fast_lock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3_or_noreg);
2388 __ beq(CR0, locked);
2389
2390 // None of the above fast optimizations worked so we have to get into the
2391 // slow case of monitor enter. Inline a special case of call_VM that
2392 // disallows any pending_exception.
2393
2394 // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2395 int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes);
2396 __ mr(R11_scratch1, R1_SP);
2397 RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs);
2398
2399 // Do the call.
2400 __ set_last_Java_frame(R11_scratch1, r_last_java_pc);
2401 assert(r_last_java_pc->is_nonvolatile(), "r_last_java_pc needs to be preserved accross complete_monitor_locking_C call");
2402 // The following call will not be preempted.
2403 // push_cont_fastpath forces freeze slow path in case we try to preempt where we will pin the
2404 // vthread to the carrier (see FreezeBase::recurse_freeze_native_frame()).
2405 __ push_cont_fastpath();
2406 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2407 __ pop_cont_fastpath();
2408 __ reset_last_Java_frame();
2409
2410 RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs);
2411
2412 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2413 "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C");
2414
2415 __ bind(locked);
2416 }
2417
2418 __ set_last_Java_frame(R1_SP, r_last_java_pc);
2419
2420 // Publish thread state
2421 // --------------------------------------------------------------------------
2422
2423 // Transition from _thread_in_Java to _thread_in_native.
2424 __ li(R0, _thread_in_native);
2425 __ release();
2426 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2427 __ stw(R0, thread_(thread_state));
2428
2429
2430 // The JNI call
2431 // --------------------------------------------------------------------------
2432 __ call_c(native_func, relocInfo::runtime_call_type);
2433
2434
2435 // Now, we are back from the native code.
2436
2437
2438 // Unpack the native result.
2439 // --------------------------------------------------------------------------
2440
2441 // For int-types, we do any needed sign-extension required.
2442 // Care must be taken that the return values (R3_RET and F1_RET)
2443 // will survive any VM calls for blocking or unlocking.
2444 // An OOP result (handle) is done specially in the slow-path code.
2445
2446 switch (ret_type) {
2447 case T_VOID: break; // Nothing to do!
2448 case T_FLOAT: break; // Got it where we want it (unless slow-path).
2449 case T_DOUBLE: break; // Got it where we want it (unless slow-path).
2450 case T_LONG: break; // Got it where we want it (unless slow-path).
2451 case T_OBJECT: break; // Really a handle.
2452 // Cannot de-handlize until after reclaiming jvm_lock.
2453 case T_ARRAY: break;
2454
2455 case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1)
2456 __ normalize_bool(R3_RET);
2457 break;
2458 }
2459 case T_BYTE: { // sign extension
2460 __ extsb(R3_RET, R3_RET);
2461 break;
2462 }
2463 case T_CHAR: { // unsigned result
2464 __ andi(R3_RET, R3_RET, 0xffff);
2465 break;
2466 }
2467 case T_SHORT: { // sign extension
2468 __ extsh(R3_RET, R3_RET);
2469 break;
2470 }
2471 case T_INT: // nothing to do
2472 break;
2473 default:
2474 ShouldNotReachHere();
2475 break;
2476 }
2477
2478 // Publish thread state
2479 // --------------------------------------------------------------------------
2480
2481 // Switch thread to "native transition" state before reading the
2482 // synchronization state. This additional state is necessary because reading
2483 // and testing the synchronization state is not atomic w.r.t. GC, as this
2484 // scenario demonstrates:
2485 // - Java thread A, in _thread_in_native state, loads _not_synchronized
2486 // and is preempted.
2487 // - VM thread changes sync state to synchronizing and suspends threads
2488 // for GC.
2489 // - Thread A is resumed to finish this native method, but doesn't block
2490 // here since it didn't see any synchronization in progress, and escapes.
2491
2492 // Transition from _thread_in_native to _thread_in_native_trans.
2493 __ li(R0, _thread_in_native_trans);
2494 __ release();
2495 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2496 __ stw(R0, thread_(thread_state));
2497
2498
2499 // Must we block?
2500 // --------------------------------------------------------------------------
2501
2502 // Block, if necessary, before resuming in _thread_in_Java state.
2503 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2504 {
2505 Label no_block, sync;
2506
2507 // Force this write out before the read below.
2508 if (!UseSystemMemoryBarrier) {
2509 __ fence();
2510 }
2511
2512 Register sync_state_addr = r_temp_4;
2513 Register sync_state = r_temp_5;
2514 Register suspend_flags = r_temp_6;
2515
2516 // No synchronization in progress nor yet synchronized
2517 // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
2518 __ safepoint_poll(sync, sync_state, true /* at_return */, false /* in_nmethod */);
2519
2520 // Not suspended.
2521 // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2522 __ lwz(suspend_flags, thread_(suspend_flags));
2523 __ cmpwi(CR1, suspend_flags, 0);
2524 __ beq(CR1, no_block);
2525
2526 // Block. Save any potential method result value before the operation and
2527 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2528 // lets us share the oopMap we used when we went native rather than create
2529 // a distinct one for this pc.
2530 __ bind(sync);
2531 __ isync();
2532
2533 address entry_point =
2534 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2535 save_native_result(masm, ret_type, workspace_slot_offset);
2536 __ call_VM_leaf(entry_point, R16_thread);
2537 restore_native_result(masm, ret_type, workspace_slot_offset);
2538
2539 __ bind(no_block);
2540
2541 // Publish thread state.
2542 // --------------------------------------------------------------------------
2543
2544 // Thread state is thread_in_native_trans. Any safepoint blocking has
2545 // already happened so we can now change state to _thread_in_Java.
2546
2547 // Transition from _thread_in_native_trans to _thread_in_Java.
2548 __ li(R0, _thread_in_Java);
2549 __ lwsync(); // Acquire safepoint and suspend state, release thread state.
2550 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2551 __ stw(R0, thread_(thread_state));
2552
2553 // Check preemption for Object.wait()
2554 if (method->is_object_wait0()) {
2555 Label not_preempted;
2556 __ ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2557 __ cmpdi(CR0, R0, 0);
2558 __ beq(CR0, not_preempted);
2559 __ mtlr(R0);
2560 __ li(R0, 0);
2561 __ std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2562 __ blr();
2563 __ bind(not_preempted);
2564 }
2565 __ bind(last_java_pc);
2566 // We use the same pc/oopMap repeatedly when we call out above.
2567 intptr_t oopmap_pc = (intptr_t) __ pc();
2568 oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2569 }
2570
2571 // Reguard any pages if necessary.
2572 // --------------------------------------------------------------------------
2573
2574 Label no_reguard;
2575 __ lwz(r_temp_1, thread_(stack_guard_state));
2576 __ cmpwi(CR0, r_temp_1, StackOverflow::stack_guard_yellow_reserved_disabled);
2577 __ bne(CR0, no_reguard);
2578
2579 save_native_result(masm, ret_type, workspace_slot_offset);
2580 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2581 restore_native_result(masm, ret_type, workspace_slot_offset);
2582
2583 __ bind(no_reguard);
2584
2585
2586 // Unlock
2587 // --------------------------------------------------------------------------
2588
2589 if (method->is_synchronized()) {
2590 const Register r_oop = r_temp_4;
2591 const Register r_box = r_temp_5;
2592 const Register r_exception = r_temp_6;
2593 Label done;
2594
2595 // Get oop and address of lock object box.
2596 if (method_is_static) {
2597 assert(klass_offset != -1, "");
2598 __ ld(r_oop, klass_offset, R1_SP);
2599 } else {
2600 assert(receiver_offset != -1, "");
2601 __ ld(r_oop, receiver_offset, R1_SP);
2602 }
2603 __ addi(r_box, R1_SP, lock_offset);
2604
2605 // Try fastpath for unlocking.
2606 __ compiler_fast_unlock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2607 __ beq(CR0, done);
2608
2609 // Save and restore any potential method result value around the unlocking operation.
2610 save_native_result(masm, ret_type, workspace_slot_offset);
2611
2612 // Must save pending exception around the slow-path VM call. Since it's a
2613 // leaf call, the pending exception (if any) can be kept in a register.
2614 __ ld(r_exception, thread_(pending_exception));
2615 assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2616 __ li(R0, 0);
2617 __ std(R0, thread_(pending_exception));
2618
2619 // Slow case of monitor enter.
2620 // Inline a special case of call_VM that disallows any pending_exception.
2621 // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2622 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2623
2624 __ asm_assert_mem8_is_zero(thread_(pending_exception),
2625 "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C");
2626
2627 restore_native_result(masm, ret_type, workspace_slot_offset);
2628
2629 // Check_forward_pending_exception jump to forward_exception if any pending
2630 // exception is set. The forward_exception routine expects to see the
2631 // exception in pending_exception and not in a register. Kind of clumsy,
2632 // since all folks who branch to forward_exception must have tested
2633 // pending_exception first and hence have it in a register already.
2634 __ std(r_exception, thread_(pending_exception));
2635
2636 __ bind(done);
2637 }
2638
2639 # if 0
2640 // DTrace method exit
2641 # endif
2642
2643 // Clear "last Java frame" SP and PC.
2644 // --------------------------------------------------------------------------
2645
2646 // Last java frame won't be set if we're resuming after preemption
2647 bool maybe_preempted = method->is_object_wait0();
2648 __ reset_last_Java_frame(!maybe_preempted /* check_last_java_sp */);
2649
2650 // Unbox oop result, e.g. JNIHandles::resolve value.
2651 // --------------------------------------------------------------------------
2652
2653 if (is_reference_type(ret_type)) {
2654 __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, MacroAssembler::PRESERVATION_NONE);
2655 }
2656
2657 if (CheckJNICalls) {
2658 // clear_pending_jni_exception_check
2659 __ load_const_optimized(R0, 0L);
2660 __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread);
2661 }
2662
2663 // Reset handle block.
2664 // --------------------------------------------------------------------------
2665 __ ld(r_temp_1, thread_(active_handles));
2666 // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2667 __ li(r_temp_2, 0);
2668 __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1);
2669
2670 // Prepare for return
2671 // --------------------------------------------------------------------------
2672 __ pop_frame();
2673 __ restore_LR(R11);
2674
2675 #if INCLUDE_JFR
2676 // We need to do a poll test after unwind in case the sampler
2677 // managed to sample the native frame after returning to Java.
2678 Label L_stub;
2679 int safepoint_offset = __ offset();
2680 if (!UseSIGTRAP) {
2681 __ relocate(relocInfo::poll_return_type);
2682 }
2683 __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */);
2684 #endif // INCLUDE_JFR
2685
2686 // Check for pending exceptions.
2687 // --------------------------------------------------------------------------
2688 __ ld(r_temp_2, thread_(pending_exception));
2689 __ cmpdi(CR0, r_temp_2, 0);
2690 __ bne(CR0, handle_pending_exception);
2691
2692 // Return.
2693 __ blr();
2694
2695 // Handler for return safepoint (out-of-line).
2696 #if INCLUDE_JFR
2697 if (!UseSIGTRAP) {
2698 __ bind(L_stub);
2699 __ jump_to_polling_page_return_handler_blob(safepoint_offset);
2700 }
2701 #endif // INCLUDE_JFR
2702
2703 // Handler for pending exceptions (out-of-line).
2704 // --------------------------------------------------------------------------
2705 // Since this is a native call, we know the proper exception handler
2706 // is the empty function. We just pop this frame and then jump to
2707 // forward_exception_entry.
2708 __ bind(handle_pending_exception);
2709 __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2710 relocInfo::runtime_call_type);
2711
2712 // Done.
2713 // --------------------------------------------------------------------------
2714
2715 __ flush();
2716
2717 nmethod *nm = nmethod::new_native_nmethod(method,
2718 compile_id,
2719 masm->code(),
2720 vep_start_pc-start_pc,
2721 frame_done_pc-start_pc,
2722 stack_slots / VMRegImpl::slots_per_word,
2723 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2724 in_ByteSize(lock_offset),
2725 oop_maps);
2726
2727 return nm;
2728 }
2729
2730 // This function returns the adjust size (in number of words) to a c2i adapter
2731 // activation for use during deoptimization.
2732 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2733 return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::frame_alignment_in_words);
2734 }
2735
2736 uint SharedRuntime::in_preserve_stack_slots() {
2737 return frame::jit_in_preserve_size / VMRegImpl::stack_slot_size;
2738 }
2739
2740 uint SharedRuntime::out_preserve_stack_slots() {
2741 #if defined(COMPILER1) || defined(COMPILER2)
2742 return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2743 #else
2744 return 0;
2745 #endif
2746 }
2747
2748 VMReg SharedRuntime::thread_register() {
2749 // On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames).
2750 ShouldNotCallThis();
2751 return nullptr;
2752 }
2753
2754 #if defined(COMPILER1) || defined(COMPILER2)
2755 // Frame generation for deopt and uncommon trap blobs.
2756 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2757 /* Read */
2758 Register unroll_block_reg,
2759 /* Update */
2760 Register frame_sizes_reg,
2761 Register number_of_frames_reg,
2762 Register pcs_reg,
2763 /* Invalidate */
2764 Register frame_size_reg,
2765 Register pc_reg) {
2766
2767 __ ld(pc_reg, 0, pcs_reg);
2768 __ ld(frame_size_reg, 0, frame_sizes_reg);
2769 __ std(pc_reg, _abi0(lr), R1_SP);
2770 __ push_frame(frame_size_reg, R0/*tmp*/);
2771 __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2772 __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2773 __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2774 __ addi(pcs_reg, pcs_reg, wordSize);
2775 }
2776
2777 // Loop through the UnrollBlock info and create new frames.
2778 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2779 /* read */
2780 Register unroll_block_reg,
2781 /* invalidate */
2782 Register frame_sizes_reg,
2783 Register number_of_frames_reg,
2784 Register pcs_reg,
2785 Register frame_size_reg,
2786 Register pc_reg) {
2787 Label loop;
2788
2789 // _number_of_frames is of type int (deoptimization.hpp)
2790 __ lwa(number_of_frames_reg,
2791 in_bytes(Deoptimization::UnrollBlock::number_of_frames_offset()),
2792 unroll_block_reg);
2793 __ ld(pcs_reg,
2794 in_bytes(Deoptimization::UnrollBlock::frame_pcs_offset()),
2795 unroll_block_reg);
2796 __ ld(frame_sizes_reg,
2797 in_bytes(Deoptimization::UnrollBlock::frame_sizes_offset()),
2798 unroll_block_reg);
2799
2800 // stack: (caller_of_deoptee, ...).
2801
2802 // At this point we either have an interpreter frame or a compiled
2803 // frame on top of stack. If it is a compiled frame we push a new c2i
2804 // adapter here
2805
2806 // Memorize top-frame stack-pointer.
2807 __ mr(frame_size_reg/*old_sp*/, R1_SP);
2808
2809 // Resize interpreter top frame OR C2I adapter.
2810
2811 // At this moment, the top frame (which is the caller of the deoptee) is
2812 // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2813 // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2814 // outgoing arguments.
2815 //
2816 // In order to push the interpreter frame for the deoptee, we need to
2817 // resize the top frame such that we are able to place the deoptee's
2818 // locals in the frame.
2819 // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2820 // into a valid PARENT_IJAVA_FRAME_ABI.
2821
2822 __ lwa(R11_scratch1,
2823 in_bytes(Deoptimization::UnrollBlock::caller_adjustment_offset()),
2824 unroll_block_reg);
2825 __ neg(R11_scratch1, R11_scratch1);
2826
2827 // R11_scratch1 contains size of locals for frame resizing.
2828 // R12_scratch2 contains top frame's lr.
2829
2830 // Resize frame by complete frame size prevents TOC from being
2831 // overwritten by locals. A more stack space saving way would be
2832 // to copy the TOC to its location in the new abi.
2833 __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2834
2835 // now, resize the frame
2836 __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2837
2838 // In the case where we have resized a c2i frame above, the optional
2839 // alignment below the locals has size 32 (why?).
2840 __ std(R12_scratch2, _abi0(lr), R1_SP);
2841
2842 // Initialize initial_caller_sp.
2843 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2844
2845 #ifdef ASSERT
2846 // Make sure that there is at least one entry in the array.
2847 __ cmpdi(CR0, number_of_frames_reg, 0);
2848 __ asm_assert_ne("array_size must be > 0");
2849 #endif
2850
2851 // Now push the new interpreter frames.
2852 //
2853 __ bind(loop);
2854 // Allocate a new frame, fill in the pc.
2855 push_skeleton_frame(masm, deopt,
2856 unroll_block_reg,
2857 frame_sizes_reg,
2858 number_of_frames_reg,
2859 pcs_reg,
2860 frame_size_reg,
2861 pc_reg);
2862 __ cmpdi(CR0, number_of_frames_reg, 0);
2863 __ bne(CR0, loop);
2864
2865 // Get the return address pointing into the template interpreter.
2866 __ ld(R0, 0, pcs_reg);
2867 // Store it in the top interpreter frame.
2868 __ std(R0, _abi0(lr), R1_SP);
2869 // Initialize frame_manager_lr of interpreter top frame.
2870 }
2871 #endif
2872
2873 void SharedRuntime::generate_deopt_blob() {
2874 // Allocate space for the code
2875 ResourceMark rm;
2876 // Setup code generation tools
2877 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2878 CodeBuffer buffer(name, 2048, 1024);
2879 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2880 Label exec_mode_initialized;
2881 OopMap* map = nullptr;
2882 OopMapSet *oop_maps = new OopMapSet();
2883
2884 // size of ABI112 plus spill slots for R3_RET and F1_RET.
2885 const int frame_size_in_bytes = frame::native_abi_reg_args_spill_size;
2886 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
2887 int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
2888
2889 const Register exec_mode_reg = R21_tmp1;
2890
2891 const address start = __ pc();
2892 int exception_offset = 0;
2893 int exception_in_tls_offset = 0;
2894 int reexecute_offset = 0;
2895
2896 #if defined(COMPILER1) || defined(COMPILER2)
2897 // --------------------------------------------------------------------------
2898 // Prolog for non exception case!
2899
2900 // We have been called from the deopt handler of the deoptee.
2901 //
2902 // deoptee:
2903 // ...
2904 // call X
2905 // ...
2906 // deopt_handler: call_deopt_stub
2907 // cur. return pc --> ...
2908 //
2909 // The return_pc has been stored in the frame of the deoptee and
2910 // will replace the address of the deopt_handler in the call
2911 // to Deoptimization::fetch_unroll_info below.
2912
2913 // Push the "unpack frame"
2914 // Save everything in sight.
2915 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2916 &first_frame_size_in_bytes,
2917 /*generate_oop_map=*/ true,
2918 RegisterSaver::return_pc_is_lr,
2919 /*save_vectors*/ SuperwordUseVSX);
2920 assert(map != nullptr, "OopMap must have been created");
2921
2922 __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
2923 // Save exec mode for unpack_frames.
2924 __ b(exec_mode_initialized);
2925
2926 // --------------------------------------------------------------------------
2927 // Prolog for exception case
2928
2929 // An exception is pending.
2930 // We have been called with a return (interpreter) or a jump (exception blob).
2931 //
2932 // - R3_ARG1: exception oop
2933 // - R4_ARG2: exception pc
2934
2935 exception_offset = __ pc() - start;
2936
2937 BLOCK_COMMENT("Prolog for exception case");
2938
2939 // Store exception oop and pc in thread (location known to GC).
2940 // This is needed since the call to "fetch_unroll_info()" may safepoint.
2941 __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2942 __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
2943 __ std(R4_ARG2, _abi0(lr), R1_SP);
2944
2945 // Vanilla deoptimization with an exception pending in exception_oop.
2946 exception_in_tls_offset = __ pc() - start;
2947
2948 // Push the "unpack frame".
2949 // Save everything in sight.
2950 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2951 &first_frame_size_in_bytes,
2952 /*generate_oop_map=*/ false,
2953 RegisterSaver::return_pc_is_pre_saved,
2954 /*save_vectors*/ SuperwordUseVSX);
2955
2956 // Deopt during an exception. Save exec mode for unpack_frames.
2957 __ li(exec_mode_reg, Deoptimization::Unpack_exception);
2958
2959 // fall through
2960 #ifdef COMPILER1
2961 __ b(exec_mode_initialized);
2962
2963 // Reexecute entry, similar to c2 uncommon trap
2964 reexecute_offset = __ pc() - start;
2965
2966 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2967 &first_frame_size_in_bytes,
2968 /*generate_oop_map=*/ false,
2969 RegisterSaver::return_pc_is_pre_saved,
2970 /*save_vectors*/ SuperwordUseVSX);
2971 __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
2972 #endif
2973
2974 // --------------------------------------------------------------------------
2975 __ BIND(exec_mode_initialized);
2976
2977 const Register unroll_block_reg = R22_tmp2;
2978
2979 // We need to set `last_Java_frame' because `fetch_unroll_info' will
2980 // call `last_Java_frame()'. The value of the pc in the frame is not
2981 // particularly important. It just needs to identify this blob.
2982 __ set_last_Java_frame(R1_SP, noreg);
2983
2984 // With EscapeAnalysis turned on, this call may safepoint!
2985 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
2986 address calls_return_pc = __ last_calls_return_pc();
2987 // Set an oopmap for the call site that describes all our saved registers.
2988 oop_maps->add_gc_map(calls_return_pc - start, map);
2989
2990 __ reset_last_Java_frame();
2991 // Save the return value.
2992 __ mr(unroll_block_reg, R3_RET);
2993
2994 // Restore only the result registers that have been saved
2995 // by save_volatile_registers(...).
2996 RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes, /*save_vectors*/ SuperwordUseVSX);
2997
2998 // reload the exec mode from the UnrollBlock (it might have changed)
2999 __ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3000 // In excp_deopt_mode, restore and clear exception oop which we
3001 // stored in the thread during exception entry above. The exception
3002 // oop will be the return value of this stub.
3003 Label skip_restore_excp;
3004 __ cmpdi(CR0, exec_mode_reg, Deoptimization::Unpack_exception);
3005 __ bne(CR0, skip_restore_excp);
3006 __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3007 __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3008 __ li(R0, 0);
3009 __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3010 __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3011 __ BIND(skip_restore_excp);
3012
3013 __ pop_frame();
3014
3015 // stack: (deoptee, optional i2c, caller of deoptee, ...).
3016
3017 // pop the deoptee's frame
3018 __ pop_frame();
3019
3020 // stack: (caller_of_deoptee, ...).
3021
3022 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3023 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3024 // and the frame is effectively not resized.
3025 Register caller_sp = R23_tmp3;
3026 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3027 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3028
3029 // Loop through the `UnrollBlock' info and create interpreter frames.
3030 push_skeleton_frames(masm, true/*deopt*/,
3031 unroll_block_reg,
3032 R23_tmp3,
3033 R24_tmp4,
3034 R25_tmp5,
3035 R26_tmp6,
3036 R27_tmp7);
3037
3038 // stack: (skeletal interpreter frame, ..., optional skeletal
3039 // interpreter frame, optional c2i, caller of deoptee, ...).
3040
3041 // push an `unpack_frame' taking care of float / int return values.
3042 __ push_frame(frame_size_in_bytes, R0/*tmp*/);
3043
3044 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3045 // skeletal interpreter frame, optional c2i, caller of deoptee,
3046 // ...).
3047
3048 // Spill live volatile registers since we'll do a call.
3049 __ std( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3050 __ stfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3051
3052 // Let the unpacker layout information in the skeletal frames just
3053 // allocated.
3054 __ calculate_address_from_global_toc(R3_RET, calls_return_pc, true, true, true, true);
3055 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
3056 // This is a call to a LEAF method, so no oop map is required.
3057 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3058 R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3059 __ reset_last_Java_frame();
3060
3061 // Restore the volatiles saved above.
3062 __ ld( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP);
3063 __ lfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3064
3065 // Pop the unpack frame.
3066 __ pop_frame();
3067 __ restore_LR(R0);
3068
3069 // stack: (top interpreter frame, ..., optional interpreter frame,
3070 // optional c2i, caller of deoptee, ...).
3071
3072 // Initialize R14_state.
3073 __ restore_interpreter_state(R11_scratch1);
3074 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3075
3076 // Return to the interpreter entry point.
3077 __ blr();
3078 #else // !defined(COMPILER1) && !defined(COMPILER2)
3079 __ unimplemented("deopt blob needed only with compiler");
3080 #endif
3081
3082 // Make sure all code is generated
3083 __ flush();
3084
3085 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
3086 reexecute_offset, first_frame_size_in_bytes / wordSize);
3087 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3088 }
3089
3090 #ifdef COMPILER2
3091 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
3092 // Allocate space for the code.
3093 ResourceMark rm;
3094 // Setup code generation tools.
3095 const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
3096 CodeBuffer buffer(name, 2048, 1024);
3097 if (buffer.blob() == nullptr) {
3098 return nullptr;
3099 }
3100 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3101 address start = __ pc();
3102
3103 Register unroll_block_reg = R21_tmp1;
3104 Register klass_index_reg = R22_tmp2;
3105 Register unc_trap_reg = R23_tmp3;
3106 Register r_return_pc = R27_tmp7;
3107
3108 OopMapSet* oop_maps = new OopMapSet();
3109 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3110 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3111
3112 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3113
3114 // Push a dummy `unpack_frame' and call
3115 // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3116 // vframe array and return the `UnrollBlock' information.
3117
3118 // Save LR to compiled frame.
3119 __ save_LR(R11_scratch1);
3120
3121 // Push an "uncommon_trap" frame.
3122 __ push_frame_reg_args(0, R11_scratch1);
3123
3124 // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
3125
3126 // Set the `unpack_frame' as last_Java_frame.
3127 // `Deoptimization::uncommon_trap' expects it and considers its
3128 // sender frame as the deoptee frame.
3129 // Remember the offset of the instruction whose address will be
3130 // moved to R11_scratch1.
3131 address gc_map_pc = __ pc();
3132 __ calculate_address_from_global_toc(r_return_pc, gc_map_pc, true, true, true, true);
3133 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3134
3135 __ mr(klass_index_reg, R3);
3136 __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
3137 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
3138 R16_thread, klass_index_reg, R5_ARG3);
3139
3140 // Set an oopmap for the call site.
3141 oop_maps->add_gc_map(gc_map_pc - start, map);
3142
3143 __ reset_last_Java_frame();
3144
3145 // Pop the `unpack frame'.
3146 __ pop_frame();
3147
3148 // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3149
3150 // Save the return value.
3151 __ mr(unroll_block_reg, R3_RET);
3152
3153 // Pop the uncommon_trap frame.
3154 __ pop_frame();
3155
3156 // stack: (caller_of_deoptee, ...).
3157
3158 #ifdef ASSERT
3159 __ lwz(R22_tmp2, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3160 __ cmpdi(CR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
3161 __ asm_assert_eq("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
3162 #endif
3163
3164 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3165 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3166 // and the frame is effectively not resized.
3167 Register caller_sp = R23_tmp3;
3168 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3169 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3170
3171 // Allocate new interpreter frame(s) and possibly a c2i adapter
3172 // frame.
3173 push_skeleton_frames(masm, false/*deopt*/,
3174 unroll_block_reg,
3175 R22_tmp2,
3176 R23_tmp3,
3177 R24_tmp4,
3178 R25_tmp5,
3179 R26_tmp6);
3180
3181 // stack: (skeletal interpreter frame, ..., optional skeletal
3182 // interpreter frame, optional c2i, caller of deoptee, ...).
3183
3184 // Push a dummy `unpack_frame' taking care of float return values.
3185 // Call `Deoptimization::unpack_frames' to layout information in the
3186 // interpreter frames just created.
3187
3188 // Push a simple "unpack frame" here.
3189 __ push_frame_reg_args(0, R11_scratch1);
3190
3191 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3192 // skeletal interpreter frame, optional c2i, caller of deoptee,
3193 // ...).
3194
3195 // Set the "unpack_frame" as last_Java_frame.
3196 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3197
3198 // Indicate it is the uncommon trap case.
3199 __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3200 // Let the unpacker layout information in the skeletal frames just
3201 // allocated.
3202 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3203 R16_thread, unc_trap_reg);
3204
3205 __ reset_last_Java_frame();
3206 // Pop the `unpack frame'.
3207 __ pop_frame();
3208 // Restore LR from top interpreter frame.
3209 __ restore_LR(R11_scratch1);
3210
3211 // stack: (top interpreter frame, ..., optional interpreter frame,
3212 // optional c2i, caller of deoptee, ...).
3213
3214 __ restore_interpreter_state(R11_scratch1);
3215 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3216
3217 // Return to the interpreter entry point.
3218 __ blr();
3219
3220 masm->flush();
3221
3222 return UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3223 }
3224 #endif // COMPILER2
3225
3226 // Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3227 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3228 assert(StubRoutines::forward_exception_entry() != nullptr,
3229 "must be generated before");
3230 assert(is_polling_page_id(id), "expected a polling page stub id");
3231
3232 ResourceMark rm;
3233 OopMapSet *oop_maps = new OopMapSet();
3234 OopMap* map;
3235
3236 // Allocate space for the code. Setup code generation tools.
3237 const char* name = SharedRuntime::stub_name(id);
3238 CodeBuffer buffer(name, 2048, 1024);
3239 MacroAssembler* masm = new MacroAssembler(&buffer);
3240
3241 address start = __ pc();
3242 int frame_size_in_bytes = 0;
3243
3244 RegisterSaver::ReturnPCLocation return_pc_location;
3245 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3246 if (cause_return) {
3247 // Nothing to do here. The frame has already been popped in MachEpilogNode.
3248 // Register LR already contains the return pc.
3249 return_pc_location = RegisterSaver::return_pc_is_pre_saved;
3250 } else {
3251 // Use thread()->saved_exception_pc() as return pc.
3252 return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3253 }
3254
3255 bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3256
3257 // Save registers, fpu state, and flags. Set R31 = return pc.
3258 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3259 &frame_size_in_bytes,
3260 /*generate_oop_map=*/ true,
3261 return_pc_location, save_vectors);
3262
3263 // The following is basically a call_VM. However, we need the precise
3264 // address of the call in order to generate an oopmap. Hence, we do all the
3265 // work ourselves.
3266 __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3267
3268 // The return address must always be correct so that the frame constructor
3269 // never sees an invalid pc.
3270
3271 // Do the call
3272 __ call_VM_leaf(call_ptr, R16_thread);
3273 address calls_return_pc = __ last_calls_return_pc();
3274
3275 // Set an oopmap for the call site. This oopmap will map all
3276 // oop-registers and debug-info registers as callee-saved. This
3277 // will allow deoptimization at this safepoint to find all possible
3278 // debug-info recordings, as well as let GC find all oops.
3279 oop_maps->add_gc_map(calls_return_pc - start, map);
3280
3281 Label noException;
3282
3283 // Clear the last Java frame.
3284 __ reset_last_Java_frame();
3285
3286 BLOCK_COMMENT(" Check pending exception.");
3287 const Register pending_exception = R0;
3288 __ ld(pending_exception, thread_(pending_exception));
3289 __ cmpdi(CR0, pending_exception, 0);
3290 __ beq(CR0, noException);
3291
3292 // Exception pending
3293 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3294 frame_size_in_bytes,
3295 /*restore_ctr=*/true, save_vectors);
3296
3297 BLOCK_COMMENT(" Jump to forward_exception_entry.");
3298 // Jump to forward_exception_entry, with the issuing PC in LR
3299 // so it looks like the original nmethod called forward_exception_entry.
3300 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3301
3302 // No exception case.
3303 __ BIND(noException);
3304
3305 if (!cause_return) {
3306 Label no_adjust;
3307 // If our stashed return pc was modified by the runtime we avoid touching it
3308 __ ld(R0, frame_size_in_bytes + _abi0(lr), R1_SP);
3309 __ cmpd(CR0, R0, R31);
3310 __ bne(CR0, no_adjust);
3311
3312 // Adjust return pc forward to step over the safepoint poll instruction
3313 __ addi(R31, R31, 4);
3314 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
3315
3316 __ bind(no_adjust);
3317 }
3318
3319 // Normal exit, restore registers and exit.
3320 RegisterSaver::restore_live_registers_and_pop_frame(masm,
3321 frame_size_in_bytes,
3322 /*restore_ctr=*/true, save_vectors);
3323
3324 __ blr();
3325
3326 // Make sure all code is generated
3327 masm->flush();
3328
3329 // Fill-out other meta info
3330 // CodeBlob frame size is in words.
3331 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3332 }
3333
3334 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3335 //
3336 // Generate a stub that calls into the vm to find out the proper destination
3337 // of a java call. All the argument registers are live at this point
3338 // but since this is generic code we don't know what they are and the caller
3339 // must do any gc of the args.
3340 //
3341 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3342 assert(is_resolve_id(id), "expected a resolve stub id");
3343
3344 // allocate space for the code
3345 ResourceMark rm;
3346
3347 const char* name = SharedRuntime::stub_name(id);
3348 CodeBuffer buffer(name, 1000, 512);
3349 MacroAssembler* masm = new MacroAssembler(&buffer);
3350
3351 int frame_size_in_bytes;
3352
3353 OopMapSet *oop_maps = new OopMapSet();
3354 OopMap* map = nullptr;
3355
3356 address start = __ pc();
3357
3358 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3359 &frame_size_in_bytes,
3360 /*generate_oop_map*/ true,
3361 RegisterSaver::return_pc_is_lr);
3362
3363 // Use noreg as last_Java_pc, the return pc will be reconstructed
3364 // from the physical frame.
3365 __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3366
3367 int frame_complete = __ offset();
3368
3369 // Pass R19_method as 2nd (optional) argument, used by
3370 // counter_overflow_stub.
3371 __ call_VM_leaf(destination, R16_thread, R19_method);
3372 address calls_return_pc = __ last_calls_return_pc();
3373 // Set an oopmap for the call site.
3374 // We need this not only for callee-saved registers, but also for volatile
3375 // registers that the compiler might be keeping live across a safepoint.
3376 // Create the oopmap for the call's return pc.
3377 oop_maps->add_gc_map(calls_return_pc - start, map);
3378
3379 // R3_RET contains the address we are going to jump to assuming no exception got installed.
3380
3381 // clear last_Java_sp
3382 __ reset_last_Java_frame();
3383
3384 // Check for pending exceptions.
3385 BLOCK_COMMENT("Check for pending exceptions.");
3386 Label pending;
3387 __ ld(R11_scratch1, thread_(pending_exception));
3388 __ cmpdi(CR0, R11_scratch1, 0);
3389 __ bne(CR0, pending);
3390
3391 __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3392
3393 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3394
3395 // Get the returned method.
3396 __ get_vm_result_metadata(R19_method);
3397
3398 __ bctr();
3399
3400
3401 // Pending exception after the safepoint.
3402 __ BIND(pending);
3403
3404 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3405
3406 // exception pending => remove activation and forward to exception handler
3407
3408 __ li(R11_scratch1, 0);
3409 __ ld(R3_ARG1, thread_(pending_exception));
3410 __ std(R11_scratch1, in_bytes(JavaThread::vm_result_oop_offset()), R16_thread);
3411 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3412
3413 // -------------
3414 // Make sure all code is generated.
3415 masm->flush();
3416
3417 // return the blob
3418 // frame_size_words or bytes??
3419 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3420 oop_maps, true);
3421 }
3422
3423 // Continuation point for throwing of implicit exceptions that are
3424 // not handled in the current activation. Fabricates an exception
3425 // oop and initiates normal exception dispatching in this
3426 // frame. Only callee-saved registers are preserved (through the
3427 // normal register window / RegisterMap handling). If the compiler
3428 // needs all registers to be preserved between the fault point and
3429 // the exception handler then it must assume responsibility for that
3430 // in AbstractCompiler::continuation_for_implicit_null_exception or
3431 // continuation_for_implicit_division_by_zero_exception. All other
3432 // implicit exceptions (e.g., NullPointerException or
3433 // AbstractMethodError on entry) are either at call sites or
3434 // otherwise assume that stack unwinding will be initiated, so
3435 // caller saved registers were assumed volatile in the compiler.
3436 //
3437 // Note that we generate only this stub into a RuntimeStub, because
3438 // it needs to be properly traversed and ignored during GC, so we
3439 // change the meaning of the "__" macro within this method.
3440 //
3441 // Note: the routine set_pc_not_at_call_for_caller in
3442 // SharedRuntime.cpp requires that this code be generated into a
3443 // RuntimeStub.
3444 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3445 assert(is_throw_id(id), "expected a throw stub id");
3446
3447 const char* name = SharedRuntime::stub_name(id);
3448
3449 ResourceMark rm;
3450 const char* timer_msg = "SharedRuntime generate_throw_exception";
3451 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3452
3453 CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0);
3454 MacroAssembler* masm = new MacroAssembler(&code);
3455
3456 OopMapSet* oop_maps = new OopMapSet();
3457 int frame_size_in_bytes = frame::native_abi_reg_args_size;
3458 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3459
3460 address start = __ pc();
3461
3462 __ save_LR(R11_scratch1);
3463
3464 // Push a frame.
3465 __ push_frame_reg_args(0, R11_scratch1);
3466
3467 address frame_complete_pc = __ pc();
3468
3469 // Note that we always have a runtime stub frame on the top of
3470 // stack by this point. Remember the offset of the instruction
3471 // whose address will be moved to R11_scratch1.
3472 address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
3473
3474 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3475
3476 __ mr(R3_ARG1, R16_thread);
3477 __ call_c(runtime_entry);
3478
3479 // Set an oopmap for the call site.
3480 oop_maps->add_gc_map((int)(gc_map_pc - start), map);
3481
3482 __ reset_last_Java_frame();
3483
3484 #ifdef ASSERT
3485 // Make sure that this code is only executed if there is a pending
3486 // exception.
3487 {
3488 Label L;
3489 __ ld(R0,
3490 in_bytes(Thread::pending_exception_offset()),
3491 R16_thread);
3492 __ cmpdi(CR0, R0, 0);
3493 __ bne(CR0, L);
3494 __ stop("SharedRuntime::throw_exception: no pending exception");
3495 __ bind(L);
3496 }
3497 #endif
3498
3499 // Pop frame.
3500 __ pop_frame();
3501
3502 __ restore_LR(R11_scratch1);
3503
3504 __ load_const(R11_scratch1, StubRoutines::forward_exception_entry());
3505 __ mtctr(R11_scratch1);
3506 __ bctr();
3507
3508 // Create runtime stub with OopMap.
3509 RuntimeStub* stub =
3510 RuntimeStub::new_runtime_stub(name, &code,
3511 /*frame_complete=*/ (int)(frame_complete_pc - start),
3512 frame_size_in_bytes/wordSize,
3513 oop_maps,
3514 false);
3515 return stub;
3516 }
3517
3518 //------------------------------Montgomery multiplication------------------------
3519 //
3520
3521 // Subtract 0:b from carry:a. Return carry.
3522 static unsigned long
3523 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3524 long i = 0;
3525 unsigned long tmp, tmp2;
3526 __asm__ __volatile__ (
3527 "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA
3528 "mtctr %[len] \n"
3529 "0: \n"
3530 "ldx %[tmp], %[i], %[a] \n"
3531 "ldx %[tmp2], %[i], %[b] \n"
3532 "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended
3533 "stdx %[tmp], %[i], %[a] \n"
3534 "addi %[i], %[i], 8 \n"
3535 "bdnz 0b \n"
3536 "addme %[tmp], %[carry] \n" // carry + CA - 1
3537 : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
3538 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
3539 : "ctr", "xer", "memory"
3540 );
3541 return tmp;
3542 }
3543
3544 // Multiply (unsigned) Long A by Long B, accumulating the double-
3545 // length result into the accumulator formed of T0, T1, and T2.
3546 inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3547 unsigned long hi, lo;
3548 __asm__ __volatile__ (
3549 "mulld %[lo], %[A], %[B] \n"
3550 "mulhdu %[hi], %[A], %[B] \n"
3551 "addc %[T0], %[T0], %[lo] \n"
3552 "adde %[T1], %[T1], %[hi] \n"
3553 "addze %[T2], %[T2] \n"
3554 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3555 : [A]"r"(A), [B]"r"(B)
3556 : "xer"
3557 );
3558 }
3559
3560 // As above, but add twice the double-length result into the
3561 // accumulator.
3562 inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3563 unsigned long hi, lo;
3564 __asm__ __volatile__ (
3565 "mulld %[lo], %[A], %[B] \n"
3566 "mulhdu %[hi], %[A], %[B] \n"
3567 "addc %[T0], %[T0], %[lo] \n"
3568 "adde %[T1], %[T1], %[hi] \n"
3569 "addze %[T2], %[T2] \n"
3570 "addc %[T0], %[T0], %[lo] \n"
3571 "adde %[T1], %[T1], %[hi] \n"
3572 "addze %[T2], %[T2] \n"
3573 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3574 : [A]"r"(A), [B]"r"(B)
3575 : "xer"
3576 );
3577 }
3578
3579 // Fast Montgomery multiplication. The derivation of the algorithm is
3580 // in "A Cryptographic Library for the Motorola DSP56000,
3581 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3582 static void
3583 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3584 unsigned long m[], unsigned long inv, int len) {
3585 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3586 int i;
3587
3588 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3589
3590 for (i = 0; i < len; i++) {
3591 int j;
3592 for (j = 0; j < i; j++) {
3593 MACC(a[j], b[i-j], t0, t1, t2);
3594 MACC(m[j], n[i-j], t0, t1, t2);
3595 }
3596 MACC(a[i], b[0], t0, t1, t2);
3597 m[i] = t0 * inv;
3598 MACC(m[i], n[0], t0, t1, t2);
3599
3600 assert(t0 == 0, "broken Montgomery multiply");
3601
3602 t0 = t1; t1 = t2; t2 = 0;
3603 }
3604
3605 for (i = len; i < 2*len; i++) {
3606 int j;
3607 for (j = i-len+1; j < len; j++) {
3608 MACC(a[j], b[i-j], t0, t1, t2);
3609 MACC(m[j], n[i-j], t0, t1, t2);
3610 }
3611 m[i-len] = t0;
3612 t0 = t1; t1 = t2; t2 = 0;
3613 }
3614
3615 while (t0) {
3616 t0 = sub(m, n, t0, len);
3617 }
3618 }
3619
3620 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3621 // multiplies so it should be up to 25% faster than Montgomery
3622 // multiplication. However, its loop control is more complex and it
3623 // may actually run slower on some machines.
3624 static void
3625 montgomery_square(unsigned long a[], unsigned long n[],
3626 unsigned long m[], unsigned long inv, int len) {
3627 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3628 int i;
3629
3630 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3631
3632 for (i = 0; i < len; i++) {
3633 int j;
3634 int end = (i+1)/2;
3635 for (j = 0; j < end; j++) {
3636 MACC2(a[j], a[i-j], t0, t1, t2);
3637 MACC(m[j], n[i-j], t0, t1, t2);
3638 }
3639 if ((i & 1) == 0) {
3640 MACC(a[j], a[j], t0, t1, t2);
3641 }
3642 for (; j < i; j++) {
3643 MACC(m[j], n[i-j], t0, t1, t2);
3644 }
3645 m[i] = t0 * inv;
3646 MACC(m[i], n[0], t0, t1, t2);
3647
3648 assert(t0 == 0, "broken Montgomery square");
3649
3650 t0 = t1; t1 = t2; t2 = 0;
3651 }
3652
3653 for (i = len; i < 2*len; i++) {
3654 int start = i-len+1;
3655 int end = start + (len - start)/2;
3656 int j;
3657 for (j = start; j < end; j++) {
3658 MACC2(a[j], a[i-j], t0, t1, t2);
3659 MACC(m[j], n[i-j], t0, t1, t2);
3660 }
3661 if ((i & 1) == 0) {
3662 MACC(a[j], a[j], t0, t1, t2);
3663 }
3664 for (; j < len; j++) {
3665 MACC(m[j], n[i-j], t0, t1, t2);
3666 }
3667 m[i-len] = t0;
3668 t0 = t1; t1 = t2; t2 = 0;
3669 }
3670
3671 while (t0) {
3672 t0 = sub(m, n, t0, len);
3673 }
3674 }
3675
3676 // The threshold at which squaring is advantageous was determined
3677 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3678 // Doesn't seem to be relevant for Power8 so we use the same value.
3679 #define MONTGOMERY_SQUARING_THRESHOLD 64
3680
3681 // Copy len longwords from s to d, word-swapping as we go. The
3682 // destination array is reversed.
3683 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3684 d += len;
3685 while(len-- > 0) {
3686 d--;
3687 unsigned long s_val = *s;
3688 // Swap words in a longword on little endian machines.
3689 #ifdef VM_LITTLE_ENDIAN
3690 s_val = (s_val << 32) | (s_val >> 32);
3691 #endif
3692 *d = s_val;
3693 s++;
3694 }
3695 }
3696
3697 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3698 jint len, jlong inv,
3699 jint *m_ints) {
3700 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3701 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3702 int longwords = len/2;
3703
3704 // Make very sure we don't use so much space that the stack might
3705 // overflow. 512 jints corresponds to an 16384-bit integer and
3706 // will use here a total of 8k bytes of stack space.
3707 int divisor = sizeof(unsigned long) * 4;
3708 guarantee(longwords <= 8192 / divisor, "must be");
3709 int total_allocation = longwords * sizeof (unsigned long) * 4;
3710 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3711
3712 // Local scratch arrays
3713 unsigned long
3714 *a = scratch + 0 * longwords,
3715 *b = scratch + 1 * longwords,
3716 *n = scratch + 2 * longwords,
3717 *m = scratch + 3 * longwords;
3718
3719 reverse_words((unsigned long *)a_ints, a, longwords);
3720 reverse_words((unsigned long *)b_ints, b, longwords);
3721 reverse_words((unsigned long *)n_ints, n, longwords);
3722
3723 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3724
3725 reverse_words(m, (unsigned long *)m_ints, longwords);
3726 }
3727
3728 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3729 jint len, jlong inv,
3730 jint *m_ints) {
3731 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3732 assert(len % 2 == 0, "array length in montgomery_square must be even");
3733 int longwords = len/2;
3734
3735 // Make very sure we don't use so much space that the stack might
3736 // overflow. 512 jints corresponds to an 16384-bit integer and
3737 // will use here a total of 6k bytes of stack space.
3738 int divisor = sizeof(unsigned long) * 3;
3739 guarantee(longwords <= (8192 / divisor), "must be");
3740 int total_allocation = longwords * sizeof (unsigned long) * 3;
3741 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3742
3743 // Local scratch arrays
3744 unsigned long
3745 *a = scratch + 0 * longwords,
3746 *n = scratch + 1 * longwords,
3747 *m = scratch + 2 * longwords;
3748
3749 reverse_words((unsigned long *)a_ints, a, longwords);
3750 reverse_words((unsigned long *)n_ints, n, longwords);
3751
3752 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3753 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3754 } else {
3755 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3756 }
3757
3758 reverse_words(m, (unsigned long *)m_ints, longwords);
3759 }
3760
3761 #if INCLUDE_JFR
3762
3763 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3764 // It returns a jobject handle to the event writer.
3765 // The handle is dereferenced and the return value is the event writer oop.
3766 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3767 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3768 CodeBuffer code(name, 512, 64);
3769 MacroAssembler* masm = new MacroAssembler(&code);
3770
3771 Register tmp1 = R10_ARG8;
3772 Register tmp2 = R9_ARG7;
3773
3774 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3775 address start = __ pc();
3776 __ mflr(tmp1);
3777 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3778 __ push_frame_reg_args(0, tmp1);
3779 int frame_complete = __ pc() - start;
3780 __ set_last_Java_frame(R1_SP, noreg);
3781 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), R16_thread);
3782 address calls_return_pc = __ last_calls_return_pc();
3783 __ reset_last_Java_frame();
3784 // The handle is dereferenced through a load barrier.
3785 __ resolve_global_jobject(R3_RET, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE);
3786 __ pop_frame();
3787 __ ld(tmp1, _abi0(lr), R1_SP);
3788 __ mtlr(tmp1);
3789 __ blr();
3790
3791 OopMapSet* oop_maps = new OopMapSet();
3792 OopMap* map = new OopMap(framesize, 0);
3793 oop_maps->add_gc_map(calls_return_pc - start, map);
3794
3795 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3796 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3797 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3798 oop_maps, false);
3799 return stub;
3800 }
3801
3802 // For c2: call to return a leased buffer.
3803 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3804 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3805 CodeBuffer code(name, 512, 64);
3806 MacroAssembler* masm = new MacroAssembler(&code);
3807
3808 Register tmp1 = R10_ARG8;
3809 Register tmp2 = R9_ARG7;
3810
3811 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3812 address start = __ pc();
3813 __ mflr(tmp1);
3814 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3815 __ push_frame_reg_args(0, tmp1);
3816 int frame_complete = __ pc() - start;
3817 __ set_last_Java_frame(R1_SP, noreg);
3818 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3819 address calls_return_pc = __ last_calls_return_pc();
3820 __ reset_last_Java_frame();
3821 __ pop_frame();
3822 __ ld(tmp1, _abi0(lr), R1_SP);
3823 __ mtlr(tmp1);
3824 __ blr();
3825
3826 OopMapSet* oop_maps = new OopMapSet();
3827 OopMap* map = new OopMap(framesize, 0);
3828 oop_maps->add_gc_map(calls_return_pc - start, map);
3829
3830 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3831 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3832 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3833 oop_maps, false);
3834 return stub;
3835 }
3836
3837 #endif // INCLUDE_JFR