1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2025 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "asm/macroAssembler.inline.hpp" 27 #include "code/debugInfoRec.hpp" 28 #include "code/compiledIC.hpp" 29 #include "code/vtableStubs.hpp" 30 #include "frame_ppc.hpp" 31 #include "compiler/oopMap.hpp" 32 #include "gc/shared/gcLocker.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "oops/klass.inline.hpp" 37 #include "prims/methodHandles.hpp" 38 #include "runtime/continuation.hpp" 39 #include "runtime/continuationEntry.inline.hpp" 40 #include "runtime/jniHandles.hpp" 41 #include "runtime/os.inline.hpp" 42 #include "runtime/safepointMechanism.hpp" 43 #include "runtime/sharedRuntime.hpp" 44 #include "runtime/signature.hpp" 45 #include "runtime/stubRoutines.hpp" 46 #include "runtime/timerTrace.hpp" 47 #include "runtime/vframeArray.hpp" 48 #include "utilities/align.hpp" 49 #include "utilities/macros.hpp" 50 #include "vmreg_ppc.inline.hpp" 51 #ifdef COMPILER1 52 #include "c1/c1_Runtime1.hpp" 53 #endif 54 #ifdef COMPILER2 55 #include "opto/ad.hpp" 56 #include "opto/runtime.hpp" 57 #endif 58 59 #include <alloca.h> 60 61 #define __ masm-> 62 63 #ifdef PRODUCT 64 #define BLOCK_COMMENT(str) // nothing 65 #else 66 #define BLOCK_COMMENT(str) __ block_comment(str) 67 #endif 68 69 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 70 71 72 class RegisterSaver { 73 // Used for saving volatile registers. 74 public: 75 76 // Support different return pc locations. 77 enum ReturnPCLocation { 78 return_pc_is_lr, 79 return_pc_is_pre_saved, 80 return_pc_is_thread_saved_exception_pc 81 }; 82 83 static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm, 84 int* out_frame_size_in_bytes, 85 bool generate_oop_map, 86 int return_pc_adjustment, 87 ReturnPCLocation return_pc_location, 88 bool save_vectors = false); 89 static void restore_live_registers_and_pop_frame(MacroAssembler* masm, 90 int frame_size_in_bytes, 91 bool restore_ctr, 92 bool save_vectors = false); 93 94 static void push_frame_and_save_argument_registers(MacroAssembler* masm, 95 Register r_temp, 96 int frame_size, 97 int total_args, 98 const VMRegPair *regs, const VMRegPair *regs2 = nullptr); 99 static void restore_argument_registers_and_pop_frame(MacroAssembler*masm, 100 int frame_size, 101 int total_args, 102 const VMRegPair *regs, const VMRegPair *regs2 = nullptr); 103 104 // During deoptimization only the result registers need to be restored 105 // all the other values have already been extracted. 106 static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes); 107 108 // Constants and data structures: 109 110 typedef enum { 111 int_reg, 112 float_reg, 113 special_reg, 114 vec_reg 115 } RegisterType; 116 117 typedef enum { 118 reg_size = 8, 119 half_reg_size = reg_size / 2, 120 vec_reg_size = 16 121 } RegisterConstants; 122 123 typedef struct { 124 RegisterType reg_type; 125 int reg_num; 126 VMReg vmreg; 127 } LiveRegType; 128 }; 129 130 131 #define RegisterSaver_LiveIntReg(regname) \ 132 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() } 133 134 #define RegisterSaver_LiveFloatReg(regname) \ 135 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() } 136 137 #define RegisterSaver_LiveSpecialReg(regname) \ 138 { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() } 139 140 #define RegisterSaver_LiveVecReg(regname) \ 141 { RegisterSaver::vec_reg, regname->encoding(), regname->as_VMReg() } 142 143 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { 144 // Live registers which get spilled to the stack. Register 145 // positions in this array correspond directly to the stack layout. 146 147 // 148 // live special registers: 149 // 150 RegisterSaver_LiveSpecialReg(SR_CTR), 151 // 152 // live float registers: 153 // 154 RegisterSaver_LiveFloatReg( F0 ), 155 RegisterSaver_LiveFloatReg( F1 ), 156 RegisterSaver_LiveFloatReg( F2 ), 157 RegisterSaver_LiveFloatReg( F3 ), 158 RegisterSaver_LiveFloatReg( F4 ), 159 RegisterSaver_LiveFloatReg( F5 ), 160 RegisterSaver_LiveFloatReg( F6 ), 161 RegisterSaver_LiveFloatReg( F7 ), 162 RegisterSaver_LiveFloatReg( F8 ), 163 RegisterSaver_LiveFloatReg( F9 ), 164 RegisterSaver_LiveFloatReg( F10 ), 165 RegisterSaver_LiveFloatReg( F11 ), 166 RegisterSaver_LiveFloatReg( F12 ), 167 RegisterSaver_LiveFloatReg( F13 ), 168 RegisterSaver_LiveFloatReg( F14 ), 169 RegisterSaver_LiveFloatReg( F15 ), 170 RegisterSaver_LiveFloatReg( F16 ), 171 RegisterSaver_LiveFloatReg( F17 ), 172 RegisterSaver_LiveFloatReg( F18 ), 173 RegisterSaver_LiveFloatReg( F19 ), 174 RegisterSaver_LiveFloatReg( F20 ), 175 RegisterSaver_LiveFloatReg( F21 ), 176 RegisterSaver_LiveFloatReg( F22 ), 177 RegisterSaver_LiveFloatReg( F23 ), 178 RegisterSaver_LiveFloatReg( F24 ), 179 RegisterSaver_LiveFloatReg( F25 ), 180 RegisterSaver_LiveFloatReg( F26 ), 181 RegisterSaver_LiveFloatReg( F27 ), 182 RegisterSaver_LiveFloatReg( F28 ), 183 RegisterSaver_LiveFloatReg( F29 ), 184 RegisterSaver_LiveFloatReg( F30 ), 185 RegisterSaver_LiveFloatReg( F31 ), 186 // 187 // live integer registers: 188 // 189 RegisterSaver_LiveIntReg( R0 ), 190 //RegisterSaver_LiveIntReg( R1 ), // stack pointer 191 RegisterSaver_LiveIntReg( R2 ), 192 RegisterSaver_LiveIntReg( R3 ), 193 RegisterSaver_LiveIntReg( R4 ), 194 RegisterSaver_LiveIntReg( R5 ), 195 RegisterSaver_LiveIntReg( R6 ), 196 RegisterSaver_LiveIntReg( R7 ), 197 RegisterSaver_LiveIntReg( R8 ), 198 RegisterSaver_LiveIntReg( R9 ), 199 RegisterSaver_LiveIntReg( R10 ), 200 RegisterSaver_LiveIntReg( R11 ), 201 RegisterSaver_LiveIntReg( R12 ), 202 //RegisterSaver_LiveIntReg( R13 ), // system thread id 203 RegisterSaver_LiveIntReg( R14 ), 204 RegisterSaver_LiveIntReg( R15 ), 205 RegisterSaver_LiveIntReg( R16 ), 206 RegisterSaver_LiveIntReg( R17 ), 207 RegisterSaver_LiveIntReg( R18 ), 208 RegisterSaver_LiveIntReg( R19 ), 209 RegisterSaver_LiveIntReg( R20 ), 210 RegisterSaver_LiveIntReg( R21 ), 211 RegisterSaver_LiveIntReg( R22 ), 212 RegisterSaver_LiveIntReg( R23 ), 213 RegisterSaver_LiveIntReg( R24 ), 214 RegisterSaver_LiveIntReg( R25 ), 215 RegisterSaver_LiveIntReg( R26 ), 216 RegisterSaver_LiveIntReg( R27 ), 217 RegisterSaver_LiveIntReg( R28 ), 218 RegisterSaver_LiveIntReg( R29 ), 219 RegisterSaver_LiveIntReg( R30 ), 220 RegisterSaver_LiveIntReg( R31 ) // must be the last register (see save/restore functions below) 221 }; 222 223 static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = { 224 // 225 // live vector registers (optional, only these ones are used by C2): 226 // 227 RegisterSaver_LiveVecReg( VR0 ), 228 RegisterSaver_LiveVecReg( VR1 ), 229 RegisterSaver_LiveVecReg( VR2 ), 230 RegisterSaver_LiveVecReg( VR3 ), 231 RegisterSaver_LiveVecReg( VR4 ), 232 RegisterSaver_LiveVecReg( VR5 ), 233 RegisterSaver_LiveVecReg( VR6 ), 234 RegisterSaver_LiveVecReg( VR7 ), 235 RegisterSaver_LiveVecReg( VR8 ), 236 RegisterSaver_LiveVecReg( VR9 ), 237 RegisterSaver_LiveVecReg( VR10 ), 238 RegisterSaver_LiveVecReg( VR11 ), 239 RegisterSaver_LiveVecReg( VR12 ), 240 RegisterSaver_LiveVecReg( VR13 ), 241 RegisterSaver_LiveVecReg( VR14 ), 242 RegisterSaver_LiveVecReg( VR15 ), 243 RegisterSaver_LiveVecReg( VR16 ), 244 RegisterSaver_LiveVecReg( VR17 ), 245 RegisterSaver_LiveVecReg( VR18 ), 246 RegisterSaver_LiveVecReg( VR19 ), 247 RegisterSaver_LiveVecReg( VR20 ), 248 RegisterSaver_LiveVecReg( VR21 ), 249 RegisterSaver_LiveVecReg( VR22 ), 250 RegisterSaver_LiveVecReg( VR23 ), 251 RegisterSaver_LiveVecReg( VR24 ), 252 RegisterSaver_LiveVecReg( VR25 ), 253 RegisterSaver_LiveVecReg( VR26 ), 254 RegisterSaver_LiveVecReg( VR27 ), 255 RegisterSaver_LiveVecReg( VR28 ), 256 RegisterSaver_LiveVecReg( VR29 ), 257 RegisterSaver_LiveVecReg( VR30 ), 258 RegisterSaver_LiveVecReg( VR31 ) 259 }; 260 261 262 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm, 263 int* out_frame_size_in_bytes, 264 bool generate_oop_map, 265 int return_pc_adjustment, 266 ReturnPCLocation return_pc_location, 267 bool save_vectors) { 268 // Push an abi_reg_args-frame and store all registers which may be live. 269 // If requested, create an OopMap: Record volatile registers as 270 // callee-save values in an OopMap so their save locations will be 271 // propagated to the RegisterMap of the caller frame during 272 // StackFrameStream construction (needed for deoptimization; see 273 // compiledVFrame::create_stack_value). 274 // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment. 275 // Updated return pc is returned in R31 (if not return_pc_is_pre_saved). 276 277 // calculate frame size 278 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / 279 sizeof(RegisterSaver::LiveRegType); 280 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) / 281 sizeof(RegisterSaver::LiveRegType)) 282 : 0; 283 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size; 284 const int frame_size_in_bytes = align_up(register_save_size, frame::alignment_in_bytes) 285 + frame::native_abi_reg_args_size; 286 287 *out_frame_size_in_bytes = frame_size_in_bytes; 288 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); 289 const int register_save_offset = frame_size_in_bytes - register_save_size; 290 291 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. 292 OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : nullptr; 293 294 BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {"); 295 296 // push a new frame 297 __ push_frame(frame_size_in_bytes, noreg); 298 299 // Save some registers in the last (non-vector) slots of the new frame so we 300 // can use them as scratch regs or to determine the return pc. 301 __ std(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP); 302 __ std(R30, frame_size_in_bytes - 2*reg_size - vecregstosave_num * vec_reg_size, R1_SP); 303 304 // save the flags 305 // Do the save_LR by hand and adjust the return pc if requested. 306 switch (return_pc_location) { 307 case return_pc_is_lr: __ mflr(R31); break; 308 case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break; 309 case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break; 310 default: ShouldNotReachHere(); 311 } 312 if (return_pc_location != return_pc_is_pre_saved) { 313 if (return_pc_adjustment != 0) { 314 __ addi(R31, R31, return_pc_adjustment); 315 } 316 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP); 317 } 318 319 // save all registers (ints and floats) 320 int offset = register_save_offset; 321 322 for (int i = 0; i < regstosave_num; i++) { 323 int reg_num = RegisterSaver_LiveRegs[i].reg_num; 324 int reg_type = RegisterSaver_LiveRegs[i].reg_type; 325 326 switch (reg_type) { 327 case RegisterSaver::int_reg: { 328 if (reg_num < 30) { // We spilled R30-31 right at the beginning. 329 __ std(as_Register(reg_num), offset, R1_SP); 330 } 331 break; 332 } 333 case RegisterSaver::float_reg: { 334 __ stfd(as_FloatRegister(reg_num), offset, R1_SP); 335 break; 336 } 337 case RegisterSaver::special_reg: { 338 if (reg_num == SR_CTR.encoding()) { 339 __ mfctr(R30); 340 __ std(R30, offset, R1_SP); 341 } else { 342 Unimplemented(); 343 } 344 break; 345 } 346 default: 347 ShouldNotReachHere(); 348 } 349 350 if (generate_oop_map) { 351 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), 352 RegisterSaver_LiveRegs[i].vmreg); 353 } 354 offset += reg_size; 355 } 356 357 // Note that generate_oop_map in the following loop is only used for the 358 // polling_page_vectors_safepoint_handler_blob. 359 // The order in which the vector contents are stored depends on Endianess and 360 // the utilized instructions (PowerArchitecturePPC64). 361 assert(is_aligned(offset, StackAlignmentInBytes), "should be"); 362 if (PowerArchitecturePPC64 >= 10) { 363 assert(is_even(vecregstosave_num), "expectation"); 364 for (int i = 0; i < vecregstosave_num; i += 2) { 365 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num; 366 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); 367 368 __ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP); 369 // Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad). 370 if (generate_oop_map) { 371 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), 372 RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg); 373 map->set_callee_saved(VMRegImpl::stack2reg((offset + vec_reg_size) >> 2), 374 RegisterSaver_LiveVecRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg); 375 } 376 offset += (2 * vec_reg_size); 377 } 378 } else { 379 for (int i = 0; i < vecregstosave_num; i++) { 380 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num; 381 382 if (PowerArchitecturePPC64 >= 9) { 383 __ stxv(as_VectorRegister(reg_num)->to_vsr(), offset, R1_SP); 384 } else { 385 __ li(R31, offset); 386 __ stxvd2x(as_VectorRegister(reg_num)->to_vsr(), R31, R1_SP); 387 } 388 // Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad). 389 if (generate_oop_map) { 390 VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg; 391 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr); 392 } 393 offset += vec_reg_size; 394 } 395 } 396 397 assert(offset == frame_size_in_bytes, "consistency check"); 398 399 BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers"); 400 401 // And we're done. 402 return map; 403 } 404 405 406 // Pop the current frame and restore all the registers that we 407 // saved. 408 void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, 409 int frame_size_in_bytes, 410 bool restore_ctr, 411 bool save_vectors) { 412 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / 413 sizeof(RegisterSaver::LiveRegType); 414 const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) / 415 sizeof(RegisterSaver::LiveRegType)) 416 : 0; 417 const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size; 418 419 const int register_save_offset = frame_size_in_bytes - register_save_size; 420 421 BLOCK_COMMENT("restore_live_registers_and_pop_frame {"); 422 423 // restore all registers (ints and floats) 424 int offset = register_save_offset; 425 426 for (int i = 0; i < regstosave_num; i++) { 427 int reg_num = RegisterSaver_LiveRegs[i].reg_num; 428 int reg_type = RegisterSaver_LiveRegs[i].reg_type; 429 430 switch (reg_type) { 431 case RegisterSaver::int_reg: { 432 if (reg_num != 31) // R31 restored at the end, it's the tmp reg! 433 __ ld(as_Register(reg_num), offset, R1_SP); 434 break; 435 } 436 case RegisterSaver::float_reg: { 437 __ lfd(as_FloatRegister(reg_num), offset, R1_SP); 438 break; 439 } 440 case RegisterSaver::special_reg: { 441 if (reg_num == SR_CTR.encoding()) { 442 if (restore_ctr) { // Nothing to do here if ctr already contains the next address. 443 __ ld(R31, offset, R1_SP); 444 __ mtctr(R31); 445 } 446 } else { 447 Unimplemented(); 448 } 449 break; 450 } 451 default: 452 ShouldNotReachHere(); 453 } 454 offset += reg_size; 455 } 456 457 assert(is_aligned(offset, StackAlignmentInBytes), "should be"); 458 if (PowerArchitecturePPC64 >= 10) { 459 for (int i = 0; i < vecregstosave_num; i += 2) { 460 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num; 461 assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!"); 462 463 __ lxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP); 464 465 offset += (2 * vec_reg_size); 466 } 467 } else { 468 for (int i = 0; i < vecregstosave_num; i++) { 469 int reg_num = RegisterSaver_LiveVecRegs[i].reg_num; 470 471 if (PowerArchitecturePPC64 >= 9) { 472 __ lxv(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP); 473 } else { 474 __ li(R31, offset); 475 __ lxvd2x(as_VectorRegister(reg_num).to_vsr(), R31, R1_SP); 476 } 477 478 offset += vec_reg_size; 479 } 480 } 481 482 assert(offset == frame_size_in_bytes, "consistency check"); 483 484 // restore link and the flags 485 __ ld(R31, frame_size_in_bytes + _abi0(lr), R1_SP); 486 __ mtlr(R31); 487 488 // restore scratch register's value 489 __ ld(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP); 490 491 // pop the frame 492 __ addi(R1_SP, R1_SP, frame_size_in_bytes); 493 494 BLOCK_COMMENT("} restore_live_registers_and_pop_frame"); 495 } 496 497 void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp, 498 int frame_size,int total_args, const VMRegPair *regs, 499 const VMRegPair *regs2) { 500 __ push_frame(frame_size, r_temp); 501 int st_off = frame_size - wordSize; 502 for (int i = 0; i < total_args; i++) { 503 VMReg r_1 = regs[i].first(); 504 VMReg r_2 = regs[i].second(); 505 if (!r_1->is_valid()) { 506 assert(!r_2->is_valid(), ""); 507 continue; 508 } 509 if (r_1->is_Register()) { 510 Register r = r_1->as_Register(); 511 __ std(r, st_off, R1_SP); 512 st_off -= wordSize; 513 } else if (r_1->is_FloatRegister()) { 514 FloatRegister f = r_1->as_FloatRegister(); 515 __ stfd(f, st_off, R1_SP); 516 st_off -= wordSize; 517 } 518 } 519 if (regs2 != nullptr) { 520 for (int i = 0; i < total_args; i++) { 521 VMReg r_1 = regs2[i].first(); 522 VMReg r_2 = regs2[i].second(); 523 if (!r_1->is_valid()) { 524 assert(!r_2->is_valid(), ""); 525 continue; 526 } 527 if (r_1->is_Register()) { 528 Register r = r_1->as_Register(); 529 __ std(r, st_off, R1_SP); 530 st_off -= wordSize; 531 } else if (r_1->is_FloatRegister()) { 532 FloatRegister f = r_1->as_FloatRegister(); 533 __ stfd(f, st_off, R1_SP); 534 st_off -= wordSize; 535 } 536 } 537 } 538 } 539 540 void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size, 541 int total_args, const VMRegPair *regs, 542 const VMRegPair *regs2) { 543 int st_off = frame_size - wordSize; 544 for (int i = 0; i < total_args; i++) { 545 VMReg r_1 = regs[i].first(); 546 VMReg r_2 = regs[i].second(); 547 if (r_1->is_Register()) { 548 Register r = r_1->as_Register(); 549 __ ld(r, st_off, R1_SP); 550 st_off -= wordSize; 551 } else if (r_1->is_FloatRegister()) { 552 FloatRegister f = r_1->as_FloatRegister(); 553 __ lfd(f, st_off, R1_SP); 554 st_off -= wordSize; 555 } 556 } 557 if (regs2 != nullptr) 558 for (int i = 0; i < total_args; i++) { 559 VMReg r_1 = regs2[i].first(); 560 VMReg r_2 = regs2[i].second(); 561 if (r_1->is_Register()) { 562 Register r = r_1->as_Register(); 563 __ ld(r, st_off, R1_SP); 564 st_off -= wordSize; 565 } else if (r_1->is_FloatRegister()) { 566 FloatRegister f = r_1->as_FloatRegister(); 567 __ lfd(f, st_off, R1_SP); 568 st_off -= wordSize; 569 } 570 } 571 __ pop_frame(); 572 } 573 574 // Restore the registers that might be holding a result. 575 void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) { 576 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / 577 sizeof(RegisterSaver::LiveRegType); 578 const int register_save_size = regstosave_num * reg_size; // VS registers not relevant here. 579 const int register_save_offset = frame_size_in_bytes - register_save_size; 580 581 // restore all result registers (ints and floats) 582 int offset = register_save_offset; 583 for (int i = 0; i < regstosave_num; i++) { 584 int reg_num = RegisterSaver_LiveRegs[i].reg_num; 585 int reg_type = RegisterSaver_LiveRegs[i].reg_type; 586 switch (reg_type) { 587 case RegisterSaver::int_reg: { 588 if (as_Register(reg_num)==R3_RET) // int result_reg 589 __ ld(as_Register(reg_num), offset, R1_SP); 590 break; 591 } 592 case RegisterSaver::float_reg: { 593 if (as_FloatRegister(reg_num)==F1_RET) // float result_reg 594 __ lfd(as_FloatRegister(reg_num), offset, R1_SP); 595 break; 596 } 597 case RegisterSaver::special_reg: { 598 // Special registers don't hold a result. 599 break; 600 } 601 default: 602 ShouldNotReachHere(); 603 } 604 offset += reg_size; 605 } 606 607 assert(offset == frame_size_in_bytes, "consistency check"); 608 } 609 610 // Is vector's size (in bytes) bigger than a size saved by default? 611 bool SharedRuntime::is_wide_vector(int size) { 612 // Note, MaxVectorSize == 8/16 on PPC64. 613 assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size); 614 return size > 8; 615 } 616 617 static int reg2slot(VMReg r) { 618 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 619 } 620 621 static int reg2offset(VMReg r) { 622 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 623 } 624 625 // --------------------------------------------------------------------------- 626 // Read the array of BasicTypes from a signature, and compute where the 627 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 628 // quantities. Values less than VMRegImpl::stack0 are registers, those above 629 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 630 // as framesizes are fixed. 631 // VMRegImpl::stack0 refers to the first slot 0(sp). 632 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register 633 // up to Register::number_of_registers) are the 64-bit 634 // integer registers. 635 636 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 637 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 638 // units regardless of build. Of course for i486 there is no 64 bit build 639 640 // The Java calling convention is a "shifted" version of the C ABI. 641 // By skipping the first C ABI register we can call non-static jni methods 642 // with small numbers of arguments without having to shuffle the arguments 643 // at all. Since we control the java ABI we ought to at least get some 644 // advantage out of it. 645 646 const VMReg java_iarg_reg[8] = { 647 R3->as_VMReg(), 648 R4->as_VMReg(), 649 R5->as_VMReg(), 650 R6->as_VMReg(), 651 R7->as_VMReg(), 652 R8->as_VMReg(), 653 R9->as_VMReg(), 654 R10->as_VMReg() 655 }; 656 657 const VMReg java_farg_reg[13] = { 658 F1->as_VMReg(), 659 F2->as_VMReg(), 660 F3->as_VMReg(), 661 F4->as_VMReg(), 662 F5->as_VMReg(), 663 F6->as_VMReg(), 664 F7->as_VMReg(), 665 F8->as_VMReg(), 666 F9->as_VMReg(), 667 F10->as_VMReg(), 668 F11->as_VMReg(), 669 F12->as_VMReg(), 670 F13->as_VMReg() 671 }; 672 673 const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]); 674 const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]); 675 676 STATIC_ASSERT(num_java_iarg_registers == Argument::n_int_register_parameters_j); 677 STATIC_ASSERT(num_java_farg_registers == Argument::n_float_register_parameters_j); 678 679 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 680 VMRegPair *regs, 681 int total_args_passed) { 682 // C2c calling conventions for compiled-compiled calls. 683 // Put 8 ints/longs into registers _AND_ 13 float/doubles into 684 // registers _AND_ put the rest on the stack. 685 686 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats 687 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles 688 689 int i; 690 VMReg reg; 691 int stk = 0; 692 int ireg = 0; 693 int freg = 0; 694 695 // We put the first 8 arguments into registers and the rest on the 696 // stack, float arguments are already in their argument registers 697 // due to c2c calling conventions (see calling_convention). 698 for (int i = 0; i < total_args_passed; ++i) { 699 switch(sig_bt[i]) { 700 case T_BOOLEAN: 701 case T_CHAR: 702 case T_BYTE: 703 case T_SHORT: 704 case T_INT: 705 if (ireg < num_java_iarg_registers) { 706 // Put int/ptr in register 707 reg = java_iarg_reg[ireg]; 708 ++ireg; 709 } else { 710 // Put int/ptr on stack. 711 reg = VMRegImpl::stack2reg(stk); 712 stk += inc_stk_for_intfloat; 713 } 714 regs[i].set1(reg); 715 break; 716 case T_LONG: 717 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 718 if (ireg < num_java_iarg_registers) { 719 // Put long in register. 720 reg = java_iarg_reg[ireg]; 721 ++ireg; 722 } else { 723 // Put long on stack. They must be aligned to 2 slots. 724 if (stk & 0x1) ++stk; 725 reg = VMRegImpl::stack2reg(stk); 726 stk += inc_stk_for_longdouble; 727 } 728 regs[i].set2(reg); 729 break; 730 case T_OBJECT: 731 case T_ARRAY: 732 case T_ADDRESS: 733 if (ireg < num_java_iarg_registers) { 734 // Put ptr in register. 735 reg = java_iarg_reg[ireg]; 736 ++ireg; 737 } else { 738 // Put ptr on stack. Objects must be aligned to 2 slots too, 739 // because "64-bit pointers record oop-ishness on 2 aligned 740 // adjacent registers." (see OopFlow::build_oop_map). 741 if (stk & 0x1) ++stk; 742 reg = VMRegImpl::stack2reg(stk); 743 stk += inc_stk_for_longdouble; 744 } 745 regs[i].set2(reg); 746 break; 747 case T_FLOAT: 748 if (freg < num_java_farg_registers) { 749 // Put float in register. 750 reg = java_farg_reg[freg]; 751 ++freg; 752 } else { 753 // Put float on stack. 754 reg = VMRegImpl::stack2reg(stk); 755 stk += inc_stk_for_intfloat; 756 } 757 regs[i].set1(reg); 758 break; 759 case T_DOUBLE: 760 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 761 if (freg < num_java_farg_registers) { 762 // Put double in register. 763 reg = java_farg_reg[freg]; 764 ++freg; 765 } else { 766 // Put double on stack. They must be aligned to 2 slots. 767 if (stk & 0x1) ++stk; 768 reg = VMRegImpl::stack2reg(stk); 769 stk += inc_stk_for_longdouble; 770 } 771 regs[i].set2(reg); 772 break; 773 case T_VOID: 774 // Do not count halves. 775 regs[i].set_bad(); 776 break; 777 default: 778 ShouldNotReachHere(); 779 } 780 } 781 return stk; 782 } 783 784 #if defined(COMPILER1) || defined(COMPILER2) 785 // Calling convention for calling C code. 786 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 787 VMRegPair *regs, 788 int total_args_passed) { 789 // Calling conventions for C runtime calls and calls to JNI native methods. 790 // 791 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8 792 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist 793 // the first 13 flt/dbl's in the first 13 fp regs but additionally 794 // copy flt/dbl to the stack if they are beyond the 8th argument. 795 796 const VMReg iarg_reg[8] = { 797 R3->as_VMReg(), 798 R4->as_VMReg(), 799 R5->as_VMReg(), 800 R6->as_VMReg(), 801 R7->as_VMReg(), 802 R8->as_VMReg(), 803 R9->as_VMReg(), 804 R10->as_VMReg() 805 }; 806 807 const VMReg farg_reg[13] = { 808 F1->as_VMReg(), 809 F2->as_VMReg(), 810 F3->as_VMReg(), 811 F4->as_VMReg(), 812 F5->as_VMReg(), 813 F6->as_VMReg(), 814 F7->as_VMReg(), 815 F8->as_VMReg(), 816 F9->as_VMReg(), 817 F10->as_VMReg(), 818 F11->as_VMReg(), 819 F12->as_VMReg(), 820 F13->as_VMReg() 821 }; 822 823 // Check calling conventions consistency. 824 assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c && 825 sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c, 826 "consistency"); 827 828 const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size) 829 / VMRegImpl::stack_slot_size); 830 const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size; 831 832 VMReg reg; 833 int arg = 0; 834 int freg = 0; 835 bool stack_used = false; 836 837 for (int i = 0; i < total_args_passed; ++i, ++arg) { 838 // Each argument corresponds to a slot in the Parameter Save Area (if not omitted) 839 int stk = (arg * 2) + additional_frame_header_slots; 840 841 switch(sig_bt[i]) { 842 // 843 // If arguments 0-7 are integers, they are passed in integer registers. 844 // Argument i is placed in iarg_reg[i]. 845 // 846 case T_BOOLEAN: 847 case T_CHAR: 848 case T_BYTE: 849 case T_SHORT: 850 case T_INT: 851 // We must cast ints to longs and use full 64 bit stack slots 852 // here. Thus fall through, handle as long. 853 case T_LONG: 854 case T_OBJECT: 855 case T_ARRAY: 856 case T_ADDRESS: 857 case T_METADATA: 858 // Oops are already boxed if required (JNI). 859 if (arg < Argument::n_int_register_parameters_c) { 860 reg = iarg_reg[arg]; 861 } else { 862 reg = VMRegImpl::stack2reg(stk); 863 stack_used = true; 864 } 865 regs[i].set2(reg); 866 break; 867 868 // 869 // Floats are treated differently from int regs: The first 13 float arguments 870 // are passed in registers (not the float args among the first 13 args). 871 // Thus argument i is NOT passed in farg_reg[i] if it is float. It is passed 872 // in farg_reg[j] if argument i is the j-th float argument of this call. 873 // 874 case T_FLOAT: 875 if (freg < Argument::n_float_register_parameters_c) { 876 // Put float in register ... 877 reg = farg_reg[freg]; 878 ++freg; 879 } else { 880 // Put float on stack. 881 reg = VMRegImpl::stack2reg(stk + float_offset_in_slots); 882 stack_used = true; 883 } 884 regs[i].set1(reg); 885 break; 886 case T_DOUBLE: 887 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 888 if (freg < Argument::n_float_register_parameters_c) { 889 // Put double in register ... 890 reg = farg_reg[freg]; 891 ++freg; 892 } else { 893 // Put double on stack. 894 reg = VMRegImpl::stack2reg(stk); 895 stack_used = true; 896 } 897 regs[i].set2(reg); 898 break; 899 900 case T_VOID: 901 // Do not count halves. 902 regs[i].set_bad(); 903 --arg; 904 break; 905 default: 906 ShouldNotReachHere(); 907 } 908 } 909 910 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots. 911 #if defined(ABI_ELFv2) 912 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots"); 913 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype 914 // indicates that all parameters can be passed in registers. 915 return stack_used ? (arg * 2) : 0; 916 #else 917 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1. 918 // We have to add extra slots because ABIv1 uses a larger header. 919 return MAX2(arg, 8) * 2 + additional_frame_header_slots; 920 #endif 921 } 922 #endif // COMPILER2 923 924 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 925 uint num_bits, 926 uint total_args_passed) { 927 Unimplemented(); 928 return 0; 929 } 930 931 static address gen_c2i_adapter(MacroAssembler *masm, 932 int total_args_passed, 933 int comp_args_on_stack, 934 const BasicType *sig_bt, 935 const VMRegPair *regs, 936 Label& call_interpreter, 937 const Register& ientry) { 938 939 address c2i_entrypoint; 940 941 const Register sender_SP = R21_sender_SP; // == R21_tmp1 942 const Register code = R22_tmp2; 943 //const Register ientry = R23_tmp3; 944 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 }; 945 const int num_value_regs = sizeof(value_regs) / sizeof(Register); 946 int value_regs_index = 0; 947 948 const Register return_pc = R27_tmp7; 949 const Register tmp = R28_tmp8; 950 951 assert_different_registers(sender_SP, code, ientry, return_pc, tmp); 952 953 // Adapter needs TOP_IJAVA_FRAME_ABI. 954 const int adapter_size = frame::top_ijava_frame_abi_size + 955 align_up(total_args_passed * wordSize, frame::alignment_in_bytes); 956 957 // regular (verified) c2i entry point 958 c2i_entrypoint = __ pc(); 959 960 // Does compiled code exists? If yes, patch the caller's callsite. 961 __ ld(code, method_(code)); 962 __ cmpdi(CR0, code, 0); 963 __ ld(ientry, method_(interpreter_entry)); // preloaded 964 __ beq(CR0, call_interpreter); 965 966 967 // Patch caller's callsite, method_(code) was not null which means that 968 // compiled code exists. 969 __ mflr(return_pc); 970 __ std(return_pc, _abi0(lr), R1_SP); 971 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs); 972 973 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc); 974 975 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs); 976 __ ld(return_pc, _abi0(lr), R1_SP); 977 __ ld(ientry, method_(interpreter_entry)); // preloaded 978 __ mtlr(return_pc); 979 980 981 // Call the interpreter. 982 __ BIND(call_interpreter); 983 __ mtctr(ientry); 984 985 // Get a copy of the current SP for loading caller's arguments. 986 __ mr(sender_SP, R1_SP); 987 988 // Add space for the adapter. 989 __ resize_frame(-adapter_size, R12_scratch2); 990 991 int st_off = adapter_size - wordSize; 992 993 // Write the args into the outgoing interpreter space. 994 for (int i = 0; i < total_args_passed; i++) { 995 VMReg r_1 = regs[i].first(); 996 VMReg r_2 = regs[i].second(); 997 if (!r_1->is_valid()) { 998 assert(!r_2->is_valid(), ""); 999 continue; 1000 } 1001 if (r_1->is_stack()) { 1002 Register tmp_reg = value_regs[value_regs_index]; 1003 value_regs_index = (value_regs_index + 1) % num_value_regs; 1004 // The calling convention produces OptoRegs that ignore the out 1005 // preserve area (JIT's ABI). We must account for it here. 1006 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 1007 if (!r_2->is_valid()) { 1008 __ lwz(tmp_reg, ld_off, sender_SP); 1009 } else { 1010 __ ld(tmp_reg, ld_off, sender_SP); 1011 } 1012 // Pretend stack targets were loaded into tmp_reg. 1013 r_1 = tmp_reg->as_VMReg(); 1014 } 1015 1016 if (r_1->is_Register()) { 1017 Register r = r_1->as_Register(); 1018 if (!r_2->is_valid()) { 1019 __ stw(r, st_off, R1_SP); 1020 st_off-=wordSize; 1021 } else { 1022 // Longs are given 2 64-bit slots in the interpreter, but the 1023 // data is passed in only 1 slot. 1024 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1025 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); ) 1026 st_off-=wordSize; 1027 } 1028 __ std(r, st_off, R1_SP); 1029 st_off-=wordSize; 1030 } 1031 } else { 1032 assert(r_1->is_FloatRegister(), ""); 1033 FloatRegister f = r_1->as_FloatRegister(); 1034 if (!r_2->is_valid()) { 1035 __ stfs(f, st_off, R1_SP); 1036 st_off-=wordSize; 1037 } else { 1038 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the 1039 // data is passed in only 1 slot. 1040 // One of these should get known junk... 1041 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); ) 1042 st_off-=wordSize; 1043 __ stfd(f, st_off, R1_SP); 1044 st_off-=wordSize; 1045 } 1046 } 1047 } 1048 1049 // Jump to the interpreter just as if interpreter was doing it. 1050 1051 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); 1052 1053 // load TOS 1054 __ addi(R15_esp, R1_SP, st_off); 1055 1056 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1. 1057 assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register"); 1058 __ bctr(); 1059 1060 return c2i_entrypoint; 1061 } 1062 1063 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 1064 int total_args_passed, 1065 int comp_args_on_stack, 1066 const BasicType *sig_bt, 1067 const VMRegPair *regs) { 1068 1069 // Load method's entry-point from method. 1070 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); 1071 __ mtctr(R12_scratch2); 1072 1073 // We will only enter here from an interpreted frame and never from after 1074 // passing thru a c2i. Azul allowed this but we do not. If we lose the 1075 // race and use a c2i we will remain interpreted for the race loser(s). 1076 // This removes all sorts of headaches on the x86 side and also eliminates 1077 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 1078 1079 // Note: r13 contains the senderSP on entry. We must preserve it since 1080 // we may do a i2c -> c2i transition if we lose a race where compiled 1081 // code goes non-entrant while we get args ready. 1082 // In addition we use r13 to locate all the interpreter args as 1083 // we must align the stack to 16 bytes on an i2c entry else we 1084 // lose alignment we expect in all compiled code and register 1085 // save code can segv when fxsave instructions find improperly 1086 // aligned stack pointer. 1087 1088 const Register ld_ptr = R15_esp; 1089 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 }; 1090 const int num_value_regs = sizeof(value_regs) / sizeof(Register); 1091 int value_regs_index = 0; 1092 1093 int ld_offset = total_args_passed*wordSize; 1094 1095 // Cut-out for having no stack args. Since up to 2 int/oop args are passed 1096 // in registers, we will occasionally have no stack args. 1097 int comp_words_on_stack = 0; 1098 if (comp_args_on_stack) { 1099 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in 1100 // registers are below. By subtracting stack0, we either get a negative 1101 // number (all values in registers) or the maximum stack slot accessed. 1102 1103 // Convert 4-byte c2 stack slots to words. 1104 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1105 // Round up to miminum stack alignment, in wordSize. 1106 comp_words_on_stack = align_up(comp_words_on_stack, 2); 1107 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1); 1108 } 1109 1110 // Now generate the shuffle code. Pick up all register args and move the 1111 // rest through register value=Z_R12. 1112 BLOCK_COMMENT("Shuffle arguments"); 1113 for (int i = 0; i < total_args_passed; i++) { 1114 if (sig_bt[i] == T_VOID) { 1115 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1116 continue; 1117 } 1118 1119 // Pick up 0, 1 or 2 words from ld_ptr. 1120 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1121 "scrambled load targets?"); 1122 VMReg r_1 = regs[i].first(); 1123 VMReg r_2 = regs[i].second(); 1124 if (!r_1->is_valid()) { 1125 assert(!r_2->is_valid(), ""); 1126 continue; 1127 } 1128 if (r_1->is_FloatRegister()) { 1129 if (!r_2->is_valid()) { 1130 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr); 1131 ld_offset-=wordSize; 1132 } else { 1133 // Skip the unused interpreter slot. 1134 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr); 1135 ld_offset-=2*wordSize; 1136 } 1137 } else { 1138 Register r; 1139 if (r_1->is_stack()) { 1140 // Must do a memory to memory move thru "value". 1141 r = value_regs[value_regs_index]; 1142 value_regs_index = (value_regs_index + 1) % num_value_regs; 1143 } else { 1144 r = r_1->as_Register(); 1145 } 1146 if (!r_2->is_valid()) { 1147 // Not sure we need to do this but it shouldn't hurt. 1148 if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) { 1149 __ ld(r, ld_offset, ld_ptr); 1150 ld_offset-=wordSize; 1151 } else { 1152 __ lwz(r, ld_offset, ld_ptr); 1153 ld_offset-=wordSize; 1154 } 1155 } else { 1156 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the 1157 // data is passed in only 1 slot. 1158 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1159 ld_offset-=wordSize; 1160 } 1161 __ ld(r, ld_offset, ld_ptr); 1162 ld_offset-=wordSize; 1163 } 1164 1165 if (r_1->is_stack()) { 1166 // Now store value where the compiler expects it 1167 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size; 1168 1169 if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN || 1170 sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) { 1171 __ stw(r, st_off, R1_SP); 1172 } else { 1173 __ std(r, st_off, R1_SP); 1174 } 1175 } 1176 } 1177 } 1178 1179 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about 1180 1181 BLOCK_COMMENT("Store method"); 1182 // Store method into thread->callee_target. 1183 // We might end up in handle_wrong_method if the callee is 1184 // deoptimized as we race thru here. If that happens we don't want 1185 // to take a safepoint because the caller frame will look 1186 // interpreted and arguments are now "compiled" so it is much better 1187 // to make this transition invisible to the stack walking 1188 // code. Unfortunately if we try and find the callee by normal means 1189 // a safepoint is possible. So we stash the desired callee in the 1190 // thread and the vm will find there should this case occur. 1191 __ std(R19_method, thread_(callee_target)); 1192 1193 // Jump to the compiled code just as if compiled code was doing it. 1194 __ bctr(); 1195 } 1196 1197 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1198 int total_args_passed, 1199 int comp_args_on_stack, 1200 const BasicType *sig_bt, 1201 const VMRegPair *regs, 1202 address entry_address[AdapterBlob::ENTRY_COUNT]) { 1203 // entry: i2c 1204 1205 __ align(CodeEntryAlignment); 1206 entry_address[AdapterBlob::I2C] = __ pc(); 1207 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); 1208 1209 1210 // entry: c2i unverified 1211 1212 __ align(CodeEntryAlignment); 1213 BLOCK_COMMENT("c2i unverified entry"); 1214 entry_address[AdapterBlob::C2I_Unverified] = __ pc(); 1215 1216 // inline_cache contains a CompiledICData 1217 const Register ic = R19_inline_cache_reg; 1218 const Register ic_klass = R11_scratch1; 1219 const Register receiver_klass = R12_scratch2; 1220 const Register code = R21_tmp1; 1221 const Register ientry = R23_tmp3; 1222 1223 assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry); 1224 assert(R11_scratch1 == R11, "need prologue scratch register"); 1225 1226 Label call_interpreter; 1227 1228 __ ic_check(4 /* end_alignment */); 1229 __ ld(R19_method, CompiledICData::speculated_method_offset(), ic); 1230 // Argument is valid and klass is as expected, continue. 1231 1232 __ ld(code, method_(code)); 1233 __ cmpdi(CR0, code, 0); 1234 __ ld(ientry, method_(interpreter_entry)); // preloaded 1235 __ beq_predict_taken(CR0, call_interpreter); 1236 1237 // Branch to ic_miss_stub. 1238 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); 1239 1240 // entry: c2i 1241 1242 entry_address[AdapterBlob::C2I] = __ pc(); 1243 1244 // Class initialization barrier for static methods 1245 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr; 1246 if (VM_Version::supports_fast_class_init_checks()) { 1247 Label L_skip_barrier; 1248 1249 { // Bypass the barrier for non-static methods 1250 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method); 1251 __ andi_(R0, R0, JVM_ACC_STATIC); 1252 __ beq(CR0, L_skip_barrier); // non-static 1253 } 1254 1255 Register klass = R11_scratch1; 1256 __ load_method_holder(klass, R19_method); 1257 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/); 1258 1259 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0); 1260 __ mtctr(klass); 1261 __ bctr(); 1262 1263 __ bind(L_skip_barrier); 1264 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc(); 1265 } 1266 1267 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1268 bs->c2i_entry_barrier(masm, /* tmp register*/ ic_klass, /* tmp register*/ receiver_klass, /* tmp register*/ code); 1269 1270 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry); 1271 return; 1272 } 1273 1274 // An oop arg. Must pass a handle not the oop itself. 1275 static void object_move(MacroAssembler* masm, 1276 int frame_size_in_slots, 1277 OopMap* oop_map, int oop_handle_offset, 1278 bool is_receiver, int* receiver_offset, 1279 VMRegPair src, VMRegPair dst, 1280 Register r_caller_sp, Register r_temp_1, Register r_temp_2) { 1281 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), 1282 "receiver has already been moved"); 1283 1284 // We must pass a handle. First figure out the location we use as a handle. 1285 1286 if (src.first()->is_stack()) { 1287 // stack to stack or reg 1288 1289 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register(); 1290 Label skip; 1291 const int oop_slot_in_callers_frame = reg2slot(src.first()); 1292 1293 guarantee(!is_receiver, "expecting receiver in register"); 1294 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots)); 1295 1296 __ addi(r_handle, r_caller_sp, reg2offset(src.first())); 1297 __ ld( r_temp_2, reg2offset(src.first()), r_caller_sp); 1298 __ cmpdi(CR0, r_temp_2, 0); 1299 __ bne(CR0, skip); 1300 // Use a null handle if oop is null. 1301 __ li(r_handle, 0); 1302 __ bind(skip); 1303 1304 if (dst.first()->is_stack()) { 1305 // stack to stack 1306 __ std(r_handle, reg2offset(dst.first()), R1_SP); 1307 } else { 1308 // stack to reg 1309 // Nothing to do, r_handle is already the dst register. 1310 } 1311 } else { 1312 // reg to stack or reg 1313 const Register r_oop = src.first()->as_Register(); 1314 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register(); 1315 const int oop_slot = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word 1316 + oop_handle_offset; // in slots 1317 const int oop_offset = oop_slot * VMRegImpl::stack_slot_size; 1318 Label skip; 1319 1320 if (is_receiver) { 1321 *receiver_offset = oop_offset; 1322 } 1323 oop_map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1324 1325 __ std( r_oop, oop_offset, R1_SP); 1326 __ addi(r_handle, R1_SP, oop_offset); 1327 1328 __ cmpdi(CR0, r_oop, 0); 1329 __ bne(CR0, skip); 1330 // Use a null handle if oop is null. 1331 __ li(r_handle, 0); 1332 __ bind(skip); 1333 1334 if (dst.first()->is_stack()) { 1335 // reg to stack 1336 __ std(r_handle, reg2offset(dst.first()), R1_SP); 1337 } else { 1338 // reg to reg 1339 // Nothing to do, r_handle is already the dst register. 1340 } 1341 } 1342 } 1343 1344 static void int_move(MacroAssembler*masm, 1345 VMRegPair src, VMRegPair dst, 1346 Register r_caller_sp, Register r_temp) { 1347 assert(src.first()->is_valid(), "incoming must be int"); 1348 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long"); 1349 1350 if (src.first()->is_stack()) { 1351 if (dst.first()->is_stack()) { 1352 // stack to stack 1353 __ lwa(r_temp, reg2offset(src.first()), r_caller_sp); 1354 __ std(r_temp, reg2offset(dst.first()), R1_SP); 1355 } else { 1356 // stack to reg 1357 __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); 1358 } 1359 } else if (dst.first()->is_stack()) { 1360 // reg to stack 1361 __ extsw(r_temp, src.first()->as_Register()); 1362 __ std(r_temp, reg2offset(dst.first()), R1_SP); 1363 } else { 1364 // reg to reg 1365 __ extsw(dst.first()->as_Register(), src.first()->as_Register()); 1366 } 1367 } 1368 1369 static void long_move(MacroAssembler*masm, 1370 VMRegPair src, VMRegPair dst, 1371 Register r_caller_sp, Register r_temp) { 1372 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long"); 1373 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long"); 1374 1375 if (src.first()->is_stack()) { 1376 if (dst.first()->is_stack()) { 1377 // stack to stack 1378 __ ld( r_temp, reg2offset(src.first()), r_caller_sp); 1379 __ std(r_temp, reg2offset(dst.first()), R1_SP); 1380 } else { 1381 // stack to reg 1382 __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp); 1383 } 1384 } else if (dst.first()->is_stack()) { 1385 // reg to stack 1386 __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP); 1387 } else { 1388 // reg to reg 1389 if (dst.first()->as_Register() != src.first()->as_Register()) 1390 __ mr(dst.first()->as_Register(), src.first()->as_Register()); 1391 } 1392 } 1393 1394 static void float_move(MacroAssembler*masm, 1395 VMRegPair src, VMRegPair dst, 1396 Register r_caller_sp, Register r_temp) { 1397 assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float"); 1398 assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float"); 1399 1400 if (src.first()->is_stack()) { 1401 if (dst.first()->is_stack()) { 1402 // stack to stack 1403 __ lwz(r_temp, reg2offset(src.first()), r_caller_sp); 1404 __ stw(r_temp, reg2offset(dst.first()), R1_SP); 1405 } else { 1406 // stack to reg 1407 __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp); 1408 } 1409 } else if (dst.first()->is_stack()) { 1410 // reg to stack 1411 __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP); 1412 } else { 1413 // reg to reg 1414 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister()) 1415 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1416 } 1417 } 1418 1419 static void double_move(MacroAssembler*masm, 1420 VMRegPair src, VMRegPair dst, 1421 Register r_caller_sp, Register r_temp) { 1422 assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double"); 1423 assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double"); 1424 1425 if (src.first()->is_stack()) { 1426 if (dst.first()->is_stack()) { 1427 // stack to stack 1428 __ ld( r_temp, reg2offset(src.first()), r_caller_sp); 1429 __ std(r_temp, reg2offset(dst.first()), R1_SP); 1430 } else { 1431 // stack to reg 1432 __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp); 1433 } 1434 } else if (dst.first()->is_stack()) { 1435 // reg to stack 1436 __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP); 1437 } else { 1438 // reg to reg 1439 if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister()) 1440 __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1441 } 1442 } 1443 1444 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1445 switch (ret_type) { 1446 case T_BOOLEAN: 1447 case T_CHAR: 1448 case T_BYTE: 1449 case T_SHORT: 1450 case T_INT: 1451 __ stw (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1452 break; 1453 case T_ARRAY: 1454 case T_OBJECT: 1455 case T_LONG: 1456 __ std (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1457 break; 1458 case T_FLOAT: 1459 __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1460 break; 1461 case T_DOUBLE: 1462 __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1463 break; 1464 case T_VOID: 1465 break; 1466 default: 1467 ShouldNotReachHere(); 1468 break; 1469 } 1470 } 1471 1472 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1473 switch (ret_type) { 1474 case T_BOOLEAN: 1475 case T_CHAR: 1476 case T_BYTE: 1477 case T_SHORT: 1478 case T_INT: 1479 __ lwz(R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1480 break; 1481 case T_ARRAY: 1482 case T_OBJECT: 1483 case T_LONG: 1484 __ ld (R3_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1485 break; 1486 case T_FLOAT: 1487 __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1488 break; 1489 case T_DOUBLE: 1490 __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP); 1491 break; 1492 case T_VOID: 1493 break; 1494 default: 1495 ShouldNotReachHere(); 1496 break; 1497 } 1498 } 1499 1500 static void verify_oop_args(MacroAssembler* masm, 1501 const methodHandle& method, 1502 const BasicType* sig_bt, 1503 const VMRegPair* regs) { 1504 Register temp_reg = R19_method; // not part of any compiled calling seq 1505 if (VerifyOops) { 1506 for (int i = 0; i < method->size_of_parameters(); i++) { 1507 if (is_reference_type(sig_bt[i])) { 1508 VMReg r = regs[i].first(); 1509 assert(r->is_valid(), "bad oop arg"); 1510 if (r->is_stack()) { 1511 __ ld(temp_reg, reg2offset(r), R1_SP); 1512 __ verify_oop(temp_reg, FILE_AND_LINE); 1513 } else { 1514 __ verify_oop(r->as_Register(), FILE_AND_LINE); 1515 } 1516 } 1517 } 1518 } 1519 } 1520 1521 static void gen_special_dispatch(MacroAssembler* masm, 1522 const methodHandle& method, 1523 const BasicType* sig_bt, 1524 const VMRegPair* regs) { 1525 verify_oop_args(masm, method, sig_bt, regs); 1526 vmIntrinsics::ID iid = method->intrinsic_id(); 1527 1528 // Now write the args into the outgoing interpreter space 1529 bool has_receiver = false; 1530 Register receiver_reg = noreg; 1531 int member_arg_pos = -1; 1532 Register member_reg = noreg; 1533 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1534 if (ref_kind != 0) { 1535 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1536 member_reg = R19_method; // known to be free at this point 1537 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1538 } else if (iid == vmIntrinsics::_invokeBasic) { 1539 has_receiver = true; 1540 } else if (iid == vmIntrinsics::_linkToNative) { 1541 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument 1542 member_reg = R19_method; // known to be free at this point 1543 } else { 1544 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); 1545 } 1546 1547 if (member_reg != noreg) { 1548 // Load the member_arg into register, if necessary. 1549 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1550 VMReg r = regs[member_arg_pos].first(); 1551 if (r->is_stack()) { 1552 __ ld(member_reg, reg2offset(r), R1_SP); 1553 } else { 1554 // no data motion is needed 1555 member_reg = r->as_Register(); 1556 } 1557 } 1558 1559 if (has_receiver) { 1560 // Make sure the receiver is loaded into a register. 1561 assert(method->size_of_parameters() > 0, "oob"); 1562 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1563 VMReg r = regs[0].first(); 1564 assert(r->is_valid(), "bad receiver arg"); 1565 if (r->is_stack()) { 1566 // Porting note: This assumes that compiled calling conventions always 1567 // pass the receiver oop in a register. If this is not true on some 1568 // platform, pick a temp and load the receiver from stack. 1569 fatal("receiver always in a register"); 1570 receiver_reg = R11_scratch1; // TODO (hs24): is R11_scratch1 really free at this point? 1571 __ ld(receiver_reg, reg2offset(r), R1_SP); 1572 } else { 1573 // no data motion is needed 1574 receiver_reg = r->as_Register(); 1575 } 1576 } 1577 1578 // Figure out which address we are really jumping to: 1579 MethodHandles::generate_method_handle_dispatch(masm, iid, 1580 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1581 } 1582 1583 //---------------------------- continuation_enter_setup --------------------------- 1584 // 1585 // Frame setup. 1586 // 1587 // Arguments: 1588 // None. 1589 // 1590 // Results: 1591 // R1_SP: pointer to blank ContinuationEntry in the pushed frame. 1592 // 1593 // Kills: 1594 // R0, R20 1595 // 1596 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& framesize_words) { 1597 assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, ""); 1598 assert(in_bytes(ContinuationEntry::cont_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1599 assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, ""); 1600 1601 const int frame_size_in_bytes = (int)ContinuationEntry::size(); 1602 assert(is_aligned(frame_size_in_bytes, frame::alignment_in_bytes), "alignment error"); 1603 1604 framesize_words = frame_size_in_bytes / wordSize; 1605 1606 DEBUG_ONLY(__ block_comment("setup {")); 1607 // Save return pc and push entry frame 1608 const Register return_pc = R20; 1609 __ mflr(return_pc); 1610 __ std(return_pc, _abi0(lr), R1_SP); // SP->lr = return_pc 1611 __ push_frame(frame_size_in_bytes , R0); // SP -= frame_size_in_bytes 1612 1613 OopMap* map = new OopMap((int)frame_size_in_bytes / VMRegImpl::stack_slot_size, 0 /* arg_slots*/); 1614 1615 __ ld_ptr(R0, JavaThread::cont_entry_offset(), R16_thread); 1616 __ st_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread); 1617 __ st_ptr(R0, ContinuationEntry::parent_offset(), R1_SP); 1618 DEBUG_ONLY(__ block_comment("} setup")); 1619 1620 return map; 1621 } 1622 1623 //---------------------------- fill_continuation_entry --------------------------- 1624 // 1625 // Initialize the new ContinuationEntry. 1626 // 1627 // Arguments: 1628 // R1_SP: pointer to blank Continuation entry 1629 // reg_cont_obj: pointer to the continuation 1630 // reg_flags: flags 1631 // 1632 // Results: 1633 // R1_SP: pointer to filled out ContinuationEntry 1634 // 1635 // Kills: 1636 // R8_ARG6, R9_ARG7, R10_ARG8 1637 // 1638 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) { 1639 assert_different_registers(reg_cont_obj, reg_flags); 1640 Register zero = R8_ARG6; 1641 Register tmp2 = R9_ARG7; 1642 Register tmp3 = R10_ARG8; 1643 1644 DEBUG_ONLY(__ block_comment("fill {")); 1645 #ifdef ASSERT 1646 __ load_const_optimized(tmp2, ContinuationEntry::cookie_value()); 1647 __ stw(tmp2, in_bytes(ContinuationEntry::cookie_offset()), R1_SP); 1648 #endif //ASSERT 1649 1650 __ li(zero, 0); 1651 __ st_ptr(reg_cont_obj, ContinuationEntry::cont_offset(), R1_SP); 1652 __ stw(reg_flags, in_bytes(ContinuationEntry::flags_offset()), R1_SP); 1653 __ st_ptr(zero, ContinuationEntry::chunk_offset(), R1_SP); 1654 __ stw(zero, in_bytes(ContinuationEntry::argsize_offset()), R1_SP); 1655 __ stw(zero, in_bytes(ContinuationEntry::pin_count_offset()), R1_SP); 1656 1657 __ ld_ptr(tmp2, JavaThread::cont_fastpath_offset(), R16_thread); 1658 __ ld(tmp3, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); 1659 __ st_ptr(tmp2, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP); 1660 __ std(tmp3, in_bytes(ContinuationEntry::parent_held_monitor_count_offset()), R1_SP); 1661 1662 __ st_ptr(zero, JavaThread::cont_fastpath_offset(), R16_thread); 1663 __ std(zero, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); 1664 DEBUG_ONLY(__ block_comment("} fill")); 1665 } 1666 1667 //---------------------------- continuation_enter_cleanup --------------------------- 1668 // 1669 // Copy corresponding attributes from the top ContinuationEntry to the JavaThread 1670 // before deleting it. 1671 // 1672 // Arguments: 1673 // R1_SP: pointer to the ContinuationEntry 1674 // 1675 // Results: 1676 // None. 1677 // 1678 // Kills: 1679 // R8_ARG6, R9_ARG7, R10_ARG8, R15_esp 1680 // 1681 static void continuation_enter_cleanup(MacroAssembler* masm) { 1682 Register tmp1 = R8_ARG6; 1683 Register tmp2 = R9_ARG7; 1684 Register tmp3 = R10_ARG8; 1685 1686 #ifdef ASSERT 1687 __ block_comment("clean {"); 1688 __ ld_ptr(tmp1, JavaThread::cont_entry_offset(), R16_thread); 1689 __ cmpd(CR0, R1_SP, tmp1); 1690 __ asm_assert_eq(FILE_AND_LINE ": incorrect R1_SP"); 1691 #endif 1692 1693 __ ld_ptr(tmp1, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP); 1694 __ st_ptr(tmp1, JavaThread::cont_fastpath_offset(), R16_thread); 1695 1696 if (CheckJNICalls) { 1697 // Check if this is a virtual thread continuation 1698 Label L_skip_vthread_code; 1699 __ lwz(R0, in_bytes(ContinuationEntry::flags_offset()), R1_SP); 1700 __ cmpwi(CR0, R0, 0); 1701 __ beq(CR0, L_skip_vthread_code); 1702 1703 // If the held monitor count is > 0 and this vthread is terminating then 1704 // it failed to release a JNI monitor. So we issue the same log message 1705 // that JavaThread::exit does. 1706 __ ld(R0, in_bytes(JavaThread::jni_monitor_count_offset()), R16_thread); 1707 __ cmpdi(CR0, R0, 0); 1708 __ beq(CR0, L_skip_vthread_code); 1709 1710 // Save return value potentially containing the exception oop 1711 Register ex_oop = R15_esp; // nonvolatile register 1712 __ mr(ex_oop, R3_RET); 1713 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::log_jni_monitor_still_held)); 1714 // Restore potental return value 1715 __ mr(R3_RET, ex_oop); 1716 1717 // For vthreads we have to explicitly zero the JNI monitor count of the carrier 1718 // on termination. The held count is implicitly zeroed below when we restore from 1719 // the parent held count (which has to be zero). 1720 __ li(tmp1, 0); 1721 __ std(tmp1, in_bytes(JavaThread::jni_monitor_count_offset()), R16_thread); 1722 1723 __ bind(L_skip_vthread_code); 1724 } 1725 #ifdef ASSERT 1726 else { 1727 // Check if this is a virtual thread continuation 1728 Label L_skip_vthread_code; 1729 __ lwz(R0, in_bytes(ContinuationEntry::flags_offset()), R1_SP); 1730 __ cmpwi(CR0, R0, 0); 1731 __ beq(CR0, L_skip_vthread_code); 1732 1733 // See comment just above. If not checking JNI calls the JNI count is only 1734 // needed for assertion checking. 1735 __ li(tmp1, 0); 1736 __ std(tmp1, in_bytes(JavaThread::jni_monitor_count_offset()), R16_thread); 1737 1738 __ bind(L_skip_vthread_code); 1739 } 1740 #endif 1741 1742 __ ld(tmp2, in_bytes(ContinuationEntry::parent_held_monitor_count_offset()), R1_SP); 1743 __ ld_ptr(tmp3, ContinuationEntry::parent_offset(), R1_SP); 1744 __ std(tmp2, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); 1745 __ st_ptr(tmp3, JavaThread::cont_entry_offset(), R16_thread); 1746 DEBUG_ONLY(__ block_comment("} clean")); 1747 } 1748 1749 static void check_continuation_enter_argument(VMReg actual_vmreg, 1750 Register expected_reg, 1751 const char* name) { 1752 assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name); 1753 assert(actual_vmreg->as_Register() == expected_reg, 1754 "%s is in unexpected register: %s instead of %s", 1755 name, actual_vmreg->as_Register()->name(), expected_reg->name()); 1756 } 1757 1758 static void gen_continuation_enter(MacroAssembler* masm, 1759 const VMRegPair* regs, 1760 int& exception_offset, 1761 OopMapSet* oop_maps, 1762 int& frame_complete, 1763 int& framesize_words, 1764 int& interpreted_entry_offset, 1765 int& compiled_entry_offset) { 1766 1767 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread) 1768 int pos_cont_obj = 0; 1769 int pos_is_cont = 1; 1770 int pos_is_virtual = 2; 1771 1772 // The platform-specific calling convention may present the arguments in various registers. 1773 // To simplify the rest of the code, we expect the arguments to reside at these known 1774 // registers, and we additionally check the placement here in case calling convention ever 1775 // changes. 1776 Register reg_cont_obj = R3_ARG1; 1777 Register reg_is_cont = R4_ARG2; 1778 Register reg_is_virtual = R5_ARG3; 1779 1780 check_continuation_enter_argument(regs[pos_cont_obj].first(), reg_cont_obj, "Continuation object"); 1781 check_continuation_enter_argument(regs[pos_is_cont].first(), reg_is_cont, "isContinue"); 1782 check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread"); 1783 1784 address resolve_static_call = SharedRuntime::get_resolve_static_call_stub(); 1785 1786 address start = __ pc(); 1787 1788 Label L_thaw, L_exit; 1789 1790 // i2i entry used at interp_only_mode only 1791 interpreted_entry_offset = __ pc() - start; 1792 { 1793 #ifdef ASSERT 1794 Label is_interp_only; 1795 __ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread); 1796 __ cmpwi(CR0, R0, 0); 1797 __ bne(CR0, is_interp_only); 1798 __ stop("enterSpecial interpreter entry called when not in interp_only_mode"); 1799 __ bind(is_interp_only); 1800 #endif 1801 1802 // Read interpreter arguments into registers (this is an ad-hoc i2c adapter) 1803 __ ld(reg_cont_obj, Interpreter::stackElementSize*3, R15_esp); 1804 __ lwz(reg_is_cont, Interpreter::stackElementSize*2, R15_esp); 1805 __ lwz(reg_is_virtual, Interpreter::stackElementSize*1, R15_esp); 1806 1807 __ push_cont_fastpath(); 1808 1809 OopMap* map = continuation_enter_setup(masm, framesize_words); 1810 1811 // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe, 1812 // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway. 1813 1814 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1815 1816 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1817 __ cmpwi(CR0, reg_is_cont, 0); 1818 __ bne(CR0, L_thaw); 1819 1820 // --- call Continuation.enter(Continuation c, boolean isContinue) 1821 1822 // Emit compiled static call. The call will be always resolved to the c2i 1823 // entry of Continuation.enter(Continuation c, boolean isContinue). 1824 // There are special cases in SharedRuntime::resolve_static_call_C() and 1825 // SharedRuntime::resolve_sub_helper_internal() to achieve this 1826 // See also corresponding call below. 1827 address c2i_call_pc = __ pc(); 1828 int start_offset = __ offset(); 1829 // Put the entry point as a constant into the constant pool. 1830 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none); 1831 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); 1832 guarantee(entry_point_toc_addr != nullptr, "const section overflow"); 1833 1834 // Emit the trampoline stub which will be related to the branch-and-link below. 1835 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset); 1836 guarantee(stub != nullptr, "no space for trampoline stub"); 1837 1838 __ relocate(relocInfo::static_call_type); 1839 // Note: At this point we do not have the address of the trampoline 1840 // stub, and the entry point might be too far away for bl, so __ pc() 1841 // serves as dummy and the bl will be patched later. 1842 __ bl(__ pc()); 1843 oop_maps->add_gc_map(__ pc() - start, map); 1844 __ post_call_nop(); 1845 1846 __ b(L_exit); 1847 1848 // static stub for the call above 1849 stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc); 1850 guarantee(stub != nullptr, "no space for static stub"); 1851 } 1852 1853 // compiled entry 1854 __ align(CodeEntryAlignment); 1855 compiled_entry_offset = __ pc() - start; 1856 1857 OopMap* map = continuation_enter_setup(masm, framesize_words); 1858 1859 // Frame is now completed as far as size and linkage. 1860 frame_complete =__ pc() - start; 1861 1862 fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual); 1863 1864 // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue) 1865 __ cmpwi(CR0, reg_is_cont, 0); 1866 __ bne(CR0, L_thaw); 1867 1868 // --- call Continuation.enter(Continuation c, boolean isContinue) 1869 1870 // Emit compiled static call 1871 // The call needs to be resolved. There's a special case for this in 1872 // SharedRuntime::find_callee_info_helper() which calls 1873 // LinkResolver::resolve_continuation_enter() which resolves the call to 1874 // Continuation.enter(Continuation c, boolean isContinue). 1875 address call_pc = __ pc(); 1876 int start_offset = __ offset(); 1877 // Put the entry point as a constant into the constant pool. 1878 const address entry_point_toc_addr = __ address_constant(resolve_static_call, RelocationHolder::none); 1879 const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr); 1880 guarantee(entry_point_toc_addr != nullptr, "const section overflow"); 1881 1882 // Emit the trampoline stub which will be related to the branch-and-link below. 1883 address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset); 1884 guarantee(stub != nullptr, "no space for trampoline stub"); 1885 1886 __ relocate(relocInfo::static_call_type); 1887 // Note: At this point we do not have the address of the trampoline 1888 // stub, and the entry point might be too far away for bl, so __ pc() 1889 // serves as dummy and the bl will be patched later. 1890 __ bl(__ pc()); 1891 oop_maps->add_gc_map(__ pc() - start, map); 1892 __ post_call_nop(); 1893 1894 __ b(L_exit); 1895 1896 // --- Thawing path 1897 1898 __ bind(L_thaw); 1899 ContinuationEntry::_thaw_call_pc_offset = __ pc() - start; 1900 __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(StubRoutines::cont_thaw())); 1901 __ mtctr(R0); 1902 __ bctrl(); 1903 oop_maps->add_gc_map(__ pc() - start, map->deep_copy()); 1904 ContinuationEntry::_return_pc_offset = __ pc() - start; 1905 __ post_call_nop(); 1906 1907 // --- Normal exit (resolve/thawing) 1908 1909 __ bind(L_exit); 1910 ContinuationEntry::_cleanup_offset = __ pc() - start; 1911 continuation_enter_cleanup(masm); 1912 1913 // Pop frame and return 1914 DEBUG_ONLY(__ ld_ptr(R0, 0, R1_SP)); 1915 __ addi(R1_SP, R1_SP, framesize_words*wordSize); 1916 DEBUG_ONLY(__ cmpd(CR0, R0, R1_SP)); 1917 __ asm_assert_eq(FILE_AND_LINE ": inconsistent frame size"); 1918 __ ld(R0, _abi0(lr), R1_SP); // Return pc 1919 __ mtlr(R0); 1920 __ blr(); 1921 1922 // --- Exception handling path 1923 1924 exception_offset = __ pc() - start; 1925 1926 continuation_enter_cleanup(masm); 1927 Register ex_pc = R17_tos; // nonvolatile register 1928 Register ex_oop = R15_esp; // nonvolatile register 1929 __ ld(ex_pc, _abi0(callers_sp), R1_SP); // Load caller's return pc 1930 __ ld(ex_pc, _abi0(lr), ex_pc); 1931 __ mr(ex_oop, R3_RET); // save return value containing the exception oop 1932 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, ex_pc); 1933 __ mtlr(R3_RET); // the exception handler 1934 __ ld(R1_SP, _abi0(callers_sp), R1_SP); // remove enterSpecial frame 1935 1936 // Continue at exception handler 1937 // See OptoRuntime::generate_exception_blob for register arguments 1938 __ mr(R3_ARG1, ex_oop); // pass exception oop 1939 __ mr(R4_ARG2, ex_pc); // pass exception pc 1940 __ blr(); 1941 1942 // static stub for the call above 1943 stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc); 1944 guarantee(stub != nullptr, "no space for static stub"); 1945 } 1946 1947 static void gen_continuation_yield(MacroAssembler* masm, 1948 const VMRegPair* regs, 1949 OopMapSet* oop_maps, 1950 int& frame_complete, 1951 int& framesize_words, 1952 int& compiled_entry_offset) { 1953 Register tmp = R10_ARG8; 1954 1955 const int framesize_bytes = (int)align_up((int)frame::native_abi_reg_args_size, frame::alignment_in_bytes); 1956 framesize_words = framesize_bytes / wordSize; 1957 1958 address start = __ pc(); 1959 compiled_entry_offset = __ pc() - start; 1960 1961 // Save return pc and push entry frame 1962 __ mflr(tmp); 1963 __ std(tmp, _abi0(lr), R1_SP); // SP->lr = return_pc 1964 __ push_frame(framesize_bytes , R0); // SP -= frame_size_in_bytes 1965 1966 DEBUG_ONLY(__ block_comment("Frame Complete")); 1967 frame_complete = __ pc() - start; 1968 address last_java_pc = __ pc(); 1969 1970 // This nop must be exactly at the PC we push into the frame info. 1971 // We use this nop for fast CodeBlob lookup, associate the OopMap 1972 // with it right away. 1973 __ post_call_nop(); 1974 OopMap* map = new OopMap(framesize_bytes / VMRegImpl::stack_slot_size, 1); 1975 oop_maps->add_gc_map(last_java_pc - start, map); 1976 1977 __ calculate_address_from_global_toc(tmp, last_java_pc); // will be relocated 1978 __ set_last_Java_frame(R1_SP, tmp); 1979 __ call_VM_leaf(Continuation::freeze_entry(), R16_thread, R1_SP); 1980 __ reset_last_Java_frame(); 1981 1982 Label L_pinned; 1983 1984 __ cmpwi(CR0, R3_RET, 0); 1985 __ bne(CR0, L_pinned); 1986 1987 // yield succeeded 1988 1989 // Pop frames of continuation including this stub's frame 1990 __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread); 1991 // The frame pushed by gen_continuation_enter is on top now again 1992 continuation_enter_cleanup(masm); 1993 1994 // Pop frame and return 1995 Label L_return; 1996 __ bind(L_return); 1997 __ pop_frame(); 1998 __ ld(R0, _abi0(lr), R1_SP); // Return pc 1999 __ mtlr(R0); 2000 __ blr(); 2001 2002 // yield failed - continuation is pinned 2003 2004 __ bind(L_pinned); 2005 2006 // handle pending exception thrown by freeze 2007 __ ld(tmp, in_bytes(JavaThread::pending_exception_offset()), R16_thread); 2008 __ cmpdi(CR0, tmp, 0); 2009 __ beq(CR0, L_return); // return if no exception is pending 2010 __ pop_frame(); 2011 __ ld(R0, _abi0(lr), R1_SP); // Return pc 2012 __ mtlr(R0); 2013 __ load_const_optimized(tmp, StubRoutines::forward_exception_entry(), R0); 2014 __ mtctr(tmp); 2015 __ bctr(); 2016 } 2017 2018 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) { 2019 ::continuation_enter_cleanup(masm); 2020 } 2021 2022 // --------------------------------------------------------------------------- 2023 // Generate a native wrapper for a given method. The method takes arguments 2024 // in the Java compiled code convention, marshals them to the native 2025 // convention (handlizes oops, etc), transitions to native, makes the call, 2026 // returns to java state (possibly blocking), unhandlizes any result and 2027 // returns. 2028 // 2029 // Critical native functions are a shorthand for the use of 2030 // GetPrimtiveArrayCritical and disallow the use of any other JNI 2031 // functions. The wrapper is expected to unpack the arguments before 2032 // passing them to the callee. Critical native functions leave the state _in_Java, 2033 // since they cannot stop for GC. 2034 // Some other parts of JNI setup are skipped like the tear down of the JNI handle 2035 // block and the check for pending exceptions it's impossible for them 2036 // to be thrown. 2037 // 2038 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, 2039 const methodHandle& method, 2040 int compile_id, 2041 BasicType *in_sig_bt, 2042 VMRegPair *in_regs, 2043 BasicType ret_type) { 2044 if (method->is_continuation_native_intrinsic()) { 2045 int exception_offset = -1; 2046 OopMapSet* oop_maps = new OopMapSet(); 2047 int frame_complete = -1; 2048 int stack_slots = -1; 2049 int interpreted_entry_offset = -1; 2050 int vep_offset = -1; 2051 if (method->is_continuation_enter_intrinsic()) { 2052 gen_continuation_enter(masm, 2053 in_regs, 2054 exception_offset, 2055 oop_maps, 2056 frame_complete, 2057 stack_slots, 2058 interpreted_entry_offset, 2059 vep_offset); 2060 } else if (method->is_continuation_yield_intrinsic()) { 2061 gen_continuation_yield(masm, 2062 in_regs, 2063 oop_maps, 2064 frame_complete, 2065 stack_slots, 2066 vep_offset); 2067 } else { 2068 guarantee(false, "Unknown Continuation native intrinsic"); 2069 } 2070 2071 #ifdef ASSERT 2072 if (method->is_continuation_enter_intrinsic()) { 2073 assert(interpreted_entry_offset != -1, "Must be set"); 2074 assert(exception_offset != -1, "Must be set"); 2075 } else { 2076 assert(interpreted_entry_offset == -1, "Must be unset"); 2077 assert(exception_offset == -1, "Must be unset"); 2078 } 2079 assert(frame_complete != -1, "Must be set"); 2080 assert(stack_slots != -1, "Must be set"); 2081 assert(vep_offset != -1, "Must be set"); 2082 #endif 2083 2084 __ flush(); 2085 nmethod* nm = nmethod::new_native_nmethod(method, 2086 compile_id, 2087 masm->code(), 2088 vep_offset, 2089 frame_complete, 2090 stack_slots, 2091 in_ByteSize(-1), 2092 in_ByteSize(-1), 2093 oop_maps, 2094 exception_offset); 2095 if (nm == nullptr) return nm; 2096 if (method->is_continuation_enter_intrinsic()) { 2097 ContinuationEntry::set_enter_code(nm, interpreted_entry_offset); 2098 } else if (method->is_continuation_yield_intrinsic()) { 2099 _cont_doYield_stub = nm; 2100 } 2101 return nm; 2102 } 2103 2104 if (method->is_method_handle_intrinsic()) { 2105 vmIntrinsics::ID iid = method->intrinsic_id(); 2106 intptr_t start = (intptr_t)__ pc(); 2107 int vep_offset = ((intptr_t)__ pc()) - start; 2108 gen_special_dispatch(masm, 2109 method, 2110 in_sig_bt, 2111 in_regs); 2112 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2113 __ flush(); 2114 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2115 return nmethod::new_native_nmethod(method, 2116 compile_id, 2117 masm->code(), 2118 vep_offset, 2119 frame_complete, 2120 stack_slots / VMRegImpl::slots_per_word, 2121 in_ByteSize(-1), 2122 in_ByteSize(-1), 2123 (OopMapSet*)nullptr); 2124 } 2125 2126 address native_func = method->native_function(); 2127 assert(native_func != nullptr, "must have function"); 2128 2129 // First, create signature for outgoing C call 2130 // -------------------------------------------------------------------------- 2131 2132 int total_in_args = method->size_of_parameters(); 2133 // We have received a description of where all the java args are located 2134 // on entry to the wrapper. We need to convert these args to where 2135 // the jni function will expect them. To figure out where they go 2136 // we convert the java signature to a C signature by inserting 2137 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2138 2139 // Calculate the total number of C arguments and create arrays for the 2140 // signature and the outgoing registers. 2141 // On ppc64, we have two arrays for the outgoing registers, because 2142 // some floating-point arguments must be passed in registers _and_ 2143 // in stack locations. 2144 bool method_is_static = method->is_static(); 2145 int total_c_args = total_in_args + (method_is_static ? 2 : 1); 2146 2147 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2148 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2149 2150 // Create the signature for the C call: 2151 // 1) add the JNIEnv* 2152 // 2) add the class if the method is static 2153 // 3) copy the rest of the incoming signature (shifted by the number of 2154 // hidden arguments). 2155 2156 int argc = 0; 2157 out_sig_bt[argc++] = T_ADDRESS; 2158 if (method->is_static()) { 2159 out_sig_bt[argc++] = T_OBJECT; 2160 } 2161 2162 for (int i = 0; i < total_in_args ; i++ ) { 2163 out_sig_bt[argc++] = in_sig_bt[i]; 2164 } 2165 2166 2167 // Compute the wrapper's frame size. 2168 // -------------------------------------------------------------------------- 2169 2170 // Now figure out where the args must be stored and how much stack space 2171 // they require. 2172 // 2173 // Compute framesize for the wrapper. We need to handlize all oops in 2174 // incoming registers. 2175 // 2176 // Calculate the total number of stack slots we will need: 2177 // 1) abi requirements 2178 // 2) outgoing arguments 2179 // 3) space for inbound oop handle area 2180 // 4) space for handlizing a klass if static method 2181 // 5) space for a lock if synchronized method 2182 // 6) workspace for saving return values, int <-> float reg moves, etc. 2183 // 7) alignment 2184 // 2185 // Layout of the native wrapper frame: 2186 // (stack grows upwards, memory grows downwards) 2187 // 2188 // NW [ABI_REG_ARGS] <-- 1) R1_SP 2189 // [outgoing arguments] <-- 2) R1_SP + out_arg_slot_offset 2190 // [oopHandle area] <-- 3) R1_SP + oop_handle_offset 2191 // klass <-- 4) R1_SP + klass_offset 2192 // lock <-- 5) R1_SP + lock_offset 2193 // [workspace] <-- 6) R1_SP + workspace_offset 2194 // [alignment] (optional) <-- 7) 2195 // caller [JIT_TOP_ABI_48] <-- r_callers_sp 2196 // 2197 // - *_slot_offset Indicates offset from SP in number of stack slots. 2198 // - *_offset Indicates offset from SP in bytes. 2199 2200 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2) 2201 SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention. 2202 2203 // Now the space for the inbound oop handle area. 2204 int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word; 2205 2206 int oop_handle_slot_offset = stack_slots; 2207 stack_slots += total_save_slots; // 3) 2208 2209 int klass_slot_offset = 0; 2210 int klass_offset = -1; 2211 if (method_is_static) { // 4) 2212 klass_slot_offset = stack_slots; 2213 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2214 stack_slots += VMRegImpl::slots_per_word; 2215 } 2216 2217 int lock_slot_offset = 0; 2218 int lock_offset = -1; 2219 if (method->is_synchronized()) { // 5) 2220 lock_slot_offset = stack_slots; 2221 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size; 2222 stack_slots += VMRegImpl::slots_per_word; 2223 } 2224 2225 int workspace_slot_offset = stack_slots; // 6) 2226 stack_slots += 2; 2227 2228 // Now compute actual number of stack words we need. 2229 // Rounding to make stack properly aligned. 2230 stack_slots = align_up(stack_slots, // 7) 2231 frame::alignment_in_bytes / VMRegImpl::stack_slot_size); 2232 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size; 2233 2234 2235 // Now we can start generating code. 2236 // -------------------------------------------------------------------------- 2237 2238 intptr_t start_pc = (intptr_t)__ pc(); 2239 intptr_t vep_start_pc; 2240 intptr_t frame_done_pc; 2241 2242 Label handle_pending_exception; 2243 Label last_java_pc; 2244 2245 Register r_callers_sp = R21; 2246 Register r_temp_1 = R22; 2247 Register r_temp_2 = R23; 2248 Register r_temp_3 = R24; 2249 Register r_temp_4 = R25; 2250 Register r_temp_5 = R26; 2251 Register r_temp_6 = R27; 2252 Register r_last_java_pc = R28; 2253 2254 Register r_carg1_jnienv = noreg; 2255 Register r_carg2_classorobject = noreg; 2256 r_carg1_jnienv = out_regs[0].first()->as_Register(); 2257 r_carg2_classorobject = out_regs[1].first()->as_Register(); 2258 2259 2260 // Generate the Unverified Entry Point (UEP). 2261 // -------------------------------------------------------------------------- 2262 assert(start_pc == (intptr_t)__ pc(), "uep must be at start"); 2263 2264 // Check ic: object class == cached class? 2265 if (!method_is_static) { 2266 __ ic_check(4 /* end_alignment */); 2267 } 2268 2269 // Generate the Verified Entry Point (VEP). 2270 // -------------------------------------------------------------------------- 2271 vep_start_pc = (intptr_t)__ pc(); 2272 2273 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 2274 Label L_skip_barrier; 2275 Register klass = r_temp_1; 2276 // Notify OOP recorder (don't need the relocation) 2277 AddressLiteral md = __ constant_metadata_address(method->method_holder()); 2278 __ load_const_optimized(klass, md.value(), R0); 2279 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/); 2280 2281 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0); 2282 __ mtctr(klass); 2283 __ bctr(); 2284 2285 __ bind(L_skip_barrier); 2286 } 2287 2288 __ save_LR(r_temp_1); 2289 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame. 2290 __ mr(r_callers_sp, R1_SP); // Remember frame pointer. 2291 __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame. 2292 2293 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2294 bs->nmethod_entry_barrier(masm, r_temp_1); 2295 2296 frame_done_pc = (intptr_t)__ pc(); 2297 2298 // Native nmethod wrappers never take possession of the oop arguments. 2299 // So the caller will gc the arguments. 2300 // The only thing we need an oopMap for is if the call is static. 2301 // 2302 // An OopMap for lock (and class if static), and one for the VM call itself. 2303 OopMapSet *oop_maps = new OopMapSet(); 2304 OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2305 2306 // Move arguments from register/stack to register/stack. 2307 // -------------------------------------------------------------------------- 2308 // 2309 // We immediately shuffle the arguments so that for any vm call we have 2310 // to make from here on out (sync slow path, jvmti, etc.) we will have 2311 // captured the oops from our caller and have a valid oopMap for them. 2312 // 2313 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 2314 // (derived from JavaThread* which is in R16_thread) and, if static, 2315 // the class mirror instead of a receiver. This pretty much guarantees that 2316 // register layout will not match. We ignore these extra arguments during 2317 // the shuffle. The shuffle is described by the two calling convention 2318 // vectors we have in our possession. We simply walk the java vector to 2319 // get the source locations and the c vector to get the destinations. 2320 2321 // Record sp-based slot for receiver on stack for non-static methods. 2322 int receiver_offset = -1; 2323 2324 // We move the arguments backward because the floating point registers 2325 // destination will always be to a register with a greater or equal 2326 // register number or the stack. 2327 // in is the index of the incoming Java arguments 2328 // out is the index of the outgoing C arguments 2329 2330 #ifdef ASSERT 2331 bool reg_destroyed[Register::number_of_registers]; 2332 bool freg_destroyed[FloatRegister::number_of_registers]; 2333 for (int r = 0 ; r < Register::number_of_registers ; r++) { 2334 reg_destroyed[r] = false; 2335 } 2336 for (int f = 0 ; f < FloatRegister::number_of_registers ; f++) { 2337 freg_destroyed[f] = false; 2338 } 2339 #endif // ASSERT 2340 2341 for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) { 2342 2343 #ifdef ASSERT 2344 if (in_regs[in].first()->is_Register()) { 2345 assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!"); 2346 } else if (in_regs[in].first()->is_FloatRegister()) { 2347 assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!"); 2348 } 2349 if (out_regs[out].first()->is_Register()) { 2350 reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true; 2351 } else if (out_regs[out].first()->is_FloatRegister()) { 2352 freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true; 2353 } 2354 #endif // ASSERT 2355 2356 switch (in_sig_bt[in]) { 2357 case T_BOOLEAN: 2358 case T_CHAR: 2359 case T_BYTE: 2360 case T_SHORT: 2361 case T_INT: 2362 // Move int and do sign extension. 2363 int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); 2364 break; 2365 case T_LONG: 2366 long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); 2367 break; 2368 case T_ARRAY: 2369 case T_OBJECT: 2370 object_move(masm, stack_slots, 2371 oop_map, oop_handle_slot_offset, 2372 ((in == 0) && (!method_is_static)), &receiver_offset, 2373 in_regs[in], out_regs[out], 2374 r_callers_sp, r_temp_1, r_temp_2); 2375 break; 2376 case T_VOID: 2377 break; 2378 case T_FLOAT: 2379 float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); 2380 break; 2381 case T_DOUBLE: 2382 double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1); 2383 break; 2384 case T_ADDRESS: 2385 fatal("found type (T_ADDRESS) in java args"); 2386 break; 2387 default: 2388 ShouldNotReachHere(); 2389 break; 2390 } 2391 } 2392 2393 // Pre-load a static method's oop into ARG2. 2394 // Used both by locking code and the normal JNI call code. 2395 if (method_is_static) { 2396 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), 2397 r_carg2_classorobject); 2398 2399 // Now handlize the static class mirror in carg2. It's known not-null. 2400 __ std(r_carg2_classorobject, klass_offset, R1_SP); 2401 oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2402 __ addi(r_carg2_classorobject, R1_SP, klass_offset); 2403 } 2404 2405 // Get JNIEnv* which is first argument to native. 2406 __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset())); 2407 2408 // NOTE: 2409 // 2410 // We have all of the arguments setup at this point. 2411 // We MUST NOT touch any outgoing regs from this point on. 2412 // So if we must call out we must push a new frame. 2413 2414 // The last java pc will also be used as resume pc if this is the wrapper for wait0. 2415 // For this purpose the precise location matters but not for oopmap lookup. 2416 __ calculate_address_from_global_toc(r_last_java_pc, last_java_pc, true, true, true, true); 2417 2418 // Make sure that thread is non-volatile; it crosses a bunch of VM calls below. 2419 assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register"); 2420 2421 # if 0 2422 // DTrace method entry 2423 # endif 2424 2425 // Lock a synchronized method. 2426 // -------------------------------------------------------------------------- 2427 2428 if (method->is_synchronized()) { 2429 Register r_oop = r_temp_4; 2430 const Register r_box = r_temp_5; 2431 Label done, locked; 2432 2433 // Load the oop for the object or class. r_carg2_classorobject contains 2434 // either the handlized oop from the incoming arguments or the handlized 2435 // class mirror (if the method is static). 2436 __ ld(r_oop, 0, r_carg2_classorobject); 2437 2438 // Get the lock box slot's address. 2439 __ addi(r_box, R1_SP, lock_offset); 2440 2441 // Try fastpath for locking. 2442 // fast_lock kills r_temp_1, r_temp_2, r_temp_3. 2443 Register r_temp_3_or_noreg = UseObjectMonitorTable ? r_temp_3 : noreg; 2444 __ compiler_fast_lock_lightweight_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3_or_noreg); 2445 __ beq(CR0, locked); 2446 2447 // None of the above fast optimizations worked so we have to get into the 2448 // slow case of monitor enter. Inline a special case of call_VM that 2449 // disallows any pending_exception. 2450 2451 // Save argument registers and leave room for C-compatible ABI_REG_ARGS. 2452 int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes); 2453 __ mr(R11_scratch1, R1_SP); 2454 RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs); 2455 2456 // Do the call. 2457 __ set_last_Java_frame(R11_scratch1, r_last_java_pc); 2458 assert(r_last_java_pc->is_nonvolatile(), "r_last_java_pc needs to be preserved accross complete_monitor_locking_C call"); 2459 // The following call will not be preempted. 2460 // push_cont_fastpath forces freeze slow path in case we try to preempt where we will pin the 2461 // vthread to the carrier (see FreezeBase::recurse_freeze_native_frame()). 2462 __ push_cont_fastpath(); 2463 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread); 2464 __ pop_cont_fastpath(); 2465 __ reset_last_Java_frame(); 2466 2467 RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs); 2468 2469 __ asm_assert_mem8_is_zero(thread_(pending_exception), 2470 "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C"); 2471 2472 __ bind(locked); 2473 } 2474 2475 __ set_last_Java_frame(R1_SP, r_last_java_pc); 2476 2477 // Publish thread state 2478 // -------------------------------------------------------------------------- 2479 2480 // Transition from _thread_in_Java to _thread_in_native. 2481 __ li(R0, _thread_in_native); 2482 __ release(); 2483 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); 2484 __ stw(R0, thread_(thread_state)); 2485 2486 2487 // The JNI call 2488 // -------------------------------------------------------------------------- 2489 __ call_c(native_func, relocInfo::runtime_call_type); 2490 2491 2492 // Now, we are back from the native code. 2493 2494 2495 // Unpack the native result. 2496 // -------------------------------------------------------------------------- 2497 2498 // For int-types, we do any needed sign-extension required. 2499 // Care must be taken that the return values (R3_RET and F1_RET) 2500 // will survive any VM calls for blocking or unlocking. 2501 // An OOP result (handle) is done specially in the slow-path code. 2502 2503 switch (ret_type) { 2504 case T_VOID: break; // Nothing to do! 2505 case T_FLOAT: break; // Got it where we want it (unless slow-path). 2506 case T_DOUBLE: break; // Got it where we want it (unless slow-path). 2507 case T_LONG: break; // Got it where we want it (unless slow-path). 2508 case T_OBJECT: break; // Really a handle. 2509 // Cannot de-handlize until after reclaiming jvm_lock. 2510 case T_ARRAY: break; 2511 2512 case T_BOOLEAN: { // 0 -> false(0); !0 -> true(1) 2513 __ normalize_bool(R3_RET); 2514 break; 2515 } 2516 case T_BYTE: { // sign extension 2517 __ extsb(R3_RET, R3_RET); 2518 break; 2519 } 2520 case T_CHAR: { // unsigned result 2521 __ andi(R3_RET, R3_RET, 0xffff); 2522 break; 2523 } 2524 case T_SHORT: { // sign extension 2525 __ extsh(R3_RET, R3_RET); 2526 break; 2527 } 2528 case T_INT: // nothing to do 2529 break; 2530 default: 2531 ShouldNotReachHere(); 2532 break; 2533 } 2534 2535 // Publish thread state 2536 // -------------------------------------------------------------------------- 2537 2538 // Switch thread to "native transition" state before reading the 2539 // synchronization state. This additional state is necessary because reading 2540 // and testing the synchronization state is not atomic w.r.t. GC, as this 2541 // scenario demonstrates: 2542 // - Java thread A, in _thread_in_native state, loads _not_synchronized 2543 // and is preempted. 2544 // - VM thread changes sync state to synchronizing and suspends threads 2545 // for GC. 2546 // - Thread A is resumed to finish this native method, but doesn't block 2547 // here since it didn't see any synchronization in progress, and escapes. 2548 2549 // Transition from _thread_in_native to _thread_in_native_trans. 2550 __ li(R0, _thread_in_native_trans); 2551 __ release(); 2552 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); 2553 __ stw(R0, thread_(thread_state)); 2554 2555 2556 // Must we block? 2557 // -------------------------------------------------------------------------- 2558 2559 // Block, if necessary, before resuming in _thread_in_Java state. 2560 // In order for GC to work, don't clear the last_Java_sp until after blocking. 2561 { 2562 Label no_block, sync; 2563 2564 // Force this write out before the read below. 2565 if (!UseSystemMemoryBarrier) { 2566 __ fence(); 2567 } 2568 2569 Register sync_state_addr = r_temp_4; 2570 Register sync_state = r_temp_5; 2571 Register suspend_flags = r_temp_6; 2572 2573 // No synchronization in progress nor yet synchronized 2574 // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path). 2575 __ safepoint_poll(sync, sync_state, true /* at_return */, false /* in_nmethod */); 2576 2577 // Not suspended. 2578 // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); 2579 __ lwz(suspend_flags, thread_(suspend_flags)); 2580 __ cmpwi(CR1, suspend_flags, 0); 2581 __ beq(CR1, no_block); 2582 2583 // Block. Save any potential method result value before the operation and 2584 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 2585 // lets us share the oopMap we used when we went native rather than create 2586 // a distinct one for this pc. 2587 __ bind(sync); 2588 __ isync(); 2589 2590 address entry_point = 2591 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); 2592 save_native_result(masm, ret_type, workspace_slot_offset); 2593 __ call_VM_leaf(entry_point, R16_thread); 2594 restore_native_result(masm, ret_type, workspace_slot_offset); 2595 2596 __ bind(no_block); 2597 2598 // Publish thread state. 2599 // -------------------------------------------------------------------------- 2600 2601 // Thread state is thread_in_native_trans. Any safepoint blocking has 2602 // already happened so we can now change state to _thread_in_Java. 2603 2604 // Transition from _thread_in_native_trans to _thread_in_Java. 2605 __ li(R0, _thread_in_Java); 2606 __ lwsync(); // Acquire safepoint and suspend state, release thread state. 2607 // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); 2608 __ stw(R0, thread_(thread_state)); 2609 2610 // Check preemption for Object.wait() 2611 if (method->is_object_wait0()) { 2612 Label not_preempted; 2613 __ ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread); 2614 __ cmpdi(CR0, R0, 0); 2615 __ beq(CR0, not_preempted); 2616 __ mtlr(R0); 2617 __ li(R0, 0); 2618 __ std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread); 2619 __ blr(); 2620 __ bind(not_preempted); 2621 } 2622 __ bind(last_java_pc); 2623 // We use the same pc/oopMap repeatedly when we call out above. 2624 intptr_t oopmap_pc = (intptr_t) __ pc(); 2625 oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map); 2626 } 2627 2628 // Reguard any pages if necessary. 2629 // -------------------------------------------------------------------------- 2630 2631 Label no_reguard; 2632 __ lwz(r_temp_1, thread_(stack_guard_state)); 2633 __ cmpwi(CR0, r_temp_1, StackOverflow::stack_guard_yellow_reserved_disabled); 2634 __ bne(CR0, no_reguard); 2635 2636 save_native_result(masm, ret_type, workspace_slot_offset); 2637 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 2638 restore_native_result(masm, ret_type, workspace_slot_offset); 2639 2640 __ bind(no_reguard); 2641 2642 2643 // Unlock 2644 // -------------------------------------------------------------------------- 2645 2646 if (method->is_synchronized()) { 2647 const Register r_oop = r_temp_4; 2648 const Register r_box = r_temp_5; 2649 const Register r_exception = r_temp_6; 2650 Label done; 2651 2652 // Get oop and address of lock object box. 2653 if (method_is_static) { 2654 assert(klass_offset != -1, ""); 2655 __ ld(r_oop, klass_offset, R1_SP); 2656 } else { 2657 assert(receiver_offset != -1, ""); 2658 __ ld(r_oop, receiver_offset, R1_SP); 2659 } 2660 __ addi(r_box, R1_SP, lock_offset); 2661 2662 // Try fastpath for unlocking. 2663 __ compiler_fast_unlock_lightweight_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); 2664 __ beq(CR0, done); 2665 2666 // Save and restore any potential method result value around the unlocking operation. 2667 save_native_result(masm, ret_type, workspace_slot_offset); 2668 2669 // Must save pending exception around the slow-path VM call. Since it's a 2670 // leaf call, the pending exception (if any) can be kept in a register. 2671 __ ld(r_exception, thread_(pending_exception)); 2672 assert(r_exception->is_nonvolatile(), "exception register must be non-volatile"); 2673 __ li(R0, 0); 2674 __ std(R0, thread_(pending_exception)); 2675 2676 // Slow case of monitor enter. 2677 // Inline a special case of call_VM that disallows any pending_exception. 2678 // Arguments are (oop obj, BasicLock* lock, JavaThread* thread). 2679 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread); 2680 2681 __ asm_assert_mem8_is_zero(thread_(pending_exception), 2682 "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C"); 2683 2684 restore_native_result(masm, ret_type, workspace_slot_offset); 2685 2686 // Check_forward_pending_exception jump to forward_exception if any pending 2687 // exception is set. The forward_exception routine expects to see the 2688 // exception in pending_exception and not in a register. Kind of clumsy, 2689 // since all folks who branch to forward_exception must have tested 2690 // pending_exception first and hence have it in a register already. 2691 __ std(r_exception, thread_(pending_exception)); 2692 2693 __ bind(done); 2694 } 2695 2696 # if 0 2697 // DTrace method exit 2698 # endif 2699 2700 // Clear "last Java frame" SP and PC. 2701 // -------------------------------------------------------------------------- 2702 2703 // Last java frame won't be set if we're resuming after preemption 2704 bool maybe_preempted = method->is_object_wait0(); 2705 __ reset_last_Java_frame(!maybe_preempted /* check_last_java_sp */); 2706 2707 // Unbox oop result, e.g. JNIHandles::resolve value. 2708 // -------------------------------------------------------------------------- 2709 2710 if (is_reference_type(ret_type)) { 2711 __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, MacroAssembler::PRESERVATION_NONE); 2712 } 2713 2714 if (CheckJNICalls) { 2715 // clear_pending_jni_exception_check 2716 __ load_const_optimized(R0, 0L); 2717 __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread); 2718 } 2719 2720 // Reset handle block. 2721 // -------------------------------------------------------------------------- 2722 __ ld(r_temp_1, thread_(active_handles)); 2723 // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); 2724 __ li(r_temp_2, 0); 2725 __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1); 2726 2727 // Prepare for return 2728 // -------------------------------------------------------------------------- 2729 __ pop_frame(); 2730 __ restore_LR(R11); 2731 2732 #if INCLUDE_JFR 2733 // We need to do a poll test after unwind in case the sampler 2734 // managed to sample the native frame after returning to Java. 2735 Label L_stub; 2736 int safepoint_offset = __ offset(); 2737 if (!UseSIGTRAP) { 2738 __ relocate(relocInfo::poll_return_type); 2739 } 2740 __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */); 2741 #endif // INCLUDE_JFR 2742 2743 // Check for pending exceptions. 2744 // -------------------------------------------------------------------------- 2745 __ ld(r_temp_2, thread_(pending_exception)); 2746 __ cmpdi(CR0, r_temp_2, 0); 2747 __ bne(CR0, handle_pending_exception); 2748 2749 // Return. 2750 __ blr(); 2751 2752 // Handler for return safepoint (out-of-line). 2753 #if INCLUDE_JFR 2754 if (!UseSIGTRAP) { 2755 __ bind(L_stub); 2756 __ jump_to_polling_page_return_handler_blob(safepoint_offset); 2757 } 2758 #endif // INCLUDE_JFR 2759 2760 // Handler for pending exceptions (out-of-line). 2761 // -------------------------------------------------------------------------- 2762 // Since this is a native call, we know the proper exception handler 2763 // is the empty function. We just pop this frame and then jump to 2764 // forward_exception_entry. 2765 __ bind(handle_pending_exception); 2766 __ b64_patchable((address)StubRoutines::forward_exception_entry(), 2767 relocInfo::runtime_call_type); 2768 2769 // Done. 2770 // -------------------------------------------------------------------------- 2771 2772 __ flush(); 2773 2774 nmethod *nm = nmethod::new_native_nmethod(method, 2775 compile_id, 2776 masm->code(), 2777 vep_start_pc-start_pc, 2778 frame_done_pc-start_pc, 2779 stack_slots / VMRegImpl::slots_per_word, 2780 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2781 in_ByteSize(lock_offset), 2782 oop_maps); 2783 2784 return nm; 2785 } 2786 2787 // This function returns the adjust size (in number of words) to a c2i adapter 2788 // activation for use during deoptimization. 2789 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 2790 return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::frame_alignment_in_words); 2791 } 2792 2793 uint SharedRuntime::in_preserve_stack_slots() { 2794 return frame::jit_in_preserve_size / VMRegImpl::stack_slot_size; 2795 } 2796 2797 uint SharedRuntime::out_preserve_stack_slots() { 2798 #if defined(COMPILER1) || defined(COMPILER2) 2799 return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size; 2800 #else 2801 return 0; 2802 #endif 2803 } 2804 2805 VMReg SharedRuntime::thread_register() { 2806 // On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames). 2807 ShouldNotCallThis(); 2808 return nullptr; 2809 } 2810 2811 #if defined(COMPILER1) || defined(COMPILER2) 2812 // Frame generation for deopt and uncommon trap blobs. 2813 static void push_skeleton_frame(MacroAssembler* masm, bool deopt, 2814 /* Read */ 2815 Register unroll_block_reg, 2816 /* Update */ 2817 Register frame_sizes_reg, 2818 Register number_of_frames_reg, 2819 Register pcs_reg, 2820 /* Invalidate */ 2821 Register frame_size_reg, 2822 Register pc_reg) { 2823 2824 __ ld(pc_reg, 0, pcs_reg); 2825 __ ld(frame_size_reg, 0, frame_sizes_reg); 2826 __ std(pc_reg, _abi0(lr), R1_SP); 2827 __ push_frame(frame_size_reg, R0/*tmp*/); 2828 __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP); 2829 __ addi(number_of_frames_reg, number_of_frames_reg, -1); 2830 __ addi(frame_sizes_reg, frame_sizes_reg, wordSize); 2831 __ addi(pcs_reg, pcs_reg, wordSize); 2832 } 2833 2834 // Loop through the UnrollBlock info and create new frames. 2835 static void push_skeleton_frames(MacroAssembler* masm, bool deopt, 2836 /* read */ 2837 Register unroll_block_reg, 2838 /* invalidate */ 2839 Register frame_sizes_reg, 2840 Register number_of_frames_reg, 2841 Register pcs_reg, 2842 Register frame_size_reg, 2843 Register pc_reg) { 2844 Label loop; 2845 2846 // _number_of_frames is of type int (deoptimization.hpp) 2847 __ lwa(number_of_frames_reg, 2848 in_bytes(Deoptimization::UnrollBlock::number_of_frames_offset()), 2849 unroll_block_reg); 2850 __ ld(pcs_reg, 2851 in_bytes(Deoptimization::UnrollBlock::frame_pcs_offset()), 2852 unroll_block_reg); 2853 __ ld(frame_sizes_reg, 2854 in_bytes(Deoptimization::UnrollBlock::frame_sizes_offset()), 2855 unroll_block_reg); 2856 2857 // stack: (caller_of_deoptee, ...). 2858 2859 // At this point we either have an interpreter frame or a compiled 2860 // frame on top of stack. If it is a compiled frame we push a new c2i 2861 // adapter here 2862 2863 // Memorize top-frame stack-pointer. 2864 __ mr(frame_size_reg/*old_sp*/, R1_SP); 2865 2866 // Resize interpreter top frame OR C2I adapter. 2867 2868 // At this moment, the top frame (which is the caller of the deoptee) is 2869 // an interpreter frame or a newly pushed C2I adapter or an entry frame. 2870 // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the 2871 // outgoing arguments. 2872 // 2873 // In order to push the interpreter frame for the deoptee, we need to 2874 // resize the top frame such that we are able to place the deoptee's 2875 // locals in the frame. 2876 // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI 2877 // into a valid PARENT_IJAVA_FRAME_ABI. 2878 2879 __ lwa(R11_scratch1, 2880 in_bytes(Deoptimization::UnrollBlock::caller_adjustment_offset()), 2881 unroll_block_reg); 2882 __ neg(R11_scratch1, R11_scratch1); 2883 2884 // R11_scratch1 contains size of locals for frame resizing. 2885 // R12_scratch2 contains top frame's lr. 2886 2887 // Resize frame by complete frame size prevents TOC from being 2888 // overwritten by locals. A more stack space saving way would be 2889 // to copy the TOC to its location in the new abi. 2890 __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size); 2891 2892 // now, resize the frame 2893 __ resize_frame(R11_scratch1, pc_reg/*tmp*/); 2894 2895 // In the case where we have resized a c2i frame above, the optional 2896 // alignment below the locals has size 32 (why?). 2897 __ std(R12_scratch2, _abi0(lr), R1_SP); 2898 2899 // Initialize initial_caller_sp. 2900 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP); 2901 2902 #ifdef ASSERT 2903 // Make sure that there is at least one entry in the array. 2904 __ cmpdi(CR0, number_of_frames_reg, 0); 2905 __ asm_assert_ne("array_size must be > 0"); 2906 #endif 2907 2908 // Now push the new interpreter frames. 2909 // 2910 __ bind(loop); 2911 // Allocate a new frame, fill in the pc. 2912 push_skeleton_frame(masm, deopt, 2913 unroll_block_reg, 2914 frame_sizes_reg, 2915 number_of_frames_reg, 2916 pcs_reg, 2917 frame_size_reg, 2918 pc_reg); 2919 __ cmpdi(CR0, number_of_frames_reg, 0); 2920 __ bne(CR0, loop); 2921 2922 // Get the return address pointing into the template interpreter. 2923 __ ld(R0, 0, pcs_reg); 2924 // Store it in the top interpreter frame. 2925 __ std(R0, _abi0(lr), R1_SP); 2926 // Initialize frame_manager_lr of interpreter top frame. 2927 } 2928 #endif 2929 2930 void SharedRuntime::generate_deopt_blob() { 2931 // Allocate space for the code 2932 ResourceMark rm; 2933 // Setup code generation tools 2934 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id); 2935 CodeBuffer buffer(name, 2048, 1024); 2936 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); 2937 Label exec_mode_initialized; 2938 int frame_size_in_words; 2939 OopMap* map = nullptr; 2940 OopMapSet *oop_maps = new OopMapSet(); 2941 2942 // size of ABI112 plus spill slots for R3_RET and F1_RET. 2943 const int frame_size_in_bytes = frame::native_abi_reg_args_spill_size; 2944 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); 2945 int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info. 2946 2947 const Register exec_mode_reg = R21_tmp1; 2948 2949 const address start = __ pc(); 2950 2951 #if defined(COMPILER1) || defined(COMPILER2) 2952 // -------------------------------------------------------------------------- 2953 // Prolog for non exception case! 2954 2955 // We have been called from the deopt handler of the deoptee. 2956 // 2957 // deoptee: 2958 // ... 2959 // call X 2960 // ... 2961 // deopt_handler: call_deopt_stub 2962 // cur. return pc --> ... 2963 // 2964 // So currently SR_LR points behind the call in the deopt handler. 2965 // We adjust it such that it points to the start of the deopt handler. 2966 // The return_pc has been stored in the frame of the deoptee and 2967 // will replace the address of the deopt_handler in the call 2968 // to Deoptimization::fetch_unroll_info below. 2969 // We can't grab a free register here, because all registers may 2970 // contain live values, so let the RegisterSaver do the adjustment 2971 // of the return pc. 2972 const int return_pc_adjustment_no_exception = -MacroAssembler::bl64_patchable_size; 2973 2974 // Push the "unpack frame" 2975 // Save everything in sight. 2976 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm, 2977 &first_frame_size_in_bytes, 2978 /*generate_oop_map=*/ true, 2979 return_pc_adjustment_no_exception, 2980 RegisterSaver::return_pc_is_lr); 2981 assert(map != nullptr, "OopMap must have been created"); 2982 2983 __ li(exec_mode_reg, Deoptimization::Unpack_deopt); 2984 // Save exec mode for unpack_frames. 2985 __ b(exec_mode_initialized); 2986 2987 // -------------------------------------------------------------------------- 2988 // Prolog for exception case 2989 2990 // An exception is pending. 2991 // We have been called with a return (interpreter) or a jump (exception blob). 2992 // 2993 // - R3_ARG1: exception oop 2994 // - R4_ARG2: exception pc 2995 2996 int exception_offset = __ pc() - start; 2997 2998 BLOCK_COMMENT("Prolog for exception case"); 2999 3000 // Store exception oop and pc in thread (location known to GC). 3001 // This is needed since the call to "fetch_unroll_info()" may safepoint. 3002 __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread); 3003 __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread); 3004 __ std(R4_ARG2, _abi0(lr), R1_SP); 3005 3006 // Vanilla deoptimization with an exception pending in exception_oop. 3007 int exception_in_tls_offset = __ pc() - start; 3008 3009 // Push the "unpack frame". 3010 // Save everything in sight. 3011 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm, 3012 &first_frame_size_in_bytes, 3013 /*generate_oop_map=*/ false, 3014 /*return_pc_adjustment_exception=*/ 0, 3015 RegisterSaver::return_pc_is_pre_saved); 3016 3017 // Deopt during an exception. Save exec mode for unpack_frames. 3018 __ li(exec_mode_reg, Deoptimization::Unpack_exception); 3019 3020 // fall through 3021 3022 int reexecute_offset = 0; 3023 #ifdef COMPILER1 3024 __ b(exec_mode_initialized); 3025 3026 // Reexecute entry, similar to c2 uncommon trap 3027 reexecute_offset = __ pc() - start; 3028 3029 RegisterSaver::push_frame_reg_args_and_save_live_registers(masm, 3030 &first_frame_size_in_bytes, 3031 /*generate_oop_map=*/ false, 3032 /*return_pc_adjustment_reexecute=*/ 0, 3033 RegisterSaver::return_pc_is_pre_saved); 3034 __ li(exec_mode_reg, Deoptimization::Unpack_reexecute); 3035 #endif 3036 3037 // -------------------------------------------------------------------------- 3038 __ BIND(exec_mode_initialized); 3039 3040 const Register unroll_block_reg = R22_tmp2; 3041 3042 // We need to set `last_Java_frame' because `fetch_unroll_info' will 3043 // call `last_Java_frame()'. The value of the pc in the frame is not 3044 // particularly important. It just needs to identify this blob. 3045 __ set_last_Java_frame(R1_SP, noreg); 3046 3047 // With EscapeAnalysis turned on, this call may safepoint! 3048 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg); 3049 address calls_return_pc = __ last_calls_return_pc(); 3050 // Set an oopmap for the call site that describes all our saved registers. 3051 oop_maps->add_gc_map(calls_return_pc - start, map); 3052 3053 __ reset_last_Java_frame(); 3054 // Save the return value. 3055 __ mr(unroll_block_reg, R3_RET); 3056 3057 // Restore only the result registers that have been saved 3058 // by save_volatile_registers(...). 3059 RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes); 3060 3061 // reload the exec mode from the UnrollBlock (it might have changed) 3062 __ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg); 3063 // In excp_deopt_mode, restore and clear exception oop which we 3064 // stored in the thread during exception entry above. The exception 3065 // oop will be the return value of this stub. 3066 Label skip_restore_excp; 3067 __ cmpdi(CR0, exec_mode_reg, Deoptimization::Unpack_exception); 3068 __ bne(CR0, skip_restore_excp); 3069 __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread); 3070 __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread); 3071 __ li(R0, 0); 3072 __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread); 3073 __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread); 3074 __ BIND(skip_restore_excp); 3075 3076 __ pop_frame(); 3077 3078 // stack: (deoptee, optional i2c, caller of deoptee, ...). 3079 3080 // pop the deoptee's frame 3081 __ pop_frame(); 3082 3083 // stack: (caller_of_deoptee, ...). 3084 3085 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled. 3086 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info()) 3087 // and the frame is effectively not resized. 3088 Register caller_sp = R23_tmp3; 3089 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg); 3090 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5); 3091 3092 // Loop through the `UnrollBlock' info and create interpreter frames. 3093 push_skeleton_frames(masm, true/*deopt*/, 3094 unroll_block_reg, 3095 R23_tmp3, 3096 R24_tmp4, 3097 R25_tmp5, 3098 R26_tmp6, 3099 R27_tmp7); 3100 3101 // stack: (skeletal interpreter frame, ..., optional skeletal 3102 // interpreter frame, optional c2i, caller of deoptee, ...). 3103 3104 // push an `unpack_frame' taking care of float / int return values. 3105 __ push_frame(frame_size_in_bytes, R0/*tmp*/); 3106 3107 // stack: (unpack frame, skeletal interpreter frame, ..., optional 3108 // skeletal interpreter frame, optional c2i, caller of deoptee, 3109 // ...). 3110 3111 // Spill live volatile registers since we'll do a call. 3112 __ std( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP); 3113 __ stfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP); 3114 3115 // Let the unpacker layout information in the skeletal frames just 3116 // allocated. 3117 __ calculate_address_from_global_toc(R3_RET, calls_return_pc, true, true, true, true); 3118 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET); 3119 // This is a call to a LEAF method, so no oop map is required. 3120 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), 3121 R16_thread/*thread*/, exec_mode_reg/*exec_mode*/); 3122 __ reset_last_Java_frame(); 3123 3124 // Restore the volatiles saved above. 3125 __ ld( R3_RET, _native_abi_reg_args_spill(spill_ret), R1_SP); 3126 __ lfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP); 3127 3128 // Pop the unpack frame. 3129 __ pop_frame(); 3130 __ restore_LR(R0); 3131 3132 // stack: (top interpreter frame, ..., optional interpreter frame, 3133 // optional c2i, caller of deoptee, ...). 3134 3135 // Initialize R14_state. 3136 __ restore_interpreter_state(R11_scratch1); 3137 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); 3138 3139 // Return to the interpreter entry point. 3140 __ blr(); 3141 __ flush(); 3142 #else // COMPILER2 3143 __ unimplemented("deopt blob needed only with compiler"); 3144 int exception_offset = __ pc() - start; 3145 #endif // COMPILER2 3146 3147 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 3148 reexecute_offset, first_frame_size_in_bytes / wordSize); 3149 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3150 } 3151 3152 #ifdef COMPILER2 3153 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() { 3154 // Allocate space for the code. 3155 ResourceMark rm; 3156 // Setup code generation tools. 3157 const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id); 3158 CodeBuffer buffer(name, 2048, 1024); 3159 if (buffer.blob() == nullptr) { 3160 return nullptr; 3161 } 3162 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); 3163 address start = __ pc(); 3164 3165 Register unroll_block_reg = R21_tmp1; 3166 Register klass_index_reg = R22_tmp2; 3167 Register unc_trap_reg = R23_tmp3; 3168 Register r_return_pc = R27_tmp7; 3169 3170 OopMapSet* oop_maps = new OopMapSet(); 3171 int frame_size_in_bytes = frame::native_abi_reg_args_size; 3172 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0); 3173 3174 // stack: (deoptee, optional i2c, caller_of_deoptee, ...). 3175 3176 // Push a dummy `unpack_frame' and call 3177 // `Deoptimization::uncommon_trap' to pack the compiled frame into a 3178 // vframe array and return the `UnrollBlock' information. 3179 3180 // Save LR to compiled frame. 3181 __ save_LR(R11_scratch1); 3182 3183 // Push an "uncommon_trap" frame. 3184 __ push_frame_reg_args(0, R11_scratch1); 3185 3186 // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...). 3187 3188 // Set the `unpack_frame' as last_Java_frame. 3189 // `Deoptimization::uncommon_trap' expects it and considers its 3190 // sender frame as the deoptee frame. 3191 // Remember the offset of the instruction whose address will be 3192 // moved to R11_scratch1. 3193 address gc_map_pc = __ pc(); 3194 __ calculate_address_from_global_toc(r_return_pc, gc_map_pc, true, true, true, true); 3195 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc); 3196 3197 __ mr(klass_index_reg, R3); 3198 __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap); 3199 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), 3200 R16_thread, klass_index_reg, R5_ARG3); 3201 3202 // Set an oopmap for the call site. 3203 oop_maps->add_gc_map(gc_map_pc - start, map); 3204 3205 __ reset_last_Java_frame(); 3206 3207 // Pop the `unpack frame'. 3208 __ pop_frame(); 3209 3210 // stack: (deoptee, optional i2c, caller_of_deoptee, ...). 3211 3212 // Save the return value. 3213 __ mr(unroll_block_reg, R3_RET); 3214 3215 // Pop the uncommon_trap frame. 3216 __ pop_frame(); 3217 3218 // stack: (caller_of_deoptee, ...). 3219 3220 #ifdef ASSERT 3221 __ lwz(R22_tmp2, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg); 3222 __ cmpdi(CR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap); 3223 __ asm_assert_eq("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); 3224 #endif 3225 3226 // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled. 3227 // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info()) 3228 // and the frame is effectively not resized. 3229 Register caller_sp = R23_tmp3; 3230 __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg); 3231 __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5); 3232 3233 // Allocate new interpreter frame(s) and possibly a c2i adapter 3234 // frame. 3235 push_skeleton_frames(masm, false/*deopt*/, 3236 unroll_block_reg, 3237 R22_tmp2, 3238 R23_tmp3, 3239 R24_tmp4, 3240 R25_tmp5, 3241 R26_tmp6); 3242 3243 // stack: (skeletal interpreter frame, ..., optional skeletal 3244 // interpreter frame, optional c2i, caller of deoptee, ...). 3245 3246 // Push a dummy `unpack_frame' taking care of float return values. 3247 // Call `Deoptimization::unpack_frames' to layout information in the 3248 // interpreter frames just created. 3249 3250 // Push a simple "unpack frame" here. 3251 __ push_frame_reg_args(0, R11_scratch1); 3252 3253 // stack: (unpack frame, skeletal interpreter frame, ..., optional 3254 // skeletal interpreter frame, optional c2i, caller of deoptee, 3255 // ...). 3256 3257 // Set the "unpack_frame" as last_Java_frame. 3258 __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc); 3259 3260 // Indicate it is the uncommon trap case. 3261 __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap); 3262 // Let the unpacker layout information in the skeletal frames just 3263 // allocated. 3264 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), 3265 R16_thread, unc_trap_reg); 3266 3267 __ reset_last_Java_frame(); 3268 // Pop the `unpack frame'. 3269 __ pop_frame(); 3270 // Restore LR from top interpreter frame. 3271 __ restore_LR(R11_scratch1); 3272 3273 // stack: (top interpreter frame, ..., optional interpreter frame, 3274 // optional c2i, caller of deoptee, ...). 3275 3276 __ restore_interpreter_state(R11_scratch1); 3277 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); 3278 3279 // Return to the interpreter entry point. 3280 __ blr(); 3281 3282 masm->flush(); 3283 3284 return UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize); 3285 } 3286 #endif // COMPILER2 3287 3288 // Generate a special Compile2Runtime blob that saves all registers, and setup oopmap. 3289 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) { 3290 assert(StubRoutines::forward_exception_entry() != nullptr, 3291 "must be generated before"); 3292 assert(is_polling_page_id(id), "expected a polling page stub id"); 3293 3294 ResourceMark rm; 3295 OopMapSet *oop_maps = new OopMapSet(); 3296 OopMap* map; 3297 3298 // Allocate space for the code. Setup code generation tools. 3299 const char* name = SharedRuntime::stub_name(id); 3300 CodeBuffer buffer(name, 2048, 1024); 3301 MacroAssembler* masm = new MacroAssembler(&buffer); 3302 3303 address start = __ pc(); 3304 int frame_size_in_bytes = 0; 3305 3306 RegisterSaver::ReturnPCLocation return_pc_location; 3307 bool cause_return = (id == StubId::shared_polling_page_return_handler_id); 3308 if (cause_return) { 3309 // Nothing to do here. The frame has already been popped in MachEpilogNode. 3310 // Register LR already contains the return pc. 3311 return_pc_location = RegisterSaver::return_pc_is_pre_saved; 3312 } else { 3313 // Use thread()->saved_exception_pc() as return pc. 3314 return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc; 3315 } 3316 3317 bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id); 3318 3319 // Save registers, fpu state, and flags. Set R31 = return pc. 3320 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm, 3321 &frame_size_in_bytes, 3322 /*generate_oop_map=*/ true, 3323 /*return_pc_adjustment=*/0, 3324 return_pc_location, save_vectors); 3325 3326 // The following is basically a call_VM. However, we need the precise 3327 // address of the call in order to generate an oopmap. Hence, we do all the 3328 // work ourselves. 3329 __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg); 3330 3331 // The return address must always be correct so that the frame constructor 3332 // never sees an invalid pc. 3333 3334 // Do the call 3335 __ call_VM_leaf(call_ptr, R16_thread); 3336 address calls_return_pc = __ last_calls_return_pc(); 3337 3338 // Set an oopmap for the call site. This oopmap will map all 3339 // oop-registers and debug-info registers as callee-saved. This 3340 // will allow deoptimization at this safepoint to find all possible 3341 // debug-info recordings, as well as let GC find all oops. 3342 oop_maps->add_gc_map(calls_return_pc - start, map); 3343 3344 Label noException; 3345 3346 // Clear the last Java frame. 3347 __ reset_last_Java_frame(); 3348 3349 BLOCK_COMMENT(" Check pending exception."); 3350 const Register pending_exception = R0; 3351 __ ld(pending_exception, thread_(pending_exception)); 3352 __ cmpdi(CR0, pending_exception, 0); 3353 __ beq(CR0, noException); 3354 3355 // Exception pending 3356 RegisterSaver::restore_live_registers_and_pop_frame(masm, 3357 frame_size_in_bytes, 3358 /*restore_ctr=*/true, save_vectors); 3359 3360 BLOCK_COMMENT(" Jump to forward_exception_entry."); 3361 // Jump to forward_exception_entry, with the issuing PC in LR 3362 // so it looks like the original nmethod called forward_exception_entry. 3363 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 3364 3365 // No exception case. 3366 __ BIND(noException); 3367 3368 if (!cause_return) { 3369 Label no_adjust; 3370 // If our stashed return pc was modified by the runtime we avoid touching it 3371 __ ld(R0, frame_size_in_bytes + _abi0(lr), R1_SP); 3372 __ cmpd(CR0, R0, R31); 3373 __ bne(CR0, no_adjust); 3374 3375 // Adjust return pc forward to step over the safepoint poll instruction 3376 __ addi(R31, R31, 4); 3377 __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP); 3378 3379 __ bind(no_adjust); 3380 } 3381 3382 // Normal exit, restore registers and exit. 3383 RegisterSaver::restore_live_registers_and_pop_frame(masm, 3384 frame_size_in_bytes, 3385 /*restore_ctr=*/true, save_vectors); 3386 3387 __ blr(); 3388 3389 // Make sure all code is generated 3390 masm->flush(); 3391 3392 // Fill-out other meta info 3393 // CodeBlob frame size is in words. 3394 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize); 3395 } 3396 3397 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss) 3398 // 3399 // Generate a stub that calls into the vm to find out the proper destination 3400 // of a java call. All the argument registers are live at this point 3401 // but since this is generic code we don't know what they are and the caller 3402 // must do any gc of the args. 3403 // 3404 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) { 3405 assert(is_resolve_id(id), "expected a resolve stub id"); 3406 3407 // allocate space for the code 3408 ResourceMark rm; 3409 3410 const char* name = SharedRuntime::stub_name(id); 3411 CodeBuffer buffer(name, 1000, 512); 3412 MacroAssembler* masm = new MacroAssembler(&buffer); 3413 3414 int frame_size_in_bytes; 3415 3416 OopMapSet *oop_maps = new OopMapSet(); 3417 OopMap* map = nullptr; 3418 3419 address start = __ pc(); 3420 3421 map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm, 3422 &frame_size_in_bytes, 3423 /*generate_oop_map*/ true, 3424 /*return_pc_adjustment*/ 0, 3425 RegisterSaver::return_pc_is_lr); 3426 3427 // Use noreg as last_Java_pc, the return pc will be reconstructed 3428 // from the physical frame. 3429 __ set_last_Java_frame(/*sp*/R1_SP, noreg); 3430 3431 int frame_complete = __ offset(); 3432 3433 // Pass R19_method as 2nd (optional) argument, used by 3434 // counter_overflow_stub. 3435 __ call_VM_leaf(destination, R16_thread, R19_method); 3436 address calls_return_pc = __ last_calls_return_pc(); 3437 // Set an oopmap for the call site. 3438 // We need this not only for callee-saved registers, but also for volatile 3439 // registers that the compiler might be keeping live across a safepoint. 3440 // Create the oopmap for the call's return pc. 3441 oop_maps->add_gc_map(calls_return_pc - start, map); 3442 3443 // R3_RET contains the address we are going to jump to assuming no exception got installed. 3444 3445 // clear last_Java_sp 3446 __ reset_last_Java_frame(); 3447 3448 // Check for pending exceptions. 3449 BLOCK_COMMENT("Check for pending exceptions."); 3450 Label pending; 3451 __ ld(R11_scratch1, thread_(pending_exception)); 3452 __ cmpdi(CR0, R11_scratch1, 0); 3453 __ bne(CR0, pending); 3454 3455 __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame. 3456 3457 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false); 3458 3459 // Get the returned method. 3460 __ get_vm_result_metadata(R19_method); 3461 3462 __ bctr(); 3463 3464 3465 // Pending exception after the safepoint. 3466 __ BIND(pending); 3467 3468 RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true); 3469 3470 // exception pending => remove activation and forward to exception handler 3471 3472 __ li(R11_scratch1, 0); 3473 __ ld(R3_ARG1, thread_(pending_exception)); 3474 __ std(R11_scratch1, in_bytes(JavaThread::vm_result_oop_offset()), R16_thread); 3475 __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 3476 3477 // ------------- 3478 // Make sure all code is generated. 3479 masm->flush(); 3480 3481 // return the blob 3482 // frame_size_words or bytes?? 3483 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize, 3484 oop_maps, true); 3485 } 3486 3487 // Continuation point for throwing of implicit exceptions that are 3488 // not handled in the current activation. Fabricates an exception 3489 // oop and initiates normal exception dispatching in this 3490 // frame. Only callee-saved registers are preserved (through the 3491 // normal register window / RegisterMap handling). If the compiler 3492 // needs all registers to be preserved between the fault point and 3493 // the exception handler then it must assume responsibility for that 3494 // in AbstractCompiler::continuation_for_implicit_null_exception or 3495 // continuation_for_implicit_division_by_zero_exception. All other 3496 // implicit exceptions (e.g., NullPointerException or 3497 // AbstractMethodError on entry) are either at call sites or 3498 // otherwise assume that stack unwinding will be initiated, so 3499 // caller saved registers were assumed volatile in the compiler. 3500 // 3501 // Note that we generate only this stub into a RuntimeStub, because 3502 // it needs to be properly traversed and ignored during GC, so we 3503 // change the meaning of the "__" macro within this method. 3504 // 3505 // Note: the routine set_pc_not_at_call_for_caller in 3506 // SharedRuntime.cpp requires that this code be generated into a 3507 // RuntimeStub. 3508 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) { 3509 assert(is_throw_id(id), "expected a throw stub id"); 3510 3511 const char* name = SharedRuntime::stub_name(id); 3512 3513 ResourceMark rm; 3514 const char* timer_msg = "SharedRuntime generate_throw_exception"; 3515 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); 3516 3517 CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0); 3518 MacroAssembler* masm = new MacroAssembler(&code); 3519 3520 OopMapSet* oop_maps = new OopMapSet(); 3521 int frame_size_in_bytes = frame::native_abi_reg_args_size; 3522 OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0); 3523 3524 address start = __ pc(); 3525 3526 __ save_LR(R11_scratch1); 3527 3528 // Push a frame. 3529 __ push_frame_reg_args(0, R11_scratch1); 3530 3531 address frame_complete_pc = __ pc(); 3532 3533 // Note that we always have a runtime stub frame on the top of 3534 // stack by this point. Remember the offset of the instruction 3535 // whose address will be moved to R11_scratch1. 3536 address gc_map_pc = __ get_PC_trash_LR(R11_scratch1); 3537 3538 __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1); 3539 3540 __ mr(R3_ARG1, R16_thread); 3541 __ call_c(runtime_entry); 3542 3543 // Set an oopmap for the call site. 3544 oop_maps->add_gc_map((int)(gc_map_pc - start), map); 3545 3546 __ reset_last_Java_frame(); 3547 3548 #ifdef ASSERT 3549 // Make sure that this code is only executed if there is a pending 3550 // exception. 3551 { 3552 Label L; 3553 __ ld(R0, 3554 in_bytes(Thread::pending_exception_offset()), 3555 R16_thread); 3556 __ cmpdi(CR0, R0, 0); 3557 __ bne(CR0, L); 3558 __ stop("SharedRuntime::throw_exception: no pending exception"); 3559 __ bind(L); 3560 } 3561 #endif 3562 3563 // Pop frame. 3564 __ pop_frame(); 3565 3566 __ restore_LR(R11_scratch1); 3567 3568 __ load_const(R11_scratch1, StubRoutines::forward_exception_entry()); 3569 __ mtctr(R11_scratch1); 3570 __ bctr(); 3571 3572 // Create runtime stub with OopMap. 3573 RuntimeStub* stub = 3574 RuntimeStub::new_runtime_stub(name, &code, 3575 /*frame_complete=*/ (int)(frame_complete_pc - start), 3576 frame_size_in_bytes/wordSize, 3577 oop_maps, 3578 false); 3579 return stub; 3580 } 3581 3582 //------------------------------Montgomery multiplication------------------------ 3583 // 3584 3585 // Subtract 0:b from carry:a. Return carry. 3586 static unsigned long 3587 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { 3588 long i = 0; 3589 unsigned long tmp, tmp2; 3590 __asm__ __volatile__ ( 3591 "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA 3592 "mtctr %[len] \n" 3593 "0: \n" 3594 "ldx %[tmp], %[i], %[a] \n" 3595 "ldx %[tmp2], %[i], %[b] \n" 3596 "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended 3597 "stdx %[tmp], %[i], %[a] \n" 3598 "addi %[i], %[i], 8 \n" 3599 "bdnz 0b \n" 3600 "addme %[tmp], %[carry] \n" // carry + CA - 1 3601 : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2) 3602 : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len) 3603 : "ctr", "xer", "memory" 3604 ); 3605 return tmp; 3606 } 3607 3608 // Multiply (unsigned) Long A by Long B, accumulating the double- 3609 // length result into the accumulator formed of T0, T1, and T2. 3610 inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) { 3611 unsigned long hi, lo; 3612 __asm__ __volatile__ ( 3613 "mulld %[lo], %[A], %[B] \n" 3614 "mulhdu %[hi], %[A], %[B] \n" 3615 "addc %[T0], %[T0], %[lo] \n" 3616 "adde %[T1], %[T1], %[hi] \n" 3617 "addze %[T2], %[T2] \n" 3618 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 3619 : [A]"r"(A), [B]"r"(B) 3620 : "xer" 3621 ); 3622 } 3623 3624 // As above, but add twice the double-length result into the 3625 // accumulator. 3626 inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) { 3627 unsigned long hi, lo; 3628 __asm__ __volatile__ ( 3629 "mulld %[lo], %[A], %[B] \n" 3630 "mulhdu %[hi], %[A], %[B] \n" 3631 "addc %[T0], %[T0], %[lo] \n" 3632 "adde %[T1], %[T1], %[hi] \n" 3633 "addze %[T2], %[T2] \n" 3634 "addc %[T0], %[T0], %[lo] \n" 3635 "adde %[T1], %[T1], %[hi] \n" 3636 "addze %[T2], %[T2] \n" 3637 : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 3638 : [A]"r"(A), [B]"r"(B) 3639 : "xer" 3640 ); 3641 } 3642 3643 // Fast Montgomery multiplication. The derivation of the algorithm is 3644 // in "A Cryptographic Library for the Motorola DSP56000, 3645 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237". 3646 static void 3647 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], 3648 unsigned long m[], unsigned long inv, int len) { 3649 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3650 int i; 3651 3652 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 3653 3654 for (i = 0; i < len; i++) { 3655 int j; 3656 for (j = 0; j < i; j++) { 3657 MACC(a[j], b[i-j], t0, t1, t2); 3658 MACC(m[j], n[i-j], t0, t1, t2); 3659 } 3660 MACC(a[i], b[0], t0, t1, t2); 3661 m[i] = t0 * inv; 3662 MACC(m[i], n[0], t0, t1, t2); 3663 3664 assert(t0 == 0, "broken Montgomery multiply"); 3665 3666 t0 = t1; t1 = t2; t2 = 0; 3667 } 3668 3669 for (i = len; i < 2*len; i++) { 3670 int j; 3671 for (j = i-len+1; j < len; j++) { 3672 MACC(a[j], b[i-j], t0, t1, t2); 3673 MACC(m[j], n[i-j], t0, t1, t2); 3674 } 3675 m[i-len] = t0; 3676 t0 = t1; t1 = t2; t2 = 0; 3677 } 3678 3679 while (t0) { 3680 t0 = sub(m, n, t0, len); 3681 } 3682 } 3683 3684 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3685 // multiplies so it should be up to 25% faster than Montgomery 3686 // multiplication. However, its loop control is more complex and it 3687 // may actually run slower on some machines. 3688 static void 3689 montgomery_square(unsigned long a[], unsigned long n[], 3690 unsigned long m[], unsigned long inv, int len) { 3691 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3692 int i; 3693 3694 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 3695 3696 for (i = 0; i < len; i++) { 3697 int j; 3698 int end = (i+1)/2; 3699 for (j = 0; j < end; j++) { 3700 MACC2(a[j], a[i-j], t0, t1, t2); 3701 MACC(m[j], n[i-j], t0, t1, t2); 3702 } 3703 if ((i & 1) == 0) { 3704 MACC(a[j], a[j], t0, t1, t2); 3705 } 3706 for (; j < i; j++) { 3707 MACC(m[j], n[i-j], t0, t1, t2); 3708 } 3709 m[i] = t0 * inv; 3710 MACC(m[i], n[0], t0, t1, t2); 3711 3712 assert(t0 == 0, "broken Montgomery square"); 3713 3714 t0 = t1; t1 = t2; t2 = 0; 3715 } 3716 3717 for (i = len; i < 2*len; i++) { 3718 int start = i-len+1; 3719 int end = start + (len - start)/2; 3720 int j; 3721 for (j = start; j < end; j++) { 3722 MACC2(a[j], a[i-j], t0, t1, t2); 3723 MACC(m[j], n[i-j], t0, t1, t2); 3724 } 3725 if ((i & 1) == 0) { 3726 MACC(a[j], a[j], t0, t1, t2); 3727 } 3728 for (; j < len; j++) { 3729 MACC(m[j], n[i-j], t0, t1, t2); 3730 } 3731 m[i-len] = t0; 3732 t0 = t1; t1 = t2; t2 = 0; 3733 } 3734 3735 while (t0) { 3736 t0 = sub(m, n, t0, len); 3737 } 3738 } 3739 3740 // The threshold at which squaring is advantageous was determined 3741 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3742 // Doesn't seem to be relevant for Power8 so we use the same value. 3743 #define MONTGOMERY_SQUARING_THRESHOLD 64 3744 3745 // Copy len longwords from s to d, word-swapping as we go. The 3746 // destination array is reversed. 3747 static void reverse_words(unsigned long *s, unsigned long *d, int len) { 3748 d += len; 3749 while(len-- > 0) { 3750 d--; 3751 unsigned long s_val = *s; 3752 // Swap words in a longword on little endian machines. 3753 #ifdef VM_LITTLE_ENDIAN 3754 s_val = (s_val << 32) | (s_val >> 32); 3755 #endif 3756 *d = s_val; 3757 s++; 3758 } 3759 } 3760 3761 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3762 jint len, jlong inv, 3763 jint *m_ints) { 3764 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. 3765 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3766 int longwords = len/2; 3767 3768 // Make very sure we don't use so much space that the stack might 3769 // overflow. 512 jints corresponds to an 16384-bit integer and 3770 // will use here a total of 8k bytes of stack space. 3771 int divisor = sizeof(unsigned long) * 4; 3772 guarantee(longwords <= 8192 / divisor, "must be"); 3773 int total_allocation = longwords * sizeof (unsigned long) * 4; 3774 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 3775 3776 // Local scratch arrays 3777 unsigned long 3778 *a = scratch + 0 * longwords, 3779 *b = scratch + 1 * longwords, 3780 *n = scratch + 2 * longwords, 3781 *m = scratch + 3 * longwords; 3782 3783 reverse_words((unsigned long *)a_ints, a, longwords); 3784 reverse_words((unsigned long *)b_ints, b, longwords); 3785 reverse_words((unsigned long *)n_ints, n, longwords); 3786 3787 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); 3788 3789 reverse_words(m, (unsigned long *)m_ints, longwords); 3790 } 3791 3792 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3793 jint len, jlong inv, 3794 jint *m_ints) { 3795 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. 3796 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3797 int longwords = len/2; 3798 3799 // Make very sure we don't use so much space that the stack might 3800 // overflow. 512 jints corresponds to an 16384-bit integer and 3801 // will use here a total of 6k bytes of stack space. 3802 int divisor = sizeof(unsigned long) * 3; 3803 guarantee(longwords <= (8192 / divisor), "must be"); 3804 int total_allocation = longwords * sizeof (unsigned long) * 3; 3805 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 3806 3807 // Local scratch arrays 3808 unsigned long 3809 *a = scratch + 0 * longwords, 3810 *n = scratch + 1 * longwords, 3811 *m = scratch + 2 * longwords; 3812 3813 reverse_words((unsigned long *)a_ints, a, longwords); 3814 reverse_words((unsigned long *)n_ints, n, longwords); 3815 3816 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3817 ::montgomery_square(a, n, m, (unsigned long)inv, longwords); 3818 } else { 3819 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); 3820 } 3821 3822 reverse_words(m, (unsigned long *)m_ints, longwords); 3823 } 3824 3825 #if INCLUDE_JFR 3826 3827 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint. 3828 // It returns a jobject handle to the event writer. 3829 // The handle is dereferenced and the return value is the event writer oop. 3830 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { 3831 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id); 3832 CodeBuffer code(name, 512, 64); 3833 MacroAssembler* masm = new MacroAssembler(&code); 3834 3835 Register tmp1 = R10_ARG8; 3836 Register tmp2 = R9_ARG7; 3837 3838 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size; 3839 address start = __ pc(); 3840 __ mflr(tmp1); 3841 __ std(tmp1, _abi0(lr), R1_SP); // save return pc 3842 __ push_frame_reg_args(0, tmp1); 3843 int frame_complete = __ pc() - start; 3844 __ set_last_Java_frame(R1_SP, noreg); 3845 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), R16_thread); 3846 address calls_return_pc = __ last_calls_return_pc(); 3847 __ reset_last_Java_frame(); 3848 // The handle is dereferenced through a load barrier. 3849 __ resolve_global_jobject(R3_RET, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE); 3850 __ pop_frame(); 3851 __ ld(tmp1, _abi0(lr), R1_SP); 3852 __ mtlr(tmp1); 3853 __ blr(); 3854 3855 OopMapSet* oop_maps = new OopMapSet(); 3856 OopMap* map = new OopMap(framesize, 0); 3857 oop_maps->add_gc_map(calls_return_pc - start, map); 3858 3859 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) 3860 RuntimeStub::new_runtime_stub(name, &code, frame_complete, 3861 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 3862 oop_maps, false); 3863 return stub; 3864 } 3865 3866 // For c2: call to return a leased buffer. 3867 RuntimeStub* SharedRuntime::generate_jfr_return_lease() { 3868 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id); 3869 CodeBuffer code(name, 512, 64); 3870 MacroAssembler* masm = new MacroAssembler(&code); 3871 3872 Register tmp1 = R10_ARG8; 3873 Register tmp2 = R9_ARG7; 3874 3875 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size; 3876 address start = __ pc(); 3877 __ mflr(tmp1); 3878 __ std(tmp1, _abi0(lr), R1_SP); // save return pc 3879 __ push_frame_reg_args(0, tmp1); 3880 int frame_complete = __ pc() - start; 3881 __ set_last_Java_frame(R1_SP, noreg); 3882 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread); 3883 address calls_return_pc = __ last_calls_return_pc(); 3884 __ reset_last_Java_frame(); 3885 __ pop_frame(); 3886 __ ld(tmp1, _abi0(lr), R1_SP); 3887 __ mtlr(tmp1); 3888 __ blr(); 3889 3890 OopMapSet* oop_maps = new OopMapSet(); 3891 OopMap* map = new OopMap(framesize, 0); 3892 oop_maps->add_gc_map(calls_return_pc - start, map); 3893 3894 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) 3895 RuntimeStub::new_runtime_stub(name, &code, frame_complete, 3896 (framesize >> (LogBytesPerWord - LogBytesPerInt)), 3897 oop_maps, false); 3898 return stub; 3899 } 3900 3901 #endif // INCLUDE_JFR