1 /* 2 * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2024 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_PPC_MACROASSEMBLER_PPC_HPP 27 #define CPU_PPC_MACROASSEMBLER_PPC_HPP 28 29 #include "asm/assembler.hpp" 30 #include "oops/accessDecorators.hpp" 31 #include "utilities/macros.hpp" 32 33 // MacroAssembler extends Assembler by a few frequently used macros. 34 35 class ciTypeArray; 36 class OopMap; 37 38 class MacroAssembler: public Assembler { 39 public: 40 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 41 42 // Indicates whether and, if so, which registers must be preserved when calling runtime code. 43 enum PreservationLevel { 44 PRESERVATION_NONE, 45 PRESERVATION_FRAME_LR, 46 PRESERVATION_FRAME_LR_GP_REGS, 47 PRESERVATION_FRAME_LR_GP_FP_REGS 48 }; 49 50 // 51 // Optimized instruction emitters 52 // 53 54 inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; } 55 inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); } 56 57 // load d = *[a+si31] 58 // Emits several instructions if the offset is not encodable in one instruction. 59 void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop); 60 void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop); 61 inline static bool is_ld_largeoffset(address a); 62 inline static int get_ld_largeoffset_offset(address a); 63 64 inline void round_to(Register r, int modulus); 65 66 // Load/store with type given by parameter. 67 void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed); 68 void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes); 69 70 // Move register if destination register and target register are different 71 inline void mr_if_needed(Register rd, Register rs); 72 inline void fmr_if_needed(FloatRegister rd, FloatRegister rs); 73 // This is dedicated for emitting scheduled mach nodes. For better 74 // readability of the ad file I put it here. 75 // Endgroups are not needed if 76 // - the scheduler is off 77 // - the scheduler found that there is a natural group end, in that 78 // case it reduced the size of the instruction used in the test 79 // yielding 'needed'. 80 inline void endgroup_if_needed(bool needed); 81 82 // Memory barriers. 83 inline void membar(int bits); 84 inline void release(); 85 inline void acquire(); 86 inline void fence(); 87 88 // nop padding 89 void align(int modulus, int max = 252, int rem = 0); 90 91 // Align prefix opcode to make sure it's not on the last word of a 92 // 64-byte block. 93 // 94 // Note: do not call align_prefix() in a .ad file (e.g. ppc.ad). Instead 95 // add ins_alignment(2) to the instruct definition and implement the 96 // compute_padding() method of the instruct node to use 97 // compute_prefix_padding(). See loadConI32Node::compute_padding() in 98 // ppc.ad for an example. 99 void align_prefix(); 100 101 // 102 // Constants, loading constants, TOC support 103 // 104 105 // Address of the global TOC. 106 inline static address global_toc(); 107 // Offset of given address to the global TOC. 108 inline static int offset_to_global_toc(const address addr); 109 110 // Address of TOC of the current method. 111 inline address method_toc(); 112 // Offset of given address to TOC of the current method. 113 inline int offset_to_method_toc(const address addr); 114 115 // Global TOC. 116 void calculate_address_from_global_toc(Register dst, address addr, 117 bool hi16 = true, bool lo16 = true, 118 bool add_relocation = true, bool emit_dummy_addr = false); 119 inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) { 120 calculate_address_from_global_toc(dst, addr, true, false); 121 }; 122 inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) { 123 calculate_address_from_global_toc(dst, addr, false, true); 124 }; 125 126 inline static bool is_calculate_address_from_global_toc_at(address a, address bound); 127 // Returns address of first instruction in sequence. 128 static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr); 129 static address get_address_of_calculate_address_from_global_toc_at(address a, address addr); 130 131 #ifdef _LP64 132 // Patch narrow oop constant. 133 inline static bool is_set_narrow_oop(address a, address bound); 134 // Returns address of first instruction in sequence. 135 static address patch_set_narrow_oop(address a, address bound, narrowOop data); 136 static narrowOop get_narrow_oop(address a, address bound); 137 #endif 138 139 inline static bool is_load_const_at(address a); 140 141 // Emits an oop const to the constant pool, loads the constant, and 142 // sets a relocation info with address current_pc. 143 // Returns true if successful. 144 bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false); 145 146 static bool is_load_const_from_method_toc_at(address a); 147 static int get_offset_of_load_const_from_method_toc_at(address a); 148 149 // Get the 64 bit constant from a `load_const' sequence. 150 static long get_const(address load_const); 151 152 // Patch the 64 bit constant of a `load_const' sequence. This is a 153 // low level procedure. It neither flushes the instruction cache nor 154 // is it atomic. 155 static void patch_const(address load_const, long x); 156 157 // Metadata in code that we have to keep track of. 158 AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index 159 AddressLiteral constant_metadata_address(Metadata* obj); // find_index 160 // Oops used directly in compiled code are stored in the constant pool, 161 // and loaded from there. 162 // Allocate new entry for oop in constant pool. Generate relocation. 163 AddressLiteral allocate_oop_address(jobject obj); 164 // Find oop obj in constant pool. Return relocation with it's index. 165 AddressLiteral constant_oop_address(jobject obj); 166 167 // Find oop in constant pool and emit instructions to load it. 168 // Uses constant_oop_address. 169 inline void set_oop_constant(jobject obj, Register d); 170 // Same as load_address. 171 inline void set_oop (AddressLiteral obj_addr, Register d); 172 173 // 174 // branch, jump 175 // 176 // set dst to -1, 0, +1 as follows: if CCR0bi is "greater than", dst is set to 1, 177 // if CCR0bi is "equal", dst is set to 0, otherwise it's set to -1. 178 void inline set_cmp3(Register dst); 179 // set dst to (treat_unordered_like_less ? -1 : +1) 180 void inline set_cmpu3(Register dst, bool treat_unordered_like_less); 181 // Branch-free implementation to convert !=0 to 1. 182 void inline normalize_bool(Register dst, Register temp = R0, bool is_64bit = false); 183 184 inline void pd_patch_instruction(address branch, address target, const char* file, int line); 185 NOT_PRODUCT(static void pd_print_patched_instruction(address branch);) 186 187 // Conditional far branch for destinations encodable in 24+2 bits. 188 // Same interface as bc, e.g. no inverse boint-field. 189 enum { 190 bc_far_optimize_not = 0, 191 bc_far_optimize_on_relocate = 1 192 }; 193 // optimize: flag for telling the conditional far branch to optimize 194 // itself when relocated. 195 void bc_far(int boint, int biint, Label& dest, int optimize); 196 void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions 197 // Relocation of conditional far branches. 198 static bool is_bc_far_at(address instruction_addr); 199 static address get_dest_of_bc_far_at(address instruction_addr); 200 static void set_dest_of_bc_far_at(address instruction_addr, address dest); 201 private: 202 static bool inline is_bc_far_variant1_at(address instruction_addr); 203 static bool inline is_bc_far_variant2_at(address instruction_addr); 204 static bool inline is_bc_far_variant3_at(address instruction_addr); 205 public: 206 207 // Convenience bc_far versions. 208 inline void blt_far(ConditionRegister crx, Label& L, int optimize); 209 inline void bgt_far(ConditionRegister crx, Label& L, int optimize); 210 inline void beq_far(ConditionRegister crx, Label& L, int optimize); 211 inline void bso_far(ConditionRegister crx, Label& L, int optimize); 212 inline void bge_far(ConditionRegister crx, Label& L, int optimize); 213 inline void ble_far(ConditionRegister crx, Label& L, int optimize); 214 inline void bne_far(ConditionRegister crx, Label& L, int optimize); 215 inline void bns_far(ConditionRegister crx, Label& L, int optimize); 216 217 // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump. 218 private: 219 enum { 220 bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/), 221 bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord, 222 bxx64_patchable_ret_addr_offset = bxx64_patchable_size 223 }; 224 void bxx64_patchable(address target, relocInfo::relocType rt, bool link); 225 static bool is_bxx64_patchable_at( address instruction_addr, bool link); 226 // Does the instruction use a pc-relative encoding of the destination? 227 static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link); 228 static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link); 229 // Load destination relative to global toc. 230 static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link); 231 static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link); 232 static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link); 233 static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link); 234 235 public: 236 // call 237 enum { 238 bl64_patchable_instruction_count = bxx64_patchable_instruction_count, 239 bl64_patchable_size = bxx64_patchable_size, 240 bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset 241 }; 242 inline void bl64_patchable(address target, relocInfo::relocType rt) { 243 bxx64_patchable(target, rt, /*link=*/true); 244 } 245 inline static bool is_bl64_patchable_at(address instruction_addr) { 246 return is_bxx64_patchable_at(instruction_addr, /*link=*/true); 247 } 248 inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) { 249 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true); 250 } 251 inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) { 252 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true); 253 } 254 inline static address get_dest_of_bl64_patchable_at(address instruction_addr) { 255 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true); 256 } 257 // jump 258 enum { 259 b64_patchable_instruction_count = bxx64_patchable_instruction_count, 260 b64_patchable_size = bxx64_patchable_size, 261 }; 262 inline void b64_patchable(address target, relocInfo::relocType rt) { 263 bxx64_patchable(target, rt, /*link=*/false); 264 } 265 inline static bool is_b64_patchable_at(address instruction_addr) { 266 return is_bxx64_patchable_at(instruction_addr, /*link=*/false); 267 } 268 inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) { 269 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false); 270 } 271 inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) { 272 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false); 273 } 274 inline static address get_dest_of_b64_patchable_at(address instruction_addr) { 275 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false); 276 } 277 278 // 279 // Support for frame handling 280 // 281 282 // some ABI-related functions 283 284 // Clobbers all volatile, (non-floating-point) general-purpose registers for debugging purposes. 285 // This is especially useful for making calls to the JRT in places in which this hasn't been done before; 286 // e.g. with the introduction of LRBs (load reference barriers) for concurrent garbage collection. 287 void clobber_volatile_gprs(Register excluded_register = noreg); 288 void clobber_carg_stack_slots(Register tmp); 289 290 void save_nonvolatile_gprs( Register dst_base, int offset); 291 void restore_nonvolatile_gprs(Register src_base, int offset); 292 293 enum { 294 num_volatile_gp_regs = 11, 295 num_volatile_fp_regs = 14, 296 num_volatile_regs = num_volatile_gp_regs + num_volatile_fp_regs 297 }; 298 299 void save_volatile_gprs( Register dst_base, int offset, 300 bool include_fp_regs = true, bool include_R3_RET_reg = true); 301 void restore_volatile_gprs(Register src_base, int offset, 302 bool include_fp_regs = true, bool include_R3_RET_reg = true); 303 void save_LR(Register tmp); 304 void restore_LR(Register tmp); 305 void save_LR_CR(Register tmp); // tmp contains LR on return. 306 void restore_LR_CR(Register tmp); 307 308 // Get current PC using bl-next-instruction trick. 309 address get_PC_trash_LR(Register result); 310 311 // Resize current frame either relatively wrt to current SP or absolute. 312 void resize_frame(Register offset, Register tmp); 313 void resize_frame(int offset, Register tmp); 314 void resize_frame_absolute(Register addr, Register tmp1, Register tmp2); 315 316 // Push a frame of size bytes. 317 void push_frame(Register bytes, Register tmp); 318 319 // Push a frame of size `bytes'. No abi space provided. 320 void push_frame(unsigned int bytes, Register tmp); 321 322 // Push a frame of size `bytes' plus native_abi_reg_args on top. 323 void push_frame_reg_args(unsigned int bytes, Register tmp); 324 325 // Setup up a new C frame with a spill area for non-volatile GPRs and additional 326 // space for local variables 327 void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp); 328 329 // pop current C frame 330 void pop_frame(); 331 332 // 333 // Calls 334 // 335 336 private: 337 address _last_calls_return_pc; 338 339 #if defined(ABI_ELFv2) 340 // Generic version of a call to C function. 341 // Updates and returns _last_calls_return_pc. 342 address branch_to(Register function_entry, bool and_link); 343 #else 344 // Generic version of a call to C function via a function descriptor 345 // with variable support for C calling conventions (TOC, ENV, etc.). 346 // updates and returns _last_calls_return_pc. 347 address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, 348 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee); 349 #endif 350 351 public: 352 353 // Get the pc where the last call will return to. returns _last_calls_return_pc. 354 inline address last_calls_return_pc(); 355 356 #if defined(ABI_ELFv2) 357 // Call a C function via a function descriptor and use full C 358 // calling conventions. Updates and returns _last_calls_return_pc. 359 address call_c(Register function_entry); 360 // For tail calls: only branch, don't link, so callee returns to caller of this function. 361 address call_c_and_return_to_caller(Register function_entry); 362 address call_c(address function_entry, relocInfo::relocType rt = relocInfo::none); 363 #else 364 // Call a C function via a function descriptor and use full C 365 // calling conventions. Updates and returns _last_calls_return_pc. 366 address call_c(Register function_descriptor); 367 // For tail calls: only branch, don't link, so callee returns to caller of this function. 368 address call_c_and_return_to_caller(Register function_descriptor); 369 address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt); 370 address call_c(address function_entry, relocInfo::relocType rt = relocInfo::none) { 371 return call_c((const FunctionDescriptor*)function_entry, rt); 372 } 373 address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt, 374 Register toc); 375 #endif 376 377 static int ic_check_size(); 378 int ic_check(int end_alignment); 379 380 protected: 381 382 // It is imperative that all calls into the VM are handled via the 383 // call_VM macros. They make sure that the stack linkage is setup 384 // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points 385 // while call_VM_leaf's correspond to LEAF entry points. 386 // 387 // This is the base routine called by the different versions of 388 // call_VM. The interpreter may customize this version by overriding 389 // it for its purposes (e.g., to save/restore additional registers 390 // when doing a VM call). 391 // 392 // If no last_java_sp is specified (noreg) then SP will be used instead. 393 virtual void call_VM_base( 394 // where an oop-result ends up if any; use noreg otherwise 395 Register oop_result, 396 // to set up last_Java_frame in stubs; use noreg otherwise 397 Register last_java_sp, 398 // the entry point 399 address entry_point, 400 // flag which indicates if exception should be checked 401 bool check_exception = true 402 ); 403 404 // Support for VM calls. This is the base routine called by the 405 // different versions of call_VM_leaf. The interpreter may customize 406 // this version by overriding it for its purposes (e.g., to 407 // save/restore additional registers when doing a VM call). 408 void call_VM_leaf_base(address entry_point); 409 410 public: 411 // Call into the VM. 412 // Passes the thread pointer (in R3_ARG1) as a prepended argument. 413 // Makes sure oop return values are visible to the GC. 414 void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); 415 void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); 416 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 417 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true); 418 void call_VM_leaf(address entry_point); 419 void call_VM_leaf(address entry_point, Register arg_1); 420 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 421 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 422 423 // Call a stub function via a function descriptor, but don't save 424 // TOC before call, don't setup TOC and ENV for call, and don't 425 // restore TOC after call. Updates and returns _last_calls_return_pc. 426 inline address call_stub(Register function_entry); 427 inline void call_stub_and_return_to(Register function_entry, Register return_pc); 428 429 void post_call_nop(); 430 static bool is_post_call_nop(int instr_bits) { 431 const uint32_t nineth_bit = opp_u_field(1, 9, 9); 432 const uint32_t opcode_mask = 0b111110 << OPCODE_SHIFT; 433 const uint32_t pcn_mask = opcode_mask | nineth_bit; 434 return (instr_bits & pcn_mask) == (Assembler::CMPLI_OPCODE | nineth_bit); 435 } 436 437 // 438 // Java utilities 439 // 440 441 // Read from the polling page, its address is already in a register. 442 inline void load_from_polling_page(Register polling_page_address, int offset = 0); 443 // Check whether instruction is a read access to the polling page 444 // which was emitted by load_from_polling_page(..). 445 static bool is_load_from_polling_page(int instruction, void* ucontext/*may be nullptr*/, 446 address* polling_address_ptr = nullptr); 447 448 // Support for null-checks 449 // 450 // Generates code that causes a null OS exception if the content of reg is null. 451 // If the accessed location is M[reg + offset] and the offset is known, provide the 452 // offset. No explicit code generation is needed if the offset is within a certain 453 // range (0 <= offset <= page_size). 454 455 // Stack overflow checking 456 void bang_stack_with_offset(int offset); 457 458 // If instruction is a stack bang of the form ld, stdu, or 459 // stdux, return the banged address. Otherwise, return 0. 460 static address get_stack_bang_address(int instruction, void* ucontext); 461 462 // Check for reserved stack access in method being exited. If the reserved 463 // stack area was accessed, protect it again and throw StackOverflowError. 464 void reserved_stack_check(Register return_pc); 465 466 // Atomics 467 // CmpxchgX sets condition register to cmpX(current, compare). 468 // (flag == ne) => (dest_current_value != compare_value), (!swapped) 469 // (flag == eq) => (dest_current_value == compare_value), ( swapped) 470 static inline bool cmpxchgx_hint_acquire_lock() { return true; } 471 // The stxcx will probably not be succeeded by a releasing store. 472 static inline bool cmpxchgx_hint_release_lock() { return false; } 473 static inline bool cmpxchgx_hint_atomic_update() { return false; } 474 475 // Cmpxchg semantics 476 enum { 477 MemBarNone = 0, 478 MemBarRel = 1, 479 MemBarAcq = 2, 480 MemBarFenceAfter = 4 // use powers of 2 481 }; 482 private: 483 // Helper functions for word/sub-word atomics. 484 void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, 485 Register addr_base, Register tmp1, Register tmp2, Register tmp3, 486 bool cmpxchgx_hint, bool is_add, int size); 487 void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, 488 RegisterOrConstant compare_value, Register exchange_value, 489 Register addr_base, Register tmp1, Register tmp2, 490 Label &retry, Label &failed, bool cmpxchgx_hint, int size); 491 void cmpxchg_generic(ConditionRegister flag, Register dest_current_value, 492 RegisterOrConstant compare_value, Register exchange_value, 493 Register addr_base, Register tmp1, Register tmp2, 494 int semantics, bool cmpxchgx_hint, Register int_flag_success, 495 Label* failed_ext, bool contention_hint, bool weak, int size); 496 public: 497 // Temps and addr_base are killed if processor does not support Power 8 instructions. 498 // Result will be sign extended. 499 void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base, 500 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 501 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1); 502 } 503 // Temps and addr_base are killed if processor does not support Power 8 instructions. 504 // Result will be sign extended. 505 void getandseth(Register dest_current_value, Register exchange_value, Register addr_base, 506 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 507 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2); 508 } 509 void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base, 510 bool cmpxchgx_hint) { 511 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4); 512 } 513 void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, 514 bool cmpxchgx_hint); 515 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 516 // Result will be sign extended. 517 void getandaddb(Register dest_current_value, Register inc_value, Register addr_base, 518 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 519 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1); 520 } 521 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 522 // Result will be sign extended. 523 void getandaddh(Register dest_current_value, Register inc_value, Register addr_base, 524 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 525 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2); 526 } 527 void getandaddw(Register dest_current_value, Register inc_value, Register addr_base, 528 Register tmp1, bool cmpxchgx_hint) { 529 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4); 530 } 531 void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base, 532 Register tmp, bool cmpxchgx_hint); 533 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 534 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 535 void cmpxchgb(ConditionRegister flag, Register dest_current_value, 536 RegisterOrConstant compare_value, Register exchange_value, 537 Register addr_base, Register tmp1, Register tmp2, 538 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, 539 Label* failed = nullptr, bool contention_hint = false, bool weak = false) { 540 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 541 semantics, cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 1); 542 } 543 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 544 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 545 void cmpxchgh(ConditionRegister flag, Register dest_current_value, 546 RegisterOrConstant compare_value, Register exchange_value, 547 Register addr_base, Register tmp1, Register tmp2, 548 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, 549 Label* failed = nullptr, bool contention_hint = false, bool weak = false) { 550 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 551 semantics, cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 2); 552 } 553 void cmpxchgw(ConditionRegister flag, Register dest_current_value, 554 RegisterOrConstant compare_value, Register exchange_value, 555 Register addr_base, 556 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, 557 Label* failed = nullptr, bool contention_hint = false, bool weak = false) { 558 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg, 559 semantics, cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 4); 560 } 561 void cmpxchgd(ConditionRegister flag, Register dest_current_value, 562 RegisterOrConstant compare_value, Register exchange_value, 563 Register addr_base, 564 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, 565 Label* failed = nullptr, bool contention_hint = false, bool weak = false); 566 567 // interface method calling 568 void lookup_interface_method(Register recv_klass, 569 Register intf_klass, 570 RegisterOrConstant itable_index, 571 Register method_result, 572 Register temp_reg, Register temp2_reg, 573 Label& no_such_interface, 574 bool return_method = true); 575 576 // virtual method calling 577 void lookup_virtual_method(Register recv_klass, 578 RegisterOrConstant vtable_index, 579 Register method_result); 580 581 // Test sub_klass against super_klass, with fast and slow paths. 582 583 // The fast path produces a tri-state answer: yes / no / maybe-slow. 584 // One of the three labels can be null, meaning take the fall-through. 585 // If super_check_offset is -1, the value is loaded up from super_klass. 586 // No registers are killed, except temp_reg and temp2_reg. 587 // If super_check_offset is not -1, temp2_reg is not used and can be noreg. 588 void check_klass_subtype_fast_path(Register sub_klass, 589 Register super_klass, 590 Register temp1_reg, 591 Register temp2_reg, 592 Label* L_success, 593 Label* L_failure, 594 Label* L_slow_path = nullptr, // default fall through 595 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 596 597 // The rest of the type check; must be wired to a corresponding fast path. 598 // It does not repeat the fast path logic, so don't use it standalone. 599 // The temp_reg can be noreg, if no temps are available. 600 // It can also be sub_klass or super_klass, meaning it's OK to kill that one. 601 // Updates the sub's secondary super cache as necessary. 602 void check_klass_subtype_slow_path(Register sub_klass, 603 Register super_klass, 604 Register temp1_reg, 605 Register temp2_reg, 606 Label* L_success = nullptr, 607 Register result_reg = noreg); 608 609 // Simplified, combined version, good for typical uses. 610 // Falls through on failure. 611 void check_klass_subtype(Register sub_klass, 612 Register super_klass, 613 Register temp1_reg, 614 Register temp2_reg, 615 Label& L_success); 616 617 void repne_scan(Register addr, Register value, Register count, Register scratch); 618 619 // As above, but with a constant super_klass. 620 // The result is in Register result, not the condition codes. 621 void lookup_secondary_supers_table(Register r_sub_klass, 622 Register r_super_klass, 623 Register temp1, 624 Register temp2, 625 Register temp3, 626 Register temp4, 627 Register result, 628 u1 super_klass_slot); 629 630 void verify_secondary_supers_table(Register r_sub_klass, 631 Register r_super_klass, 632 Register result, 633 Register temp1, 634 Register temp2, 635 Register temp3); 636 637 void lookup_secondary_supers_table_slow_path(Register r_super_klass, 638 Register r_array_base, 639 Register r_array_index, 640 Register r_bitmap, 641 Register result, 642 Register temp1); 643 644 void clinit_barrier(Register klass, 645 Register thread, 646 Label* L_fast_path = nullptr, 647 Label* L_slow_path = nullptr); 648 649 // Method handle support (JSR 292). 650 RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0); 651 652 void push_cont_fastpath(); 653 void pop_cont_fastpath(); 654 void inc_held_monitor_count(Register tmp); 655 void dec_held_monitor_count(Register tmp); 656 void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics); 657 void lightweight_lock(Register box, Register obj, Register t1, Register t2, Label& slow); 658 void lightweight_unlock(Register obj, Register t1, Label& slow); 659 660 // allocation (for C1) 661 void tlab_allocate( 662 Register obj, // result: pointer to object after successful allocation 663 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 664 int con_size_in_bytes, // object size in bytes if known at compile time 665 Register t1, // temp register 666 Label& slow_case // continuation point if fast allocation fails 667 ); 668 669 enum { trampoline_stub_size = 6 * 4 }; 670 address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg); 671 672 void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, 673 Register tmp1, Register tmp2, Register tmp3); 674 675 void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, 676 Register tmp1, Register tmp2, Register tmp3); 677 678 void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register box, 679 Register tmp1, Register tmp2, Register tmp3); 680 681 void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register box, 682 Register tmp1, Register tmp2, Register tmp3); 683 684 // Check if safepoint requested and if so branch 685 void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod); 686 687 void resolve_jobject(Register value, Register tmp1, Register tmp2, 688 MacroAssembler::PreservationLevel preservation_level); 689 void resolve_global_jobject(Register value, Register tmp1, Register tmp2, 690 MacroAssembler::PreservationLevel preservation_level); 691 692 // Support for managing the JavaThread pointer (i.e.; the reference to 693 // thread-local information). 694 695 // Support for last Java frame (but use call_VM instead where possible): 696 // access R16_thread->last_Java_sp. 697 void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); 698 void reset_last_Java_frame(void); 699 void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1); 700 701 // Read vm result from thread: oop_result = R16_thread->result; 702 void get_vm_result (Register oop_result); 703 void get_vm_result_2(Register metadata_result); 704 705 static bool needs_explicit_null_check(intptr_t offset); 706 static bool uses_implicit_null_check(void* address); 707 708 // Trap-instruction-based checks. 709 // Range checks can be distinguished from zero checks as they check 32 bit, 710 // zero checks all 64 bits (tw, td). 711 inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual); 712 static bool is_trap_null_check(int x) { 713 return is_tdi(x, traptoEqual, -1/*any reg*/, 0) || 714 is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0); 715 } 716 717 inline void trap_ic_miss_check(Register a, Register b); 718 static bool is_trap_ic_miss_check(int x) { 719 return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/); 720 } 721 722 // Implicit or explicit null check, jumps to static address exception_entry. 723 inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry); 724 inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided 725 726 // Access heap oop, handle encoding and GC barriers. 727 // Some GC barriers call C so use needs_frame = true if an extra frame is needed at the current call site. 728 inline void access_store_at(BasicType type, DecoratorSet decorators, 729 Register base, RegisterOrConstant ind_or_offs, Register val, 730 Register tmp1, Register tmp2, Register tmp3, 731 MacroAssembler::PreservationLevel preservation_level); 732 inline void access_load_at(BasicType type, DecoratorSet decorators, 733 Register base, RegisterOrConstant ind_or_offs, Register dst, 734 Register tmp1, Register tmp2, 735 MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null = nullptr); 736 737 public: 738 // Specify tmp1 for better code in certain compressed oops cases. Specify Label to bail out on null oop. 739 // tmp1, tmp2 and needs_frame are used with decorators ON_PHANTOM_OOP_REF or ON_WEAK_OOP_REF. 740 inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1, 741 Register tmp1, Register tmp2, 742 MacroAssembler::PreservationLevel preservation_level, 743 DecoratorSet decorators = 0, Label *L_handle_null = nullptr); 744 745 inline void store_heap_oop(Register d, RegisterOrConstant offs, Register s1, 746 Register tmp1, Register tmp2, Register tmp3, 747 MacroAssembler::PreservationLevel preservation_level, DecoratorSet decorators = 0); 748 749 // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. 750 // src == d allowed. 751 inline Register encode_heap_oop_not_null(Register d, Register src = noreg); 752 inline Register decode_heap_oop_not_null(Register d, Register src = noreg); 753 754 // Null allowed. 755 inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier! 756 inline void decode_heap_oop(Register d); 757 758 // Load/Store klass oop from klass field. Compress. 759 void load_klass(Register dst, Register src); 760 void load_klass_check_null(Register dst, Register src, Label* is_null = nullptr); 761 void store_klass(Register dst_oop, Register klass, Register tmp = R0); 762 void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. 763 764 void resolve_oop_handle(Register result, Register tmp1, Register tmp2, 765 MacroAssembler::PreservationLevel preservation_level); 766 void resolve_weak_handle(Register result, Register tmp1, Register tmp2, 767 MacroAssembler::PreservationLevel preservation_level); 768 void load_method_holder(Register holder, Register method); 769 770 static int instr_size_for_decode_klass_not_null(); 771 void decode_klass_not_null(Register dst, Register src = noreg); 772 Register encode_klass_not_null(Register dst, Register src = noreg); 773 774 // SIGTRAP-based range checks for arrays. 775 inline void trap_range_check_l(Register a, Register b); 776 inline void trap_range_check_l(Register a, int si16); 777 static bool is_trap_range_check_l(int x) { 778 return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 779 is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) ); 780 } 781 inline void trap_range_check_le(Register a, int si16); 782 static bool is_trap_range_check_le(int x) { 783 return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/); 784 } 785 inline void trap_range_check_g(Register a, int si16); 786 static bool is_trap_range_check_g(int x) { 787 return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/); 788 } 789 inline void trap_range_check_ge(Register a, Register b); 790 inline void trap_range_check_ge(Register a, int si16); 791 static bool is_trap_range_check_ge(int x) { 792 return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 793 is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) ); 794 } 795 static bool is_trap_range_check(int x) { 796 return is_trap_range_check_l(x) || is_trap_range_check_le(x) || 797 is_trap_range_check_g(x) || is_trap_range_check_ge(x); 798 } 799 800 void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0); 801 void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0); 802 void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1); 803 804 // Emitters for BigInteger.multiplyToLen intrinsic. 805 inline void multiply64(Register dest_hi, Register dest_lo, 806 Register x, Register y); 807 void add2_with_carry(Register dest_hi, Register dest_lo, 808 Register src1, Register src2); 809 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 810 Register y, Register y_idx, Register z, 811 Register carry, Register product_high, Register product, 812 Register idx, Register kdx, Register tmp); 813 void multiply_add_128_x_128(Register x_xstart, Register y, Register z, 814 Register yz_idx, Register idx, Register carry, 815 Register product_high, Register product, Register tmp, 816 int offset); 817 void multiply_128_x_128_loop(Register x_xstart, 818 Register y, Register z, 819 Register yz_idx, Register idx, Register carry, 820 Register product_high, Register product, 821 Register carry2, Register tmp); 822 void muladd(Register out, Register in, Register offset, Register len, Register k, 823 Register tmp1, Register tmp2, Register carry); 824 void multiply_to_len(Register x, Register xlen, 825 Register y, Register ylen, 826 Register z, 827 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 828 Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, 829 Register tmp11, Register tmp12, Register tmp13); 830 831 // Emitters for CRC32 calculation. 832 // A note on invertCRC: 833 // Unfortunately, internal representation of crc differs between CRC32 and CRC32C. 834 // CRC32 holds it's current crc value in the externally visible representation. 835 // CRC32C holds it's current crc value in internal format, ready for updating. 836 // Thus, the crc value must be bit-flipped before updating it in the CRC32 case. 837 // In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()). 838 // The bool invertCRC parameter indicates whether bit-flipping is required before updates. 839 void load_reverse_32(Register dst, Register src); 840 int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3); 841 void fold_byte_crc32(Register crc, Register val, Register table, Register tmp); 842 void update_byte_crc32(Register crc, Register val, Register table); 843 void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, 844 Register data, bool loopAlignment); 845 void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 846 Register t0, Register t1, Register t2, Register t3, 847 Register tc0, Register tc1, Register tc2, Register tc3); 848 void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 849 Register t0, Register t1, Register t2, Register t3, 850 Register tc0, Register tc1, Register tc2, Register tc3, 851 bool invertCRC); 852 void kernel_crc32_vpmsum(Register crc, Register buf, Register len, Register constants, 853 Register t0, Register t1, Register t2, Register t3, Register t4, 854 Register t5, Register t6, bool invertCRC); 855 void kernel_crc32_vpmsum_aligned(Register crc, Register buf, Register len, Register constants, 856 Register t0, Register t1, Register t2, Register t3, Register t4, 857 Register t5, Register t6); 858 // Version which internally decides what to use. 859 void crc32(Register crc, Register buf, Register len, Register t0, Register t1, Register t2, 860 Register t3, Register t4, Register t5, Register t6, Register t7, bool is_crc32c); 861 862 void kernel_crc32_singleByteReg(Register crc, Register val, Register table, 863 bool invertCRC); 864 865 // SHA-2 auxiliary functions and public interfaces 866 private: 867 void sha256_deque(const VectorRegister src, 868 const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3); 869 void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr); 870 void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 871 void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws, 872 const int total_ws, const Register k, const VectorRegister* kpws, 873 const int total_kpws); 874 void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1, 875 const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0, 876 const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3, 877 const Register j, const Register k); 878 void sha256_update_sha_state(const VectorRegister a, const VectorRegister b, 879 const VectorRegister c, const VectorRegister d, const VectorRegister e, 880 const VectorRegister f, const VectorRegister g, const VectorRegister h, 881 const Register hptr); 882 883 void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws); 884 void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs); 885 void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 886 void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs); 887 void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1, 888 const VectorRegister w2, const VectorRegister w3, 889 const VectorRegister w4, const VectorRegister w5, 890 const VectorRegister w6, const VectorRegister w7, 891 const VectorRegister kpw0, const VectorRegister kpw1, const Register j, 892 const VectorRegister vRb, const Register k); 893 894 public: 895 void sha256(bool multi_block); 896 void sha512(bool multi_block); 897 898 void cache_wb(Address line); 899 void cache_wbsync(bool is_presync); 900 901 // 902 // Debugging 903 // 904 905 // assert on cr0 906 void asm_assert(bool check_equal, const char* msg); 907 void asm_assert_eq(const char* msg) { asm_assert(true, msg); } 908 void asm_assert_ne(const char* msg) { asm_assert(false, msg); } 909 910 private: 911 void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base, 912 const char* msg); 913 914 public: 915 916 void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg) { 917 asm_assert_mems_zero(true, 8, mem_offset, mem_base, msg); 918 } 919 void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg) { 920 asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg); 921 } 922 923 // Calls verify_oop. If UseCompressedOops is on, decodes the oop. 924 // Preserves reg. 925 void verify_coop(Register reg, const char*); 926 // Emit code to verify that reg contains a valid oop if +VerifyOops is set. 927 void verify_oop(Register reg, const char* s = "broken oop"); 928 void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop"); 929 930 // TODO: verify method and klass metadata (compare against vptr?) 931 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 932 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {} 933 934 // Convenience method returning function entry. For the ELFv1 case 935 // creates function descriptor at the current address and returns 936 // the pointer to it. For the ELFv2 case returns the current address. 937 inline address function_entry(); 938 939 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 940 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 941 942 private: 943 void stop(int type, const char* msg); 944 945 public: 946 enum { 947 stop_stop = 0, 948 stop_untested = 1, 949 stop_unimplemented = 2, 950 stop_shouldnotreachhere = 3, 951 stop_msg_present = -0x8000 952 }; 953 954 // Prints msg, dumps registers and stops execution. 955 void stop (const char* msg = nullptr) { stop(stop_stop, msg); } 956 void untested (const char* msg = nullptr) { stop(stop_untested, msg); } 957 void unimplemented (const char* msg = nullptr) { stop(stop_unimplemented, msg); } 958 void should_not_reach_here(const char* msg = nullptr) { stop(stop_shouldnotreachhere, msg); } 959 960 void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN; 961 }; 962 963 // class SkipIfEqualZero: 964 // 965 // Instantiating this class will result in assembly code being output that will 966 // jump around any code emitted between the creation of the instance and it's 967 // automatic destruction at the end of a scope block, depending on the value of 968 // the flag passed to the constructor, which will be checked at run-time. 969 class SkipIfEqualZero : public StackObj { 970 private: 971 MacroAssembler* _masm; 972 Label _label; 973 974 public: 975 // 'Temp' is a temp register that this object can use (and trash). 976 explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr); 977 static void skip_to_label_if_equal_zero(MacroAssembler*, Register temp, 978 const bool* flag_addr, Label& label); 979 ~SkipIfEqualZero(); 980 }; 981 982 #endif // CPU_PPC_MACROASSEMBLER_PPC_HPP