1 /*
2 * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef CPU_PPC_MACROASSEMBLER_PPC_HPP
27 #define CPU_PPC_MACROASSEMBLER_PPC_HPP
28
29 #include "asm/assembler.hpp"
30 #include "oops/accessDecorators.hpp"
31 #include "utilities/macros.hpp"
32
33 // MacroAssembler extends Assembler by a few frequently used macros.
34
35 class ciTypeArray;
36 class OopMap;
37 class ciInlineKlass;
38 class SigEntry;
39 class VMRegPair;
40
41 class MacroAssembler: public Assembler {
42 public:
43 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
44
45 // Indicates whether and, if so, which registers must be preserved when calling runtime code.
46 enum PreservationLevel {
47 PRESERVATION_NONE,
48 PRESERVATION_FRAME_LR,
49 PRESERVATION_FRAME_LR_GP_REGS,
50 PRESERVATION_FRAME_LR_GP_FP_REGS
51 };
52
53 //
54 // Optimized instruction emitters
55 //
56
57 inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; }
58 inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); }
59
60 // load d = *[a+si31]
61 // Emits several instructions if the offset is not encodable in one instruction.
62 void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop);
63 void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop);
64 inline static bool is_ld_largeoffset(address a);
65 inline static int get_ld_largeoffset_offset(address a);
66
67 inline void round_to(Register r, int modulus);
68
69 // Load/store with type given by parameter.
70 void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed);
71 void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes);
72
73 // Move register if destination register and target register are different
74 inline void mr_if_needed(Register rd, Register rs, bool allow_invalid = false);
75 inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
76
77 // Memory barriers.
78 inline void membar(int bits);
79 inline void release();
80 inline void acquire();
81 inline void fence();
82
83 // nop padding
84 void align(int modulus, int max = 252, int rem = 0);
85
86 // Align prefix opcode to make sure it's not on the last word of a
87 // 64-byte block.
88 //
89 // Note: do not call align_prefix() in a .ad file (e.g. ppc.ad). Instead
90 // add ins_alignment(2) to the instruct definition and implement the
91 // compute_padding() method of the instruct node to use
92 // compute_prefix_padding(). See loadConI32Node::compute_padding() in
93 // ppc.ad for an example.
94 void align_prefix();
95
96 //
97 // Constants, loading constants, TOC support
98 //
99
100 // Address of the global TOC.
101 inline static address global_toc();
102 // Offset of given address to the global TOC.
103 inline static int offset_to_global_toc(const address addr);
104
105 // Address of TOC of the current method.
106 inline address method_toc();
107 // Offset of given address to TOC of the current method.
108 inline int offset_to_method_toc(const address addr);
109
110 // Global TOC.
111 void calculate_address_from_global_toc(Register dst, address addr,
112 bool hi16 = true, bool lo16 = true,
113 bool add_relocation = true, bool emit_dummy_addr = false,
114 bool add_addr_to_reloc = true);
115 void calculate_address_from_global_toc(Register dst, Label& addr,
116 bool hi16 = true, bool lo16 = true,
117 bool add_relocation = true, bool emit_dummy_addr = false) {
118 calculate_address_from_global_toc(dst, target(addr), hi16, lo16, add_relocation, emit_dummy_addr, false);
119 }
120 inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) {
121 calculate_address_from_global_toc(dst, addr, true, false);
122 };
123 inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) {
124 calculate_address_from_global_toc(dst, addr, false, true);
125 };
126
127 inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
128 // Returns address of first instruction in sequence.
129 static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
130 static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
131
132 #ifdef _LP64
133 // Patch narrow oop constant.
134 inline static bool is_set_narrow_oop(address a, address bound);
135 // Returns address of first instruction in sequence.
136 static address patch_set_narrow_oop(address a, address bound, narrowOop data);
137 static narrowOop get_narrow_oop(address a, address bound);
138 #endif
139
140 inline static bool is_load_const_at(address a);
141
142 // Emits an oop const to the constant pool, loads the constant, and
143 // sets a relocation info with address current_pc.
144 // Returns true if successful.
145 bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false);
146
147 static bool is_load_const_from_method_toc_at(address a);
148 static int get_offset_of_load_const_from_method_toc_at(address a);
149
150 // Get the 64 bit constant from a `load_const' sequence.
151 static long get_const(address load_const);
152
153 // Patch the 64 bit constant of a `load_const' sequence. This is a
154 // low level procedure. It neither flushes the instruction cache nor
155 // is it atomic.
156 static void patch_const(address load_const, long x);
157
158 // Metadata in code that we have to keep track of.
159 AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
160 AddressLiteral constant_metadata_address(Metadata* obj); // find_index
161 // Oops used directly in compiled code are stored in the constant pool,
162 // and loaded from there.
163 // Allocate new entry for oop in constant pool. Generate relocation.
164 AddressLiteral allocate_oop_address(jobject obj);
165 // Find oop obj in constant pool. Return relocation with it's index.
166 AddressLiteral constant_oop_address(jobject obj);
167
168 // Find oop in constant pool and emit instructions to load it.
169 // Uses constant_oop_address.
170 inline void set_oop_constant(jobject obj, Register d);
171 // Same as load_address.
172 inline void set_oop (AddressLiteral obj_addr, Register d);
173
174 //
175 // branch, jump
176 //
177 // set dst to -1, 0, +1 as follows: if CR0bi is "greater than", dst is set to 1,
178 // if CR0bi is "equal", dst is set to 0, otherwise it's set to -1.
179 void inline set_cmp3(Register dst);
180 // set dst to (treat_unordered_like_less ? -1 : +1)
181 void inline set_cmpu3(Register dst, bool treat_unordered_like_less);
182 // Branch-free implementation to convert !=0 to 1.
183 void inline normalize_bool(Register dst, Register temp = R0, bool is_64bit = false);
184 // Convert between half precision float encoded into a short and a float in a FloatRegister.
185 void inline f2hf(Register dst, FloatRegister src, FloatRegister tmp);
186 void inline hf2f(FloatRegister dst, Register src);
187
188 inline void pd_patch_instruction(address branch, address target, const char* file, int line);
189 NOT_PRODUCT(static void pd_print_patched_instruction(address branch);)
190
191 // Conditional far branch for destinations encodable in 24+2 bits.
192 // Same interface as bc, e.g. no inverse boint-field.
193 enum {
194 bc_far_optimize_not = 0,
195 bc_far_optimize_on_relocate = 1
196 };
197 // optimize: flag for telling the conditional far branch to optimize
198 // itself when relocated.
199 void bc_far(int boint, int biint, Label& dest, int optimize);
200 void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions
201 // Relocation of conditional far branches.
202 static bool is_bc_far_at(address instruction_addr);
203 static address get_dest_of_bc_far_at(address instruction_addr);
204 static void set_dest_of_bc_far_at(address instruction_addr, address dest);
205 private:
206 static bool inline is_bc_far_variant1_at(address instruction_addr);
207 static bool inline is_bc_far_variant2_at(address instruction_addr);
208 static bool inline is_bc_far_variant3_at(address instruction_addr);
209 public:
210
211 // Convenience bc_far versions.
212 inline void blt_far(ConditionRegister crx, Label& L, int optimize);
213 inline void bgt_far(ConditionRegister crx, Label& L, int optimize);
214 inline void beq_far(ConditionRegister crx, Label& L, int optimize);
215 inline void bso_far(ConditionRegister crx, Label& L, int optimize);
216 inline void bge_far(ConditionRegister crx, Label& L, int optimize);
217 inline void ble_far(ConditionRegister crx, Label& L, int optimize);
218 inline void bne_far(ConditionRegister crx, Label& L, int optimize);
219 inline void bns_far(ConditionRegister crx, Label& L, int optimize);
220
221 // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump.
222 private:
223 enum {
224 bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/),
225 bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord,
226 bxx64_patchable_ret_addr_offset = bxx64_patchable_size
227 };
228 void bxx64_patchable(address target, relocInfo::relocType rt, bool link);
229 static bool is_bxx64_patchable_at( address instruction_addr, bool link);
230 // Does the instruction use a pc-relative encoding of the destination?
231 static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link);
232 static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link);
233 // Load destination relative to global toc.
234 static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link);
235 static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link);
236 static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link);
237 static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link);
238
239 public:
240 // call
241 enum {
242 bl64_patchable_instruction_count = bxx64_patchable_instruction_count,
243 bl64_patchable_size = bxx64_patchable_size,
244 bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset
245 };
246 inline void bl64_patchable(address target, relocInfo::relocType rt) {
247 bxx64_patchable(target, rt, /*link=*/true);
248 }
249 inline static bool is_bl64_patchable_at(address instruction_addr) {
250 return is_bxx64_patchable_at(instruction_addr, /*link=*/true);
251 }
252 inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) {
253 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true);
254 }
255 inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) {
256 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true);
257 }
258 inline static address get_dest_of_bl64_patchable_at(address instruction_addr) {
259 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true);
260 }
261 // jump
262 enum {
263 b64_patchable_instruction_count = bxx64_patchable_instruction_count,
264 b64_patchable_size = bxx64_patchable_size,
265 };
266 inline void b64_patchable(address target, relocInfo::relocType rt) {
267 bxx64_patchable(target, rt, /*link=*/false);
268 }
269 inline static bool is_b64_patchable_at(address instruction_addr) {
270 return is_bxx64_patchable_at(instruction_addr, /*link=*/false);
271 }
272 inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) {
273 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false);
274 }
275 inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) {
276 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false);
277 }
278 inline static address get_dest_of_b64_patchable_at(address instruction_addr) {
279 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false);
280 }
281
282 //
283 // Support for frame handling
284 //
285
286 // some ABI-related functions
287
288 // Clobbers all volatile, (non-floating-point) general-purpose registers for debugging purposes.
289 // This is especially useful for making calls to the JRT in places in which this hasn't been done before;
290 // e.g. with the introduction of LRBs (load reference barriers) for concurrent garbage collection.
291 void clobber_volatile_gprs(Register excluded_register = noreg) NOT_DEBUG_RETURN;
292 // Load bad values into registers that are nonvolatile according to the ABI except R16_thread and R29_TOC.
293 // This is done after vthread preemption and before vthread resume.
294 void clobber_nonvolatile_registers() NOT_DEBUG_RETURN;
295 void clobber_carg_stack_slots(Register tmp);
296
297 int save_nonvolatile_registers_size(bool include_fp_regs, bool include_vector_regs) {
298 int size = (32 - 14) * 8; // GP regs
299 if (include_fp_regs) size += (32 - 14) * 8;
300 if (include_vector_regs) size += (32 - 20) * 16;
301 return size;
302 }
303 void save_nonvolatile_registers( Register dst_base, int offset, bool include_fp_regs, bool include_vector_regs);
304 void restore_nonvolatile_registers(Register src_base, int offset, bool include_fp_regs, bool include_vector_regs);
305
306 enum {
307 num_volatile_gp_regs = 11,
308 num_volatile_fp_regs = 14,
309 num_volatile_regs = num_volatile_gp_regs + num_volatile_fp_regs
310 };
311
312 void save_volatile_gprs( Register dst_base, int offset,
313 bool include_fp_regs = true, bool include_R3_RET_reg = true);
314 void restore_volatile_gprs(Register src_base, int offset,
315 bool include_fp_regs = true, bool include_R3_RET_reg = true);
316 void save_LR(Register tmp);
317 void restore_LR(Register tmp);
318 void save_LR_CR(Register tmp); // tmp contains LR on return.
319 void restore_LR_CR(Register tmp);
320
321 // Get current PC using bl-next-instruction trick.
322 address get_PC_trash_LR(Register result);
323
324 // Resize current frame either relatively wrt to current SP or absolute.
325 void resize_frame(Register offset, Register tmp);
326 void resize_frame(int offset, Register tmp);
327 void resize_frame_absolute(Register addr, Register tmp1, Register tmp2);
328
329 // Push a frame of size bytes.
330 void push_frame(Register bytes, Register tmp);
331
332 // Push a frame of size `bytes'. No abi space provided.
333 void push_frame(unsigned int bytes, Register tmp);
334
335 // Push a frame of size `bytes' plus native_abi_reg_args on top.
336 void push_frame_reg_args(unsigned int bytes, Register tmp);
337
338 // pop current C frame
339 void pop_frame();
340
341 //
342 // Calls
343 //
344
345 private:
346 address _last_calls_return_pc;
347
348 #if defined(ABI_ELFv2)
349 // Generic version of a call to C function.
350 // Updates and returns _last_calls_return_pc.
351 address branch_to(Register function_entry, bool and_link);
352 #else
353 // Generic version of a call to C function via a function descriptor
354 // with variable support for C calling conventions (TOC, ENV, etc.).
355 // updates and returns _last_calls_return_pc.
356 address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
357 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee);
358 #endif
359
360 public:
361
362 // Get the pc where the last call will return to. returns _last_calls_return_pc.
363 inline address last_calls_return_pc();
364
365 #if defined(ABI_ELFv2)
366 // Call a C function via a function descriptor and use full C
367 // calling conventions. Updates and returns _last_calls_return_pc.
368 address call_c(Register function_entry);
369 // For tail calls: only branch, don't link, so callee returns to caller of this function.
370 address call_c_and_return_to_caller(Register function_entry);
371 address call_c(address function_entry, relocInfo::relocType rt = relocInfo::none);
372 #else
373 // Call a C function via a function descriptor and use full C
374 // calling conventions. Updates and returns _last_calls_return_pc.
375 address call_c(Register function_descriptor);
376 // For tail calls: only branch, don't link, so callee returns to caller of this function.
377 address call_c_and_return_to_caller(Register function_descriptor);
378 address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
379 address call_c(address function_entry, relocInfo::relocType rt = relocInfo::none) {
380 return call_c((const FunctionDescriptor*)function_entry, rt);
381 }
382 address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
383 Register toc);
384 #endif
385
386 static int ic_check_size();
387 int ic_check(int end_alignment);
388
389 protected:
390
391 // It is imperative that all calls into the VM are handled via the
392 // call_VM macros. They make sure that the stack linkage is setup
393 // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points
394 // while call_VM_leaf's correspond to LEAF entry points.
395 //
396 // This is the base routine called by the different versions of
397 // call_VM. The interpreter may customize this version by overriding
398 // it for its purposes (e.g., to save/restore additional registers
399 // when doing a VM call).
400 //
401 // If no last_java_sp is specified (noreg) then SP will be used instead.
402 virtual void call_VM_base(
403 // where an oop-result ends up if any; use noreg otherwise
404 Register oop_result,
405 // to set up last_Java_frame in stubs; use noreg otherwise
406 Register last_java_sp,
407 // the entry point
408 address entry_point,
409 // flag which indicates if exception should be checked
410 bool check_exception = true,
411 Label* last_java_pc = nullptr
412 );
413
414 // Support for VM calls. This is the base routine called by the
415 // different versions of call_VM_leaf. The interpreter may customize
416 // this version by overriding it for its purposes (e.g., to
417 // save/restore additional registers when doing a VM call).
418 void call_VM_leaf_base(address entry_point);
419
420 public:
421 // Call into the VM.
422 // Passes the thread pointer (in R3_ARG1) as a prepended argument.
423 // Makes sure oop return values are visible to the GC.
424 void call_VM(Register oop_result, address entry_point, bool check_exceptions = true, Label* last_java_pc = nullptr);
425 void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
426 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
427 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true);
428 void call_VM_leaf(address entry_point);
429 void call_VM_leaf(address entry_point, Register arg_1);
430 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
431 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
432
433 // Call a stub function via a function descriptor, but don't save
434 // TOC before call, don't setup TOC and ENV for call, and don't
435 // restore TOC after call. Updates and returns _last_calls_return_pc.
436 inline address call_stub(Register function_entry);
437 inline void call_stub_and_return_to(Register function_entry, Register return_pc);
438
439 void post_call_nop();
440 static bool is_post_call_nop(int instr_bits) {
441 const uint32_t nineth_bit = opp_u_field(1, 9, 9);
442 const uint32_t opcode_mask = 0b111110 << OPCODE_SHIFT;
443 const uint32_t pcn_mask = opcode_mask | nineth_bit;
444 return (instr_bits & pcn_mask) == (Assembler::CMPLI_OPCODE | nineth_bit);
445 }
446
447 //
448 // Java utilities
449 //
450
451 // Read from the polling page, its address is already in a register.
452 inline void load_from_polling_page(Register polling_page_address, int offset = 0);
453 // Check whether instruction is a read access to the polling page
454 // which was emitted by load_from_polling_page(..).
455 static bool is_load_from_polling_page(int instruction, void* ucontext/*may be nullptr*/,
456 address* polling_address_ptr = nullptr);
457
458 // Support for null-checks
459 //
460 // Generates code that causes a null OS exception if the content of reg is null.
461 // If the accessed location is M[reg + offset] and the offset is known, provide the
462 // offset. No explicit code generation is needed if the offset is within a certain
463 // range (0 <= offset <= page_size).
464
465 // Stack overflow checking
466 void bang_stack_with_offset(int offset);
467
468 // If instruction is a stack bang of the form ld, stdu, or
469 // stdux, return the banged address. Otherwise, return 0.
470 static address get_stack_bang_address(int instruction, void* ucontext);
471
472 // Check for reserved stack access in method being exited. If the reserved
473 // stack area was accessed, protect it again and throw StackOverflowError.
474 void reserved_stack_check(Register return_pc);
475
476 // Atomics
477 // CmpxchgX sets condition register to cmpX(current, compare).
478 // (flag == ne) => (dest_current_value != compare_value), (!swapped)
479 // (flag == eq) => (dest_current_value == compare_value), ( swapped)
480 static inline bool cmpxchgx_hint_acquire_lock() { return true; }
481 // The stxcx will probably not be succeeded by a releasing store.
482 static inline bool cmpxchgx_hint_release_lock() { return false; }
483 static inline bool cmpxchgx_hint_atomic_update() { return false; }
484
485 // Cmpxchg semantics
486 enum {
487 MemBarNone = 0,
488 MemBarRel = 1,
489 MemBarAcq = 2,
490 MemBarFenceAfter = 4 // use powers of 2
491 };
492 private:
493 // Helper functions for word/sub-word atomics.
494 void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
495 Register addr_base, Register tmp1, Register tmp2, Register tmp3,
496 bool cmpxchgx_hint, bool is_add, int size);
497 void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
498 RegisterOrConstant compare_value, Register exchange_value,
499 Register addr_base,Label &retry, Label &failed, bool cmpxchgx_hint, int size);
500 void cmpxchg_generic(ConditionRegister flag, Register dest_current_value,
501 RegisterOrConstant compare_value, Register exchange_value,
502 Register addr_base, int semantics, bool cmpxchgx_hint, Register int_flag_success,
503 Label* failed_ext, bool contention_hint, bool weak, int size);
504 public:
505 // Temps and addr_base are killed if processor does not support Power 8 instructions.
506 // Result will be sign extended.
507 void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base,
508 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
509 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1);
510 }
511 // Temps and addr_base are killed if processor does not support Power 8 instructions.
512 // Result will be sign extended.
513 void getandseth(Register dest_current_value, Register exchange_value, Register addr_base,
514 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
515 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2);
516 }
517 void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base,
518 bool cmpxchgx_hint) {
519 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4);
520 }
521 void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
522 bool cmpxchgx_hint);
523 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
524 // Result will be sign extended.
525 void getandaddb(Register dest_current_value, Register inc_value, Register addr_base,
526 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
527 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1);
528 }
529 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
530 // Result will be sign extended.
531 void getandaddh(Register dest_current_value, Register inc_value, Register addr_base,
532 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
533 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2);
534 }
535 void getandaddw(Register dest_current_value, Register inc_value, Register addr_base,
536 Register tmp1, bool cmpxchgx_hint) {
537 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4);
538 }
539 void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base,
540 Register tmp, bool cmpxchgx_hint);
541 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
542 // compare_value must be at least 32 bit sign extended. Result will be sign extended.
543 void cmpxchgb(ConditionRegister flag, Register dest_current_value,
544 RegisterOrConstant compare_value, Register exchange_value,
545 Register addr_base, int semantics, bool cmpxchgx_hint = false,
546 Register int_flag_success = noreg, Label* failed = nullptr,
547 bool contention_hint = false, bool weak = false) {
548 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, semantics,
549 cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 1);
550 }
551 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
552 // compare_value must be at least 32 bit sign extended. Result will be sign extended.
553 void cmpxchgh(ConditionRegister flag, Register dest_current_value,
554 RegisterOrConstant compare_value, Register exchange_value,
555 Register addr_base, int semantics, bool cmpxchgx_hint = false,
556 Register int_flag_success = noreg, Label* failed = nullptr,
557 bool contention_hint = false, bool weak = false) {
558 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base,
559 semantics, cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 2);
560 }
561 void cmpxchgw(ConditionRegister flag, Register dest_current_value,
562 RegisterOrConstant compare_value, Register exchange_value,
563 Register addr_base,
564 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg,
565 Label* failed = nullptr, bool contention_hint = false, bool weak = false) {
566 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base,
567 semantics, cmpxchgx_hint, int_flag_success, failed, contention_hint, weak, 4);
568 }
569 void cmpxchgd(ConditionRegister flag, Register dest_current_value,
570 RegisterOrConstant compare_value, Register exchange_value,
571 Register addr_base,
572 int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg,
573 Label* failed = nullptr, bool contention_hint = false, bool weak = false);
574
575 // interface method calling
576 void lookup_interface_method(Register recv_klass,
577 Register intf_klass,
578 RegisterOrConstant itable_index,
579 Register method_result,
580 Register temp_reg, Register temp2_reg,
581 Label& no_such_interface,
582 bool return_method = true);
583
584 // virtual method calling
585 void lookup_virtual_method(Register recv_klass,
586 RegisterOrConstant vtable_index,
587 Register method_result);
588
589 // Test sub_klass against super_klass, with fast and slow paths.
590
591 // The fast path produces a tri-state answer: yes / no / maybe-slow.
592 // One of the three labels can be null, meaning take the fall-through.
593 // If super_check_offset is -1, the value is loaded up from super_klass.
594 // No registers are killed, except temp_reg and temp2_reg.
595 // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
596 void check_klass_subtype_fast_path(Register sub_klass,
597 Register super_klass,
598 Register temp1_reg,
599 Register temp2_reg,
600 Label* L_success,
601 Label* L_failure,
602 Label* L_slow_path = nullptr, // default fall through
603 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
604
605 // The rest of the type check; must be wired to a corresponding fast path.
606 // It does not repeat the fast path logic, so don't use it standalone.
607 // The temp_reg can be noreg, if no temps are available.
608 // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
609 // Updates the sub's secondary super cache as necessary.
610 void check_klass_subtype_slow_path_linear(Register sub_klass,
611 Register super_klass,
612 Register temp1_reg,
613 Register temp2_reg,
614 Label* L_success = nullptr,
615 Register result_reg = noreg);
616
617 void check_klass_subtype_slow_path_table(Register sub_klass,
618 Register super_klass,
619 Register temp1_reg,
620 Register temp2_reg,
621 Label* L_success = nullptr,
622 Register result_reg = noreg);
623
624 void check_klass_subtype_slow_path(Register sub_klass,
625 Register super_klass,
626 Register temp1_reg,
627 Register temp2_reg,
628 Label* L_success = nullptr,
629 Register result_reg = noreg);
630
631 void lookup_secondary_supers_table_var(Register sub_klass,
632 Register r_super_klass,
633 Register temp1,
634 Register temp2,
635 Register temp3,
636 Register temp4,
637 Register result);
638
639 // If r is valid, return r.
640 // If r is invalid, remove a register r2 from available_regs, add r2
641 // to regs_to_push, then return r2.
642 Register allocate_if_noreg(const Register r,
643 RegSetIterator<Register> &available_regs,
644 RegSet ®s_to_push);
645
646 // Frameless register spills (negative offset from SP)
647 void push_set(RegSet set);
648 void pop_set(RegSet set);
649
650 // Simplified, combined version, good for typical uses.
651 // Falls through on failure.
652 void check_klass_subtype(Register sub_klass,
653 Register super_klass,
654 Register temp1_reg,
655 Register temp2_reg,
656 Label& L_success);
657
658 void repne_scan(Register addr, Register value, Register count, Register scratch);
659
660 // As above, but with a constant super_klass.
661 // The result is in Register result, not the condition codes.
662 void lookup_secondary_supers_table_const(Register r_sub_klass,
663 Register r_super_klass,
664 Register temp1,
665 Register temp2,
666 Register temp3,
667 Register temp4,
668 Register result,
669 u1 super_klass_slot);
670
671 void verify_secondary_supers_table(Register r_sub_klass,
672 Register r_super_klass,
673 Register result,
674 Register temp1,
675 Register temp2,
676 Register temp3);
677
678 void lookup_secondary_supers_table_slow_path(Register r_super_klass,
679 Register r_array_base,
680 Register r_array_index,
681 Register r_bitmap,
682 Register result,
683 Register temp1);
684
685 void clinit_barrier(Register klass,
686 Register thread,
687 Label* L_fast_path = nullptr,
688 Label* L_slow_path = nullptr);
689
690 // Method handle support (JSR 292).
691 RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0);
692
693 void push_cont_fastpath();
694 void pop_cont_fastpath();
695 void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics);
696 void fast_lock(Register box, Register obj, Register t1, Register t2, Label& slow);
697 void fast_unlock(Register obj, Register t1, Label& slow);
698
699 // allocation (for C1)
700 void tlab_allocate(
701 Register obj, // result: pointer to object after successful allocation
702 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
703 int con_size_in_bytes, // object size in bytes if known at compile time
704 Register t1, // temp register
705 Label& slow_case // continuation point if fast allocation fails
706 );
707
708 enum { trampoline_stub_size = 6 * 4 };
709 address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
710
711 void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
712 Register tmp1, Register tmp2, Register tmp3);
713
714 void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
715 Register tmp1, Register tmp2, Register tmp3);
716
717 // Check if safepoint requested and if so branch
718 void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod);
719 void jump_to_polling_page_return_handler_blob(int safepoint_offset, bool fixed_size = false);
720
721 void resolve_jobject(Register value, Register tmp1, Register tmp2,
722 MacroAssembler::PreservationLevel preservation_level);
723 void resolve_global_jobject(Register value, Register tmp1, Register tmp2,
724 MacroAssembler::PreservationLevel preservation_level);
725
726 // Support for managing the JavaThread pointer (i.e.; the reference to
727 // thread-local information).
728
729 // Support for last Java frame (but use call_VM instead where possible):
730 // access R16_thread->last_Java_sp.
731 void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
732 void reset_last_Java_frame(bool check_last_java_sp = true);
733 void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, Label* jpc = nullptr);
734
735 // Read vm result from thread: oop_result = R16_thread->result;
736 void get_vm_result_oop(Register oop_result);
737 void get_vm_result_metadata(Register metadata_result);
738
739 static bool needs_explicit_null_check(intptr_t offset);
740 static bool uses_implicit_null_check(void* address);
741
742 // Trap-instruction-based checks.
743 // Range checks can be distinguished from zero checks as they check 32 bit,
744 // zero checks all 64 bits (tw, td).
745 inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual);
746 static bool is_trap_null_check(int x) {
747 return is_tdi(x, traptoEqual, -1/*any reg*/, 0) ||
748 is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0);
749 }
750
751 inline void trap_ic_miss_check(Register a, Register b);
752 static bool is_trap_ic_miss_check(int x) {
753 return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/);
754 }
755
756 // Implicit or explicit null check, jumps to static address exception_entry.
757 inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
758 inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided
759
760 // Access heap oop, handle encoding and GC barriers.
761 // Some GC barriers call C so use needs_frame = true if an extra frame is needed at the current call site.
762 inline void access_store_at(BasicType type, DecoratorSet decorators,
763 Register base, RegisterOrConstant ind_or_offs, Register val,
764 Register tmp1, Register tmp2, Register tmp3,
765 MacroAssembler::PreservationLevel preservation_level);
766 inline void access_load_at(BasicType type, DecoratorSet decorators,
767 Register base, RegisterOrConstant ind_or_offs, Register dst,
768 Register tmp1, Register tmp2,
769 MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null = nullptr);
770
771 public:
772 // Specify tmp1 for better code in certain compressed oops cases. Specify Label to bail out on null oop.
773 // tmp1, tmp2 and needs_frame are used with decorators ON_PHANTOM_OOP_REF or ON_WEAK_OOP_REF.
774 inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1,
775 Register tmp1, Register tmp2,
776 MacroAssembler::PreservationLevel preservation_level,
777 DecoratorSet decorators = 0, Label *L_handle_null = nullptr);
778
779 inline void store_heap_oop(Register d, RegisterOrConstant offs, Register s1,
780 Register tmp1, Register tmp2, Register tmp3,
781 MacroAssembler::PreservationLevel preservation_level, DecoratorSet decorators = 0);
782
783 // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
784 // src == d allowed.
785 inline Register encode_heap_oop_not_null(Register d, Register src = noreg);
786 inline Register decode_heap_oop_not_null(Register d, Register src = noreg);
787
788 // Null allowed.
789 inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier!
790 inline void decode_heap_oop(Register d);
791
792 // Load/Store klass oop from klass field. Compress.
793 void load_klass_no_decode(Register dst, Register src);
794 void load_klass(Register dst, Register src);
795 void load_narrow_klass_compact(Register dst, Register src);
796 void cmp_klass(ConditionRegister dst, Register obj, Register klass, Register tmp, Register tmp2);
797 void cmp_klasses_from_objects(ConditionRegister dst, Register obj1, Register obj2, Register tmp1, Register tmp2);
798 void load_klass_check_null(Register dst, Register src, Label* is_null = nullptr);
799 void store_klass(Register dst_oop, Register klass, Register tmp = R0);
800 void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
801
802 void resolve_oop_handle(Register result, Register tmp1, Register tmp2,
803 MacroAssembler::PreservationLevel preservation_level);
804 void resolve_weak_handle(Register result, Register tmp1, Register tmp2,
805 MacroAssembler::PreservationLevel preservation_level);
806 void load_method_holder(Register holder, Register method);
807
808 static int instr_size_for_load_klass();
809 void decode_klass_not_null(Register dst, Register src = noreg);
810 Register encode_klass_not_null(Register dst, Register src = noreg);
811
812 // SIGTRAP-based range checks for arrays.
813 inline void trap_range_check_l(Register a, Register b);
814 inline void trap_range_check_l(Register a, int si16);
815 static bool is_trap_range_check_l(int x) {
816 return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
817 is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) );
818 }
819 inline void trap_range_check_le(Register a, int si16);
820 static bool is_trap_range_check_le(int x) {
821 return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/);
822 }
823 inline void trap_range_check_g(Register a, int si16);
824 static bool is_trap_range_check_g(int x) {
825 return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/);
826 }
827 inline void trap_range_check_ge(Register a, Register b);
828 inline void trap_range_check_ge(Register a, int si16);
829 static bool is_trap_range_check_ge(int x) {
830 return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
831 is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) );
832 }
833 static bool is_trap_range_check(int x) {
834 return is_trap_range_check_l(x) || is_trap_range_check_le(x) ||
835 is_trap_range_check_g(x) || is_trap_range_check_ge(x);
836 }
837
838 void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0);
839 void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
840 void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
841
842 // Emitters for BigInteger.multiplyToLen intrinsic.
843 inline void multiply64(Register dest_hi, Register dest_lo,
844 Register x, Register y);
845 void add2_with_carry(Register dest_hi, Register dest_lo,
846 Register src1, Register src2);
847 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
848 Register y, Register y_idx, Register z,
849 Register carry, Register product_high, Register product,
850 Register idx, Register kdx, Register tmp);
851 void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
852 Register yz_idx, Register idx, Register carry,
853 Register product_high, Register product, Register tmp,
854 int offset);
855 void multiply_128_x_128_loop(Register x_xstart,
856 Register y, Register z,
857 Register yz_idx, Register idx, Register carry,
858 Register product_high, Register product,
859 Register carry2, Register tmp);
860 void muladd(Register out, Register in, Register offset, Register len, Register k,
861 Register tmp1, Register tmp2, Register carry);
862 void multiply_to_len(Register x, Register xlen,
863 Register y, Register ylen,
864 Register z,
865 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
866 Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
867 Register tmp11, Register tmp12, Register tmp13);
868
869 // Emitters for CRC32 calculation.
870 // A note on invertCRC:
871 // Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
872 // CRC32 holds it's current crc value in the externally visible representation.
873 // CRC32C holds it's current crc value in internal format, ready for updating.
874 // Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
875 // In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
876 // The bool invertCRC parameter indicates whether bit-flipping is required before updates.
877 void load_reverse_32(Register dst, Register src);
878 int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
879 void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
880 void update_byte_crc32(Register crc, Register val, Register table);
881 void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
882 Register data, bool loopAlignment);
883 void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
884 Register t0, Register t1, Register t2, Register t3,
885 Register tc0, Register tc1, Register tc2, Register tc3);
886 void kernel_crc32_vpmsum(Register crc, Register buf, Register len, Register constants,
887 Register t0, Register t1, Register t2, Register t3, Register t4,
888 Register t5, Register t6, bool invertCRC);
889 void kernel_crc32_vpmsum_aligned(Register crc, Register buf, Register len, Register constants,
890 Register t0, Register t1, Register t2, Register t3, Register t4,
891 Register t5, Register t6);
892 // Version which internally decides what to use.
893 void crc32(Register crc, Register buf, Register len, Register t0, Register t1, Register t2,
894 Register t3, Register t4, Register t5, Register t6, Register t7, bool is_crc32c);
895
896 void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
897 bool invertCRC);
898
899 // SHA-2 auxiliary functions and public interfaces
900 private:
901 void sha256_deque(const VectorRegister src,
902 const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
903 void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
904 void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
905 void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
906 const int total_ws, const Register k, const VectorRegister* kpws,
907 const int total_kpws);
908 void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
909 const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
910 const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
911 const Register j, const Register k);
912 void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
913 const VectorRegister c, const VectorRegister d, const VectorRegister e,
914 const VectorRegister f, const VectorRegister g, const VectorRegister h,
915 const Register hptr);
916
917 void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
918 void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
919 void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
920 void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
921 void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
922 const VectorRegister w2, const VectorRegister w3,
923 const VectorRegister w4, const VectorRegister w5,
924 const VectorRegister w6, const VectorRegister w7,
925 const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
926 const VectorRegister vRb, const Register k);
927
928 public:
929 void sha256(bool multi_block);
930 void sha512(bool multi_block);
931
932 void cache_wb(Address line);
933 void cache_wbsync(bool is_presync);
934
935 //
936 // Debugging
937 //
938
939 // assert on cr0
940 enum AsmAssertCond {
941 eq,
942 ne,
943 ge,
944 gt,
945 lt,
946 le
947 };
948 void asm_assert(AsmAssertCond cond, const char* msg) PRODUCT_RETURN;
949 void asm_assert_eq(const char* msg) { asm_assert(eq, msg); }
950 void asm_assert_ne(const char* msg) { asm_assert(ne, msg); }
951
952 private:
953 void asm_assert_mems_zero(AsmAssertCond cond, int size, int mem_offset, Register mem_base,
954 const char* msg) NOT_DEBUG_RETURN;
955
956 public:
957
958 void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg) {
959 asm_assert_mems_zero(eq, 8, mem_offset, mem_base, msg);
960 }
961 void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg) {
962 asm_assert_mems_zero(ne, 8, mem_offset, mem_base, msg);
963 }
964
965 // Calls verify_oop. If UseCompressedOops is on, decodes the oop.
966 // Preserves reg.
967 void verify_coop(Register reg, const char*);
968 // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
969 void verify_oop(Register reg, const char* s = "broken oop");
970 void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop");
971
972 // TODO: verify method and klass metadata (compare against vptr?)
973 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
974 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
975
976 // Convenience method returning function entry. For the ELFv1 case
977 // creates function descriptor at the current address and returns
978 // the pointer to it. For the ELFv2 case returns the current address.
979 inline address function_entry();
980
981 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
982 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
983
984 private:
985 void stop(int type, const char* msg);
986
987 public:
988 enum {
989 stop_stop = 0,
990 stop_untested = 1,
991 stop_unimplemented = 2,
992 stop_shouldnotreachhere = 3,
993 stop_msg_present = -0x8000
994 };
995
996 // Prints msg, dumps registers and stops execution.
997 void stop (const char* msg = nullptr) { stop(stop_stop, msg); }
998 void untested (const char* msg = nullptr) { stop(stop_untested, msg); }
999 void unimplemented (const char* msg = nullptr) { stop(stop_unimplemented, msg); }
1000 void should_not_reach_here(const char* msg = nullptr) { stop(stop_shouldnotreachhere, msg); }
1001
1002 void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN;
1003
1004 // Inline type specific methods
1005 #include "asm/macroAssembler_common.hpp"
1006 };
1007
1008 #endif // CPU_PPC_MACROASSEMBLER_PPC_HPP