9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "ci/ciEnv.hpp"
29 #include "code/compiledIC.hpp"
30 #include "compiler/compileTask.hpp"
31 #include "compiler/disassembler.hpp"
32 #include "compiler/oopMap.hpp"
33 #include "gc/shared/barrierSet.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "gc/shared/cardTableBarrierSet.hpp"
36 #include "gc/shared/cardTable.hpp"
37 #include "gc/shared/collectedHeap.hpp"
38 #include "gc/shared/tlab_globals.hpp"
39 #include "interpreter/bytecodeHistogram.hpp"
40 #include "interpreter/interpreter.hpp"
41 #include "interpreter/interpreterRuntime.hpp"
42 #include "jvm.h"
43 #include "memory/resourceArea.hpp"
44 #include "memory/universe.hpp"
45 #include "nativeInst_aarch64.hpp"
46 #include "oops/accessDecorators.hpp"
47 #include "oops/compressedKlass.inline.hpp"
48 #include "oops/compressedOops.inline.hpp"
49 #include "oops/klass.inline.hpp"
50 #include "runtime/continuation.hpp"
51 #include "runtime/icache.hpp"
52 #include "runtime/interfaceSupport.inline.hpp"
53 #include "runtime/javaThread.hpp"
54 #include "runtime/jniHandles.inline.hpp"
55 #include "runtime/sharedRuntime.hpp"
56 #include "runtime/stubRoutines.hpp"
57 #include "utilities/globalDefinitions.hpp"
58 #include "utilities/integerCast.hpp"
59 #include "utilities/powerOfTwo.hpp"
60 #ifdef COMPILER1
61 #include "c1/c1_LIRAssembler.hpp"
62 #endif
63 #ifdef COMPILER2
64 #include "oops/oop.hpp"
65 #include "opto/compile.hpp"
66 #include "opto/node.hpp"
67 #include "opto/output.hpp"
68 #endif
69
70 #include <sys/types.h>
71
72 #ifdef PRODUCT
73 #define BLOCK_COMMENT(str) /* nothing */
74 #else
75 #define BLOCK_COMMENT(str) block_comment(str)
76 #endif
77 #define STOP(str) stop(str);
78 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
79
488 // narrow OOPs by setting the upper 16 bits in the first
489 // instruction.
490 if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
491 // Move narrow OOP
492 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
493 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
494 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
495 instructions = 2;
496 } else {
497 // Move wide OOP
498 assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
499 uintptr_t dest = (uintptr_t)o;
500 Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
501 Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
502 Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
503 instructions = 3;
504 }
505 return instructions * NativeInstruction::instruction_size;
506 }
507
508 int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
509 // Metadata pointers are either narrow (32 bits) or wide (48 bits).
510 // We encode narrow ones by setting the upper 16 bits in the first
511 // instruction.
512 NativeInstruction *insn = nativeInstruction_at(insn_addr);
513 assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
514 nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
515
516 MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
517
518 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
519 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
520 return 2 * NativeInstruction::instruction_size;
521 }
522
523 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp) {
524 ldr(tmp, Address(rthread, JavaThread::polling_word_offset()));
525 if (at_return) {
526 // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
527 // we may safely use the sp instead to perform the stack watermark check.
528 cmp(in_nmethod ? sp : rfp, tmp);
529 br(Assembler::HI, slow_path);
530 } else {
531 tbnz(tmp, log2i_exact(SafepointMechanism::poll_bit()), slow_path);
532 }
533 }
534
535 void MacroAssembler::rt_call(address dest, Register tmp) {
536 CodeBlob *cb = CodeCache::find_blob(dest);
537 if (cb) {
538 far_call(RuntimeAddress(dest));
539 } else {
540 lea(tmp, RuntimeAddress(dest));
541 blr(tmp);
542 }
2006 ldarb(scratch, scratch);
2007 cmp(scratch, InstanceKlass::fully_initialized);
2008 br(Assembler::EQ, *L_fast_path);
2009
2010 // Fast path check: current thread is initializer thread
2011 ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
2012 cmp(rthread, scratch);
2013
2014 if (L_slow_path == &L_fallthrough) {
2015 br(Assembler::EQ, *L_fast_path);
2016 bind(*L_slow_path);
2017 } else if (L_fast_path == &L_fallthrough) {
2018 br(Assembler::NE, *L_slow_path);
2019 bind(*L_fast_path);
2020 } else {
2021 Unimplemented();
2022 }
2023 }
2024
2025 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
2026 if (!VerifyOops) return;
2027
2028 // Pass register number to verify_oop_subroutine
2029 const char* b = nullptr;
2030 {
2031 ResourceMark rm;
2032 stringStream ss;
2033 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
2034 b = code_string(ss.as_string());
2035 }
2036 BLOCK_COMMENT("verify_oop {");
2037
2038 strip_return_address(); // This might happen within a stack frame.
2039 protect_return_address();
2040 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
2041 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
2042
2043 mov(r0, reg);
2044 movptr(rscratch1, (uintptr_t)(address)b);
2045
2046 // call indirectly to solve generation ordering problem
2047 lea(rscratch2, RuntimeAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2048 ldr(rscratch2, Address(rscratch2));
2049 blr(rscratch2);
2050
2051 ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
2052 ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
2053 authenticate_return_address();
2054
2055 BLOCK_COMMENT("} verify_oop");
2056 }
2057
2058 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
2059 if (!VerifyOops) return;
2060
2061 const char* b = nullptr;
2062 {
2063 ResourceMark rm;
2064 stringStream ss;
2065 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
2066 b = code_string(ss.as_string());
2067 }
2068 BLOCK_COMMENT("verify_oop_addr {");
2069
2070 strip_return_address(); // This might happen within a stack frame.
2071 protect_return_address();
2072 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
2073 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
2074
2075 // addr may contain sp so we will have to adjust it based on the
2076 // pushes that we just did.
2077 if (addr.uses(sp)) {
2078 lea(r0, addr);
2079 ldr(r0, Address(r0, 4 * wordSize));
2292 call_VM_leaf_base(entry_point, 1);
2293 }
2294
2295 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
2296 assert_different_registers(arg_1, c_rarg0);
2297 pass_arg0(this, arg_0);
2298 pass_arg1(this, arg_1);
2299 call_VM_leaf_base(entry_point, 2);
2300 }
2301
2302 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
2303 Register arg_1, Register arg_2) {
2304 assert_different_registers(arg_1, c_rarg0);
2305 assert_different_registers(arg_2, c_rarg0, c_rarg1);
2306 pass_arg0(this, arg_0);
2307 pass_arg1(this, arg_1);
2308 pass_arg2(this, arg_2);
2309 call_VM_leaf_base(entry_point, 3);
2310 }
2311
2312 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
2313 pass_arg0(this, arg_0);
2314 MacroAssembler::call_VM_leaf_base(entry_point, 1);
2315 }
2316
2317 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
2318
2319 assert_different_registers(arg_0, c_rarg1);
2320 pass_arg1(this, arg_1);
2321 pass_arg0(this, arg_0);
2322 MacroAssembler::call_VM_leaf_base(entry_point, 2);
2323 }
2324
2325 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
2326 assert_different_registers(arg_0, c_rarg1, c_rarg2);
2327 assert_different_registers(arg_1, c_rarg2);
2328 pass_arg2(this, arg_2);
2329 pass_arg1(this, arg_1);
2330 pass_arg0(this, arg_0);
2331 MacroAssembler::call_VM_leaf_base(entry_point, 3);
2337 assert_different_registers(arg_2, c_rarg3);
2338 pass_arg3(this, arg_3);
2339 pass_arg2(this, arg_2);
2340 pass_arg1(this, arg_1);
2341 pass_arg0(this, arg_0);
2342 MacroAssembler::call_VM_leaf_base(entry_point, 4);
2343 }
2344
2345 void MacroAssembler::null_check(Register reg, int offset) {
2346 if (needs_explicit_null_check(offset)) {
2347 // provoke OS null exception if reg is null by
2348 // accessing M[reg] w/o changing any registers
2349 // NOTE: this is plenty to provoke a segv
2350 ldr(zr, Address(reg));
2351 } else {
2352 // nothing to do, (later) access of M[reg + offset]
2353 // will provoke OS null exception if reg is null
2354 }
2355 }
2356
2357 // MacroAssembler protected routines needed to implement
2358 // public methods
2359
2360 void MacroAssembler::mov(Register r, Address dest) {
2361 code_section()->relocate(pc(), dest.rspec());
2362 uint64_t imm64 = (uint64_t)dest.target();
2363 movptr(r, imm64);
2364 }
2365
2366 // Move a constant pointer into r. In AArch64 mode the virtual
2367 // address space is 48 bits in size, so we only need three
2368 // instructions to create a patchable instruction sequence that can
2369 // reach anywhere.
2370 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
2371 #ifndef PRODUCT
2372 {
2373 char buffer[64];
2374 os::snprintf_checked(buffer, sizeof(buffer), "0x%" PRIX64, (uint64_t)imm64);
2375 block_comment(buffer);
2376 }
5071 adrp(rscratch1, src2, offset);
5072 ldr(rscratch1, Address(rscratch1, offset));
5073 cmp(src1, rscratch1);
5074 }
5075
5076 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
5077 cmp(obj1, obj2);
5078 }
5079
5080 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5081 load_method_holder(rresult, rmethod);
5082 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5083 }
5084
5085 void MacroAssembler::load_method_holder(Register holder, Register method) {
5086 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5087 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5088 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5089 }
5090
5091 // Loads the obj's Klass* into dst.
5092 // Preserves all registers (incl src, rscratch1 and rscratch2).
5093 // Input:
5094 // src - the oop we want to load the klass from.
5095 // dst - output narrow klass.
5096 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5097 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5098 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5099 lsr(dst, dst, markWord::klass_shift);
5100 }
5101
5102 void MacroAssembler::load_klass(Register dst, Register src) {
5103 if (UseCompactObjectHeaders) {
5104 load_narrow_klass_compact(dst, src);
5105 } else {
5106 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5107 }
5108 decode_klass_not_null(dst);
5109 }
5110
5162 } else {
5163 ldrw(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
5164 }
5165 if (CompressedKlassPointers::base() == nullptr) {
5166 cmp(klass, tmp, LSL, CompressedKlassPointers::shift());
5167 return;
5168 } else if (!AOTCodeCache::is_on_for_dump() &&
5169 ((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
5170 && CompressedKlassPointers::shift() == 0) {
5171 // Only the bottom 32 bits matter
5172 cmpw(klass, tmp);
5173 return;
5174 }
5175 decode_klass_not_null(tmp);
5176 cmp(klass, tmp);
5177 }
5178
5179 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) {
5180 if (UseCompactObjectHeaders) {
5181 load_narrow_klass_compact(tmp1, obj1);
5182 load_narrow_klass_compact(tmp2, obj2);
5183 } else {
5184 ldrw(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
5185 ldrw(tmp2, Address(obj2, oopDesc::klass_offset_in_bytes()));
5186 }
5187 cmpw(tmp1, tmp2);
5188 }
5189
5190 void MacroAssembler::store_klass(Register dst, Register src) {
5191 // FIXME: Should this be a store release? concurrent gcs assumes
5192 // klass length is valid if klass field is not null.
5193 assert(!UseCompactObjectHeaders, "not with compact headers");
5194 encode_klass_not_null(src);
5195 strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
5196 }
5197
5198 void MacroAssembler::store_klass_gap(Register dst, Register src) {
5199 assert(!UseCompactObjectHeaders, "not with compact headers");
5200 // Store to klass gap in destination
5201 strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
5202 }
5203
5204 // Algorithm must match CompressedOops::encode.
5205 void MacroAssembler::encode_heap_oop(Register d, Register s) {
5206 #ifdef ASSERT
5207 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
5208 #endif
5209 verify_oop_msg(s, "broken oop in encode_heap_oop");
5559 if (as_raw) {
5560 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
5561 } else {
5562 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
5563 }
5564 }
5565
5566 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
5567 Address dst, Register val,
5568 Register tmp1, Register tmp2, Register tmp3) {
5569 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
5570 decorators = AccessInternal::decorator_fixup(decorators, type);
5571 bool as_raw = (decorators & AS_RAW) != 0;
5572 if (as_raw) {
5573 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5574 } else {
5575 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5576 }
5577 }
5578
5579 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
5580 Register tmp2, DecoratorSet decorators) {
5581 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
5582 }
5583
5584 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
5585 Register tmp2, DecoratorSet decorators) {
5586 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, tmp2);
5587 }
5588
5589 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5590 Register tmp2, Register tmp3, DecoratorSet decorators) {
5591 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5592 }
5593
5594 // Used for storing nulls.
5595 void MacroAssembler::store_heap_oop_null(Address dst) {
5596 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5597 }
5598
5673 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
5674 cmp(rscratch2, rscratch1);
5675 br(Assembler::HS, next);
5676 STOP("assert(top >= start)");
5677 should_not_reach_here();
5678
5679 bind(next);
5680 ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
5681 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
5682 cmp(rscratch2, rscratch1);
5683 br(Assembler::HS, ok);
5684 STOP("assert(top <= end)");
5685 should_not_reach_here();
5686
5687 bind(ok);
5688 ldp(rscratch2, rscratch1, Address(post(sp, 16)));
5689 }
5690 #endif
5691 }
5692
5693 // Writes to stack successive pages until offset reached to check for
5694 // stack overflow + shadow pages. This clobbers tmp.
5695 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5696 assert_different_registers(tmp, size, rscratch1);
5697 mov(tmp, sp);
5698 // Bang stack for total size given plus shadow page size.
5699 // Bang one page at a time because large size can bang beyond yellow and
5700 // red zones.
5701 Label loop;
5702 mov(rscratch1, (int)os::vm_page_size());
5703 bind(loop);
5704 lea(tmp, Address(tmp, -(int)os::vm_page_size()));
5705 subsw(size, size, rscratch1);
5706 str(size, Address(tmp));
5707 br(Assembler::GT, loop);
5708
5709 // Bang down shadow pages too.
5710 // At this point, (tmp-0) is the last address touched, so don't
5711 // touch it again. (It was touched as (tmp-pagesize) but then tmp
5712 // was post-decremented.) Skip this address by starting at i=1, and
5779
5780 // Strictly speaking the card table base isn't an address at all, and it might
5781 // even be negative. It is thus materialised as a constant.
5782 mov(reg, (uint64_t)ctbs->card_table_base_const());
5783 }
5784
5785 void MacroAssembler::load_aotrc_address(Register reg, address a) {
5786 #if INCLUDE_CDS
5787 assert(AOTRuntimeConstants::contains(a), "address out of range for data area");
5788 if (AOTCodeCache::is_on_for_dump()) {
5789 // all aotrc field addresses should be registered in the AOTCodeCache address table
5790 lea(reg, ExternalAddress(a));
5791 } else {
5792 mov(reg, (uint64_t)a);
5793 }
5794 #else
5795 ShouldNotReachHere();
5796 #endif
5797 }
5798
5799 void MacroAssembler::build_frame(int framesize) {
5800 assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
5801 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
5802 protect_return_address();
5803 if (framesize < ((1 << 9) + 2 * wordSize)) {
5804 sub(sp, sp, framesize);
5805 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
5806 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
5807 } else {
5808 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
5809 if (PreserveFramePointer) mov(rfp, sp);
5810 if (framesize < ((1 << 12) + 2 * wordSize))
5811 sub(sp, sp, framesize - 2 * wordSize);
5812 else {
5813 mov(rscratch1, framesize - 2 * wordSize);
5814 sub(sp, sp, rscratch1);
5815 }
5816 }
5817 verify_cross_modify_fence_not_required();
5818 }
5819
5820 void MacroAssembler::remove_frame(int framesize) {
5821 assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
5822 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
5823 if (framesize < ((1 << 9) + 2 * wordSize)) {
5824 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
5825 add(sp, sp, framesize);
5826 } else {
5827 if (framesize < ((1 << 12) + 2 * wordSize))
5828 add(sp, sp, framesize - 2 * wordSize);
5829 else {
5830 mov(rscratch1, framesize - 2 * wordSize);
5831 add(sp, sp, rscratch1);
5832 }
5833 ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
5834 }
5835 authenticate_return_address();
5836 }
5837
5838
5839 // This method counts leading positive bytes (highest bit not set) in provided byte array
5840 address MacroAssembler::count_positives(Register ary1, Register len, Register result) {
5841 // Simple and most common case of aligned small array which is not at the
5842 // end of memory page is placed here. All other cases are in stub.
5843 Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
5844 const uint64_t UPPER_BIT_MASK=0x8080808080808080;
5845 assert_different_registers(ary1, len, result);
5846
5847 mov(result, len);
5848 cmpw(len, 0);
5849 br(LE, DONE);
5850 cmpw(len, 4 * wordSize);
5851 br(GE, STUB_LONG); // size > 32 then go to stub
5852
5853 int shift = 64 - exact_log2(os::vm_page_size());
5854 lsl(rscratch1, ary1, shift);
5855 mov(rscratch2, (size_t)(4 * wordSize) << shift);
5856 adds(rscratch2, rscratch1, rscratch2); // At end of page?
5857 br(CS, STUB); // at the end of page then go to stub
6741 // On other systems, the helper is a usual C function.
6742 //
6743 void MacroAssembler::get_thread(Register dst) {
6744 RegSet saved_regs =
6745 BSD_ONLY(RegSet::range(r0, r17) + lr - dst)
6746 NOT_BSD (RegSet::range(r0, r1) + lr - dst);
6747
6748 protect_return_address();
6749 push(saved_regs, sp);
6750
6751 mov(lr, ExternalAddress(CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper)));
6752 blr(lr);
6753 if (dst != c_rarg0) {
6754 mov(dst, c_rarg0);
6755 }
6756
6757 pop(saved_regs, sp);
6758 authenticate_return_address();
6759 }
6760
6761 void MacroAssembler::cache_wb(Address line) {
6762 assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset");
6763 assert(line.index() == noreg, "index should be noreg");
6764 assert(line.offset() == 0, "offset should be 0");
6765 // would like to assert this
6766 // assert(line._ext.shift == 0, "shift should be zero");
6767 if (VM_Version::supports_dcpop()) {
6768 // writeback using clear virtual address to point of persistence
6769 dc(Assembler::CVAP, line.base());
6770 } else {
6771 // no need to generate anything as Unsafe.writebackMemory should
6772 // never invoke this stub
6773 }
6774 }
6775
6776 void MacroAssembler::cache_wbsync(bool is_pre) {
6777 // we only need a barrier post sync
6778 if (!is_pre) {
6779 membar(Assembler::AnyAny);
6780 }
7176 }
7177
7178 // Check if the lock-stack is full.
7179 ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
7180 cmpw(top, (unsigned)LockStack::end_offset());
7181 br(Assembler::GE, slow);
7182
7183 // Check for recursion.
7184 subw(t, top, oopSize);
7185 ldr(t, Address(rthread, t));
7186 cmp(obj, t);
7187 br(Assembler::EQ, push);
7188
7189 // Check header for monitor (0b10).
7190 tst(mark, markWord::monitor_value);
7191 br(Assembler::NE, slow);
7192
7193 // Try to lock. Transition lock bits 0b01 => 0b00
7194 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
7195 orr(mark, mark, markWord::unlocked_value);
7196 eor(t, mark, markWord::unlocked_value);
7197 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
7198 /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg);
7199 br(Assembler::NE, slow);
7200
7201 bind(push);
7202 // After successful lock, push object on lock-stack.
7203 str(obj, Address(rthread, top));
7204 addw(top, top, oopSize);
7205 strw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
7206 }
7207
7208 // Implements fast-unlocking.
7209 //
7210 // - obj: the object to be unlocked
7211 // - t1, t2, t3: temporary registers
7212 // - slow: branched to if unlocking fails, absolute offset may larger than 32KB (imm14 encoding).
7213 void MacroAssembler::fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
7214 // cmpxchg clobbers rscratch1.
7215 assert_different_registers(obj, t1, t2, t3, rscratch1);
|
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "ci/ciEnv.hpp"
29 #include "ci/ciInlineKlass.hpp"
30 #include "code/compiledIC.hpp"
31 #include "compiler/compileTask.hpp"
32 #include "compiler/disassembler.hpp"
33 #include "compiler/oopMap.hpp"
34 #include "gc/shared/barrierSet.hpp"
35 #include "gc/shared/barrierSetAssembler.hpp"
36 #include "gc/shared/cardTableBarrierSet.hpp"
37 #include "gc/shared/cardTable.hpp"
38 #include "gc/shared/collectedHeap.hpp"
39 #include "gc/shared/tlab_globals.hpp"
40 #include "interpreter/bytecodeHistogram.hpp"
41 #include "interpreter/interpreter.hpp"
42 #include "interpreter/interpreterRuntime.hpp"
43 #include "jvm.h"
44 #include "memory/resourceArea.hpp"
45 #include "memory/universe.hpp"
46 #include "nativeInst_aarch64.hpp"
47 #include "oops/accessDecorators.hpp"
48 #include "oops/compressedKlass.inline.hpp"
49 #include "oops/compressedOops.inline.hpp"
50 #include "oops/klass.inline.hpp"
51 #include "oops/resolvedFieldEntry.hpp"
52 #include "runtime/arguments.hpp"
53 #include "runtime/continuation.hpp"
54 #include "runtime/globals.hpp"
55 #include "runtime/icache.hpp"
56 #include "runtime/interfaceSupport.inline.hpp"
57 #include "runtime/javaThread.hpp"
58 #include "runtime/jniHandles.inline.hpp"
59 #include "runtime/sharedRuntime.hpp"
60 #include "runtime/signature_cc.hpp"
61 #include "runtime/stubRoutines.hpp"
62 #include "utilities/globalDefinitions.hpp"
63 #include "utilities/integerCast.hpp"
64 #include "utilities/powerOfTwo.hpp"
65 #include "vmreg_aarch64.inline.hpp"
66 #ifdef COMPILER1
67 #include "c1/c1_LIRAssembler.hpp"
68 #endif
69 #ifdef COMPILER2
70 #include "oops/oop.hpp"
71 #include "opto/compile.hpp"
72 #include "opto/node.hpp"
73 #include "opto/output.hpp"
74 #endif
75
76 #include <sys/types.h>
77
78 #ifdef PRODUCT
79 #define BLOCK_COMMENT(str) /* nothing */
80 #else
81 #define BLOCK_COMMENT(str) block_comment(str)
82 #endif
83 #define STOP(str) stop(str);
84 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
85
494 // narrow OOPs by setting the upper 16 bits in the first
495 // instruction.
496 if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
497 // Move narrow OOP
498 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
499 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
500 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
501 instructions = 2;
502 } else {
503 // Move wide OOP
504 assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
505 uintptr_t dest = (uintptr_t)o;
506 Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
507 Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
508 Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
509 instructions = 3;
510 }
511 return instructions * NativeInstruction::instruction_size;
512 }
513
514 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp) {
515 ldr(tmp, Address(rthread, JavaThread::polling_word_offset()));
516 if (at_return) {
517 // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
518 // we may safely use the sp instead to perform the stack watermark check.
519 cmp(in_nmethod ? sp : rfp, tmp);
520 br(Assembler::HI, slow_path);
521 } else {
522 tbnz(tmp, log2i_exact(SafepointMechanism::poll_bit()), slow_path);
523 }
524 }
525
526 void MacroAssembler::rt_call(address dest, Register tmp) {
527 CodeBlob *cb = CodeCache::find_blob(dest);
528 if (cb) {
529 far_call(RuntimeAddress(dest));
530 } else {
531 lea(tmp, RuntimeAddress(dest));
532 blr(tmp);
533 }
1997 ldarb(scratch, scratch);
1998 cmp(scratch, InstanceKlass::fully_initialized);
1999 br(Assembler::EQ, *L_fast_path);
2000
2001 // Fast path check: current thread is initializer thread
2002 ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
2003 cmp(rthread, scratch);
2004
2005 if (L_slow_path == &L_fallthrough) {
2006 br(Assembler::EQ, *L_fast_path);
2007 bind(*L_slow_path);
2008 } else if (L_fast_path == &L_fallthrough) {
2009 br(Assembler::NE, *L_slow_path);
2010 bind(*L_fast_path);
2011 } else {
2012 Unimplemented();
2013 }
2014 }
2015
2016 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
2017 if (!VerifyOops || VerifyAdapterSharing) {
2018 // Below address of the code string confuses VerifyAdapterSharing
2019 // because it may differ between otherwise equivalent adapters.
2020 return;
2021 }
2022
2023 // Pass register number to verify_oop_subroutine
2024 const char* b = nullptr;
2025 {
2026 ResourceMark rm;
2027 stringStream ss;
2028 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
2029 b = code_string(ss.as_string());
2030 }
2031 BLOCK_COMMENT("verify_oop {");
2032
2033 strip_return_address(); // This might happen within a stack frame.
2034 protect_return_address();
2035 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
2036 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
2037
2038 mov(r0, reg);
2039 movptr(rscratch1, (uintptr_t)(address)b);
2040
2041 // call indirectly to solve generation ordering problem
2042 lea(rscratch2, RuntimeAddress(StubRoutines::verify_oop_subroutine_entry_address()));
2043 ldr(rscratch2, Address(rscratch2));
2044 blr(rscratch2);
2045
2046 ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
2047 ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
2048 authenticate_return_address();
2049
2050 BLOCK_COMMENT("} verify_oop");
2051 }
2052
2053 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
2054 if (!VerifyOops || VerifyAdapterSharing) {
2055 // Below address of the code string confuses VerifyAdapterSharing
2056 // because it may differ between otherwise equivalent adapters.
2057 return;
2058 }
2059
2060 const char* b = nullptr;
2061 {
2062 ResourceMark rm;
2063 stringStream ss;
2064 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
2065 b = code_string(ss.as_string());
2066 }
2067 BLOCK_COMMENT("verify_oop_addr {");
2068
2069 strip_return_address(); // This might happen within a stack frame.
2070 protect_return_address();
2071 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
2072 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
2073
2074 // addr may contain sp so we will have to adjust it based on the
2075 // pushes that we just did.
2076 if (addr.uses(sp)) {
2077 lea(r0, addr);
2078 ldr(r0, Address(r0, 4 * wordSize));
2291 call_VM_leaf_base(entry_point, 1);
2292 }
2293
2294 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
2295 assert_different_registers(arg_1, c_rarg0);
2296 pass_arg0(this, arg_0);
2297 pass_arg1(this, arg_1);
2298 call_VM_leaf_base(entry_point, 2);
2299 }
2300
2301 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
2302 Register arg_1, Register arg_2) {
2303 assert_different_registers(arg_1, c_rarg0);
2304 assert_different_registers(arg_2, c_rarg0, c_rarg1);
2305 pass_arg0(this, arg_0);
2306 pass_arg1(this, arg_1);
2307 pass_arg2(this, arg_2);
2308 call_VM_leaf_base(entry_point, 3);
2309 }
2310
2311 void MacroAssembler::super_call_VM_leaf(address entry_point) {
2312 MacroAssembler::call_VM_leaf_base(entry_point, 1);
2313 }
2314
2315 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
2316 pass_arg0(this, arg_0);
2317 MacroAssembler::call_VM_leaf_base(entry_point, 1);
2318 }
2319
2320 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
2321
2322 assert_different_registers(arg_0, c_rarg1);
2323 pass_arg1(this, arg_1);
2324 pass_arg0(this, arg_0);
2325 MacroAssembler::call_VM_leaf_base(entry_point, 2);
2326 }
2327
2328 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
2329 assert_different_registers(arg_0, c_rarg1, c_rarg2);
2330 assert_different_registers(arg_1, c_rarg2);
2331 pass_arg2(this, arg_2);
2332 pass_arg1(this, arg_1);
2333 pass_arg0(this, arg_0);
2334 MacroAssembler::call_VM_leaf_base(entry_point, 3);
2340 assert_different_registers(arg_2, c_rarg3);
2341 pass_arg3(this, arg_3);
2342 pass_arg2(this, arg_2);
2343 pass_arg1(this, arg_1);
2344 pass_arg0(this, arg_0);
2345 MacroAssembler::call_VM_leaf_base(entry_point, 4);
2346 }
2347
2348 void MacroAssembler::null_check(Register reg, int offset) {
2349 if (needs_explicit_null_check(offset)) {
2350 // provoke OS null exception if reg is null by
2351 // accessing M[reg] w/o changing any registers
2352 // NOTE: this is plenty to provoke a segv
2353 ldr(zr, Address(reg));
2354 } else {
2355 // nothing to do, (later) access of M[reg + offset]
2356 // will provoke OS null exception if reg is null
2357 }
2358 }
2359
2360 void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
2361 assert_different_registers(markword, rscratch2);
2362 mov(rscratch2, markWord::inline_type_pattern_mask);
2363 andr(markword, markword, rscratch2);
2364 mov(rscratch2, markWord::inline_type_pattern);
2365 cmp(markword, rscratch2);
2366 br(Assembler::EQ, is_inline_type);
2367 }
2368
2369 void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null) {
2370 assert_different_registers(tmp, rscratch1);
2371 if (can_be_null) {
2372 cbz(object, not_inline_type);
2373 }
2374 const int is_inline_type_mask = markWord::inline_type_pattern;
2375 ldr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
2376 mov(rscratch1, is_inline_type_mask);
2377 andr(tmp, tmp, rscratch1);
2378 cmp(tmp, rscratch1);
2379 br(Assembler::NE, not_inline_type);
2380 }
2381
2382 void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
2383 assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
2384 tbnz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, is_null_free_inline_type);
2385 }
2386
2387 void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
2388 assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
2389 tbz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, not_null_free_inline_type);
2390 }
2391
2392 void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) {
2393 assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
2394 tbnz(flags, ResolvedFieldEntry::is_flat_shift, is_flat);
2395 }
2396
2397 void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
2398 Label test_mark_word;
2399 // load mark word
2400 ldr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
2401 // check displaced
2402 tst(temp_reg, markWord::unlocked_value);
2403 br(Assembler::NE, test_mark_word);
2404 // slow path use klass prototype
2405 load_prototype_header(temp_reg, oop);
2406
2407 bind(test_mark_word);
2408 andr(temp_reg, temp_reg, test_bit);
2409 if (jmp_set) {
2410 cbnz(temp_reg, jmp_label);
2411 } else {
2412 cbz(temp_reg, jmp_label);
2413 }
2414 }
2415
2416 void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array) {
2417 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array);
2418 }
2419
2420 void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg,
2421 Label&is_non_flat_array) {
2422 test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array);
2423 }
2424
2425 void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) {
2426 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
2427 }
2428
2429 void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
2430 test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
2431 }
2432
2433 void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) {
2434 tst(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
2435 br(Assembler::NE, is_flat_array);
2436 }
2437
2438 // MacroAssembler protected routines needed to implement
2439 // public methods
2440
2441 void MacroAssembler::mov(Register r, Address dest) {
2442 code_section()->relocate(pc(), dest.rspec());
2443 uint64_t imm64 = (uint64_t)dest.target();
2444 movptr(r, imm64);
2445 }
2446
2447 // Move a constant pointer into r. In AArch64 mode the virtual
2448 // address space is 48 bits in size, so we only need three
2449 // instructions to create a patchable instruction sequence that can
2450 // reach anywhere.
2451 void MacroAssembler::movptr(Register r, uintptr_t imm64) {
2452 #ifndef PRODUCT
2453 {
2454 char buffer[64];
2455 os::snprintf_checked(buffer, sizeof(buffer), "0x%" PRIX64, (uint64_t)imm64);
2456 block_comment(buffer);
2457 }
5152 adrp(rscratch1, src2, offset);
5153 ldr(rscratch1, Address(rscratch1, offset));
5154 cmp(src1, rscratch1);
5155 }
5156
5157 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
5158 cmp(obj1, obj2);
5159 }
5160
5161 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5162 load_method_holder(rresult, rmethod);
5163 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5164 }
5165
5166 void MacroAssembler::load_method_holder(Register holder, Register method) {
5167 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5168 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5169 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5170 }
5171
5172 void MacroAssembler::load_metadata(Register dst, Register src) {
5173 if (UseCompactObjectHeaders) {
5174 load_narrow_klass_compact(dst, src);
5175 } else {
5176 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5177 }
5178 }
5179
5180 // Loads the obj's Klass* into dst.
5181 // Preserves all registers (incl src, rscratch1 and rscratch2).
5182 // Input:
5183 // src - the oop we want to load the klass from.
5184 // dst - output narrow klass.
5185 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5186 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5187 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5188 lsr(dst, dst, markWord::klass_shift);
5189 }
5190
5191 void MacroAssembler::load_klass(Register dst, Register src) {
5192 if (UseCompactObjectHeaders) {
5193 load_narrow_klass_compact(dst, src);
5194 } else {
5195 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5196 }
5197 decode_klass_not_null(dst);
5198 }
5199
5251 } else {
5252 ldrw(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
5253 }
5254 if (CompressedKlassPointers::base() == nullptr) {
5255 cmp(klass, tmp, LSL, CompressedKlassPointers::shift());
5256 return;
5257 } else if (!AOTCodeCache::is_on_for_dump() &&
5258 ((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
5259 && CompressedKlassPointers::shift() == 0) {
5260 // Only the bottom 32 bits matter
5261 cmpw(klass, tmp);
5262 return;
5263 }
5264 decode_klass_not_null(tmp);
5265 cmp(klass, tmp);
5266 }
5267
5268 void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) {
5269 if (UseCompactObjectHeaders) {
5270 load_narrow_klass_compact(tmp1, obj1);
5271 load_narrow_klass_compact(tmp2, obj2);
5272 } else {
5273 ldrw(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
5274 ldrw(tmp2, Address(obj2, oopDesc::klass_offset_in_bytes()));
5275 }
5276 cmpw(tmp1, tmp2);
5277 }
5278
5279 void MacroAssembler::load_prototype_header(Register dst, Register src) {
5280 load_klass(dst, src);
5281 ldr(dst, Address(dst, Klass::prototype_header_offset()));
5282 }
5283
5284 void MacroAssembler::store_klass(Register dst, Register src) {
5285 // FIXME: Should this be a store release? concurrent gcs assumes
5286 // klass length is valid if klass field is not null.
5287 assert(!UseCompactObjectHeaders, "not with compact headers");
5288 encode_klass_not_null(src);
5289 strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
5290 }
5291
5292 void MacroAssembler::store_klass_gap(Register dst, Register src) {
5293 assert(!UseCompactObjectHeaders, "not with compact headers");
5294 // Store to klass gap in destination
5295 strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
5296 }
5297
5298 // Algorithm must match CompressedOops::encode.
5299 void MacroAssembler::encode_heap_oop(Register d, Register s) {
5300 #ifdef ASSERT
5301 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
5302 #endif
5303 verify_oop_msg(s, "broken oop in encode_heap_oop");
5653 if (as_raw) {
5654 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
5655 } else {
5656 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
5657 }
5658 }
5659
5660 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
5661 Address dst, Register val,
5662 Register tmp1, Register tmp2, Register tmp3) {
5663 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
5664 decorators = AccessInternal::decorator_fixup(decorators, type);
5665 bool as_raw = (decorators & AS_RAW) != 0;
5666 if (as_raw) {
5667 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5668 } else {
5669 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
5670 }
5671 }
5672
5673 void MacroAssembler::flat_field_copy(DecoratorSet decorators, Register src, Register dst,
5674 Register inline_layout_info) {
5675 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5676 bs->flat_field_copy(this, decorators, src, dst, inline_layout_info);
5677 }
5678
5679 void MacroAssembler::payload_offset(Register inline_klass, Register offset) {
5680 ldr(offset, Address(inline_klass, InlineKlass::adr_members_offset()));
5681 ldrw(offset, Address(offset, InlineKlass::payload_offset_offset()));
5682 }
5683
5684 void MacroAssembler::payload_address(Register oop, Register data, Register inline_klass) {
5685 // ((address) (void*) o) + vk->payload_offset();
5686 Register offset = (data == oop) ? rscratch1 : data;
5687 payload_offset(inline_klass, offset);
5688 if (data == oop) {
5689 add(data, data, offset);
5690 } else {
5691 lea(data, Address(oop, offset));
5692 }
5693 }
5694
5695 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
5696 Register tmp2, DecoratorSet decorators) {
5697 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
5698 }
5699
5700 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
5701 Register tmp2, DecoratorSet decorators) {
5702 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, tmp2);
5703 }
5704
5705 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
5706 Register tmp2, Register tmp3, DecoratorSet decorators) {
5707 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
5708 }
5709
5710 // Used for storing nulls.
5711 void MacroAssembler::store_heap_oop_null(Address dst) {
5712 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
5713 }
5714
5789 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
5790 cmp(rscratch2, rscratch1);
5791 br(Assembler::HS, next);
5792 STOP("assert(top >= start)");
5793 should_not_reach_here();
5794
5795 bind(next);
5796 ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
5797 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
5798 cmp(rscratch2, rscratch1);
5799 br(Assembler::HS, ok);
5800 STOP("assert(top <= end)");
5801 should_not_reach_here();
5802
5803 bind(ok);
5804 ldp(rscratch2, rscratch1, Address(post(sp, 16)));
5805 }
5806 #endif
5807 }
5808
5809 void MacroAssembler::inline_layout_info(Register holder_klass, Register index, Register layout_info) {
5810 assert_different_registers(holder_klass, index, layout_info);
5811 InlineLayoutInfo array[2];
5812 int size = (char*)&array[1] - (char*)&array[0]; // computing size of array elements
5813 if (is_power_of_2(size)) {
5814 lsl(index, index, log2i_exact(size)); // Scale index by power of 2
5815 } else {
5816 mov(layout_info, size);
5817 mul(index, index, layout_info); // Scale the index to be the entry index * array_element_size
5818 }
5819 ldr(layout_info, Address(holder_klass, InstanceKlass::inline_layout_info_array_offset()));
5820 add(layout_info, layout_info, Array<InlineLayoutInfo>::base_offset_in_bytes());
5821 lea(layout_info, Address(layout_info, index));
5822 }
5823
5824 // Writes to stack successive pages until offset reached to check for
5825 // stack overflow + shadow pages. This clobbers tmp.
5826 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
5827 assert_different_registers(tmp, size, rscratch1);
5828 mov(tmp, sp);
5829 // Bang stack for total size given plus shadow page size.
5830 // Bang one page at a time because large size can bang beyond yellow and
5831 // red zones.
5832 Label loop;
5833 mov(rscratch1, (int)os::vm_page_size());
5834 bind(loop);
5835 lea(tmp, Address(tmp, -(int)os::vm_page_size()));
5836 subsw(size, size, rscratch1);
5837 str(size, Address(tmp));
5838 br(Assembler::GT, loop);
5839
5840 // Bang down shadow pages too.
5841 // At this point, (tmp-0) is the last address touched, so don't
5842 // touch it again. (It was touched as (tmp-pagesize) but then tmp
5843 // was post-decremented.) Skip this address by starting at i=1, and
5910
5911 // Strictly speaking the card table base isn't an address at all, and it might
5912 // even be negative. It is thus materialised as a constant.
5913 mov(reg, (uint64_t)ctbs->card_table_base_const());
5914 }
5915
5916 void MacroAssembler::load_aotrc_address(Register reg, address a) {
5917 #if INCLUDE_CDS
5918 assert(AOTRuntimeConstants::contains(a), "address out of range for data area");
5919 if (AOTCodeCache::is_on_for_dump()) {
5920 // all aotrc field addresses should be registered in the AOTCodeCache address table
5921 lea(reg, ExternalAddress(a));
5922 } else {
5923 mov(reg, (uint64_t)a);
5924 }
5925 #else
5926 ShouldNotReachHere();
5927 #endif
5928 }
5929
5930 #ifdef ASSERT
5931 void MacroAssembler::build_frame(int framesize) {
5932 build_frame(framesize, false);
5933 }
5934 #endif
5935
5936 void MacroAssembler::build_frame(int framesize DEBUG_ONLY(COMMA bool zap_rfp_lr_spills)) {
5937 assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
5938 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
5939 protect_return_address();
5940 if (framesize < ((1 << 9) + 2 * wordSize)) {
5941 sub(sp, sp, framesize);
5942 if (DEBUG_ONLY(zap_rfp_lr_spills ||) false) {
5943 mov_immediate64(rscratch1, ((uint64_t)badRegWordVal) << 32 | (uint64_t)badRegWordVal);
5944 stp(rscratch1, rscratch1, Address(sp, framesize - 2 * wordSize));
5945 } else {
5946 stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
5947 }
5948 if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
5949 } else {
5950 if (DEBUG_ONLY(zap_rfp_lr_spills ||) false) {
5951 mov_immediate64(rscratch1, ((uint64_t)badRegWordVal) << 32 | (uint64_t)badRegWordVal);
5952 stp(rscratch1, rscratch1, Address(pre(sp, -2 * wordSize)));
5953 } else {
5954 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
5955 }
5956 if (PreserveFramePointer) mov(rfp, sp);
5957 if (framesize < ((1 << 12) + 2 * wordSize))
5958 sub(sp, sp, framesize - 2 * wordSize);
5959 else {
5960 mov(rscratch1, framesize - 2 * wordSize);
5961 sub(sp, sp, rscratch1);
5962 }
5963 }
5964 verify_cross_modify_fence_not_required();
5965 }
5966
5967 void MacroAssembler::remove_frame(int framesize) {
5968 assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
5969 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
5970 if (framesize < ((1 << 9) + 2 * wordSize)) {
5971 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
5972 add(sp, sp, framesize);
5973 } else {
5974 if (framesize < ((1 << 12) + 2 * wordSize))
5975 add(sp, sp, framesize - 2 * wordSize);
5976 else {
5977 mov(rscratch1, framesize - 2 * wordSize);
5978 add(sp, sp, rscratch1);
5979 }
5980 ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
5981 }
5982 authenticate_return_address();
5983 }
5984
5985 void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
5986 if (needs_stack_repair) {
5987 // The method has a scalarized entry point (where fields of value object arguments
5988 // are passed through registers and stack), and a non-scalarized entry point (where
5989 // value object arguments are given as oops). The non-scalarized entry point will
5990 // first load each field of value object arguments and store them in registers and on
5991 // the stack in a way compatible with the scalarized entry point. To do so, some extra
5992 // stack space might be reserved (if argument registers are not enough). On leaving the
5993 // method, this space must be freed.
5994 //
5995 // In case we used the non-scalarized entry point the stack looks like this:
5996 //
5997 // | Arguments from caller |
5998 // |---------------------------| <-- caller's SP
5999 // | Saved LR #1 |
6000 // | Saved FP #1 |
6001 // |---------------------------|
6002 // | Extension space for |
6003 // | inline arg (un)packing |
6004 // |---------------------------| <-- start of this method's frame
6005 // | Saved LR #2 |
6006 // | Saved FP #2 |
6007 // |---------------------------| <-- FP (with -XX:+PreserveFramePointer)
6008 // | sp_inc |
6009 // | method locals |
6010 // |---------------------------| <-- SP
6011 //
6012 // There are two copies of FP and LR on the stack. They will be identical at
6013 // first, but that can change.
6014 // If the caller has been deoptimized, LR #1 will be patched to point at the
6015 // deopt blob, and LR #2 will still point into the old method.
6016 // If the saved FP (x29) was not used as the frame pointer, but to store an
6017 // oop, the GC will be aware only of FP #1 as the spilled location of x29 and
6018 // will fix only this one. Overall, FP/LR #2 are not reliable and are simply
6019 // needed to add space between the extension space and the locals, as there
6020 // would be between the real arguments and the locals if we don't need to
6021 // do unpacking (from the scalarized entry point).
6022 //
6023 // When restoring, one must then load FP #1 into x29, and LR #1 into x30,
6024 // while keeping in mind that from the scalarized entry point, there will be
6025 // only one copy of each. Indeed, in the case we used the scalarized calling
6026 // convention, the stack looks like this:
6027 //
6028 // | Arguments from caller |
6029 // |---------------------------| <-- caller's SP / start of this method's frame
6030 // | Saved LR |
6031 // | Saved FP |
6032 // |---------------------------| <-- FP (with -XX:+PreserveFramePointer)
6033 // | sp_inc |
6034 // | method locals |
6035 // |---------------------------| <-- SP
6036 //
6037 // The sp_inc stack slot holds the total size of the frame including the
6038 // extension space minus two words for the saved FP and LR. That is how to
6039 // find FP/LR #1. This size is expressed in bytes. Be careful when using it
6040 // from C++ in pointer arithmetic; you might need to divide it by wordSize.
6041 //
6042 // One can find sp_inc since the start the method's frame is SP + initial_framesize.
6043
6044 int sp_inc_offset = initial_framesize - 3 * wordSize; // Immediately below saved LR and FP
6045
6046 ldr(rscratch1, Address(sp, sp_inc_offset));
6047 add(sp, sp, rscratch1);
6048 ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
6049 } else {
6050 remove_frame(initial_framesize);
6051 }
6052 }
6053
6054 void MacroAssembler::save_stack_increment(int sp_inc, int frame_size) {
6055 int real_frame_size = frame_size + sp_inc;
6056 assert(sp_inc == 0 || sp_inc > 2*wordSize, "invalid sp_inc value");
6057 assert(real_frame_size >= 2*wordSize, "frame size must include FP/LR space");
6058 assert((real_frame_size & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
6059
6060 int sp_inc_offset = frame_size - 3 * wordSize; // Immediately below saved LR and FP
6061
6062 // Subtract two words for the saved FP and LR as these will be popped
6063 // separately. See remove_frame above.
6064 mov(rscratch1, real_frame_size - 2*wordSize);
6065 str(rscratch1, Address(sp, sp_inc_offset));
6066 }
6067
6068 // This method counts leading positive bytes (highest bit not set) in provided byte array
6069 address MacroAssembler::count_positives(Register ary1, Register len, Register result) {
6070 // Simple and most common case of aligned small array which is not at the
6071 // end of memory page is placed here. All other cases are in stub.
6072 Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
6073 const uint64_t UPPER_BIT_MASK=0x8080808080808080;
6074 assert_different_registers(ary1, len, result);
6075
6076 mov(result, len);
6077 cmpw(len, 0);
6078 br(LE, DONE);
6079 cmpw(len, 4 * wordSize);
6080 br(GE, STUB_LONG); // size > 32 then go to stub
6081
6082 int shift = 64 - exact_log2(os::vm_page_size());
6083 lsl(rscratch1, ary1, shift);
6084 mov(rscratch2, (size_t)(4 * wordSize) << shift);
6085 adds(rscratch2, rscratch1, rscratch2); // At end of page?
6086 br(CS, STUB); // at the end of page then go to stub
6970 // On other systems, the helper is a usual C function.
6971 //
6972 void MacroAssembler::get_thread(Register dst) {
6973 RegSet saved_regs =
6974 BSD_ONLY(RegSet::range(r0, r17) + lr - dst)
6975 NOT_BSD (RegSet::range(r0, r1) + lr - dst);
6976
6977 protect_return_address();
6978 push(saved_regs, sp);
6979
6980 mov(lr, ExternalAddress(CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper)));
6981 blr(lr);
6982 if (dst != c_rarg0) {
6983 mov(dst, c_rarg0);
6984 }
6985
6986 pop(saved_regs, sp);
6987 authenticate_return_address();
6988 }
6989
6990 #ifdef COMPILER2
6991 // C2 compiled method's prolog code
6992 // Moved here from aarch64.ad to support Valhalla code below
6993 void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
6994 if (C->clinit_barrier_on_entry()) {
6995 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
6996
6997 Label L_skip_barrier;
6998
6999 mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
7000 clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
7001 far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
7002 bind(L_skip_barrier);
7003 }
7004
7005 if (C->max_vector_size() > 0) {
7006 reinitialize_ptrue();
7007 }
7008
7009 int bangsize = C->output()->bang_size_in_bytes();
7010 if (C->output()->need_stack_bang(bangsize))
7011 generate_stack_overflow_check(bangsize);
7012
7013 // n.b. frame size includes space for return pc and rfp
7014 const long framesize = C->output()->frame_size_in_bytes();
7015 build_frame(framesize DEBUG_ONLY(COMMA sp_inc != 0));
7016
7017 if (C->needs_stack_repair()) {
7018 save_stack_increment(sp_inc, framesize);
7019 }
7020
7021 if (VerifyStackAtCalls) {
7022 Unimplemented();
7023 }
7024 }
7025 #endif // COMPILER2
7026
7027 int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
7028 assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
7029 // An inline type might be returned. If fields are in registers we
7030 // need to allocate an inline type instance and initialize it with
7031 // the value of the fields.
7032 Label skip;
7033 // We only need a new buffered inline type if a new one is not returned
7034 tbz(r0, 0, skip);
7035 int call_offset = -1;
7036
7037 // Be careful not to clobber r1-7 which hold returned fields
7038 // Also do not use callee-saved registers as these may be live in the interpreter
7039 Register tmp1 = r13, tmp2 = r14, klass = r15, r0_preserved = r12;
7040
7041 // The following code is similar to the instance allocation code in TemplateTable::_new
7042 // but has some slight differences,
7043 // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
7044 // allocating is not necessary if vk != nullptr, etc.
7045 Label slow_case;
7046 // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
7047 mov(r0_preserved, r0); // save r0 for slow_case since *_allocate may corrupt it when allocation failed
7048
7049 if (vk != nullptr) {
7050 // Called from C1, where the return type is statically known.
7051 movptr(klass, (intptr_t)vk->get_InlineKlass());
7052 jint lh = vk->layout_helper();
7053 assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
7054 if (UseTLAB && !Klass::layout_helper_needs_slow_path(lh)) {
7055 tlab_allocate(r0, noreg, lh, tmp1, tmp2, slow_case);
7056 } else {
7057 b(slow_case);
7058 }
7059 } else {
7060 // Call from interpreter. R0 contains ((the InlineKlass* of the return type) | 0x01)
7061 andr(klass, r0, -2);
7062 if (UseTLAB) {
7063 ldrw(tmp2, Address(klass, Klass::layout_helper_offset()));
7064 tst(tmp2, Klass::_lh_instance_slow_path_bit);
7065 br(Assembler::NE, slow_case);
7066 tlab_allocate(r0, tmp2, 0, tmp1, tmp2, slow_case);
7067 } else {
7068 b(slow_case);
7069 }
7070 }
7071 if (UseTLAB) {
7072 // 2. Initialize buffered inline instance header
7073 Register buffer_obj = r0;
7074 if (UseCompactObjectHeaders) {
7075 ldr(rscratch1, Address(klass, Klass::prototype_header_offset()));
7076 str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes()));
7077 } else {
7078 mov(rscratch1, (intptr_t)markWord::inline_type_prototype().value());
7079 str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes()));
7080 store_klass_gap(buffer_obj, zr);
7081 if (vk == nullptr) {
7082 // store_klass corrupts klass, so save it for later use (interpreter case only).
7083 mov(tmp1, klass);
7084 }
7085 store_klass(buffer_obj, klass);
7086 klass = tmp1;
7087 }
7088 // 3. Initialize its fields with an inline class specific handler
7089 if (vk != nullptr) {
7090 far_call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
7091 } else {
7092 ldr(tmp1, Address(klass, InlineKlass::adr_members_offset()));
7093 ldr(tmp1, Address(tmp1, InlineKlass::pack_handler_offset()));
7094 blr(tmp1);
7095 }
7096
7097 membar(Assembler::StoreStore);
7098 b(skip);
7099 } else {
7100 // Must have already branched to slow_case above.
7101 DEBUG_ONLY(should_not_reach_here());
7102 }
7103 bind(slow_case);
7104 // We failed to allocate a new inline type, fall back to a runtime
7105 // call. Some oop field may be live in some registers but we can't
7106 // tell. That runtime call will take care of preserving them
7107 // across a GC if there's one.
7108 mov(r0, r0_preserved);
7109
7110 if (from_interpreter) {
7111 super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
7112 } else {
7113 far_call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
7114 call_offset = offset();
7115 }
7116 membar(Assembler::StoreStore);
7117
7118 bind(skip);
7119 return call_offset;
7120 }
7121
7122 // Move a value between registers/stack slots and update the reg_state
7123 bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
7124 assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
7125 if (reg_state[to->value()] == reg_written) {
7126 return true; // Already written
7127 }
7128
7129 if (from != to && bt != T_VOID) {
7130 if (reg_state[to->value()] == reg_readonly) {
7131 return false; // Not yet writable
7132 }
7133 if (from->is_reg()) {
7134 if (to->is_reg()) {
7135 if (from->is_Register() && to->is_Register()) {
7136 mov(to->as_Register(), from->as_Register());
7137 } else if (from->is_FloatRegister() && to->is_FloatRegister()) {
7138 fmovd(to->as_FloatRegister(), from->as_FloatRegister());
7139 } else {
7140 ShouldNotReachHere();
7141 }
7142 } else {
7143 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
7144 Address to_addr = Address(sp, st_off);
7145 if (from->is_FloatRegister()) {
7146 if (bt == T_DOUBLE) {
7147 strd(from->as_FloatRegister(), to_addr);
7148 } else {
7149 assert(bt == T_FLOAT, "must be float");
7150 strs(from->as_FloatRegister(), to_addr);
7151 }
7152 } else {
7153 str(from->as_Register(), to_addr);
7154 }
7155 }
7156 } else {
7157 Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size);
7158 if (to->is_reg()) {
7159 if (to->is_FloatRegister()) {
7160 if (bt == T_DOUBLE) {
7161 ldrd(to->as_FloatRegister(), from_addr);
7162 } else {
7163 assert(bt == T_FLOAT, "must be float");
7164 ldrs(to->as_FloatRegister(), from_addr);
7165 }
7166 } else {
7167 ldr(to->as_Register(), from_addr);
7168 }
7169 } else {
7170 int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
7171 ldr(rscratch1, from_addr);
7172 str(rscratch1, Address(sp, st_off));
7173 }
7174 }
7175 }
7176
7177 // Update register states
7178 reg_state[from->value()] = reg_writable;
7179 reg_state[to->value()] = reg_written;
7180 return true;
7181 }
7182
7183 // Calculate the extra stack space required for packing or unpacking inline
7184 // args and adjust the stack pointer
7185 int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
7186 int sp_inc = args_on_stack * VMRegImpl::stack_slot_size;
7187 sp_inc = align_up(sp_inc, StackAlignmentInBytes);
7188 assert(sp_inc > 0, "sanity");
7189
7190 // Save a copy of the FP and LR here for deoptimization patching and frame walking
7191 stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
7192
7193 // Adjust the stack pointer. This will be repaired on return by MacroAssembler::remove_frame
7194 if (sp_inc < (1 << 9)) {
7195 sub(sp, sp, sp_inc); // Fits in an immediate
7196 } else {
7197 mov(rscratch1, sp_inc);
7198 sub(sp, sp, rscratch1);
7199 }
7200
7201 return sp_inc + 2 * wordSize; // Account for the FP/LR space
7202 }
7203
7204 // Read all fields from an inline type oop and store the values in registers/stack slots
7205 bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
7206 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
7207 RegState reg_state[]) {
7208 assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
7209 assert(from->is_valid(), "source must be valid");
7210 bool progress = false;
7211 #ifdef ASSERT
7212 const int start_offset = offset();
7213 #endif
7214
7215 Label L_null, L_notNull;
7216 // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
7217 Register tmp1 = r10;
7218 Register tmp2 = r11;
7219
7220 #ifdef ASSERT
7221 RegSet clobbered_gp_regs = MacroAssembler::call_clobbered_gp_registers();
7222 assert(clobbered_gp_regs.contains(tmp1), "tmp1 must be saved explicitly if it's not a clobber");
7223 assert(clobbered_gp_regs.contains(tmp2), "tmp2 must be saved explicitly if it's not a clobber");
7224 assert(clobbered_gp_regs.contains(r14), "r14 must be saved explicitly if it's not a clobber");
7225 #endif
7226
7227 Register fromReg = noreg;
7228 ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, true);
7229 bool done = true;
7230 bool mark_done = true;
7231 VMReg toReg;
7232 BasicType bt;
7233 // Check if argument requires a null check
7234 bool null_check = false;
7235 VMReg nullCheckReg;
7236 while (stream.next(nullCheckReg, bt)) {
7237 if (sig->at(stream.sig_index())._offset == -1) {
7238 null_check = true;
7239 break;
7240 }
7241 }
7242 stream.reset(sig_index, to_index);
7243 while (stream.next(toReg, bt)) {
7244 assert(toReg->is_valid(), "destination must be valid");
7245 int idx = (int)toReg->value();
7246 if (reg_state[idx] == reg_readonly) {
7247 if (idx != from->value()) {
7248 mark_done = false;
7249 }
7250 done = false;
7251 continue;
7252 } else if (reg_state[idx] == reg_written) {
7253 continue;
7254 }
7255 assert(reg_state[idx] == reg_writable, "must be writable");
7256 reg_state[idx] = reg_written;
7257 progress = true;
7258
7259 if (fromReg == noreg) {
7260 if (from->is_reg()) {
7261 fromReg = from->as_Register();
7262 } else {
7263 int st_off = from->reg2stack() * VMRegImpl::stack_slot_size;
7264 ldr(tmp1, Address(sp, st_off));
7265 fromReg = tmp1;
7266 }
7267 if (null_check) {
7268 // Nullable inline type argument, emit null check
7269 cbz(fromReg, L_null);
7270 }
7271 }
7272 int off = sig->at(stream.sig_index())._offset;
7273 if (off == -1) {
7274 assert(null_check, "Missing null check at");
7275 if (toReg->is_stack()) {
7276 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
7277 mov(tmp2, 1);
7278 str(tmp2, Address(sp, st_off));
7279 } else {
7280 mov(toReg->as_Register(), 1);
7281 }
7282 continue;
7283 }
7284 if (sig->at(stream.sig_index())._vt_oop) {
7285 if (toReg->is_stack()) {
7286 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
7287 str(fromReg, Address(sp, st_off));
7288 } else {
7289 mov(toReg->as_Register(), fromReg);
7290 }
7291 continue;
7292 }
7293 assert(off > 0, "offset in object should be positive");
7294 Address fromAddr = Address(fromReg, off);
7295 if (!toReg->is_FloatRegister()) {
7296 Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
7297 if (is_reference_type(bt)) {
7298 load_heap_oop(dst, fromAddr, rscratch1, rscratch2);
7299 } else {
7300 bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
7301 load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
7302 }
7303 if (toReg->is_stack()) {
7304 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
7305 str(dst, Address(sp, st_off));
7306 }
7307 } else if (bt == T_DOUBLE) {
7308 ldrd(toReg->as_FloatRegister(), fromAddr);
7309 } else {
7310 assert(bt == T_FLOAT, "must be float");
7311 ldrs(toReg->as_FloatRegister(), fromAddr);
7312 }
7313 }
7314 if (progress && null_check) {
7315 if (done) {
7316 b(L_notNull);
7317 bind(L_null);
7318 // Set null marker to zero to signal that the argument is null.
7319 // Also set all fields to zero since the runtime requires a canonical
7320 // representation of a flat null.
7321 stream.reset(sig_index, to_index);
7322 while (stream.next(toReg, bt)) {
7323 if (toReg->is_stack()) {
7324 int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
7325 str(zr, Address(sp, st_off));
7326 } else if (toReg->is_FloatRegister()) {
7327 mov(toReg->as_FloatRegister(), T2S, 0);
7328 } else {
7329 mov(toReg->as_Register(), zr);
7330 }
7331 }
7332 bind(L_notNull);
7333 } else {
7334 bind(L_null);
7335 }
7336 }
7337
7338 sig_index = stream.sig_index();
7339 to_index = stream.regs_index();
7340
7341 if (mark_done && reg_state[from->value()] != reg_written) {
7342 // This is okay because no one else will write to that slot
7343 reg_state[from->value()] = reg_writable;
7344 }
7345 from_index--;
7346 assert(progress || (start_offset == offset()), "should not emit code");
7347 return done;
7348 }
7349
7350 // Pack fields back into an inline type oop
7351 bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
7352 VMRegPair* from, int from_count, int& from_index, VMReg to,
7353 RegState reg_state[], Register val_array) {
7354 assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter");
7355 assert(to->is_valid(), "destination must be valid");
7356
7357 if (reg_state[to->value()] == reg_written) {
7358 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
7359 return true; // Already written
7360 }
7361
7362 // The GC barrier expanded by store_heap_oop below may call into the
7363 // runtime so use callee-saved registers for any values that need to be
7364 // preserved. The GC barrier assembler should take care of saving the
7365 // Java argument registers.
7366 // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
7367 Register val_obj_tmp = r21;
7368 Register from_reg_tmp = r22;
7369 Register tmp1 = r14;
7370 Register tmp2 = r13;
7371 Register tmp3 = r12;
7372 Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
7373
7374 assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
7375
7376 if (reg_state[to->value()] == reg_readonly) {
7377 if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
7378 skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
7379 return false; // Not yet writable
7380 }
7381 val_obj = val_obj_tmp;
7382 }
7383
7384 ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
7385 VMReg fromReg;
7386 BasicType bt;
7387 Label L_null;
7388 while (stream.next(fromReg, bt)) {
7389 assert(fromReg->is_valid(), "source must be valid");
7390 reg_state[fromReg->value()] = reg_writable;
7391
7392 int off = sig->at(stream.sig_index())._offset;
7393 if (off == -1) {
7394 // Nullable inline type argument, emit null check
7395 Label L_notNull;
7396 if (fromReg->is_stack()) {
7397 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
7398 ldrb(tmp2, Address(sp, ld_off));
7399 cbnz(tmp2, L_notNull);
7400 } else {
7401 cbnz(fromReg->as_Register(), L_notNull);
7402 }
7403 mov(val_obj, 0);
7404 b(L_null);
7405 bind(L_notNull);
7406 continue;
7407 }
7408 if (sig->at(stream.sig_index())._vt_oop) {
7409 if (fromReg->is_stack()) {
7410 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
7411 ldr(val_obj, Address(sp, ld_off));
7412 } else {
7413 mov(val_obj, fromReg->as_Register());
7414 }
7415 cbnz(val_obj, L_null);
7416 // get the buffer from the just allocated pool of buffers
7417 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT);
7418 load_heap_oop(val_obj, Address(val_array, index), rscratch1, rscratch2);
7419 continue;
7420 }
7421
7422 assert(off > 0, "offset in object should be positive");
7423 size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
7424
7425 // Pack the scalarized field into the value object.
7426 Address dst(val_obj, off);
7427 if (!fromReg->is_FloatRegister()) {
7428 Register src;
7429 if (fromReg->is_stack()) {
7430 src = from_reg_tmp;
7431 int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
7432 load_sized_value(src, Address(sp, ld_off), size_in_bytes, /* is_signed */ false);
7433 } else {
7434 src = fromReg->as_Register();
7435 }
7436 assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
7437 if (is_reference_type(bt)) {
7438 // store_heap_oop transitively calls oop_store_at which corrupts to.base(). We need to keep val_obj valid.
7439 mov(tmp3, val_obj);
7440 Address dst_with_tmp3(tmp3, off);
7441 store_heap_oop(dst_with_tmp3, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
7442 } else {
7443 store_sized_value(dst, src, size_in_bytes);
7444 }
7445 } else if (bt == T_DOUBLE) {
7446 strd(fromReg->as_FloatRegister(), dst);
7447 } else {
7448 assert(bt == T_FLOAT, "must be float");
7449 strs(fromReg->as_FloatRegister(), dst);
7450 }
7451 }
7452 bind(L_null);
7453 sig_index = stream.sig_index();
7454 from_index = stream.regs_index();
7455
7456 assert(reg_state[to->value()] == reg_writable, "must have already been read");
7457 bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
7458 assert(success, "to register must be writeable");
7459 return true;
7460 }
7461
7462 VMReg MacroAssembler::spill_reg_for(VMReg reg) {
7463 return (reg->is_FloatRegister()) ? v8->as_VMReg() : r14->as_VMReg();
7464 }
7465
7466 void MacroAssembler::cache_wb(Address line) {
7467 assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset");
7468 assert(line.index() == noreg, "index should be noreg");
7469 assert(line.offset() == 0, "offset should be 0");
7470 // would like to assert this
7471 // assert(line._ext.shift == 0, "shift should be zero");
7472 if (VM_Version::supports_dcpop()) {
7473 // writeback using clear virtual address to point of persistence
7474 dc(Assembler::CVAP, line.base());
7475 } else {
7476 // no need to generate anything as Unsafe.writebackMemory should
7477 // never invoke this stub
7478 }
7479 }
7480
7481 void MacroAssembler::cache_wbsync(bool is_pre) {
7482 // we only need a barrier post sync
7483 if (!is_pre) {
7484 membar(Assembler::AnyAny);
7485 }
7881 }
7882
7883 // Check if the lock-stack is full.
7884 ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
7885 cmpw(top, (unsigned)LockStack::end_offset());
7886 br(Assembler::GE, slow);
7887
7888 // Check for recursion.
7889 subw(t, top, oopSize);
7890 ldr(t, Address(rthread, t));
7891 cmp(obj, t);
7892 br(Assembler::EQ, push);
7893
7894 // Check header for monitor (0b10).
7895 tst(mark, markWord::monitor_value);
7896 br(Assembler::NE, slow);
7897
7898 // Try to lock. Transition lock bits 0b01 => 0b00
7899 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
7900 orr(mark, mark, markWord::unlocked_value);
7901 // Mask inline_type bit such that we go to the slow path if object is an inline type
7902 andr(mark, mark, ~((int) markWord::inline_type_bit_in_place));
7903
7904 eor(t, mark, markWord::unlocked_value);
7905 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
7906 /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg);
7907 br(Assembler::NE, slow);
7908
7909 bind(push);
7910 // After successful lock, push object on lock-stack.
7911 str(obj, Address(rthread, top));
7912 addw(top, top, oopSize);
7913 strw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
7914 }
7915
7916 // Implements fast-unlocking.
7917 //
7918 // - obj: the object to be unlocked
7919 // - t1, t2, t3: temporary registers
7920 // - slow: branched to if unlocking fails, absolute offset may larger than 32KB (imm14 encoding).
7921 void MacroAssembler::fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
7922 // cmpxchg clobbers rscratch1.
7923 assert_different_registers(obj, t1, t2, t3, rscratch1);
|