< prev index next >

src/cpu/aarch64/vm/macroAssembler_aarch64.cpp

Print this page




  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  23  * or visit www.oracle.com if you need additional information or have any
  24  * questions.
  25  *
  26  */
  27 
  28 #include <sys/types.h>
  29 
  30 #include "precompiled.hpp"
  31 #include "asm/assembler.hpp"
  32 #include "asm/assembler.inline.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 
  35 #include "compiler/disassembler.hpp"





  36 #include "memory/resourceArea.hpp"
  37 #include "runtime/biasedLocking.hpp"
  38 #include "runtime/interfaceSupport.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 
  41 // #include "gc_interface/collectedHeap.inline.hpp"
  42 // #include "interpreter/interpreter.hpp"
  43 // #include "memory/cardTableModRefBS.hpp"
  44 // #include "prims/methodHandles.hpp"
  45 // #include "runtime/biasedLocking.hpp"
  46 // #include "runtime/interfaceSupport.hpp"
  47 // #include "runtime/objectMonitor.hpp"
  48 // #include "runtime/os.hpp"
  49 // #include "runtime/sharedRuntime.hpp"
  50 // #include "runtime/stubRoutines.hpp"
  51 
  52 #if INCLUDE_ALL_GCS
  53 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  54 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  55 #include "gc_implementation/g1/heapRegion.hpp"


1630     // constant
1631     u_int32_t imm_h[2];
1632     imm_h[0] = imm32 & 0xffff;
1633     imm_h[1] = ((imm32 >> 16) & 0xffff);
1634     if (imm_h[0] == 0) {
1635       movzw(dst, imm_h[1], 16);
1636     } else if (imm_h[0] == 0xffff) {
1637       movnw(dst, imm_h[1] ^ 0xffff, 16);
1638     } else if (imm_h[1] == 0) {
1639       movzw(dst, imm_h[0], 0);
1640     } else if (imm_h[1] == 0xffff) {
1641       movnw(dst, imm_h[0] ^ 0xffff, 0);
1642     } else {
1643       // use a MOVZ and MOVK (makes it easier to debug)
1644       movzw(dst, imm_h[0], 0);
1645       movkw(dst, imm_h[1], 16);
1646     }
1647   }
1648 }
1649 






1650 // Form an address from base + offset in Rd.  Rd may or may
1651 // not actually be used: you must use the Address that is returned.
1652 // It is up to you to ensure that the shift provided matches the size
1653 // of your data.
1654 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
1655   if (Address::offset_ok_for_immed(byte_offset, shift))
1656     // It fits; no need for any heroics
1657     return Address(base, byte_offset);
1658 
1659   // Don't do anything clever with negative or misaligned offsets
1660   unsigned mask = (1 << shift) - 1;
1661   if (byte_offset < 0 || byte_offset & mask) {
1662     mov(Rd, byte_offset);
1663     add(Rd, base, Rd);
1664     return Address(Rd);
1665   }
1666 
1667   // See if we can do this with two 12-bit offsets
1668   {
1669     unsigned long word_offset = byte_offset >> shift;


2189     cmp(tmp, expected);
2190   } else {
2191     BLOCK_COMMENT("cmpxchg {");
2192     Label retry_load, done;
2193     if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
2194       prfm(Address(addr), PSTL1STRM);
2195     bind(retry_load);
2196     load_exclusive(tmp, addr, size, acquire);
2197     if (size == xword)
2198       cmp(tmp, expected);
2199     else
2200       cmpw(tmp, expected);
2201     br(Assembler::NE, done);
2202     store_exclusive(tmp, new_val, addr, size, release);
2203     cbnzw(tmp, retry_load);
2204     bind(done);
2205     BLOCK_COMMENT("} cmpxchg");
2206   }
2207 }
2208 























































2209 static bool different(Register a, RegisterOrConstant b, Register c) {
2210   if (b.is_constant())
2211     return a != c;
2212   else
2213     return a != b.as_register() && a != c && b.as_register() != c;
2214 }
2215 
2216 #define ATOMIC_OP(LDXR, OP, IOP, AOP, STXR, sz)                         \
2217 void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
2218   if (UseLSE) {                                                         \
2219     prev = prev->is_valid() ? prev : zr;                                \
2220     if (incr.is_register()) {                                           \
2221       AOP(sz, incr.as_register(), prev, addr);                          \
2222     } else {                                                            \
2223       mov(rscratch2, incr.as_constant());                               \
2224       AOP(sz, rscratch2, prev, addr);                                   \
2225     }                                                                   \
2226     return;                                                             \
2227   }                                                                     \
2228   Register result = rscratch2;                                          \


2386   // printf("start = %lx\n", start);
2387   int byteCount =  aarch64_stub_prolog_size();
2388   // printf("byteCount = %x\n", byteCount);
2389   int instructionCount = (byteCount + 3)/ 4;
2390   // printf("instructionCount = %x\n", instructionCount);
2391   for (int i = 0; i < instructionCount; i++) {
2392     nop();
2393   }
2394 
2395   memcpy(start, (void*)aarch64_stub_prolog, byteCount);
2396 
2397   // write the address of the setup routine and the call format at the
2398   // end of into the copied code
2399   u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
2400   if (prolog_ptr)
2401     patch_end[-2] = (u_int64_t)prolog_ptr;
2402   patch_end[-1] = calltype;
2403 }
2404 #endif
2405 
2406 void MacroAssembler::push_call_clobbered_registers() {
2407   push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2408 
2409   // Push v0-v7, v16-v31.
2410   for (int i = 30; i >= 0; i -= 2) {
2411     if (i <= v7->encoding() || i >= v16->encoding()) {
2412         stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2413              Address(pre(sp, -2 * wordSize)));
2414     }
2415   }
2416 }
2417 
2418 void MacroAssembler::pop_call_clobbered_registers() {
2419 
2420   for (int i = 0; i < 32; i += 2) {
2421     if (i <= v7->encoding() || i >= v16->encoding()) {
2422       ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
2423            Address(post(sp, 2 * wordSize)));
2424     }
2425   }











2426 
2427   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2428 }
2429 
2430 void MacroAssembler::push_CPU_state(bool save_vectors) {
2431   push(0x3fffffff, sp);         // integer registers except lr & sp
2432 
2433   if (!save_vectors) {
2434     for (int i = 30; i >= 0; i -= 2)
2435       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2436            Address(pre(sp, -2 * wordSize)));
2437   } else {
2438     for (int i = 30; i >= 0; i -= 2)
2439       stpq(as_FloatRegister(i), as_FloatRegister(i+1),
2440            Address(pre(sp, -4 * wordSize)));
2441   }
2442 }
2443 
2444 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2445   if (!restore_vectors) {


3136   }
3137   ldr(rscratch1, adr);
3138   add(rscratch1, rscratch1, src);
3139   str(rscratch1, adr);
3140 }
3141 
3142 void MacroAssembler::cmpptr(Register src1, Address src2) {
3143   unsigned long offset;
3144   adrp(rscratch1, src2, offset);
3145   ldr(rscratch1, Address(rscratch1, offset));
3146   cmp(src1, rscratch1);
3147 }
3148 
3149 void MacroAssembler::store_check(Register obj) {
3150   // Does a store check for the oop in register obj. The content of
3151   // register obj is destroyed afterwards.
3152   store_check_part_1(obj);
3153   store_check_part_2(obj);
3154 }
3155 





3156 void MacroAssembler::store_check(Register obj, Address dst) {
3157   store_check(obj);
3158 }
3159 
3160 
3161 // split the store check operation so that other instructions can be scheduled inbetween
3162 void MacroAssembler::store_check_part_1(Register obj) {
3163   BarrierSet* bs = Universe::heap()->barrier_set();
3164   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
3165   lsr(obj, obj, CardTableModRefBS::card_shift);
3166 }
3167 
3168 void MacroAssembler::store_check_part_2(Register obj) {
3169   BarrierSet* bs = Universe::heap()->barrier_set();
3170   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
3171   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
3172   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3173 
3174   // The calculation for byte_map_base is as follows:
3175   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);


3615     pass_arg0(this, pre_val);
3616     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
3617   } else {
3618     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
3619   }
3620 
3621   pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
3622 
3623   bind(done);
3624 }
3625 
3626 void MacroAssembler::g1_write_barrier_post(Register store_addr,
3627                                            Register new_val,
3628                                            Register thread,
3629                                            Register tmp,
3630                                            Register tmp2) {
3631 #ifdef _LP64
3632   assert(thread == rthread, "must be");
3633 #endif // _LP64
3634 







3635   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
3636                                        PtrQueue::byte_offset_of_index()));
3637   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
3638                                        PtrQueue::byte_offset_of_buf()));
3639 
3640   BarrierSet* bs = Universe::heap()->barrier_set();
3641   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
3642   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3643 
3644   Label done;
3645   Label runtime;
3646 
3647   // Does store cross heap regions?
3648 
3649   eor(tmp, store_addr, new_val);
3650   lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
3651   cbz(tmp, done);
3652 
3653   // crosses regions, storing NULL?
3654 


3682   strb(zr, Address(card_addr));
3683 
3684   ldr(rscratch1, queue_index);
3685   cbz(rscratch1, runtime);
3686   sub(rscratch1, rscratch1, wordSize);
3687   str(rscratch1, queue_index);
3688 
3689   ldr(tmp2, buffer);
3690   str(card_addr, Address(tmp2, rscratch1));
3691   b(done);
3692 
3693   bind(runtime);
3694   // save the live input values
3695   push(store_addr->bit(true) | new_val->bit(true), sp);
3696   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
3697   pop(store_addr->bit(true) | new_val->bit(true), sp);
3698 
3699   bind(done);
3700 }
3701 









































3702 #endif // INCLUDE_ALL_GCS
3703 
3704 Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
3705   assert(oop_recorder() != NULL, "this assembler needs a Recorder");
3706   int index = oop_recorder()->allocate_metadata_index(obj);
3707   RelocationHolder rspec = metadata_Relocation::spec(index);
3708   return Address((address)obj, rspec);
3709 }
3710 
3711 // Move an oop into a register.  immediate is true if we want
3712 // immediate instrcutions, i.e. we are not going to patch this
3713 // instruction while the code is being executed by another thread.  In
3714 // that case we can use move immediates rather than the constant pool.
3715 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
3716   int oop_index;
3717   if (obj == NULL) {
3718     oop_index = oop_recorder()->allocate_oop_index(obj);
3719   } else {
3720     oop_index = oop_recorder()->find_index(obj);
3721     assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");


3743 Address MacroAssembler::constant_oop_address(jobject obj) {
3744   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3745   assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop");
3746   int oop_index = oop_recorder()->find_index(obj);
3747   return Address((address)obj, oop_Relocation::spec(oop_index));
3748 }
3749 
3750 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3751 void MacroAssembler::tlab_allocate(Register obj,
3752                                    Register var_size_in_bytes,
3753                                    int con_size_in_bytes,
3754                                    Register t1,
3755                                    Register t2,
3756                                    Label& slow_case) {
3757   assert_different_registers(obj, t2);
3758   assert_different_registers(obj, var_size_in_bytes);
3759   Register end = t2;
3760 
3761   // verify_tlab();
3762 


3763   ldr(obj, Address(rthread, JavaThread::tlab_top_offset()));
3764   if (var_size_in_bytes == noreg) {
3765     lea(end, Address(obj, con_size_in_bytes));
3766   } else {



3767     lea(end, Address(obj, var_size_in_bytes));
3768   }
3769   ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset()));
3770   cmp(end, rscratch1);
3771   br(Assembler::HI, slow_case);
3772 
3773   // update the tlab top pointer
3774   str(end, Address(rthread, JavaThread::tlab_top_offset()));
3775 


3776   // recover var_size_in_bytes if necessary
3777   if (var_size_in_bytes == end) {
3778     sub(var_size_in_bytes, var_size_in_bytes, obj);
3779   }
3780   // verify_tlab();
3781 }
3782 
3783 // Preserves r19, and r3.
3784 Register MacroAssembler::tlab_refill(Label& retry,
3785                                      Label& try_eden,
3786                                      Label& slow_case) {
3787   Register top = r0;
3788   Register t1  = r2;
3789   Register t2  = r4;
3790   assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3);
3791   Label do_refill, discard_tlab;
3792 
3793   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3794     // No allocation in the shared eden.
3795     b(slow_case);


4777 
4778 // Compare char[] arrays aligned to 4 bytes
4779 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
4780                                         Register result, Register tmp1)
4781 {
4782   Register cnt1 = rscratch1;
4783   Register cnt2 = rscratch2;
4784   Register tmp2 = rscratch2;
4785 
4786   Label SAME, DIFFER, NEXT, TAIL03, TAIL01;
4787 
4788   int length_offset  = arrayOopDesc::length_offset_in_bytes();
4789   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4790 
4791   BLOCK_COMMENT("char_arrays_equals  {");
4792 
4793     // different until proven equal
4794     mov(result, false);
4795 
4796     // same array?
4797     cmp(ary1, ary2);
4798     br(Assembler::EQ, SAME);
4799 
4800     // ne if either null
4801     cbz(ary1, DIFFER);
4802     cbz(ary2, DIFFER);
4803 
4804     // lengths ne?
4805     ldrw(cnt1, Address(ary1, length_offset));
4806     ldrw(cnt2, Address(ary2, length_offset));
4807     cmp(cnt1, cnt2);
4808     br(Assembler::NE, DIFFER);
4809 
4810     lea(ary1, Address(ary1, base_offset));
4811     lea(ary2, Address(ary2, base_offset));
4812 
4813     subs(cnt1, cnt1, 4);
4814     br(LT, TAIL03);
4815 
4816   BIND(NEXT);
4817     ldr(tmp1, Address(post(ary1, 8)));




  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  23  * or visit www.oracle.com if you need additional information or have any
  24  * questions.
  25  *
  26  */
  27 
  28 #include <sys/types.h>
  29 
  30 #include "precompiled.hpp"
  31 #include "asm/assembler.hpp"
  32 #include "asm/assembler.inline.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 
  35 #include "compiler/disassembler.hpp"
  36 #include "gc_interface/collectedHeap.inline.hpp"
  37 #include "gc_implementation/shenandoah/shenandoahBrooksPointer.hpp"
  38 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
  39 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
  40 #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "runtime/biasedLocking.hpp"
  43 #include "runtime/interfaceSupport.hpp"
  44 #include "runtime/sharedRuntime.hpp"
  45 
  46 // #include "gc_interface/collectedHeap.inline.hpp"
  47 // #include "interpreter/interpreter.hpp"
  48 // #include "memory/cardTableModRefBS.hpp"
  49 // #include "prims/methodHandles.hpp"
  50 // #include "runtime/biasedLocking.hpp"
  51 // #include "runtime/interfaceSupport.hpp"
  52 // #include "runtime/objectMonitor.hpp"
  53 // #include "runtime/os.hpp"
  54 // #include "runtime/sharedRuntime.hpp"
  55 // #include "runtime/stubRoutines.hpp"
  56 
  57 #if INCLUDE_ALL_GCS
  58 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  59 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  60 #include "gc_implementation/g1/heapRegion.hpp"


1635     // constant
1636     u_int32_t imm_h[2];
1637     imm_h[0] = imm32 & 0xffff;
1638     imm_h[1] = ((imm32 >> 16) & 0xffff);
1639     if (imm_h[0] == 0) {
1640       movzw(dst, imm_h[1], 16);
1641     } else if (imm_h[0] == 0xffff) {
1642       movnw(dst, imm_h[1] ^ 0xffff, 16);
1643     } else if (imm_h[1] == 0) {
1644       movzw(dst, imm_h[0], 0);
1645     } else if (imm_h[1] == 0xffff) {
1646       movnw(dst, imm_h[0] ^ 0xffff, 0);
1647     } else {
1648       // use a MOVZ and MOVK (makes it easier to debug)
1649       movzw(dst, imm_h[0], 0);
1650       movkw(dst, imm_h[1], 16);
1651     }
1652   }
1653 }
1654 
1655 void MacroAssembler::mov(Register dst, address addr) {
1656   assert(Universe::heap() == NULL
1657          || !Universe::heap()->is_in(addr), "use movptr for oop pointers");
1658     mov_immediate64(dst, (uintptr_t)addr);
1659 }
1660 
1661 // Form an address from base + offset in Rd.  Rd may or may
1662 // not actually be used: you must use the Address that is returned.
1663 // It is up to you to ensure that the shift provided matches the size
1664 // of your data.
1665 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) {
1666   if (Address::offset_ok_for_immed(byte_offset, shift))
1667     // It fits; no need for any heroics
1668     return Address(base, byte_offset);
1669 
1670   // Don't do anything clever with negative or misaligned offsets
1671   unsigned mask = (1 << shift) - 1;
1672   if (byte_offset < 0 || byte_offset & mask) {
1673     mov(Rd, byte_offset);
1674     add(Rd, base, Rd);
1675     return Address(Rd);
1676   }
1677 
1678   // See if we can do this with two 12-bit offsets
1679   {
1680     unsigned long word_offset = byte_offset >> shift;


2200     cmp(tmp, expected);
2201   } else {
2202     BLOCK_COMMENT("cmpxchg {");
2203     Label retry_load, done;
2204     if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH))
2205       prfm(Address(addr), PSTL1STRM);
2206     bind(retry_load);
2207     load_exclusive(tmp, addr, size, acquire);
2208     if (size == xword)
2209       cmp(tmp, expected);
2210     else
2211       cmpw(tmp, expected);
2212     br(Assembler::NE, done);
2213     store_exclusive(tmp, new_val, addr, size, release);
2214     cbnzw(tmp, retry_load);
2215     bind(done);
2216     BLOCK_COMMENT("} cmpxchg");
2217   }
2218 }
2219 
2220 void MacroAssembler::cmpxchg_oop_shenandoah(Register addr, Register expected,
2221                                             Register new_val,
2222                                             enum operand_size size,
2223                                             bool acquire, bool release,
2224                                             bool weak,
2225                                             Register result, Register tmp2) {
2226   assert(UseShenandoahGC, "only for shenandoah");
2227   bool is_cae = (result != noreg);
2228   bool is_narrow = (size == word);
2229 
2230   if (! is_cae) result = rscratch1;
2231 
2232   assert_different_registers(addr, expected, new_val, result, tmp2);
2233 
2234   Label retry, done, fail;
2235 
2236   // CAS, using LL/SC pair.
2237   bind(retry);
2238   load_exclusive(result, addr, size, acquire);
2239   if (is_narrow) {
2240     cmpw(result, expected);
2241   } else {
2242     cmp(result, expected);
2243   }
2244   br(Assembler::NE, fail);
2245   store_exclusive(tmp2, new_val, addr, size, release);
2246   if (weak) {
2247     cmpw(tmp2, 0u); // If the store fails, return NE to our caller
2248   } else {
2249     cbnzw(tmp2, retry);
2250   }
2251   b(done);
2252 
2253   bind(fail);
2254   // Check if rb(expected)==rb(result)
2255   // Shuffle registers so that we have memory value ready for next expected.
2256   mov(tmp2, expected);
2257   mov(expected, result);
2258   if (is_narrow) {
2259     decode_heap_oop(result, result);
2260     decode_heap_oop(tmp2, tmp2);
2261   }
2262   oopDesc::bs()->interpreter_read_barrier(this, result);
2263   oopDesc::bs()->interpreter_read_barrier(this, tmp2);
2264   cmp(result, tmp2);
2265   // Retry with expected now being the value we just loaded from addr.
2266   br(Assembler::EQ, retry);
2267   if (is_narrow && is_cae) {
2268     // For cmp-and-exchange and narrow oops, we need to restore
2269     // the compressed old-value. We moved it to 'expected' a few lines up.
2270     mov(result, expected);
2271   }
2272   bind(done);
2273 }
2274 
2275 static bool different(Register a, RegisterOrConstant b, Register c) {
2276   if (b.is_constant())
2277     return a != c;
2278   else
2279     return a != b.as_register() && a != c && b.as_register() != c;
2280 }
2281 
2282 #define ATOMIC_OP(LDXR, OP, IOP, AOP, STXR, sz)                         \
2283 void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
2284   if (UseLSE) {                                                         \
2285     prev = prev->is_valid() ? prev : zr;                                \
2286     if (incr.is_register()) {                                           \
2287       AOP(sz, incr.as_register(), prev, addr);                          \
2288     } else {                                                            \
2289       mov(rscratch2, incr.as_constant());                               \
2290       AOP(sz, rscratch2, prev, addr);                                   \
2291     }                                                                   \
2292     return;                                                             \
2293   }                                                                     \
2294   Register result = rscratch2;                                          \


2452   // printf("start = %lx\n", start);
2453   int byteCount =  aarch64_stub_prolog_size();
2454   // printf("byteCount = %x\n", byteCount);
2455   int instructionCount = (byteCount + 3)/ 4;
2456   // printf("instructionCount = %x\n", instructionCount);
2457   for (int i = 0; i < instructionCount; i++) {
2458     nop();
2459   }
2460 
2461   memcpy(start, (void*)aarch64_stub_prolog, byteCount);
2462 
2463   // write the address of the setup routine and the call format at the
2464   // end of into the copied code
2465   u_int64_t *patch_end = (u_int64_t *)(start + byteCount);
2466   if (prolog_ptr)
2467     patch_end[-2] = (u_int64_t)prolog_ptr;
2468   patch_end[-1] = calltype;
2469 }
2470 #endif
2471 
2472 void MacroAssembler::push_call_clobbered_fp_registers() {


2473   // Push v0-v7, v16-v31.
2474   for (int i = 30; i >= 0; i -= 2) {
2475     if (i <= v7->encoding() || i >= v16->encoding()) {
2476       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2477            Address(pre(sp, -2 * wordSize)));
2478     }
2479   }
2480 }
2481 
2482 void MacroAssembler::pop_call_clobbered_fp_registers() {
2483 
2484   for (int i = 0; i < 32; i += 2) {
2485     if (i <= v7->encoding() || i >= v16->encoding()) {
2486       ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
2487            Address(post(sp, 2 * wordSize)));
2488     }
2489   }
2490 }
2491 
2492 void MacroAssembler::push_call_clobbered_registers() {
2493   push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2494 
2495   push_call_clobbered_fp_registers();
2496 }
2497 
2498 void MacroAssembler::pop_call_clobbered_registers() {
2499 
2500   pop_call_clobbered_fp_registers();
2501 
2502   pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
2503 }
2504 
2505 void MacroAssembler::push_CPU_state(bool save_vectors) {
2506   push(0x3fffffff, sp);         // integer registers except lr & sp
2507 
2508   if (!save_vectors) {
2509     for (int i = 30; i >= 0; i -= 2)
2510       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
2511            Address(pre(sp, -2 * wordSize)));
2512   } else {
2513     for (int i = 30; i >= 0; i -= 2)
2514       stpq(as_FloatRegister(i), as_FloatRegister(i+1),
2515            Address(pre(sp, -4 * wordSize)));
2516   }
2517 }
2518 
2519 void MacroAssembler::pop_CPU_state(bool restore_vectors) {
2520   if (!restore_vectors) {


3211   }
3212   ldr(rscratch1, adr);
3213   add(rscratch1, rscratch1, src);
3214   str(rscratch1, adr);
3215 }
3216 
3217 void MacroAssembler::cmpptr(Register src1, Address src2) {
3218   unsigned long offset;
3219   adrp(rscratch1, src2, offset);
3220   ldr(rscratch1, Address(rscratch1, offset));
3221   cmp(src1, rscratch1);
3222 }
3223 
3224 void MacroAssembler::store_check(Register obj) {
3225   // Does a store check for the oop in register obj. The content of
3226   // register obj is destroyed afterwards.
3227   store_check_part_1(obj);
3228   store_check_part_2(obj);
3229 }
3230 
3231 void MacroAssembler::cmpoops(Register src1, Register src2) {
3232   cmp(src1, src2);
3233   oopDesc::bs()->asm_acmp_barrier(this, src1, src2);
3234 }
3235 
3236 void MacroAssembler::store_check(Register obj, Address dst) {
3237   store_check(obj);
3238 }
3239 
3240 
3241 // split the store check operation so that other instructions can be scheduled inbetween
3242 void MacroAssembler::store_check_part_1(Register obj) {
3243   BarrierSet* bs = Universe::heap()->barrier_set();
3244   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
3245   lsr(obj, obj, CardTableModRefBS::card_shift);
3246 }
3247 
3248 void MacroAssembler::store_check_part_2(Register obj) {
3249   BarrierSet* bs = Universe::heap()->barrier_set();
3250   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
3251   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
3252   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3253 
3254   // The calculation for byte_map_base is as follows:
3255   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);


3695     pass_arg0(this, pre_val);
3696     MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
3697   } else {
3698     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
3699   }
3700 
3701   pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
3702 
3703   bind(done);
3704 }
3705 
3706 void MacroAssembler::g1_write_barrier_post(Register store_addr,
3707                                            Register new_val,
3708                                            Register thread,
3709                                            Register tmp,
3710                                            Register tmp2) {
3711 #ifdef _LP64
3712   assert(thread == rthread, "must be");
3713 #endif // _LP64
3714 
3715   if (UseShenandoahGC) {
3716     // No need for this in Shenandoah.
3717     return;
3718   }
3719 
3720   assert(UseG1GC, "expect G1 GC");
3721 
3722   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
3723                                        PtrQueue::byte_offset_of_index()));
3724   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
3725                                        PtrQueue::byte_offset_of_buf()));
3726 
3727   BarrierSet* bs = Universe::heap()->barrier_set();
3728   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
3729   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
3730 
3731   Label done;
3732   Label runtime;
3733 
3734   // Does store cross heap regions?
3735 
3736   eor(tmp, store_addr, new_val);
3737   lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
3738   cbz(tmp, done);
3739 
3740   // crosses regions, storing NULL?
3741 


3769   strb(zr, Address(card_addr));
3770 
3771   ldr(rscratch1, queue_index);
3772   cbz(rscratch1, runtime);
3773   sub(rscratch1, rscratch1, wordSize);
3774   str(rscratch1, queue_index);
3775 
3776   ldr(tmp2, buffer);
3777   str(card_addr, Address(tmp2, rscratch1));
3778   b(done);
3779 
3780   bind(runtime);
3781   // save the live input values
3782   push(store_addr->bit(true) | new_val->bit(true), sp);
3783   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
3784   pop(store_addr->bit(true) | new_val->bit(true), sp);
3785 
3786   bind(done);
3787 }
3788 
3789 void MacroAssembler::shenandoah_write_barrier(Register dst) {
3790   assert(UseShenandoahGC && ShenandoahWriteBarrier, "Should be enabled");
3791   assert(dst != rscratch1, "need rscratch1");
3792   assert(dst != rscratch2, "need rscratch2");
3793 
3794   Label done;
3795 
3796   Address gc_state(rthread, in_bytes(JavaThread::gc_state_offset()));
3797   ldrb(rscratch1, gc_state);
3798 
3799   // Check for heap stability
3800   mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION);
3801   tst(rscratch1, rscratch2);
3802   br(Assembler::EQ, done);
3803 
3804   // Heap is unstable, need to perform the read-barrier even if WB is inactive
3805   ldr(dst, Address(dst, ShenandoahBrooksPointer::byte_offset()));
3806 
3807   // Check for evacuation-in-progress and jump to WB slow-path if needed
3808   mov(rscratch2, ShenandoahHeap::EVACUATION);
3809   tst(rscratch1, rscratch2);
3810   br(Assembler::EQ, done);
3811 
3812   RegSet to_save = RegSet::of(r0);
3813   if (dst != r0) {
3814     push(to_save, sp);
3815     mov(r0, dst);
3816   }
3817 
3818   assert(StubRoutines::aarch64::shenandoah_wb() != NULL, "need write barrier stub");
3819   far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::aarch64::shenandoah_wb())));
3820 
3821   if (dst != r0) {
3822     mov(dst, r0);
3823     pop(to_save, sp);
3824   }
3825   block_comment("} Shenandoah write barrier");
3826 
3827   bind(done);
3828 }
3829 
3830 #endif // INCLUDE_ALL_GCS
3831 
3832 Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
3833   assert(oop_recorder() != NULL, "this assembler needs a Recorder");
3834   int index = oop_recorder()->allocate_metadata_index(obj);
3835   RelocationHolder rspec = metadata_Relocation::spec(index);
3836   return Address((address)obj, rspec);
3837 }
3838 
3839 // Move an oop into a register.  immediate is true if we want
3840 // immediate instrcutions, i.e. we are not going to patch this
3841 // instruction while the code is being executed by another thread.  In
3842 // that case we can use move immediates rather than the constant pool.
3843 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
3844   int oop_index;
3845   if (obj == NULL) {
3846     oop_index = oop_recorder()->allocate_oop_index(obj);
3847   } else {
3848     oop_index = oop_recorder()->find_index(obj);
3849     assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");


3871 Address MacroAssembler::constant_oop_address(jobject obj) {
3872   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3873   assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop");
3874   int oop_index = oop_recorder()->find_index(obj);
3875   return Address((address)obj, oop_Relocation::spec(oop_index));
3876 }
3877 
3878 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3879 void MacroAssembler::tlab_allocate(Register obj,
3880                                    Register var_size_in_bytes,
3881                                    int con_size_in_bytes,
3882                                    Register t1,
3883                                    Register t2,
3884                                    Label& slow_case) {
3885   assert_different_registers(obj, t2);
3886   assert_different_registers(obj, var_size_in_bytes);
3887   Register end = t2;
3888 
3889   // verify_tlab();
3890 
3891   int oop_extra_words = Universe::heap()->oop_extra_words();
3892 
3893   ldr(obj, Address(rthread, JavaThread::tlab_top_offset()));
3894   if (var_size_in_bytes == noreg) {
3895     lea(end, Address(obj, con_size_in_bytes + oop_extra_words * HeapWordSize));
3896   } else {
3897     if (oop_extra_words > 0) {
3898       add(var_size_in_bytes, var_size_in_bytes, oop_extra_words * HeapWordSize);
3899     }
3900     lea(end, Address(obj, var_size_in_bytes));
3901   }
3902   ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset()));
3903   cmp(end, rscratch1);
3904   br(Assembler::HI, slow_case);
3905 
3906   // update the tlab top pointer
3907   str(end, Address(rthread, JavaThread::tlab_top_offset()));
3908 
3909   Universe::heap()->compile_prepare_oop(this, obj);
3910 
3911   // recover var_size_in_bytes if necessary
3912   if (var_size_in_bytes == end) {
3913     sub(var_size_in_bytes, var_size_in_bytes, obj);
3914   }
3915   // verify_tlab();
3916 }
3917 
3918 // Preserves r19, and r3.
3919 Register MacroAssembler::tlab_refill(Label& retry,
3920                                      Label& try_eden,
3921                                      Label& slow_case) {
3922   Register top = r0;
3923   Register t1  = r2;
3924   Register t2  = r4;
3925   assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3);
3926   Label do_refill, discard_tlab;
3927 
3928   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3929     // No allocation in the shared eden.
3930     b(slow_case);


4912 
4913 // Compare char[] arrays aligned to 4 bytes
4914 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
4915                                         Register result, Register tmp1)
4916 {
4917   Register cnt1 = rscratch1;
4918   Register cnt2 = rscratch2;
4919   Register tmp2 = rscratch2;
4920 
4921   Label SAME, DIFFER, NEXT, TAIL03, TAIL01;
4922 
4923   int length_offset  = arrayOopDesc::length_offset_in_bytes();
4924   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4925 
4926   BLOCK_COMMENT("char_arrays_equals  {");
4927 
4928     // different until proven equal
4929     mov(result, false);
4930 
4931     // same array?
4932     cmpoops(ary1, ary2);
4933     br(Assembler::EQ, SAME);
4934 
4935     // ne if either null
4936     cbz(ary1, DIFFER);
4937     cbz(ary2, DIFFER);
4938 
4939     // lengths ne?
4940     ldrw(cnt1, Address(ary1, length_offset));
4941     ldrw(cnt2, Address(ary2, length_offset));
4942     cmp(cnt1, cnt2);
4943     br(Assembler::NE, DIFFER);
4944 
4945     lea(ary1, Address(ary1, base_offset));
4946     lea(ary2, Address(ary2, base_offset));
4947 
4948     subs(cnt1, cnt1, 4);
4949     br(LT, TAIL03);
4950 
4951   BIND(NEXT);
4952     ldr(tmp1, Address(post(ary1, 8)));


< prev index next >