< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

5020 }
5021 
5022 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5023   load_method_holder(rresult, rmethod);
5024   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5025 }
5026 
5027 void MacroAssembler::load_method_holder(Register holder, Register method) {
5028   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5029   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5030   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5031 }
5032 
5033 // Loads the obj's Klass* into dst.
5034 // Preserves all registers (incl src, rscratch1 and rscratch2).
5035 // Input:
5036 // src - the oop we want to load the klass from.
5037 // dst - output narrow klass.
5038 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5039   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5040   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5041   lsr(dst, dst, markWord::klass_shift);
5042 }
5043 
5044 void MacroAssembler::load_klass(Register dst, Register src) {
5045   if (UseCompactObjectHeaders) {
5046     load_narrow_klass_compact(dst, src);
5047     decode_klass_not_null(dst);
5048   } else if (UseCompressedClassPointers) {
5049     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5050     decode_klass_not_null(dst);
5051   } else {
5052     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053   }
5054 }
5055 
5056 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5057   if (RestoreMXCSROnJNICalls) {
5058     Label OK;
5059     get_fpcr(tmp1);
5060     mov(tmp2, tmp1);
5061     // Set FPCR to the state we need. We do want Round to Nearest. We

5814     b(DONE);
5815 
5816   BIND(SET_RESULT);
5817 
5818     add(len, len, wordSize);
5819     sub(result, result, len);
5820 
5821   BIND(DONE);
5822   postcond(pc() != badAddress);
5823   return pc();
5824 }
5825 
5826 // Clobbers: rscratch1, rscratch2, rflags
5827 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5828 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5829                                       Register tmp4, Register tmp5, Register result,
5830                                       Register cnt1, int elem_size) {
5831   Label DONE, SAME;
5832   Register tmp1 = rscratch1;
5833   Register tmp2 = rscratch2;

5834   int elem_per_word = wordSize/elem_size;
5835   int log_elem_size = exact_log2(elem_size);
5836   int klass_offset  = arrayOopDesc::klass_offset_in_bytes();
5837   int length_offset = arrayOopDesc::length_offset_in_bytes();
5838   int base_offset
5839     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5840   // When the length offset is not aligned to 8 bytes,
5841   // then we align it down. This is valid because the new
5842   // offset will always be the klass which is the same
5843   // for type arrays.
5844   int start_offset = align_down(length_offset, BytesPerWord);
5845   int extra_length = base_offset - start_offset;
5846   assert(start_offset == length_offset || start_offset == klass_offset,
5847          "start offset must be 8-byte-aligned or be the klass offset");
5848   assert(base_offset != start_offset, "must include the length field");
5849   extra_length = extra_length / elem_size; // We count in elements, not bytes.
5850   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5851 
5852   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5853   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5854 
5855 #ifndef PRODUCT
5856   {
5857     const char kind = (elem_size == 2) ? 'U' : 'L';
5858     char comment[64];
5859     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5860     BLOCK_COMMENT(comment);
5861   }
5862 #endif
5863 
5864   // if (a1 == a2)
5865   //     return true;
5866   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5867   br(EQ, SAME);
5868 
5869   if (UseSimpleArrayEquals) {
5870     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5871     // if (a1 == nullptr || a2 == nullptr)
5872     //     return false;
5873     // a1 & a2 == 0 means (some-pointer is null) or
5874     // (very-rare-or-even-probably-impossible-pointer-values)
5875     // so, we can save one branch in most cases
5876     tst(a1, a2);
5877     mov(result, false);
5878     br(EQ, A_MIGHT_BE_NULL);
5879     // if (a1.length != a2.length)
5880     //      return false;
5881     bind(A_IS_NOT_NULL);
5882     ldrw(cnt1, Address(a1, length_offset));
5883     // Increase loop counter by diff between base- and actual start-offset.
5884     addw(cnt1, cnt1, extra_length);
5885     lea(a1, Address(a1, start_offset));
5886     lea(a2, Address(a2, start_offset));

5887     // Check for short strings, i.e. smaller than wordSize.
5888     subs(cnt1, cnt1, elem_per_word);
5889     br(Assembler::LT, SHORT);
5890     // Main 8 byte comparison loop.
5891     bind(NEXT_WORD); {
5892       ldr(tmp1, Address(post(a1, wordSize)));
5893       ldr(tmp2, Address(post(a2, wordSize)));
5894       subs(cnt1, cnt1, elem_per_word);
5895       eor(tmp5, tmp1, tmp2);
5896       cbnz(tmp5, DONE);
5897     } br(GT, NEXT_WORD);
5898     // Last longword.  In the case where length == 4 we compare the
5899     // same longword twice, but that's still faster than another
5900     // conditional branch.
5901     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5902     // length == 4.
5903     if (log_elem_size > 0)
5904       lsl(cnt1, cnt1, log_elem_size);
5905     ldr(tmp3, Address(a1, cnt1));
5906     ldr(tmp4, Address(a2, cnt1));

5929       eorw(tmp5, tmp3, tmp4);
5930       cbnzw(tmp5, DONE);
5931     }
5932     bind(TAIL01);
5933     if (elem_size == 1) { // Only needed when comparing byte arrays.
5934       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5935       {
5936         ldrb(tmp1, a1);
5937         ldrb(tmp2, a2);
5938         eorw(tmp5, tmp1, tmp2);
5939         cbnzw(tmp5, DONE);
5940       }
5941     }
5942   } else {
5943     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5944         CSET_EQ, LAST_CHECK;
5945     mov(result, false);
5946     cbz(a1, DONE);
5947     ldrw(cnt1, Address(a1, length_offset));
5948     cbz(a2, DONE);
5949     // Increase loop counter by diff between base- and actual start-offset.
5950     addw(cnt1, cnt1, extra_length);
5951 
5952     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5953     // faster to perform another branch before comparing a1 and a2
5954     cmp(cnt1, (u1)elem_per_word);
5955     br(LE, SHORT); // short or same
5956     ldr(tmp3, Address(pre(a1, start_offset)));
5957     subs(zr, cnt1, stubBytesThreshold);
5958     br(GE, STUB);
5959     ldr(tmp4, Address(pre(a2, start_offset)));
5960     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);


5961 
5962     // Main 16 byte comparison loop with 2 exits
5963     bind(NEXT_DWORD); {
5964       ldr(tmp1, Address(pre(a1, wordSize)));
5965       ldr(tmp2, Address(pre(a2, wordSize)));
5966       subs(cnt1, cnt1, 2 * elem_per_word);
5967       br(LE, TAIL);
5968       eor(tmp4, tmp3, tmp4);
5969       cbnz(tmp4, DONE);
5970       ldr(tmp3, Address(pre(a1, wordSize)));
5971       ldr(tmp4, Address(pre(a2, wordSize)));
5972       cmp(cnt1, (u1)elem_per_word);
5973       br(LE, TAIL2);
5974       cmp(tmp1, tmp2);
5975     } br(EQ, NEXT_DWORD);
5976     b(DONE);
5977 
5978     bind(TAIL);
5979     eor(tmp4, tmp3, tmp4);
5980     eor(tmp2, tmp1, tmp2);
5981     lslv(tmp2, tmp2, tmp5);
5982     orr(tmp5, tmp4, tmp2);
5983     cmp(tmp5, zr);
5984     b(CSET_EQ);
5985 
5986     bind(TAIL2);
5987     eor(tmp2, tmp1, tmp2);
5988     cbnz(tmp2, DONE);
5989     b(LAST_CHECK);
5990 
5991     bind(STUB);
5992     ldr(tmp4, Address(pre(a2, start_offset)));


5993     if (elem_size == 2) { // convert to byte counter
5994       lsl(cnt1, cnt1, 1);
5995     }
5996     eor(tmp5, tmp3, tmp4);
5997     cbnz(tmp5, DONE);
5998     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5999     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6000     address tpc = trampoline_call(stub);
6001     if (tpc == nullptr) {
6002       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6003       postcond(pc() == badAddress);
6004       return nullptr;
6005     }
6006     b(DONE);
6007 
6008     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6009     // so, if a2 == null => return false(0), else return true, so we can return a2
6010     mov(result, a2);
6011     b(DONE);
6012     bind(SHORT);



6013     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6014     ldr(tmp3, Address(a1, start_offset));
6015     ldr(tmp4, Address(a2, start_offset));
6016     bind(LAST_CHECK);
6017     eor(tmp4, tmp3, tmp4);
6018     lslv(tmp5, tmp4, tmp5);
6019     cmp(tmp5, zr);
6020     bind(CSET_EQ);
6021     cset(result, EQ);
6022     b(DONE);
6023   }
6024 
6025   bind(SAME);
6026   mov(result, true);
6027   // That's it.
6028   bind(DONE);
6029 
6030   BLOCK_COMMENT("} array_equals");
6031   postcond(pc() != badAddress);
6032   return pc();
6033 }
6034 
6035 // Compare Strings

5020 }
5021 
5022 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5023   load_method_holder(rresult, rmethod);
5024   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5025 }
5026 
5027 void MacroAssembler::load_method_holder(Register holder, Register method) {
5028   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5029   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5030   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5031 }
5032 
5033 // Loads the obj's Klass* into dst.
5034 // Preserves all registers (incl src, rscratch1 and rscratch2).
5035 // Input:
5036 // src - the oop we want to load the klass from.
5037 // dst - output narrow klass.
5038 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5039   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5040   ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5041   lsrw(dst, dst, markWord::klass_shift);
5042 }
5043 
5044 void MacroAssembler::load_klass(Register dst, Register src) {
5045   if (UseCompactObjectHeaders) {
5046     load_narrow_klass_compact(dst, src);
5047     decode_klass_not_null(dst);
5048   } else if (UseCompressedClassPointers) {
5049     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5050     decode_klass_not_null(dst);
5051   } else {
5052     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053   }
5054 }
5055 
5056 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5057   if (RestoreMXCSROnJNICalls) {
5058     Label OK;
5059     get_fpcr(tmp1);
5060     mov(tmp2, tmp1);
5061     // Set FPCR to the state we need. We do want Round to Nearest. We

5814     b(DONE);
5815 
5816   BIND(SET_RESULT);
5817 
5818     add(len, len, wordSize);
5819     sub(result, result, len);
5820 
5821   BIND(DONE);
5822   postcond(pc() != badAddress);
5823   return pc();
5824 }
5825 
5826 // Clobbers: rscratch1, rscratch2, rflags
5827 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5828 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5829                                       Register tmp4, Register tmp5, Register result,
5830                                       Register cnt1, int elem_size) {
5831   Label DONE, SAME;
5832   Register tmp1 = rscratch1;
5833   Register tmp2 = rscratch2;
5834   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5835   int elem_per_word = wordSize/elem_size;
5836   int log_elem_size = exact_log2(elem_size);

5837   int length_offset = arrayOopDesc::length_offset_in_bytes();
5838   int base_offset
5839     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);










5840   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5841 
5842   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5843   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5844 
5845 #ifndef PRODUCT
5846   {
5847     const char kind = (elem_size == 2) ? 'U' : 'L';
5848     char comment[64];
5849     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5850     BLOCK_COMMENT(comment);
5851   }
5852 #endif
5853 
5854   // if (a1 == a2)
5855   //     return true;
5856   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5857   br(EQ, SAME);
5858 
5859   if (UseSimpleArrayEquals) {
5860     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5861     // if (a1 == nullptr || a2 == nullptr)
5862     //     return false;
5863     // a1 & a2 == 0 means (some-pointer is null) or
5864     // (very-rare-or-even-probably-impossible-pointer-values)
5865     // so, we can save one branch in most cases
5866     tst(a1, a2);
5867     mov(result, false);
5868     br(EQ, A_MIGHT_BE_NULL);
5869     // if (a1.length != a2.length)
5870     //      return false;
5871     bind(A_IS_NOT_NULL);
5872     ldrw(cnt1, Address(a1, length_offset));
5873     ldrw(cnt2, Address(a2, length_offset));
5874     eorw(tmp5, cnt1, cnt2);
5875     cbnzw(tmp5, DONE);
5876     lea(a1, Address(a1, base_offset));
5877     lea(a2, Address(a2, base_offset));
5878     // Check for short strings, i.e. smaller than wordSize.
5879     subs(cnt1, cnt1, elem_per_word);
5880     br(Assembler::LT, SHORT);
5881     // Main 8 byte comparison loop.
5882     bind(NEXT_WORD); {
5883       ldr(tmp1, Address(post(a1, wordSize)));
5884       ldr(tmp2, Address(post(a2, wordSize)));
5885       subs(cnt1, cnt1, elem_per_word);
5886       eor(tmp5, tmp1, tmp2);
5887       cbnz(tmp5, DONE);
5888     } br(GT, NEXT_WORD);
5889     // Last longword.  In the case where length == 4 we compare the
5890     // same longword twice, but that's still faster than another
5891     // conditional branch.
5892     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5893     // length == 4.
5894     if (log_elem_size > 0)
5895       lsl(cnt1, cnt1, log_elem_size);
5896     ldr(tmp3, Address(a1, cnt1));
5897     ldr(tmp4, Address(a2, cnt1));

5920       eorw(tmp5, tmp3, tmp4);
5921       cbnzw(tmp5, DONE);
5922     }
5923     bind(TAIL01);
5924     if (elem_size == 1) { // Only needed when comparing byte arrays.
5925       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5926       {
5927         ldrb(tmp1, a1);
5928         ldrb(tmp2, a2);
5929         eorw(tmp5, tmp1, tmp2);
5930         cbnzw(tmp5, DONE);
5931       }
5932     }
5933   } else {
5934     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5935         CSET_EQ, LAST_CHECK;
5936     mov(result, false);
5937     cbz(a1, DONE);
5938     ldrw(cnt1, Address(a1, length_offset));
5939     cbz(a2, DONE);
5940     ldrw(cnt2, Address(a2, length_offset));


5941     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5942     // faster to perform another branch before comparing a1 and a2
5943     cmp(cnt1, (u1)elem_per_word);
5944     br(LE, SHORT); // short or same
5945     ldr(tmp3, Address(pre(a1, base_offset)));
5946     subs(zr, cnt1, stubBytesThreshold);
5947     br(GE, STUB);
5948     ldr(tmp4, Address(pre(a2, base_offset)));
5949     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5950     cmp(cnt2, cnt1);
5951     br(NE, DONE);
5952 
5953     // Main 16 byte comparison loop with 2 exits
5954     bind(NEXT_DWORD); {
5955       ldr(tmp1, Address(pre(a1, wordSize)));
5956       ldr(tmp2, Address(pre(a2, wordSize)));
5957       subs(cnt1, cnt1, 2 * elem_per_word);
5958       br(LE, TAIL);
5959       eor(tmp4, tmp3, tmp4);
5960       cbnz(tmp4, DONE);
5961       ldr(tmp3, Address(pre(a1, wordSize)));
5962       ldr(tmp4, Address(pre(a2, wordSize)));
5963       cmp(cnt1, (u1)elem_per_word);
5964       br(LE, TAIL2);
5965       cmp(tmp1, tmp2);
5966     } br(EQ, NEXT_DWORD);
5967     b(DONE);
5968 
5969     bind(TAIL);
5970     eor(tmp4, tmp3, tmp4);
5971     eor(tmp2, tmp1, tmp2);
5972     lslv(tmp2, tmp2, tmp5);
5973     orr(tmp5, tmp4, tmp2);
5974     cmp(tmp5, zr);
5975     b(CSET_EQ);
5976 
5977     bind(TAIL2);
5978     eor(tmp2, tmp1, tmp2);
5979     cbnz(tmp2, DONE);
5980     b(LAST_CHECK);
5981 
5982     bind(STUB);
5983     ldr(tmp4, Address(pre(a2, base_offset)));
5984     cmp(cnt2, cnt1);
5985     br(NE, DONE);
5986     if (elem_size == 2) { // convert to byte counter
5987       lsl(cnt1, cnt1, 1);
5988     }
5989     eor(tmp5, tmp3, tmp4);
5990     cbnz(tmp5, DONE);
5991     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5992     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5993     address tpc = trampoline_call(stub);
5994     if (tpc == nullptr) {
5995       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5996       postcond(pc() == badAddress);
5997       return nullptr;
5998     }
5999     b(DONE);
6000 
6001     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6002     // so, if a2 == null => return false(0), else return true, so we can return a2
6003     mov(result, a2);
6004     b(DONE);
6005     bind(SHORT);
6006     cmp(cnt2, cnt1);
6007     br(NE, DONE);
6008     cbz(cnt1, SAME);
6009     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6010     ldr(tmp3, Address(a1, base_offset));
6011     ldr(tmp4, Address(a2, base_offset));
6012     bind(LAST_CHECK);
6013     eor(tmp4, tmp3, tmp4);
6014     lslv(tmp5, tmp4, tmp5);
6015     cmp(tmp5, zr);
6016     bind(CSET_EQ);
6017     cset(result, EQ);
6018     b(DONE);
6019   }
6020 
6021   bind(SAME);
6022   mov(result, true);
6023   // That's it.
6024   bind(DONE);
6025 
6026   BLOCK_COMMENT("} array_equals");
6027   postcond(pc() != badAddress);
6028   return pc();
6029 }
6030 
6031 // Compare Strings
< prev index next >