< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

5013 }
5014 
5015 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5016   load_method_holder(rresult, rmethod);
5017   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5018 }
5019 
5020 void MacroAssembler::load_method_holder(Register holder, Register method) {
5021   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5022   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5023   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5024 }
5025 
5026 // Loads the obj's Klass* into dst.
5027 // Preserves all registers (incl src, rscratch1 and rscratch2).
5028 // Input:
5029 // src - the oop we want to load the klass from.
5030 // dst - output narrow klass.
5031 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5032   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5033   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5034   lsr(dst, dst, markWord::klass_shift);
5035 }
5036 
5037 void MacroAssembler::load_klass(Register dst, Register src) {
5038   if (UseCompactObjectHeaders) {
5039     load_narrow_klass_compact(dst, src);
5040     decode_klass_not_null(dst);
5041   } else if (UseCompressedClassPointers) {
5042     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5043     decode_klass_not_null(dst);
5044   } else {
5045     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5046   }
5047 }
5048 
5049 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5050   if (RestoreMXCSROnJNICalls) {
5051     Label OK;
5052     get_fpcr(tmp1);
5053     mov(tmp2, tmp1);
5054     // Set FPCR to the state we need. We do want Round to Nearest. We

5808     b(DONE);
5809 
5810   BIND(SET_RESULT);
5811 
5812     add(len, len, wordSize);
5813     sub(result, result, len);
5814 
5815   BIND(DONE);
5816   postcond(pc() != badAddress);
5817   return pc();
5818 }
5819 
5820 // Clobbers: rscratch1, rscratch2, rflags
5821 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5822 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5823                                       Register tmp4, Register tmp5, Register result,
5824                                       Register cnt1, int elem_size) {
5825   Label DONE, SAME;
5826   Register tmp1 = rscratch1;
5827   Register tmp2 = rscratch2;

5828   int elem_per_word = wordSize/elem_size;
5829   int log_elem_size = exact_log2(elem_size);
5830   int klass_offset  = arrayOopDesc::klass_offset_in_bytes();
5831   int length_offset = arrayOopDesc::length_offset_in_bytes();
5832   int base_offset
5833     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5834   // When the length offset is not aligned to 8 bytes,
5835   // then we align it down. This is valid because the new
5836   // offset will always be the klass which is the same
5837   // for type arrays.
5838   int start_offset = align_down(length_offset, BytesPerWord);
5839   int extra_length = base_offset - start_offset;
5840   assert(start_offset == length_offset || start_offset == klass_offset,
5841          "start offset must be 8-byte-aligned or be the klass offset");
5842   assert(base_offset != start_offset, "must include the length field");
5843   extra_length = extra_length / elem_size; // We count in elements, not bytes.
5844   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5845 
5846   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5847   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5848 
5849 #ifndef PRODUCT
5850   {
5851     const char kind = (elem_size == 2) ? 'U' : 'L';
5852     char comment[64];
5853     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5854     BLOCK_COMMENT(comment);
5855   }
5856 #endif
5857 
5858   // if (a1 == a2)
5859   //     return true;
5860   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5861   br(EQ, SAME);
5862 
5863   if (UseSimpleArrayEquals) {
5864     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5865     // if (a1 == nullptr || a2 == nullptr)
5866     //     return false;
5867     // a1 & a2 == 0 means (some-pointer is null) or
5868     // (very-rare-or-even-probably-impossible-pointer-values)
5869     // so, we can save one branch in most cases
5870     tst(a1, a2);
5871     mov(result, false);
5872     br(EQ, A_MIGHT_BE_NULL);
5873     // if (a1.length != a2.length)
5874     //      return false;
5875     bind(A_IS_NOT_NULL);
5876     ldrw(cnt1, Address(a1, length_offset));
5877     // Increase loop counter by diff between base- and actual start-offset.
5878     addw(cnt1, cnt1, extra_length);
5879     lea(a1, Address(a1, start_offset));
5880     lea(a2, Address(a2, start_offset));

5881     // Check for short strings, i.e. smaller than wordSize.
5882     subs(cnt1, cnt1, elem_per_word);
5883     br(Assembler::LT, SHORT);
5884     // Main 8 byte comparison loop.
5885     bind(NEXT_WORD); {
5886       ldr(tmp1, Address(post(a1, wordSize)));
5887       ldr(tmp2, Address(post(a2, wordSize)));
5888       subs(cnt1, cnt1, elem_per_word);
5889       eor(tmp5, tmp1, tmp2);
5890       cbnz(tmp5, DONE);
5891     } br(GT, NEXT_WORD);
5892     // Last longword.  In the case where length == 4 we compare the
5893     // same longword twice, but that's still faster than another
5894     // conditional branch.
5895     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5896     // length == 4.
5897     if (log_elem_size > 0)
5898       lsl(cnt1, cnt1, log_elem_size);
5899     ldr(tmp3, Address(a1, cnt1));
5900     ldr(tmp4, Address(a2, cnt1));

5923       eorw(tmp5, tmp3, tmp4);
5924       cbnzw(tmp5, DONE);
5925     }
5926     bind(TAIL01);
5927     if (elem_size == 1) { // Only needed when comparing byte arrays.
5928       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5929       {
5930         ldrb(tmp1, a1);
5931         ldrb(tmp2, a2);
5932         eorw(tmp5, tmp1, tmp2);
5933         cbnzw(tmp5, DONE);
5934       }
5935     }
5936   } else {
5937     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5938         CSET_EQ, LAST_CHECK;
5939     mov(result, false);
5940     cbz(a1, DONE);
5941     ldrw(cnt1, Address(a1, length_offset));
5942     cbz(a2, DONE);
5943     // Increase loop counter by diff between base- and actual start-offset.
5944     addw(cnt1, cnt1, extra_length);
5945 
5946     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5947     // faster to perform another branch before comparing a1 and a2
5948     cmp(cnt1, (u1)elem_per_word);
5949     br(LE, SHORT); // short or same
5950     ldr(tmp3, Address(pre(a1, start_offset)));
5951     subs(zr, cnt1, stubBytesThreshold);
5952     br(GE, STUB);
5953     ldr(tmp4, Address(pre(a2, start_offset)));
5954     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);


5955 
5956     // Main 16 byte comparison loop with 2 exits
5957     bind(NEXT_DWORD); {
5958       ldr(tmp1, Address(pre(a1, wordSize)));
5959       ldr(tmp2, Address(pre(a2, wordSize)));
5960       subs(cnt1, cnt1, 2 * elem_per_word);
5961       br(LE, TAIL);
5962       eor(tmp4, tmp3, tmp4);
5963       cbnz(tmp4, DONE);
5964       ldr(tmp3, Address(pre(a1, wordSize)));
5965       ldr(tmp4, Address(pre(a2, wordSize)));
5966       cmp(cnt1, (u1)elem_per_word);
5967       br(LE, TAIL2);
5968       cmp(tmp1, tmp2);
5969     } br(EQ, NEXT_DWORD);
5970     b(DONE);
5971 
5972     bind(TAIL);
5973     eor(tmp4, tmp3, tmp4);
5974     eor(tmp2, tmp1, tmp2);
5975     lslv(tmp2, tmp2, tmp5);
5976     orr(tmp5, tmp4, tmp2);
5977     cmp(tmp5, zr);
5978     b(CSET_EQ);
5979 
5980     bind(TAIL2);
5981     eor(tmp2, tmp1, tmp2);
5982     cbnz(tmp2, DONE);
5983     b(LAST_CHECK);
5984 
5985     bind(STUB);
5986     ldr(tmp4, Address(pre(a2, start_offset)));


5987     if (elem_size == 2) { // convert to byte counter
5988       lsl(cnt1, cnt1, 1);
5989     }
5990     eor(tmp5, tmp3, tmp4);
5991     cbnz(tmp5, DONE);
5992     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5993     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5994     address tpc = trampoline_call(stub);
5995     if (tpc == nullptr) {
5996       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5997       postcond(pc() == badAddress);
5998       return nullptr;
5999     }
6000     b(DONE);
6001 
6002     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6003     // so, if a2 == null => return false(0), else return true, so we can return a2
6004     mov(result, a2);
6005     b(DONE);
6006     bind(SHORT);



6007     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6008     ldr(tmp3, Address(a1, start_offset));
6009     ldr(tmp4, Address(a2, start_offset));
6010     bind(LAST_CHECK);
6011     eor(tmp4, tmp3, tmp4);
6012     lslv(tmp5, tmp4, tmp5);
6013     cmp(tmp5, zr);
6014     bind(CSET_EQ);
6015     cset(result, EQ);
6016     b(DONE);
6017   }
6018 
6019   bind(SAME);
6020   mov(result, true);
6021   // That's it.
6022   bind(DONE);
6023 
6024   BLOCK_COMMENT("} array_equals");
6025   postcond(pc() != badAddress);
6026   return pc();
6027 }
6028 
6029 // Compare Strings

5013 }
5014 
5015 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5016   load_method_holder(rresult, rmethod);
5017   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5018 }
5019 
5020 void MacroAssembler::load_method_holder(Register holder, Register method) {
5021   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5022   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5023   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5024 }
5025 
5026 // Loads the obj's Klass* into dst.
5027 // Preserves all registers (incl src, rscratch1 and rscratch2).
5028 // Input:
5029 // src - the oop we want to load the klass from.
5030 // dst - output narrow klass.
5031 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5032   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5033   ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5034   lsrw(dst, dst, markWord::klass_shift);
5035 }
5036 
5037 void MacroAssembler::load_klass(Register dst, Register src) {
5038   if (UseCompactObjectHeaders) {
5039     load_narrow_klass_compact(dst, src);
5040     decode_klass_not_null(dst);
5041   } else if (UseCompressedClassPointers) {
5042     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5043     decode_klass_not_null(dst);
5044   } else {
5045     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5046   }
5047 }
5048 
5049 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5050   if (RestoreMXCSROnJNICalls) {
5051     Label OK;
5052     get_fpcr(tmp1);
5053     mov(tmp2, tmp1);
5054     // Set FPCR to the state we need. We do want Round to Nearest. We

5808     b(DONE);
5809 
5810   BIND(SET_RESULT);
5811 
5812     add(len, len, wordSize);
5813     sub(result, result, len);
5814 
5815   BIND(DONE);
5816   postcond(pc() != badAddress);
5817   return pc();
5818 }
5819 
5820 // Clobbers: rscratch1, rscratch2, rflags
5821 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5822 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5823                                       Register tmp4, Register tmp5, Register result,
5824                                       Register cnt1, int elem_size) {
5825   Label DONE, SAME;
5826   Register tmp1 = rscratch1;
5827   Register tmp2 = rscratch2;
5828   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5829   int elem_per_word = wordSize/elem_size;
5830   int log_elem_size = exact_log2(elem_size);

5831   int length_offset = arrayOopDesc::length_offset_in_bytes();
5832   int base_offset
5833     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);










5834   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5835 
5836   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5837   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5838 
5839 #ifndef PRODUCT
5840   {
5841     const char kind = (elem_size == 2) ? 'U' : 'L';
5842     char comment[64];
5843     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5844     BLOCK_COMMENT(comment);
5845   }
5846 #endif
5847 
5848   // if (a1 == a2)
5849   //     return true;
5850   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5851   br(EQ, SAME);
5852 
5853   if (UseSimpleArrayEquals) {
5854     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5855     // if (a1 == nullptr || a2 == nullptr)
5856     //     return false;
5857     // a1 & a2 == 0 means (some-pointer is null) or
5858     // (very-rare-or-even-probably-impossible-pointer-values)
5859     // so, we can save one branch in most cases
5860     tst(a1, a2);
5861     mov(result, false);
5862     br(EQ, A_MIGHT_BE_NULL);
5863     // if (a1.length != a2.length)
5864     //      return false;
5865     bind(A_IS_NOT_NULL);
5866     ldrw(cnt1, Address(a1, length_offset));
5867     ldrw(cnt2, Address(a2, length_offset));
5868     eorw(tmp5, cnt1, cnt2);
5869     cbnzw(tmp5, DONE);
5870     lea(a1, Address(a1, base_offset));
5871     lea(a2, Address(a2, base_offset));
5872     // Check for short strings, i.e. smaller than wordSize.
5873     subs(cnt1, cnt1, elem_per_word);
5874     br(Assembler::LT, SHORT);
5875     // Main 8 byte comparison loop.
5876     bind(NEXT_WORD); {
5877       ldr(tmp1, Address(post(a1, wordSize)));
5878       ldr(tmp2, Address(post(a2, wordSize)));
5879       subs(cnt1, cnt1, elem_per_word);
5880       eor(tmp5, tmp1, tmp2);
5881       cbnz(tmp5, DONE);
5882     } br(GT, NEXT_WORD);
5883     // Last longword.  In the case where length == 4 we compare the
5884     // same longword twice, but that's still faster than another
5885     // conditional branch.
5886     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5887     // length == 4.
5888     if (log_elem_size > 0)
5889       lsl(cnt1, cnt1, log_elem_size);
5890     ldr(tmp3, Address(a1, cnt1));
5891     ldr(tmp4, Address(a2, cnt1));

5914       eorw(tmp5, tmp3, tmp4);
5915       cbnzw(tmp5, DONE);
5916     }
5917     bind(TAIL01);
5918     if (elem_size == 1) { // Only needed when comparing byte arrays.
5919       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5920       {
5921         ldrb(tmp1, a1);
5922         ldrb(tmp2, a2);
5923         eorw(tmp5, tmp1, tmp2);
5924         cbnzw(tmp5, DONE);
5925       }
5926     }
5927   } else {
5928     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5929         CSET_EQ, LAST_CHECK;
5930     mov(result, false);
5931     cbz(a1, DONE);
5932     ldrw(cnt1, Address(a1, length_offset));
5933     cbz(a2, DONE);
5934     ldrw(cnt2, Address(a2, length_offset));


5935     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5936     // faster to perform another branch before comparing a1 and a2
5937     cmp(cnt1, (u1)elem_per_word);
5938     br(LE, SHORT); // short or same
5939     ldr(tmp3, Address(pre(a1, base_offset)));
5940     subs(zr, cnt1, stubBytesThreshold);
5941     br(GE, STUB);
5942     ldr(tmp4, Address(pre(a2, base_offset)));
5943     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5944     cmp(cnt2, cnt1);
5945     br(NE, DONE);
5946 
5947     // Main 16 byte comparison loop with 2 exits
5948     bind(NEXT_DWORD); {
5949       ldr(tmp1, Address(pre(a1, wordSize)));
5950       ldr(tmp2, Address(pre(a2, wordSize)));
5951       subs(cnt1, cnt1, 2 * elem_per_word);
5952       br(LE, TAIL);
5953       eor(tmp4, tmp3, tmp4);
5954       cbnz(tmp4, DONE);
5955       ldr(tmp3, Address(pre(a1, wordSize)));
5956       ldr(tmp4, Address(pre(a2, wordSize)));
5957       cmp(cnt1, (u1)elem_per_word);
5958       br(LE, TAIL2);
5959       cmp(tmp1, tmp2);
5960     } br(EQ, NEXT_DWORD);
5961     b(DONE);
5962 
5963     bind(TAIL);
5964     eor(tmp4, tmp3, tmp4);
5965     eor(tmp2, tmp1, tmp2);
5966     lslv(tmp2, tmp2, tmp5);
5967     orr(tmp5, tmp4, tmp2);
5968     cmp(tmp5, zr);
5969     b(CSET_EQ);
5970 
5971     bind(TAIL2);
5972     eor(tmp2, tmp1, tmp2);
5973     cbnz(tmp2, DONE);
5974     b(LAST_CHECK);
5975 
5976     bind(STUB);
5977     ldr(tmp4, Address(pre(a2, base_offset)));
5978     cmp(cnt2, cnt1);
5979     br(NE, DONE);
5980     if (elem_size == 2) { // convert to byte counter
5981       lsl(cnt1, cnt1, 1);
5982     }
5983     eor(tmp5, tmp3, tmp4);
5984     cbnz(tmp5, DONE);
5985     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5986     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5987     address tpc = trampoline_call(stub);
5988     if (tpc == nullptr) {
5989       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5990       postcond(pc() == badAddress);
5991       return nullptr;
5992     }
5993     b(DONE);
5994 
5995     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5996     // so, if a2 == null => return false(0), else return true, so we can return a2
5997     mov(result, a2);
5998     b(DONE);
5999     bind(SHORT);
6000     cmp(cnt2, cnt1);
6001     br(NE, DONE);
6002     cbz(cnt1, SAME);
6003     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6004     ldr(tmp3, Address(a1, base_offset));
6005     ldr(tmp4, Address(a2, base_offset));
6006     bind(LAST_CHECK);
6007     eor(tmp4, tmp3, tmp4);
6008     lslv(tmp5, tmp4, tmp5);
6009     cmp(tmp5, zr);
6010     bind(CSET_EQ);
6011     cset(result, EQ);
6012     b(DONE);
6013   }
6014 
6015   bind(SAME);
6016   mov(result, true);
6017   // That's it.
6018   bind(DONE);
6019 
6020   BLOCK_COMMENT("} array_equals");
6021   postcond(pc() != badAddress);
6022   return pc();
6023 }
6024 
6025 // Compare Strings
< prev index next >