< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

5014 }
5015 
5016 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5017   load_method_holder(rresult, rmethod);
5018   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5019 }
5020 
5021 void MacroAssembler::load_method_holder(Register holder, Register method) {
5022   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5023   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5024   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5025 }
5026 
5027 // Loads the obj's Klass* into dst.
5028 // Preserves all registers (incl src, rscratch1 and rscratch2).
5029 // Input:
5030 // src - the oop we want to load the klass from.
5031 // dst - output narrow klass.
5032 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5033   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5034   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5035   lsr(dst, dst, markWord::klass_shift);
5036 }
5037 
5038 void MacroAssembler::load_klass(Register dst, Register src) {
5039   if (UseCompactObjectHeaders) {
5040     load_narrow_klass_compact(dst, src);
5041     decode_klass_not_null(dst);
5042   } else if (UseCompressedClassPointers) {
5043     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5044     decode_klass_not_null(dst);
5045   } else {
5046     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5047   }
5048 }
5049 
5050 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5051   if (RestoreMXCSROnJNICalls) {
5052     Label OK;
5053     get_fpcr(tmp1);
5054     mov(tmp2, tmp1);
5055     // Set FPCR to the state we need. We do want Round to Nearest. We

5794     b(DONE);
5795 
5796   BIND(SET_RESULT);
5797 
5798     add(len, len, wordSize);
5799     sub(result, result, len);
5800 
5801   BIND(DONE);
5802   postcond(pc() != badAddress);
5803   return pc();
5804 }
5805 
5806 // Clobbers: rscratch1, rscratch2, rflags
5807 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5808 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5809                                       Register tmp4, Register tmp5, Register result,
5810                                       Register cnt1, int elem_size) {
5811   Label DONE, SAME;
5812   Register tmp1 = rscratch1;
5813   Register tmp2 = rscratch2;

5814   int elem_per_word = wordSize/elem_size;
5815   int log_elem_size = exact_log2(elem_size);
5816   int klass_offset  = arrayOopDesc::klass_offset_in_bytes();
5817   int length_offset = arrayOopDesc::length_offset_in_bytes();
5818   int base_offset
5819     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5820   // When the length offset is not aligned to 8 bytes,
5821   // then we align it down. This is valid because the new
5822   // offset will always be the klass which is the same
5823   // for type arrays.
5824   int start_offset = align_down(length_offset, BytesPerWord);
5825   int extra_length = base_offset - start_offset;
5826   assert(start_offset == length_offset || start_offset == klass_offset,
5827          "start offset must be 8-byte-aligned or be the klass offset");
5828   assert(base_offset != start_offset, "must include the length field");
5829   extra_length = extra_length / elem_size; // We count in elements, not bytes.
5830   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5831 
5832   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5833   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5834 
5835 #ifndef PRODUCT
5836   {
5837     const char kind = (elem_size == 2) ? 'U' : 'L';
5838     char comment[64];
5839     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5840     BLOCK_COMMENT(comment);
5841   }
5842 #endif
5843 
5844   // if (a1 == a2)
5845   //     return true;
5846   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5847   br(EQ, SAME);
5848 
5849   if (UseSimpleArrayEquals) {
5850     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5851     // if (a1 == nullptr || a2 == nullptr)
5852     //     return false;
5853     // a1 & a2 == 0 means (some-pointer is null) or
5854     // (very-rare-or-even-probably-impossible-pointer-values)
5855     // so, we can save one branch in most cases
5856     tst(a1, a2);
5857     mov(result, false);
5858     br(EQ, A_MIGHT_BE_NULL);
5859     // if (a1.length != a2.length)
5860     //      return false;
5861     bind(A_IS_NOT_NULL);
5862     ldrw(cnt1, Address(a1, length_offset));
5863     // Increase loop counter by diff between base- and actual start-offset.
5864     addw(cnt1, cnt1, extra_length);
5865     lea(a1, Address(a1, start_offset));
5866     lea(a2, Address(a2, start_offset));

5867     // Check for short strings, i.e. smaller than wordSize.
5868     subs(cnt1, cnt1, elem_per_word);
5869     br(Assembler::LT, SHORT);
5870     // Main 8 byte comparison loop.
5871     bind(NEXT_WORD); {
5872       ldr(tmp1, Address(post(a1, wordSize)));
5873       ldr(tmp2, Address(post(a2, wordSize)));
5874       subs(cnt1, cnt1, elem_per_word);
5875       eor(tmp5, tmp1, tmp2);
5876       cbnz(tmp5, DONE);
5877     } br(GT, NEXT_WORD);
5878     // Last longword.  In the case where length == 4 we compare the
5879     // same longword twice, but that's still faster than another
5880     // conditional branch.
5881     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5882     // length == 4.
5883     if (log_elem_size > 0)
5884       lsl(cnt1, cnt1, log_elem_size);
5885     ldr(tmp3, Address(a1, cnt1));
5886     ldr(tmp4, Address(a2, cnt1));

5909       eorw(tmp5, tmp3, tmp4);
5910       cbnzw(tmp5, DONE);
5911     }
5912     bind(TAIL01);
5913     if (elem_size == 1) { // Only needed when comparing byte arrays.
5914       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5915       {
5916         ldrb(tmp1, a1);
5917         ldrb(tmp2, a2);
5918         eorw(tmp5, tmp1, tmp2);
5919         cbnzw(tmp5, DONE);
5920       }
5921     }
5922   } else {
5923     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5924         CSET_EQ, LAST_CHECK;
5925     mov(result, false);
5926     cbz(a1, DONE);
5927     ldrw(cnt1, Address(a1, length_offset));
5928     cbz(a2, DONE);
5929     // Increase loop counter by diff between base- and actual start-offset.
5930     addw(cnt1, cnt1, extra_length);
5931 
5932     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5933     // faster to perform another branch before comparing a1 and a2
5934     cmp(cnt1, (u1)elem_per_word);
5935     br(LE, SHORT); // short or same
5936     ldr(tmp3, Address(pre(a1, start_offset)));
5937     subs(zr, cnt1, stubBytesThreshold);
5938     br(GE, STUB);
5939     ldr(tmp4, Address(pre(a2, start_offset)));
5940     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);


5941 
5942     // Main 16 byte comparison loop with 2 exits
5943     bind(NEXT_DWORD); {
5944       ldr(tmp1, Address(pre(a1, wordSize)));
5945       ldr(tmp2, Address(pre(a2, wordSize)));
5946       subs(cnt1, cnt1, 2 * elem_per_word);
5947       br(LE, TAIL);
5948       eor(tmp4, tmp3, tmp4);
5949       cbnz(tmp4, DONE);
5950       ldr(tmp3, Address(pre(a1, wordSize)));
5951       ldr(tmp4, Address(pre(a2, wordSize)));
5952       cmp(cnt1, (u1)elem_per_word);
5953       br(LE, TAIL2);
5954       cmp(tmp1, tmp2);
5955     } br(EQ, NEXT_DWORD);
5956     b(DONE);
5957 
5958     bind(TAIL);
5959     eor(tmp4, tmp3, tmp4);
5960     eor(tmp2, tmp1, tmp2);
5961     lslv(tmp2, tmp2, tmp5);
5962     orr(tmp5, tmp4, tmp2);
5963     cmp(tmp5, zr);
5964     b(CSET_EQ);
5965 
5966     bind(TAIL2);
5967     eor(tmp2, tmp1, tmp2);
5968     cbnz(tmp2, DONE);
5969     b(LAST_CHECK);
5970 
5971     bind(STUB);
5972     ldr(tmp4, Address(pre(a2, start_offset)));


5973     if (elem_size == 2) { // convert to byte counter
5974       lsl(cnt1, cnt1, 1);
5975     }
5976     eor(tmp5, tmp3, tmp4);
5977     cbnz(tmp5, DONE);
5978     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5979     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5980     address tpc = trampoline_call(stub);
5981     if (tpc == nullptr) {
5982       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5983       postcond(pc() == badAddress);
5984       return nullptr;
5985     }
5986     b(DONE);
5987 
5988     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5989     // so, if a2 == null => return false(0), else return true, so we can return a2
5990     mov(result, a2);
5991     b(DONE);
5992     bind(SHORT);



5993     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5994     ldr(tmp3, Address(a1, start_offset));
5995     ldr(tmp4, Address(a2, start_offset));
5996     bind(LAST_CHECK);
5997     eor(tmp4, tmp3, tmp4);
5998     lslv(tmp5, tmp4, tmp5);
5999     cmp(tmp5, zr);
6000     bind(CSET_EQ);
6001     cset(result, EQ);
6002     b(DONE);
6003   }
6004 
6005   bind(SAME);
6006   mov(result, true);
6007   // That's it.
6008   bind(DONE);
6009 
6010   BLOCK_COMMENT("} array_equals");
6011   postcond(pc() != badAddress);
6012   return pc();
6013 }
6014 
6015 // Compare Strings

5014 }
5015 
5016 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5017   load_method_holder(rresult, rmethod);
5018   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5019 }
5020 
5021 void MacroAssembler::load_method_holder(Register holder, Register method) {
5022   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5023   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5024   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5025 }
5026 
5027 // Loads the obj's Klass* into dst.
5028 // Preserves all registers (incl src, rscratch1 and rscratch2).
5029 // Input:
5030 // src - the oop we want to load the klass from.
5031 // dst - output narrow klass.
5032 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5033   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5034   ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5035   lsrw(dst, dst, markWord::klass_shift);
5036 }
5037 
5038 void MacroAssembler::load_klass(Register dst, Register src) {
5039   if (UseCompactObjectHeaders) {
5040     load_narrow_klass_compact(dst, src);
5041     decode_klass_not_null(dst);
5042   } else if (UseCompressedClassPointers) {
5043     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5044     decode_klass_not_null(dst);
5045   } else {
5046     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5047   }
5048 }
5049 
5050 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5051   if (RestoreMXCSROnJNICalls) {
5052     Label OK;
5053     get_fpcr(tmp1);
5054     mov(tmp2, tmp1);
5055     // Set FPCR to the state we need. We do want Round to Nearest. We

5794     b(DONE);
5795 
5796   BIND(SET_RESULT);
5797 
5798     add(len, len, wordSize);
5799     sub(result, result, len);
5800 
5801   BIND(DONE);
5802   postcond(pc() != badAddress);
5803   return pc();
5804 }
5805 
5806 // Clobbers: rscratch1, rscratch2, rflags
5807 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5808 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5809                                       Register tmp4, Register tmp5, Register result,
5810                                       Register cnt1, int elem_size) {
5811   Label DONE, SAME;
5812   Register tmp1 = rscratch1;
5813   Register tmp2 = rscratch2;
5814   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5815   int elem_per_word = wordSize/elem_size;
5816   int log_elem_size = exact_log2(elem_size);

5817   int length_offset = arrayOopDesc::length_offset_in_bytes();
5818   int base_offset
5819     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);










5820   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5821 
5822   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5823   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5824 
5825 #ifndef PRODUCT
5826   {
5827     const char kind = (elem_size == 2) ? 'U' : 'L';
5828     char comment[64];
5829     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5830     BLOCK_COMMENT(comment);
5831   }
5832 #endif
5833 
5834   // if (a1 == a2)
5835   //     return true;
5836   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5837   br(EQ, SAME);
5838 
5839   if (UseSimpleArrayEquals) {
5840     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5841     // if (a1 == nullptr || a2 == nullptr)
5842     //     return false;
5843     // a1 & a2 == 0 means (some-pointer is null) or
5844     // (very-rare-or-even-probably-impossible-pointer-values)
5845     // so, we can save one branch in most cases
5846     tst(a1, a2);
5847     mov(result, false);
5848     br(EQ, A_MIGHT_BE_NULL);
5849     // if (a1.length != a2.length)
5850     //      return false;
5851     bind(A_IS_NOT_NULL);
5852     ldrw(cnt1, Address(a1, length_offset));
5853     ldrw(cnt2, Address(a2, length_offset));
5854     eorw(tmp5, cnt1, cnt2);
5855     cbnzw(tmp5, DONE);
5856     lea(a1, Address(a1, base_offset));
5857     lea(a2, Address(a2, base_offset));
5858     // Check for short strings, i.e. smaller than wordSize.
5859     subs(cnt1, cnt1, elem_per_word);
5860     br(Assembler::LT, SHORT);
5861     // Main 8 byte comparison loop.
5862     bind(NEXT_WORD); {
5863       ldr(tmp1, Address(post(a1, wordSize)));
5864       ldr(tmp2, Address(post(a2, wordSize)));
5865       subs(cnt1, cnt1, elem_per_word);
5866       eor(tmp5, tmp1, tmp2);
5867       cbnz(tmp5, DONE);
5868     } br(GT, NEXT_WORD);
5869     // Last longword.  In the case where length == 4 we compare the
5870     // same longword twice, but that's still faster than another
5871     // conditional branch.
5872     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5873     // length == 4.
5874     if (log_elem_size > 0)
5875       lsl(cnt1, cnt1, log_elem_size);
5876     ldr(tmp3, Address(a1, cnt1));
5877     ldr(tmp4, Address(a2, cnt1));

5900       eorw(tmp5, tmp3, tmp4);
5901       cbnzw(tmp5, DONE);
5902     }
5903     bind(TAIL01);
5904     if (elem_size == 1) { // Only needed when comparing byte arrays.
5905       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5906       {
5907         ldrb(tmp1, a1);
5908         ldrb(tmp2, a2);
5909         eorw(tmp5, tmp1, tmp2);
5910         cbnzw(tmp5, DONE);
5911       }
5912     }
5913   } else {
5914     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5915         CSET_EQ, LAST_CHECK;
5916     mov(result, false);
5917     cbz(a1, DONE);
5918     ldrw(cnt1, Address(a1, length_offset));
5919     cbz(a2, DONE);
5920     ldrw(cnt2, Address(a2, length_offset));


5921     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5922     // faster to perform another branch before comparing a1 and a2
5923     cmp(cnt1, (u1)elem_per_word);
5924     br(LE, SHORT); // short or same
5925     ldr(tmp3, Address(pre(a1, base_offset)));
5926     subs(zr, cnt1, stubBytesThreshold);
5927     br(GE, STUB);
5928     ldr(tmp4, Address(pre(a2, base_offset)));
5929     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5930     cmp(cnt2, cnt1);
5931     br(NE, DONE);
5932 
5933     // Main 16 byte comparison loop with 2 exits
5934     bind(NEXT_DWORD); {
5935       ldr(tmp1, Address(pre(a1, wordSize)));
5936       ldr(tmp2, Address(pre(a2, wordSize)));
5937       subs(cnt1, cnt1, 2 * elem_per_word);
5938       br(LE, TAIL);
5939       eor(tmp4, tmp3, tmp4);
5940       cbnz(tmp4, DONE);
5941       ldr(tmp3, Address(pre(a1, wordSize)));
5942       ldr(tmp4, Address(pre(a2, wordSize)));
5943       cmp(cnt1, (u1)elem_per_word);
5944       br(LE, TAIL2);
5945       cmp(tmp1, tmp2);
5946     } br(EQ, NEXT_DWORD);
5947     b(DONE);
5948 
5949     bind(TAIL);
5950     eor(tmp4, tmp3, tmp4);
5951     eor(tmp2, tmp1, tmp2);
5952     lslv(tmp2, tmp2, tmp5);
5953     orr(tmp5, tmp4, tmp2);
5954     cmp(tmp5, zr);
5955     b(CSET_EQ);
5956 
5957     bind(TAIL2);
5958     eor(tmp2, tmp1, tmp2);
5959     cbnz(tmp2, DONE);
5960     b(LAST_CHECK);
5961 
5962     bind(STUB);
5963     ldr(tmp4, Address(pre(a2, base_offset)));
5964     cmp(cnt2, cnt1);
5965     br(NE, DONE);
5966     if (elem_size == 2) { // convert to byte counter
5967       lsl(cnt1, cnt1, 1);
5968     }
5969     eor(tmp5, tmp3, tmp4);
5970     cbnz(tmp5, DONE);
5971     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5972     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5973     address tpc = trampoline_call(stub);
5974     if (tpc == nullptr) {
5975       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5976       postcond(pc() == badAddress);
5977       return nullptr;
5978     }
5979     b(DONE);
5980 
5981     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5982     // so, if a2 == null => return false(0), else return true, so we can return a2
5983     mov(result, a2);
5984     b(DONE);
5985     bind(SHORT);
5986     cmp(cnt2, cnt1);
5987     br(NE, DONE);
5988     cbz(cnt1, SAME);
5989     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5990     ldr(tmp3, Address(a1, base_offset));
5991     ldr(tmp4, Address(a2, base_offset));
5992     bind(LAST_CHECK);
5993     eor(tmp4, tmp3, tmp4);
5994     lslv(tmp5, tmp4, tmp5);
5995     cmp(tmp5, zr);
5996     bind(CSET_EQ);
5997     cset(result, EQ);
5998     b(DONE);
5999   }
6000 
6001   bind(SAME);
6002   mov(result, true);
6003   // That's it.
6004   bind(DONE);
6005 
6006   BLOCK_COMMENT("} array_equals");
6007   postcond(pc() != badAddress);
6008   return pc();
6009 }
6010 
6011 // Compare Strings
< prev index next >