< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

5023 }
5024 
5025 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5026   load_method_holder(rresult, rmethod);
5027   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5028 }
5029 
5030 void MacroAssembler::load_method_holder(Register holder, Register method) {
5031   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5032   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5033   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5034 }
5035 
5036 // Loads the obj's Klass* into dst.
5037 // Preserves all registers (incl src, rscratch1 and rscratch2).
5038 // Input:
5039 // src - the oop we want to load the klass from.
5040 // dst - output narrow klass.
5041 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5042   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5043   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5044   lsr(dst, dst, markWord::klass_shift);
5045 }
5046 
5047 void MacroAssembler::load_klass(Register dst, Register src) {
5048   if (UseCompactObjectHeaders) {
5049     load_narrow_klass_compact(dst, src);
5050     decode_klass_not_null(dst);
5051   } else if (UseCompressedClassPointers) {
5052     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053     decode_klass_not_null(dst);
5054   } else {
5055     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5056   }
5057 }
5058 
5059 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5060   if (RestoreMXCSROnJNICalls) {
5061     Label OK;
5062     get_fpcr(tmp1);
5063     mov(tmp2, tmp1);
5064     // Set FPCR to the state we need. We do want Round to Nearest. We

5878     b(DONE);
5879 
5880   BIND(SET_RESULT);
5881 
5882     add(len, len, wordSize);
5883     sub(result, result, len);
5884 
5885   BIND(DONE);
5886   postcond(pc() != badAddress);
5887   return pc();
5888 }
5889 
5890 // Clobbers: rscratch1, rscratch2, rflags
5891 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5892 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5893                                       Register tmp4, Register tmp5, Register result,
5894                                       Register cnt1, int elem_size) {
5895   Label DONE, SAME;
5896   Register tmp1 = rscratch1;
5897   Register tmp2 = rscratch2;

5898   int elem_per_word = wordSize/elem_size;
5899   int log_elem_size = exact_log2(elem_size);
5900   int klass_offset  = arrayOopDesc::klass_offset_in_bytes();
5901   int length_offset = arrayOopDesc::length_offset_in_bytes();
5902   int base_offset
5903     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5904   // When the length offset is not aligned to 8 bytes,
5905   // then we align it down. This is valid because the new
5906   // offset will always be the klass which is the same
5907   // for type arrays.
5908   int start_offset = align_down(length_offset, BytesPerWord);
5909   int extra_length = base_offset - start_offset;
5910   assert(start_offset == length_offset || start_offset == klass_offset,
5911          "start offset must be 8-byte-aligned or be the klass offset");
5912   assert(base_offset != start_offset, "must include the length field");
5913   extra_length = extra_length / elem_size; // We count in elements, not bytes.
5914   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5915 
5916   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5917   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5918 
5919 #ifndef PRODUCT
5920   {
5921     const char kind = (elem_size == 2) ? 'U' : 'L';
5922     char comment[64];
5923     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5924     BLOCK_COMMENT(comment);
5925   }
5926 #endif
5927 
5928   // if (a1 == a2)
5929   //     return true;
5930   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5931   br(EQ, SAME);
5932 
5933   if (UseSimpleArrayEquals) {
5934     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5935     // if (a1 == nullptr || a2 == nullptr)
5936     //     return false;
5937     // a1 & a2 == 0 means (some-pointer is null) or
5938     // (very-rare-or-even-probably-impossible-pointer-values)
5939     // so, we can save one branch in most cases
5940     tst(a1, a2);
5941     mov(result, false);
5942     br(EQ, A_MIGHT_BE_NULL);
5943     // if (a1.length != a2.length)
5944     //      return false;
5945     bind(A_IS_NOT_NULL);
5946     ldrw(cnt1, Address(a1, length_offset));
5947     // Increase loop counter by diff between base- and actual start-offset.
5948     addw(cnt1, cnt1, extra_length);
5949     lea(a1, Address(a1, start_offset));
5950     lea(a2, Address(a2, start_offset));

5951     // Check for short strings, i.e. smaller than wordSize.
5952     subs(cnt1, cnt1, elem_per_word);
5953     br(Assembler::LT, SHORT);
5954     // Main 8 byte comparison loop.
5955     bind(NEXT_WORD); {
5956       ldr(tmp1, Address(post(a1, wordSize)));
5957       ldr(tmp2, Address(post(a2, wordSize)));
5958       subs(cnt1, cnt1, elem_per_word);
5959       eor(tmp5, tmp1, tmp2);
5960       cbnz(tmp5, DONE);
5961     } br(GT, NEXT_WORD);
5962     // Last longword.  In the case where length == 4 we compare the
5963     // same longword twice, but that's still faster than another
5964     // conditional branch.
5965     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5966     // length == 4.
5967     if (log_elem_size > 0)
5968       lsl(cnt1, cnt1, log_elem_size);
5969     ldr(tmp3, Address(a1, cnt1));
5970     ldr(tmp4, Address(a2, cnt1));

5993       eorw(tmp5, tmp3, tmp4);
5994       cbnzw(tmp5, DONE);
5995     }
5996     bind(TAIL01);
5997     if (elem_size == 1) { // Only needed when comparing byte arrays.
5998       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5999       {
6000         ldrb(tmp1, a1);
6001         ldrb(tmp2, a2);
6002         eorw(tmp5, tmp1, tmp2);
6003         cbnzw(tmp5, DONE);
6004       }
6005     }
6006   } else {
6007     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
6008         CSET_EQ, LAST_CHECK;
6009     mov(result, false);
6010     cbz(a1, DONE);
6011     ldrw(cnt1, Address(a1, length_offset));
6012     cbz(a2, DONE);
6013     // Increase loop counter by diff between base- and actual start-offset.
6014     addw(cnt1, cnt1, extra_length);
6015 
6016     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
6017     // faster to perform another branch before comparing a1 and a2
6018     cmp(cnt1, (u1)elem_per_word);
6019     br(LE, SHORT); // short or same
6020     ldr(tmp3, Address(pre(a1, start_offset)));
6021     subs(zr, cnt1, stubBytesThreshold);
6022     br(GE, STUB);
6023     ldr(tmp4, Address(pre(a2, start_offset)));
6024     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);


6025 
6026     // Main 16 byte comparison loop with 2 exits
6027     bind(NEXT_DWORD); {
6028       ldr(tmp1, Address(pre(a1, wordSize)));
6029       ldr(tmp2, Address(pre(a2, wordSize)));
6030       subs(cnt1, cnt1, 2 * elem_per_word);
6031       br(LE, TAIL);
6032       eor(tmp4, tmp3, tmp4);
6033       cbnz(tmp4, DONE);
6034       ldr(tmp3, Address(pre(a1, wordSize)));
6035       ldr(tmp4, Address(pre(a2, wordSize)));
6036       cmp(cnt1, (u1)elem_per_word);
6037       br(LE, TAIL2);
6038       cmp(tmp1, tmp2);
6039     } br(EQ, NEXT_DWORD);
6040     b(DONE);
6041 
6042     bind(TAIL);
6043     eor(tmp4, tmp3, tmp4);
6044     eor(tmp2, tmp1, tmp2);
6045     lslv(tmp2, tmp2, tmp5);
6046     orr(tmp5, tmp4, tmp2);
6047     cmp(tmp5, zr);
6048     b(CSET_EQ);
6049 
6050     bind(TAIL2);
6051     eor(tmp2, tmp1, tmp2);
6052     cbnz(tmp2, DONE);
6053     b(LAST_CHECK);
6054 
6055     bind(STUB);
6056     ldr(tmp4, Address(pre(a2, start_offset)));


6057     if (elem_size == 2) { // convert to byte counter
6058       lsl(cnt1, cnt1, 1);
6059     }
6060     eor(tmp5, tmp3, tmp4);
6061     cbnz(tmp5, DONE);
6062     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
6063     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6064     address tpc = trampoline_call(stub);
6065     if (tpc == nullptr) {
6066       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6067       postcond(pc() == badAddress);
6068       return nullptr;
6069     }
6070     b(DONE);
6071 
6072     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6073     // so, if a2 == null => return false(0), else return true, so we can return a2
6074     mov(result, a2);
6075     b(DONE);
6076     bind(SHORT);



6077     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6078     ldr(tmp3, Address(a1, start_offset));
6079     ldr(tmp4, Address(a2, start_offset));
6080     bind(LAST_CHECK);
6081     eor(tmp4, tmp3, tmp4);
6082     lslv(tmp5, tmp4, tmp5);
6083     cmp(tmp5, zr);
6084     bind(CSET_EQ);
6085     cset(result, EQ);
6086     b(DONE);
6087   }
6088 
6089   bind(SAME);
6090   mov(result, true);
6091   // That's it.
6092   bind(DONE);
6093 
6094   BLOCK_COMMENT("} array_equals");
6095   postcond(pc() != badAddress);
6096   return pc();
6097 }
6098 
6099 // Compare Strings

5023 }
5024 
5025 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5026   load_method_holder(rresult, rmethod);
5027   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5028 }
5029 
5030 void MacroAssembler::load_method_holder(Register holder, Register method) {
5031   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
5032   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
5033   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
5034 }
5035 
5036 // Loads the obj's Klass* into dst.
5037 // Preserves all registers (incl src, rscratch1 and rscratch2).
5038 // Input:
5039 // src - the oop we want to load the klass from.
5040 // dst - output narrow klass.
5041 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5042   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5043   ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5044   lsrw(dst, dst, markWord::klass_shift);
5045 }
5046 
5047 void MacroAssembler::load_klass(Register dst, Register src) {
5048   if (UseCompactObjectHeaders) {
5049     load_narrow_klass_compact(dst, src);
5050     decode_klass_not_null(dst);
5051   } else if (UseCompressedClassPointers) {
5052     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053     decode_klass_not_null(dst);
5054   } else {
5055     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5056   }
5057 }
5058 
5059 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5060   if (RestoreMXCSROnJNICalls) {
5061     Label OK;
5062     get_fpcr(tmp1);
5063     mov(tmp2, tmp1);
5064     // Set FPCR to the state we need. We do want Round to Nearest. We

5878     b(DONE);
5879 
5880   BIND(SET_RESULT);
5881 
5882     add(len, len, wordSize);
5883     sub(result, result, len);
5884 
5885   BIND(DONE);
5886   postcond(pc() != badAddress);
5887   return pc();
5888 }
5889 
5890 // Clobbers: rscratch1, rscratch2, rflags
5891 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5892 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5893                                       Register tmp4, Register tmp5, Register result,
5894                                       Register cnt1, int elem_size) {
5895   Label DONE, SAME;
5896   Register tmp1 = rscratch1;
5897   Register tmp2 = rscratch2;
5898   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5899   int elem_per_word = wordSize/elem_size;
5900   int log_elem_size = exact_log2(elem_size);

5901   int length_offset = arrayOopDesc::length_offset_in_bytes();
5902   int base_offset
5903     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);










5904   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5905 
5906   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5907   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5908 
5909 #ifndef PRODUCT
5910   {
5911     const char kind = (elem_size == 2) ? 'U' : 'L';
5912     char comment[64];
5913     snprintf(comment, sizeof comment, "array_equals%c{", kind);
5914     BLOCK_COMMENT(comment);
5915   }
5916 #endif
5917 
5918   // if (a1 == a2)
5919   //     return true;
5920   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5921   br(EQ, SAME);
5922 
5923   if (UseSimpleArrayEquals) {
5924     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5925     // if (a1 == nullptr || a2 == nullptr)
5926     //     return false;
5927     // a1 & a2 == 0 means (some-pointer is null) or
5928     // (very-rare-or-even-probably-impossible-pointer-values)
5929     // so, we can save one branch in most cases
5930     tst(a1, a2);
5931     mov(result, false);
5932     br(EQ, A_MIGHT_BE_NULL);
5933     // if (a1.length != a2.length)
5934     //      return false;
5935     bind(A_IS_NOT_NULL);
5936     ldrw(cnt1, Address(a1, length_offset));
5937     ldrw(cnt2, Address(a2, length_offset));
5938     eorw(tmp5, cnt1, cnt2);
5939     cbnzw(tmp5, DONE);
5940     lea(a1, Address(a1, base_offset));
5941     lea(a2, Address(a2, base_offset));
5942     // Check for short strings, i.e. smaller than wordSize.
5943     subs(cnt1, cnt1, elem_per_word);
5944     br(Assembler::LT, SHORT);
5945     // Main 8 byte comparison loop.
5946     bind(NEXT_WORD); {
5947       ldr(tmp1, Address(post(a1, wordSize)));
5948       ldr(tmp2, Address(post(a2, wordSize)));
5949       subs(cnt1, cnt1, elem_per_word);
5950       eor(tmp5, tmp1, tmp2);
5951       cbnz(tmp5, DONE);
5952     } br(GT, NEXT_WORD);
5953     // Last longword.  In the case where length == 4 we compare the
5954     // same longword twice, but that's still faster than another
5955     // conditional branch.
5956     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5957     // length == 4.
5958     if (log_elem_size > 0)
5959       lsl(cnt1, cnt1, log_elem_size);
5960     ldr(tmp3, Address(a1, cnt1));
5961     ldr(tmp4, Address(a2, cnt1));

5984       eorw(tmp5, tmp3, tmp4);
5985       cbnzw(tmp5, DONE);
5986     }
5987     bind(TAIL01);
5988     if (elem_size == 1) { // Only needed when comparing byte arrays.
5989       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5990       {
5991         ldrb(tmp1, a1);
5992         ldrb(tmp2, a2);
5993         eorw(tmp5, tmp1, tmp2);
5994         cbnzw(tmp5, DONE);
5995       }
5996     }
5997   } else {
5998     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5999         CSET_EQ, LAST_CHECK;
6000     mov(result, false);
6001     cbz(a1, DONE);
6002     ldrw(cnt1, Address(a1, length_offset));
6003     cbz(a2, DONE);
6004     ldrw(cnt2, Address(a2, length_offset));


6005     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
6006     // faster to perform another branch before comparing a1 and a2
6007     cmp(cnt1, (u1)elem_per_word);
6008     br(LE, SHORT); // short or same
6009     ldr(tmp3, Address(pre(a1, base_offset)));
6010     subs(zr, cnt1, stubBytesThreshold);
6011     br(GE, STUB);
6012     ldr(tmp4, Address(pre(a2, base_offset)));
6013     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6014     cmp(cnt2, cnt1);
6015     br(NE, DONE);
6016 
6017     // Main 16 byte comparison loop with 2 exits
6018     bind(NEXT_DWORD); {
6019       ldr(tmp1, Address(pre(a1, wordSize)));
6020       ldr(tmp2, Address(pre(a2, wordSize)));
6021       subs(cnt1, cnt1, 2 * elem_per_word);
6022       br(LE, TAIL);
6023       eor(tmp4, tmp3, tmp4);
6024       cbnz(tmp4, DONE);
6025       ldr(tmp3, Address(pre(a1, wordSize)));
6026       ldr(tmp4, Address(pre(a2, wordSize)));
6027       cmp(cnt1, (u1)elem_per_word);
6028       br(LE, TAIL2);
6029       cmp(tmp1, tmp2);
6030     } br(EQ, NEXT_DWORD);
6031     b(DONE);
6032 
6033     bind(TAIL);
6034     eor(tmp4, tmp3, tmp4);
6035     eor(tmp2, tmp1, tmp2);
6036     lslv(tmp2, tmp2, tmp5);
6037     orr(tmp5, tmp4, tmp2);
6038     cmp(tmp5, zr);
6039     b(CSET_EQ);
6040 
6041     bind(TAIL2);
6042     eor(tmp2, tmp1, tmp2);
6043     cbnz(tmp2, DONE);
6044     b(LAST_CHECK);
6045 
6046     bind(STUB);
6047     ldr(tmp4, Address(pre(a2, base_offset)));
6048     cmp(cnt2, cnt1);
6049     br(NE, DONE);
6050     if (elem_size == 2) { // convert to byte counter
6051       lsl(cnt1, cnt1, 1);
6052     }
6053     eor(tmp5, tmp3, tmp4);
6054     cbnz(tmp5, DONE);
6055     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
6056     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6057     address tpc = trampoline_call(stub);
6058     if (tpc == nullptr) {
6059       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6060       postcond(pc() == badAddress);
6061       return nullptr;
6062     }
6063     b(DONE);
6064 
6065     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6066     // so, if a2 == null => return false(0), else return true, so we can return a2
6067     mov(result, a2);
6068     b(DONE);
6069     bind(SHORT);
6070     cmp(cnt2, cnt1);
6071     br(NE, DONE);
6072     cbz(cnt1, SAME);
6073     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6074     ldr(tmp3, Address(a1, base_offset));
6075     ldr(tmp4, Address(a2, base_offset));
6076     bind(LAST_CHECK);
6077     eor(tmp4, tmp3, tmp4);
6078     lslv(tmp5, tmp4, tmp5);
6079     cmp(tmp5, zr);
6080     bind(CSET_EQ);
6081     cset(result, EQ);
6082     b(DONE);
6083   }
6084 
6085   bind(SAME);
6086   mov(result, true);
6087   // That's it.
6088   bind(DONE);
6089 
6090   BLOCK_COMMENT("} array_equals");
6091   postcond(pc() != badAddress);
6092   return pc();
6093 }
6094 
6095 // Compare Strings
< prev index next >