< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page

4894 }
4895 
4896 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4897   load_method_holder(rresult, rmethod);
4898   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4899 }
4900 
4901 void MacroAssembler::load_method_holder(Register holder, Register method) {
4902   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4903   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4904   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
4905 }
4906 
4907 // Loads the obj's Klass* into dst.
4908 // Preserves all registers (incl src, rscratch1 and rscratch2).
4909 // Input:
4910 // src - the oop we want to load the klass from.
4911 // dst - output narrow klass.
4912 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
4913   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4914   ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4915   lsr(dst, dst, markWord::klass_shift);
4916 }
4917 
4918 void MacroAssembler::load_klass(Register dst, Register src) {
4919   if (UseCompactObjectHeaders) {
4920     load_narrow_klass_compact(dst, src);
4921     decode_klass_not_null(dst);
4922   } else if (UseCompressedClassPointers) {
4923     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4924     decode_klass_not_null(dst);
4925   } else {
4926     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4927   }
4928 }
4929 
4930 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4931   if (RestoreMXCSROnJNICalls) {
4932     Label OK;
4933     get_fpcr(tmp1);
4934     mov(tmp2, tmp1);
4935     // Set FPCR to the state we need. We do want Round to Nearest. We

5717     b(DONE);
5718 
5719   BIND(SET_RESULT);
5720 
5721     add(len, len, wordSize);
5722     sub(result, result, len);
5723 
5724   BIND(DONE);
5725   postcond(pc() != badAddress);
5726   return pc();
5727 }
5728 
5729 // Clobbers: rscratch1, rscratch2, rflags
5730 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5731 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5732                                       Register tmp4, Register tmp5, Register result,
5733                                       Register cnt1, int elem_size) {
5734   Label DONE, SAME;
5735   Register tmp1 = rscratch1;
5736   Register tmp2 = rscratch2;

5737   int elem_per_word = wordSize/elem_size;
5738   int log_elem_size = exact_log2(elem_size);
5739   int klass_offset  = arrayOopDesc::klass_offset_in_bytes();
5740   int length_offset = arrayOopDesc::length_offset_in_bytes();
5741   int base_offset
5742     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5743   // When the length offset is not aligned to 8 bytes,
5744   // then we align it down. This is valid because the new
5745   // offset will always be the klass which is the same
5746   // for type arrays.
5747   int start_offset = align_down(length_offset, BytesPerWord);
5748   int extra_length = base_offset - start_offset;
5749   assert(start_offset == length_offset || start_offset == klass_offset,
5750          "start offset must be 8-byte-aligned or be the klass offset");
5751   assert(base_offset != start_offset, "must include the length field");
5752   extra_length = extra_length / elem_size; // We count in elements, not bytes.
5753   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5754 
5755   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5756   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5757 
5758 #ifndef PRODUCT
5759   {
5760     const char kind = (elem_size == 2) ? 'U' : 'L';
5761     char comment[64];
5762     os::snprintf_checked(comment, sizeof comment, "array_equals%c{", kind);
5763     BLOCK_COMMENT(comment);
5764   }
5765 #endif
5766 
5767   // if (a1 == a2)
5768   //     return true;
5769   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5770   br(EQ, SAME);
5771 
5772   if (UseSimpleArrayEquals) {
5773     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5774     // if (a1 == nullptr || a2 == nullptr)
5775     //     return false;
5776     // a1 & a2 == 0 means (some-pointer is null) or
5777     // (very-rare-or-even-probably-impossible-pointer-values)
5778     // so, we can save one branch in most cases
5779     tst(a1, a2);
5780     mov(result, false);
5781     br(EQ, A_MIGHT_BE_NULL);
5782     // if (a1.length != a2.length)
5783     //      return false;
5784     bind(A_IS_NOT_NULL);
5785     ldrw(cnt1, Address(a1, length_offset));
5786     // Increase loop counter by diff between base- and actual start-offset.
5787     addw(cnt1, cnt1, extra_length);
5788     lea(a1, Address(a1, start_offset));
5789     lea(a2, Address(a2, start_offset));

5790     // Check for short strings, i.e. smaller than wordSize.
5791     subs(cnt1, cnt1, elem_per_word);
5792     br(Assembler::LT, SHORT);
5793     // Main 8 byte comparison loop.
5794     bind(NEXT_WORD); {
5795       ldr(tmp1, Address(post(a1, wordSize)));
5796       ldr(tmp2, Address(post(a2, wordSize)));
5797       subs(cnt1, cnt1, elem_per_word);
5798       eor(tmp5, tmp1, tmp2);
5799       cbnz(tmp5, DONE);
5800     } br(GT, NEXT_WORD);
5801     // Last longword.  In the case where length == 4 we compare the
5802     // same longword twice, but that's still faster than another
5803     // conditional branch.
5804     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5805     // length == 4.
5806     if (log_elem_size > 0)
5807       lsl(cnt1, cnt1, log_elem_size);
5808     ldr(tmp3, Address(a1, cnt1));
5809     ldr(tmp4, Address(a2, cnt1));

5832       eorw(tmp5, tmp3, tmp4);
5833       cbnzw(tmp5, DONE);
5834     }
5835     bind(TAIL01);
5836     if (elem_size == 1) { // Only needed when comparing byte arrays.
5837       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5838       {
5839         ldrb(tmp1, a1);
5840         ldrb(tmp2, a2);
5841         eorw(tmp5, tmp1, tmp2);
5842         cbnzw(tmp5, DONE);
5843       }
5844     }
5845   } else {
5846     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5847         CSET_EQ, LAST_CHECK;
5848     mov(result, false);
5849     cbz(a1, DONE);
5850     ldrw(cnt1, Address(a1, length_offset));
5851     cbz(a2, DONE);
5852     // Increase loop counter by diff between base- and actual start-offset.
5853     addw(cnt1, cnt1, extra_length);
5854 
5855     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5856     // faster to perform another branch before comparing a1 and a2
5857     cmp(cnt1, (u1)elem_per_word);
5858     br(LE, SHORT); // short or same
5859     ldr(tmp3, Address(pre(a1, start_offset)));
5860     subs(zr, cnt1, stubBytesThreshold);
5861     br(GE, STUB);
5862     ldr(tmp4, Address(pre(a2, start_offset)));
5863     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);


5864 
5865     // Main 16 byte comparison loop with 2 exits
5866     bind(NEXT_DWORD); {
5867       ldr(tmp1, Address(pre(a1, wordSize)));
5868       ldr(tmp2, Address(pre(a2, wordSize)));
5869       subs(cnt1, cnt1, 2 * elem_per_word);
5870       br(LE, TAIL);
5871       eor(tmp4, tmp3, tmp4);
5872       cbnz(tmp4, DONE);
5873       ldr(tmp3, Address(pre(a1, wordSize)));
5874       ldr(tmp4, Address(pre(a2, wordSize)));
5875       cmp(cnt1, (u1)elem_per_word);
5876       br(LE, TAIL2);
5877       cmp(tmp1, tmp2);
5878     } br(EQ, NEXT_DWORD);
5879     b(DONE);
5880 
5881     bind(TAIL);
5882     eor(tmp4, tmp3, tmp4);
5883     eor(tmp2, tmp1, tmp2);
5884     lslv(tmp2, tmp2, tmp5);
5885     orr(tmp5, tmp4, tmp2);
5886     cmp(tmp5, zr);
5887     b(CSET_EQ);
5888 
5889     bind(TAIL2);
5890     eor(tmp2, tmp1, tmp2);
5891     cbnz(tmp2, DONE);
5892     b(LAST_CHECK);
5893 
5894     bind(STUB);
5895     ldr(tmp4, Address(pre(a2, start_offset)));


5896     if (elem_size == 2) { // convert to byte counter
5897       lsl(cnt1, cnt1, 1);
5898     }
5899     eor(tmp5, tmp3, tmp4);
5900     cbnz(tmp5, DONE);
5901     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5902     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5903     address tpc = trampoline_call(stub);
5904     if (tpc == nullptr) {
5905       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5906       postcond(pc() == badAddress);
5907       return nullptr;
5908     }
5909     b(DONE);
5910 
5911     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5912     // so, if a2 == null => return false(0), else return true, so we can return a2
5913     mov(result, a2);
5914     b(DONE);
5915     bind(SHORT);



5916     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5917     ldr(tmp3, Address(a1, start_offset));
5918     ldr(tmp4, Address(a2, start_offset));
5919     bind(LAST_CHECK);
5920     eor(tmp4, tmp3, tmp4);
5921     lslv(tmp5, tmp4, tmp5);
5922     cmp(tmp5, zr);
5923     bind(CSET_EQ);
5924     cset(result, EQ);
5925     b(DONE);
5926   }
5927 
5928   bind(SAME);
5929   mov(result, true);
5930   // That's it.
5931   bind(DONE);
5932 
5933   BLOCK_COMMENT("} array_equals");
5934   postcond(pc() != badAddress);
5935   return pc();
5936 }
5937 
5938 // Compare Strings

4894 }
4895 
4896 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4897   load_method_holder(rresult, rmethod);
4898   ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4899 }
4900 
4901 void MacroAssembler::load_method_holder(Register holder, Register method) {
4902   ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
4903   ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
4904   ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
4905 }
4906 
4907 // Loads the obj's Klass* into dst.
4908 // Preserves all registers (incl src, rscratch1 and rscratch2).
4909 // Input:
4910 // src - the oop we want to load the klass from.
4911 // dst - output narrow klass.
4912 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
4913   assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4914   ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4915   lsrw(dst, dst, markWord::klass_shift);
4916 }
4917 
4918 void MacroAssembler::load_klass(Register dst, Register src) {
4919   if (UseCompactObjectHeaders) {
4920     load_narrow_klass_compact(dst, src);
4921     decode_klass_not_null(dst);
4922   } else if (UseCompressedClassPointers) {
4923     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4924     decode_klass_not_null(dst);
4925   } else {
4926     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4927   }
4928 }
4929 
4930 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4931   if (RestoreMXCSROnJNICalls) {
4932     Label OK;
4933     get_fpcr(tmp1);
4934     mov(tmp2, tmp1);
4935     // Set FPCR to the state we need. We do want Round to Nearest. We

5717     b(DONE);
5718 
5719   BIND(SET_RESULT);
5720 
5721     add(len, len, wordSize);
5722     sub(result, result, len);
5723 
5724   BIND(DONE);
5725   postcond(pc() != badAddress);
5726   return pc();
5727 }
5728 
5729 // Clobbers: rscratch1, rscratch2, rflags
5730 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5731 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5732                                       Register tmp4, Register tmp5, Register result,
5733                                       Register cnt1, int elem_size) {
5734   Label DONE, SAME;
5735   Register tmp1 = rscratch1;
5736   Register tmp2 = rscratch2;
5737   Register cnt2 = tmp2;  // cnt2 only used in array length compare
5738   int elem_per_word = wordSize/elem_size;
5739   int log_elem_size = exact_log2(elem_size);

5740   int length_offset = arrayOopDesc::length_offset_in_bytes();
5741   int base_offset
5742     = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);










5743   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5744 
5745   assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5746   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5747 
5748 #ifndef PRODUCT
5749   {
5750     const char kind = (elem_size == 2) ? 'U' : 'L';
5751     char comment[64];
5752     os::snprintf_checked(comment, sizeof comment, "array_equals%c{", kind);
5753     BLOCK_COMMENT(comment);
5754   }
5755 #endif
5756 
5757   // if (a1 == a2)
5758   //     return true;
5759   cmpoop(a1, a2); // May have read barriers for a1 and a2.
5760   br(EQ, SAME);
5761 
5762   if (UseSimpleArrayEquals) {
5763     Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5764     // if (a1 == nullptr || a2 == nullptr)
5765     //     return false;
5766     // a1 & a2 == 0 means (some-pointer is null) or
5767     // (very-rare-or-even-probably-impossible-pointer-values)
5768     // so, we can save one branch in most cases
5769     tst(a1, a2);
5770     mov(result, false);
5771     br(EQ, A_MIGHT_BE_NULL);
5772     // if (a1.length != a2.length)
5773     //      return false;
5774     bind(A_IS_NOT_NULL);
5775     ldrw(cnt1, Address(a1, length_offset));
5776     ldrw(cnt2, Address(a2, length_offset));
5777     eorw(tmp5, cnt1, cnt2);
5778     cbnzw(tmp5, DONE);
5779     lea(a1, Address(a1, base_offset));
5780     lea(a2, Address(a2, base_offset));
5781     // Check for short strings, i.e. smaller than wordSize.
5782     subs(cnt1, cnt1, elem_per_word);
5783     br(Assembler::LT, SHORT);
5784     // Main 8 byte comparison loop.
5785     bind(NEXT_WORD); {
5786       ldr(tmp1, Address(post(a1, wordSize)));
5787       ldr(tmp2, Address(post(a2, wordSize)));
5788       subs(cnt1, cnt1, elem_per_word);
5789       eor(tmp5, tmp1, tmp2);
5790       cbnz(tmp5, DONE);
5791     } br(GT, NEXT_WORD);
5792     // Last longword.  In the case where length == 4 we compare the
5793     // same longword twice, but that's still faster than another
5794     // conditional branch.
5795     // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5796     // length == 4.
5797     if (log_elem_size > 0)
5798       lsl(cnt1, cnt1, log_elem_size);
5799     ldr(tmp3, Address(a1, cnt1));
5800     ldr(tmp4, Address(a2, cnt1));

5823       eorw(tmp5, tmp3, tmp4);
5824       cbnzw(tmp5, DONE);
5825     }
5826     bind(TAIL01);
5827     if (elem_size == 1) { // Only needed when comparing byte arrays.
5828       tbz(cnt1, 0, SAME); // 0-1 bytes left.
5829       {
5830         ldrb(tmp1, a1);
5831         ldrb(tmp2, a2);
5832         eorw(tmp5, tmp1, tmp2);
5833         cbnzw(tmp5, DONE);
5834       }
5835     }
5836   } else {
5837     Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5838         CSET_EQ, LAST_CHECK;
5839     mov(result, false);
5840     cbz(a1, DONE);
5841     ldrw(cnt1, Address(a1, length_offset));
5842     cbz(a2, DONE);
5843     ldrw(cnt2, Address(a2, length_offset));


5844     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5845     // faster to perform another branch before comparing a1 and a2
5846     cmp(cnt1, (u1)elem_per_word);
5847     br(LE, SHORT); // short or same
5848     ldr(tmp3, Address(pre(a1, base_offset)));
5849     subs(zr, cnt1, stubBytesThreshold);
5850     br(GE, STUB);
5851     ldr(tmp4, Address(pre(a2, base_offset)));
5852     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5853     cmp(cnt2, cnt1);
5854     br(NE, DONE);
5855 
5856     // Main 16 byte comparison loop with 2 exits
5857     bind(NEXT_DWORD); {
5858       ldr(tmp1, Address(pre(a1, wordSize)));
5859       ldr(tmp2, Address(pre(a2, wordSize)));
5860       subs(cnt1, cnt1, 2 * elem_per_word);
5861       br(LE, TAIL);
5862       eor(tmp4, tmp3, tmp4);
5863       cbnz(tmp4, DONE);
5864       ldr(tmp3, Address(pre(a1, wordSize)));
5865       ldr(tmp4, Address(pre(a2, wordSize)));
5866       cmp(cnt1, (u1)elem_per_word);
5867       br(LE, TAIL2);
5868       cmp(tmp1, tmp2);
5869     } br(EQ, NEXT_DWORD);
5870     b(DONE);
5871 
5872     bind(TAIL);
5873     eor(tmp4, tmp3, tmp4);
5874     eor(tmp2, tmp1, tmp2);
5875     lslv(tmp2, tmp2, tmp5);
5876     orr(tmp5, tmp4, tmp2);
5877     cmp(tmp5, zr);
5878     b(CSET_EQ);
5879 
5880     bind(TAIL2);
5881     eor(tmp2, tmp1, tmp2);
5882     cbnz(tmp2, DONE);
5883     b(LAST_CHECK);
5884 
5885     bind(STUB);
5886     ldr(tmp4, Address(pre(a2, base_offset)));
5887     cmp(cnt2, cnt1);
5888     br(NE, DONE);
5889     if (elem_size == 2) { // convert to byte counter
5890       lsl(cnt1, cnt1, 1);
5891     }
5892     eor(tmp5, tmp3, tmp4);
5893     cbnz(tmp5, DONE);
5894     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5895     assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5896     address tpc = trampoline_call(stub);
5897     if (tpc == nullptr) {
5898       DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5899       postcond(pc() == badAddress);
5900       return nullptr;
5901     }
5902     b(DONE);
5903 
5904     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5905     // so, if a2 == null => return false(0), else return true, so we can return a2
5906     mov(result, a2);
5907     b(DONE);
5908     bind(SHORT);
5909     cmp(cnt2, cnt1);
5910     br(NE, DONE);
5911     cbz(cnt1, SAME);
5912     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5913     ldr(tmp3, Address(a1, base_offset));
5914     ldr(tmp4, Address(a2, base_offset));
5915     bind(LAST_CHECK);
5916     eor(tmp4, tmp3, tmp4);
5917     lslv(tmp5, tmp4, tmp5);
5918     cmp(tmp5, zr);
5919     bind(CSET_EQ);
5920     cset(result, EQ);
5921     b(DONE);
5922   }
5923 
5924   bind(SAME);
5925   mov(result, true);
5926   // That's it.
5927   bind(DONE);
5928 
5929   BLOCK_COMMENT("} array_equals");
5930   postcond(pc() != badAddress);
5931   return pc();
5932 }
5933 
5934 // Compare Strings
< prev index next >