4894 }
4895
4896 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4897 load_method_holder(rresult, rmethod);
4898 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4899 }
4900
4901 void MacroAssembler::load_method_holder(Register holder, Register method) {
4902 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
4903 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4904 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
4905 }
4906
4907 // Loads the obj's Klass* into dst.
4908 // Preserves all registers (incl src, rscratch1 and rscratch2).
4909 // Input:
4910 // src - the oop we want to load the klass from.
4911 // dst - output narrow klass.
4912 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
4913 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4914 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4915 lsr(dst, dst, markWord::klass_shift);
4916 }
4917
4918 void MacroAssembler::load_klass(Register dst, Register src) {
4919 if (UseCompactObjectHeaders) {
4920 load_narrow_klass_compact(dst, src);
4921 decode_klass_not_null(dst);
4922 } else if (UseCompressedClassPointers) {
4923 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4924 decode_klass_not_null(dst);
4925 } else {
4926 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4927 }
4928 }
4929
4930 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4931 if (RestoreMXCSROnJNICalls) {
4932 Label OK;
4933 get_fpcr(tmp1);
4934 mov(tmp2, tmp1);
4935 // Set FPCR to the state we need. We do want Round to Nearest. We
5717 b(DONE);
5718
5719 BIND(SET_RESULT);
5720
5721 add(len, len, wordSize);
5722 sub(result, result, len);
5723
5724 BIND(DONE);
5725 postcond(pc() != badAddress);
5726 return pc();
5727 }
5728
5729 // Clobbers: rscratch1, rscratch2, rflags
5730 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5731 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5732 Register tmp4, Register tmp5, Register result,
5733 Register cnt1, int elem_size) {
5734 Label DONE, SAME;
5735 Register tmp1 = rscratch1;
5736 Register tmp2 = rscratch2;
5737 int elem_per_word = wordSize/elem_size;
5738 int log_elem_size = exact_log2(elem_size);
5739 int klass_offset = arrayOopDesc::klass_offset_in_bytes();
5740 int length_offset = arrayOopDesc::length_offset_in_bytes();
5741 int base_offset
5742 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5743 // When the length offset is not aligned to 8 bytes,
5744 // then we align it down. This is valid because the new
5745 // offset will always be the klass which is the same
5746 // for type arrays.
5747 int start_offset = align_down(length_offset, BytesPerWord);
5748 int extra_length = base_offset - start_offset;
5749 assert(start_offset == length_offset || start_offset == klass_offset,
5750 "start offset must be 8-byte-aligned or be the klass offset");
5751 assert(base_offset != start_offset, "must include the length field");
5752 extra_length = extra_length / elem_size; // We count in elements, not bytes.
5753 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5754
5755 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5756 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5757
5758 #ifndef PRODUCT
5759 {
5760 const char kind = (elem_size == 2) ? 'U' : 'L';
5761 char comment[64];
5762 os::snprintf_checked(comment, sizeof comment, "array_equals%c{", kind);
5763 BLOCK_COMMENT(comment);
5764 }
5765 #endif
5766
5767 // if (a1 == a2)
5768 // return true;
5769 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5770 br(EQ, SAME);
5771
5772 if (UseSimpleArrayEquals) {
5773 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5774 // if (a1 == nullptr || a2 == nullptr)
5775 // return false;
5776 // a1 & a2 == 0 means (some-pointer is null) or
5777 // (very-rare-or-even-probably-impossible-pointer-values)
5778 // so, we can save one branch in most cases
5779 tst(a1, a2);
5780 mov(result, false);
5781 br(EQ, A_MIGHT_BE_NULL);
5782 // if (a1.length != a2.length)
5783 // return false;
5784 bind(A_IS_NOT_NULL);
5785 ldrw(cnt1, Address(a1, length_offset));
5786 // Increase loop counter by diff between base- and actual start-offset.
5787 addw(cnt1, cnt1, extra_length);
5788 lea(a1, Address(a1, start_offset));
5789 lea(a2, Address(a2, start_offset));
5790 // Check for short strings, i.e. smaller than wordSize.
5791 subs(cnt1, cnt1, elem_per_word);
5792 br(Assembler::LT, SHORT);
5793 // Main 8 byte comparison loop.
5794 bind(NEXT_WORD); {
5795 ldr(tmp1, Address(post(a1, wordSize)));
5796 ldr(tmp2, Address(post(a2, wordSize)));
5797 subs(cnt1, cnt1, elem_per_word);
5798 eor(tmp5, tmp1, tmp2);
5799 cbnz(tmp5, DONE);
5800 } br(GT, NEXT_WORD);
5801 // Last longword. In the case where length == 4 we compare the
5802 // same longword twice, but that's still faster than another
5803 // conditional branch.
5804 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5805 // length == 4.
5806 if (log_elem_size > 0)
5807 lsl(cnt1, cnt1, log_elem_size);
5808 ldr(tmp3, Address(a1, cnt1));
5809 ldr(tmp4, Address(a2, cnt1));
5832 eorw(tmp5, tmp3, tmp4);
5833 cbnzw(tmp5, DONE);
5834 }
5835 bind(TAIL01);
5836 if (elem_size == 1) { // Only needed when comparing byte arrays.
5837 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5838 {
5839 ldrb(tmp1, a1);
5840 ldrb(tmp2, a2);
5841 eorw(tmp5, tmp1, tmp2);
5842 cbnzw(tmp5, DONE);
5843 }
5844 }
5845 } else {
5846 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5847 CSET_EQ, LAST_CHECK;
5848 mov(result, false);
5849 cbz(a1, DONE);
5850 ldrw(cnt1, Address(a1, length_offset));
5851 cbz(a2, DONE);
5852 // Increase loop counter by diff between base- and actual start-offset.
5853 addw(cnt1, cnt1, extra_length);
5854
5855 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5856 // faster to perform another branch before comparing a1 and a2
5857 cmp(cnt1, (u1)elem_per_word);
5858 br(LE, SHORT); // short or same
5859 ldr(tmp3, Address(pre(a1, start_offset)));
5860 subs(zr, cnt1, stubBytesThreshold);
5861 br(GE, STUB);
5862 ldr(tmp4, Address(pre(a2, start_offset)));
5863 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5864
5865 // Main 16 byte comparison loop with 2 exits
5866 bind(NEXT_DWORD); {
5867 ldr(tmp1, Address(pre(a1, wordSize)));
5868 ldr(tmp2, Address(pre(a2, wordSize)));
5869 subs(cnt1, cnt1, 2 * elem_per_word);
5870 br(LE, TAIL);
5871 eor(tmp4, tmp3, tmp4);
5872 cbnz(tmp4, DONE);
5873 ldr(tmp3, Address(pre(a1, wordSize)));
5874 ldr(tmp4, Address(pre(a2, wordSize)));
5875 cmp(cnt1, (u1)elem_per_word);
5876 br(LE, TAIL2);
5877 cmp(tmp1, tmp2);
5878 } br(EQ, NEXT_DWORD);
5879 b(DONE);
5880
5881 bind(TAIL);
5882 eor(tmp4, tmp3, tmp4);
5883 eor(tmp2, tmp1, tmp2);
5884 lslv(tmp2, tmp2, tmp5);
5885 orr(tmp5, tmp4, tmp2);
5886 cmp(tmp5, zr);
5887 b(CSET_EQ);
5888
5889 bind(TAIL2);
5890 eor(tmp2, tmp1, tmp2);
5891 cbnz(tmp2, DONE);
5892 b(LAST_CHECK);
5893
5894 bind(STUB);
5895 ldr(tmp4, Address(pre(a2, start_offset)));
5896 if (elem_size == 2) { // convert to byte counter
5897 lsl(cnt1, cnt1, 1);
5898 }
5899 eor(tmp5, tmp3, tmp4);
5900 cbnz(tmp5, DONE);
5901 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5902 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5903 address tpc = trampoline_call(stub);
5904 if (tpc == nullptr) {
5905 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5906 postcond(pc() == badAddress);
5907 return nullptr;
5908 }
5909 b(DONE);
5910
5911 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5912 // so, if a2 == null => return false(0), else return true, so we can return a2
5913 mov(result, a2);
5914 b(DONE);
5915 bind(SHORT);
5916 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5917 ldr(tmp3, Address(a1, start_offset));
5918 ldr(tmp4, Address(a2, start_offset));
5919 bind(LAST_CHECK);
5920 eor(tmp4, tmp3, tmp4);
5921 lslv(tmp5, tmp4, tmp5);
5922 cmp(tmp5, zr);
5923 bind(CSET_EQ);
5924 cset(result, EQ);
5925 b(DONE);
5926 }
5927
5928 bind(SAME);
5929 mov(result, true);
5930 // That's it.
5931 bind(DONE);
5932
5933 BLOCK_COMMENT("} array_equals");
5934 postcond(pc() != badAddress);
5935 return pc();
5936 }
5937
5938 // Compare Strings
|
4894 }
4895
4896 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4897 load_method_holder(rresult, rmethod);
4898 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4899 }
4900
4901 void MacroAssembler::load_method_holder(Register holder, Register method) {
4902 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
4903 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4904 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
4905 }
4906
4907 // Loads the obj's Klass* into dst.
4908 // Preserves all registers (incl src, rscratch1 and rscratch2).
4909 // Input:
4910 // src - the oop we want to load the klass from.
4911 // dst - output narrow klass.
4912 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
4913 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
4914 ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
4915 lsrw(dst, dst, markWord::klass_shift);
4916 }
4917
4918 void MacroAssembler::load_klass(Register dst, Register src) {
4919 if (UseCompactObjectHeaders) {
4920 load_narrow_klass_compact(dst, src);
4921 decode_klass_not_null(dst);
4922 } else if (UseCompressedClassPointers) {
4923 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4924 decode_klass_not_null(dst);
4925 } else {
4926 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4927 }
4928 }
4929
4930 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
4931 if (RestoreMXCSROnJNICalls) {
4932 Label OK;
4933 get_fpcr(tmp1);
4934 mov(tmp2, tmp1);
4935 // Set FPCR to the state we need. We do want Round to Nearest. We
5717 b(DONE);
5718
5719 BIND(SET_RESULT);
5720
5721 add(len, len, wordSize);
5722 sub(result, result, len);
5723
5724 BIND(DONE);
5725 postcond(pc() != badAddress);
5726 return pc();
5727 }
5728
5729 // Clobbers: rscratch1, rscratch2, rflags
5730 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5731 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5732 Register tmp4, Register tmp5, Register result,
5733 Register cnt1, int elem_size) {
5734 Label DONE, SAME;
5735 Register tmp1 = rscratch1;
5736 Register tmp2 = rscratch2;
5737 Register cnt2 = tmp2; // cnt2 only used in array length compare
5738 int elem_per_word = wordSize/elem_size;
5739 int log_elem_size = exact_log2(elem_size);
5740 int length_offset = arrayOopDesc::length_offset_in_bytes();
5741 int base_offset
5742 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5743 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5744
5745 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5746 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5747
5748 #ifndef PRODUCT
5749 {
5750 const char kind = (elem_size == 2) ? 'U' : 'L';
5751 char comment[64];
5752 os::snprintf_checked(comment, sizeof comment, "array_equals%c{", kind);
5753 BLOCK_COMMENT(comment);
5754 }
5755 #endif
5756
5757 // if (a1 == a2)
5758 // return true;
5759 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5760 br(EQ, SAME);
5761
5762 if (UseSimpleArrayEquals) {
5763 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5764 // if (a1 == nullptr || a2 == nullptr)
5765 // return false;
5766 // a1 & a2 == 0 means (some-pointer is null) or
5767 // (very-rare-or-even-probably-impossible-pointer-values)
5768 // so, we can save one branch in most cases
5769 tst(a1, a2);
5770 mov(result, false);
5771 br(EQ, A_MIGHT_BE_NULL);
5772 // if (a1.length != a2.length)
5773 // return false;
5774 bind(A_IS_NOT_NULL);
5775 ldrw(cnt1, Address(a1, length_offset));
5776 ldrw(cnt2, Address(a2, length_offset));
5777 eorw(tmp5, cnt1, cnt2);
5778 cbnzw(tmp5, DONE);
5779 lea(a1, Address(a1, base_offset));
5780 lea(a2, Address(a2, base_offset));
5781 // Check for short strings, i.e. smaller than wordSize.
5782 subs(cnt1, cnt1, elem_per_word);
5783 br(Assembler::LT, SHORT);
5784 // Main 8 byte comparison loop.
5785 bind(NEXT_WORD); {
5786 ldr(tmp1, Address(post(a1, wordSize)));
5787 ldr(tmp2, Address(post(a2, wordSize)));
5788 subs(cnt1, cnt1, elem_per_word);
5789 eor(tmp5, tmp1, tmp2);
5790 cbnz(tmp5, DONE);
5791 } br(GT, NEXT_WORD);
5792 // Last longword. In the case where length == 4 we compare the
5793 // same longword twice, but that's still faster than another
5794 // conditional branch.
5795 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5796 // length == 4.
5797 if (log_elem_size > 0)
5798 lsl(cnt1, cnt1, log_elem_size);
5799 ldr(tmp3, Address(a1, cnt1));
5800 ldr(tmp4, Address(a2, cnt1));
5823 eorw(tmp5, tmp3, tmp4);
5824 cbnzw(tmp5, DONE);
5825 }
5826 bind(TAIL01);
5827 if (elem_size == 1) { // Only needed when comparing byte arrays.
5828 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5829 {
5830 ldrb(tmp1, a1);
5831 ldrb(tmp2, a2);
5832 eorw(tmp5, tmp1, tmp2);
5833 cbnzw(tmp5, DONE);
5834 }
5835 }
5836 } else {
5837 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5838 CSET_EQ, LAST_CHECK;
5839 mov(result, false);
5840 cbz(a1, DONE);
5841 ldrw(cnt1, Address(a1, length_offset));
5842 cbz(a2, DONE);
5843 ldrw(cnt2, Address(a2, length_offset));
5844 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5845 // faster to perform another branch before comparing a1 and a2
5846 cmp(cnt1, (u1)elem_per_word);
5847 br(LE, SHORT); // short or same
5848 ldr(tmp3, Address(pre(a1, base_offset)));
5849 subs(zr, cnt1, stubBytesThreshold);
5850 br(GE, STUB);
5851 ldr(tmp4, Address(pre(a2, base_offset)));
5852 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5853 cmp(cnt2, cnt1);
5854 br(NE, DONE);
5855
5856 // Main 16 byte comparison loop with 2 exits
5857 bind(NEXT_DWORD); {
5858 ldr(tmp1, Address(pre(a1, wordSize)));
5859 ldr(tmp2, Address(pre(a2, wordSize)));
5860 subs(cnt1, cnt1, 2 * elem_per_word);
5861 br(LE, TAIL);
5862 eor(tmp4, tmp3, tmp4);
5863 cbnz(tmp4, DONE);
5864 ldr(tmp3, Address(pre(a1, wordSize)));
5865 ldr(tmp4, Address(pre(a2, wordSize)));
5866 cmp(cnt1, (u1)elem_per_word);
5867 br(LE, TAIL2);
5868 cmp(tmp1, tmp2);
5869 } br(EQ, NEXT_DWORD);
5870 b(DONE);
5871
5872 bind(TAIL);
5873 eor(tmp4, tmp3, tmp4);
5874 eor(tmp2, tmp1, tmp2);
5875 lslv(tmp2, tmp2, tmp5);
5876 orr(tmp5, tmp4, tmp2);
5877 cmp(tmp5, zr);
5878 b(CSET_EQ);
5879
5880 bind(TAIL2);
5881 eor(tmp2, tmp1, tmp2);
5882 cbnz(tmp2, DONE);
5883 b(LAST_CHECK);
5884
5885 bind(STUB);
5886 ldr(tmp4, Address(pre(a2, base_offset)));
5887 cmp(cnt2, cnt1);
5888 br(NE, DONE);
5889 if (elem_size == 2) { // convert to byte counter
5890 lsl(cnt1, cnt1, 1);
5891 }
5892 eor(tmp5, tmp3, tmp4);
5893 cbnz(tmp5, DONE);
5894 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5895 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5896 address tpc = trampoline_call(stub);
5897 if (tpc == nullptr) {
5898 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5899 postcond(pc() == badAddress);
5900 return nullptr;
5901 }
5902 b(DONE);
5903
5904 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5905 // so, if a2 == null => return false(0), else return true, so we can return a2
5906 mov(result, a2);
5907 b(DONE);
5908 bind(SHORT);
5909 cmp(cnt2, cnt1);
5910 br(NE, DONE);
5911 cbz(cnt1, SAME);
5912 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5913 ldr(tmp3, Address(a1, base_offset));
5914 ldr(tmp4, Address(a2, base_offset));
5915 bind(LAST_CHECK);
5916 eor(tmp4, tmp3, tmp4);
5917 lslv(tmp5, tmp4, tmp5);
5918 cmp(tmp5, zr);
5919 bind(CSET_EQ);
5920 cset(result, EQ);
5921 b(DONE);
5922 }
5923
5924 bind(SAME);
5925 mov(result, true);
5926 // That's it.
5927 bind(DONE);
5928
5929 BLOCK_COMMENT("} array_equals");
5930 postcond(pc() != badAddress);
5931 return pc();
5932 }
5933
5934 // Compare Strings
|