5014 }
5015
5016 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5017 load_method_holder(rresult, rmethod);
5018 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5019 }
5020
5021 void MacroAssembler::load_method_holder(Register holder, Register method) {
5022 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5023 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5024 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5025 }
5026
5027 // Loads the obj's Klass* into dst.
5028 // Preserves all registers (incl src, rscratch1 and rscratch2).
5029 // Input:
5030 // src - the oop we want to load the klass from.
5031 // dst - output narrow klass.
5032 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5033 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5034 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5035 lsr(dst, dst, markWord::klass_shift);
5036 }
5037
5038 void MacroAssembler::load_klass(Register dst, Register src) {
5039 if (UseCompactObjectHeaders) {
5040 load_narrow_klass_compact(dst, src);
5041 decode_klass_not_null(dst);
5042 } else if (UseCompressedClassPointers) {
5043 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5044 decode_klass_not_null(dst);
5045 } else {
5046 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5047 }
5048 }
5049
5050 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5051 if (RestoreMXCSROnJNICalls) {
5052 Label OK;
5053 get_fpcr(tmp1);
5054 mov(tmp2, tmp1);
5055 // Set FPCR to the state we need. We do want Round to Nearest. We
5794 b(DONE);
5795
5796 BIND(SET_RESULT);
5797
5798 add(len, len, wordSize);
5799 sub(result, result, len);
5800
5801 BIND(DONE);
5802 postcond(pc() != badAddress);
5803 return pc();
5804 }
5805
5806 // Clobbers: rscratch1, rscratch2, rflags
5807 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5808 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5809 Register tmp4, Register tmp5, Register result,
5810 Register cnt1, int elem_size) {
5811 Label DONE, SAME;
5812 Register tmp1 = rscratch1;
5813 Register tmp2 = rscratch2;
5814 int elem_per_word = wordSize/elem_size;
5815 int log_elem_size = exact_log2(elem_size);
5816 int klass_offset = arrayOopDesc::klass_offset_in_bytes();
5817 int length_offset = arrayOopDesc::length_offset_in_bytes();
5818 int base_offset
5819 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5820 // When the length offset is not aligned to 8 bytes,
5821 // then we align it down. This is valid because the new
5822 // offset will always be the klass which is the same
5823 // for type arrays.
5824 int start_offset = align_down(length_offset, BytesPerWord);
5825 int extra_length = base_offset - start_offset;
5826 assert(start_offset == length_offset || start_offset == klass_offset,
5827 "start offset must be 8-byte-aligned or be the klass offset");
5828 assert(base_offset != start_offset, "must include the length field");
5829 extra_length = extra_length / elem_size; // We count in elements, not bytes.
5830 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5831
5832 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5833 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5834
5835 #ifndef PRODUCT
5836 {
5837 const char kind = (elem_size == 2) ? 'U' : 'L';
5838 char comment[64];
5839 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5840 BLOCK_COMMENT(comment);
5841 }
5842 #endif
5843
5844 // if (a1 == a2)
5845 // return true;
5846 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5847 br(EQ, SAME);
5848
5849 if (UseSimpleArrayEquals) {
5850 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5851 // if (a1 == nullptr || a2 == nullptr)
5852 // return false;
5853 // a1 & a2 == 0 means (some-pointer is null) or
5854 // (very-rare-or-even-probably-impossible-pointer-values)
5855 // so, we can save one branch in most cases
5856 tst(a1, a2);
5857 mov(result, false);
5858 br(EQ, A_MIGHT_BE_NULL);
5859 // if (a1.length != a2.length)
5860 // return false;
5861 bind(A_IS_NOT_NULL);
5862 ldrw(cnt1, Address(a1, length_offset));
5863 // Increase loop counter by diff between base- and actual start-offset.
5864 addw(cnt1, cnt1, extra_length);
5865 lea(a1, Address(a1, start_offset));
5866 lea(a2, Address(a2, start_offset));
5867 // Check for short strings, i.e. smaller than wordSize.
5868 subs(cnt1, cnt1, elem_per_word);
5869 br(Assembler::LT, SHORT);
5870 // Main 8 byte comparison loop.
5871 bind(NEXT_WORD); {
5872 ldr(tmp1, Address(post(a1, wordSize)));
5873 ldr(tmp2, Address(post(a2, wordSize)));
5874 subs(cnt1, cnt1, elem_per_word);
5875 eor(tmp5, tmp1, tmp2);
5876 cbnz(tmp5, DONE);
5877 } br(GT, NEXT_WORD);
5878 // Last longword. In the case where length == 4 we compare the
5879 // same longword twice, but that's still faster than another
5880 // conditional branch.
5881 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5882 // length == 4.
5883 if (log_elem_size > 0)
5884 lsl(cnt1, cnt1, log_elem_size);
5885 ldr(tmp3, Address(a1, cnt1));
5886 ldr(tmp4, Address(a2, cnt1));
5909 eorw(tmp5, tmp3, tmp4);
5910 cbnzw(tmp5, DONE);
5911 }
5912 bind(TAIL01);
5913 if (elem_size == 1) { // Only needed when comparing byte arrays.
5914 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5915 {
5916 ldrb(tmp1, a1);
5917 ldrb(tmp2, a2);
5918 eorw(tmp5, tmp1, tmp2);
5919 cbnzw(tmp5, DONE);
5920 }
5921 }
5922 } else {
5923 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5924 CSET_EQ, LAST_CHECK;
5925 mov(result, false);
5926 cbz(a1, DONE);
5927 ldrw(cnt1, Address(a1, length_offset));
5928 cbz(a2, DONE);
5929 // Increase loop counter by diff between base- and actual start-offset.
5930 addw(cnt1, cnt1, extra_length);
5931
5932 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5933 // faster to perform another branch before comparing a1 and a2
5934 cmp(cnt1, (u1)elem_per_word);
5935 br(LE, SHORT); // short or same
5936 ldr(tmp3, Address(pre(a1, start_offset)));
5937 subs(zr, cnt1, stubBytesThreshold);
5938 br(GE, STUB);
5939 ldr(tmp4, Address(pre(a2, start_offset)));
5940 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5941
5942 // Main 16 byte comparison loop with 2 exits
5943 bind(NEXT_DWORD); {
5944 ldr(tmp1, Address(pre(a1, wordSize)));
5945 ldr(tmp2, Address(pre(a2, wordSize)));
5946 subs(cnt1, cnt1, 2 * elem_per_word);
5947 br(LE, TAIL);
5948 eor(tmp4, tmp3, tmp4);
5949 cbnz(tmp4, DONE);
5950 ldr(tmp3, Address(pre(a1, wordSize)));
5951 ldr(tmp4, Address(pre(a2, wordSize)));
5952 cmp(cnt1, (u1)elem_per_word);
5953 br(LE, TAIL2);
5954 cmp(tmp1, tmp2);
5955 } br(EQ, NEXT_DWORD);
5956 b(DONE);
5957
5958 bind(TAIL);
5959 eor(tmp4, tmp3, tmp4);
5960 eor(tmp2, tmp1, tmp2);
5961 lslv(tmp2, tmp2, tmp5);
5962 orr(tmp5, tmp4, tmp2);
5963 cmp(tmp5, zr);
5964 b(CSET_EQ);
5965
5966 bind(TAIL2);
5967 eor(tmp2, tmp1, tmp2);
5968 cbnz(tmp2, DONE);
5969 b(LAST_CHECK);
5970
5971 bind(STUB);
5972 ldr(tmp4, Address(pre(a2, start_offset)));
5973 if (elem_size == 2) { // convert to byte counter
5974 lsl(cnt1, cnt1, 1);
5975 }
5976 eor(tmp5, tmp3, tmp4);
5977 cbnz(tmp5, DONE);
5978 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5979 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5980 address tpc = trampoline_call(stub);
5981 if (tpc == nullptr) {
5982 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5983 postcond(pc() == badAddress);
5984 return nullptr;
5985 }
5986 b(DONE);
5987
5988 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5989 // so, if a2 == null => return false(0), else return true, so we can return a2
5990 mov(result, a2);
5991 b(DONE);
5992 bind(SHORT);
5993 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5994 ldr(tmp3, Address(a1, start_offset));
5995 ldr(tmp4, Address(a2, start_offset));
5996 bind(LAST_CHECK);
5997 eor(tmp4, tmp3, tmp4);
5998 lslv(tmp5, tmp4, tmp5);
5999 cmp(tmp5, zr);
6000 bind(CSET_EQ);
6001 cset(result, EQ);
6002 b(DONE);
6003 }
6004
6005 bind(SAME);
6006 mov(result, true);
6007 // That's it.
6008 bind(DONE);
6009
6010 BLOCK_COMMENT("} array_equals");
6011 postcond(pc() != badAddress);
6012 return pc();
6013 }
6014
6015 // Compare Strings
|
5014 }
5015
5016 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5017 load_method_holder(rresult, rmethod);
5018 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5019 }
5020
5021 void MacroAssembler::load_method_holder(Register holder, Register method) {
5022 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5023 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5024 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5025 }
5026
5027 // Loads the obj's Klass* into dst.
5028 // Preserves all registers (incl src, rscratch1 and rscratch2).
5029 // Input:
5030 // src - the oop we want to load the klass from.
5031 // dst - output narrow klass.
5032 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5033 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5034 ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5035 lsrw(dst, dst, markWord::klass_shift);
5036 }
5037
5038 void MacroAssembler::load_klass(Register dst, Register src) {
5039 if (UseCompactObjectHeaders) {
5040 load_narrow_klass_compact(dst, src);
5041 decode_klass_not_null(dst);
5042 } else if (UseCompressedClassPointers) {
5043 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5044 decode_klass_not_null(dst);
5045 } else {
5046 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5047 }
5048 }
5049
5050 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5051 if (RestoreMXCSROnJNICalls) {
5052 Label OK;
5053 get_fpcr(tmp1);
5054 mov(tmp2, tmp1);
5055 // Set FPCR to the state we need. We do want Round to Nearest. We
5794 b(DONE);
5795
5796 BIND(SET_RESULT);
5797
5798 add(len, len, wordSize);
5799 sub(result, result, len);
5800
5801 BIND(DONE);
5802 postcond(pc() != badAddress);
5803 return pc();
5804 }
5805
5806 // Clobbers: rscratch1, rscratch2, rflags
5807 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5808 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5809 Register tmp4, Register tmp5, Register result,
5810 Register cnt1, int elem_size) {
5811 Label DONE, SAME;
5812 Register tmp1 = rscratch1;
5813 Register tmp2 = rscratch2;
5814 Register cnt2 = tmp2; // cnt2 only used in array length compare
5815 int elem_per_word = wordSize/elem_size;
5816 int log_elem_size = exact_log2(elem_size);
5817 int length_offset = arrayOopDesc::length_offset_in_bytes();
5818 int base_offset
5819 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5820 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5821
5822 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5823 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5824
5825 #ifndef PRODUCT
5826 {
5827 const char kind = (elem_size == 2) ? 'U' : 'L';
5828 char comment[64];
5829 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5830 BLOCK_COMMENT(comment);
5831 }
5832 #endif
5833
5834 // if (a1 == a2)
5835 // return true;
5836 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5837 br(EQ, SAME);
5838
5839 if (UseSimpleArrayEquals) {
5840 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5841 // if (a1 == nullptr || a2 == nullptr)
5842 // return false;
5843 // a1 & a2 == 0 means (some-pointer is null) or
5844 // (very-rare-or-even-probably-impossible-pointer-values)
5845 // so, we can save one branch in most cases
5846 tst(a1, a2);
5847 mov(result, false);
5848 br(EQ, A_MIGHT_BE_NULL);
5849 // if (a1.length != a2.length)
5850 // return false;
5851 bind(A_IS_NOT_NULL);
5852 ldrw(cnt1, Address(a1, length_offset));
5853 ldrw(cnt2, Address(a2, length_offset));
5854 eorw(tmp5, cnt1, cnt2);
5855 cbnzw(tmp5, DONE);
5856 lea(a1, Address(a1, base_offset));
5857 lea(a2, Address(a2, base_offset));
5858 // Check for short strings, i.e. smaller than wordSize.
5859 subs(cnt1, cnt1, elem_per_word);
5860 br(Assembler::LT, SHORT);
5861 // Main 8 byte comparison loop.
5862 bind(NEXT_WORD); {
5863 ldr(tmp1, Address(post(a1, wordSize)));
5864 ldr(tmp2, Address(post(a2, wordSize)));
5865 subs(cnt1, cnt1, elem_per_word);
5866 eor(tmp5, tmp1, tmp2);
5867 cbnz(tmp5, DONE);
5868 } br(GT, NEXT_WORD);
5869 // Last longword. In the case where length == 4 we compare the
5870 // same longword twice, but that's still faster than another
5871 // conditional branch.
5872 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5873 // length == 4.
5874 if (log_elem_size > 0)
5875 lsl(cnt1, cnt1, log_elem_size);
5876 ldr(tmp3, Address(a1, cnt1));
5877 ldr(tmp4, Address(a2, cnt1));
5900 eorw(tmp5, tmp3, tmp4);
5901 cbnzw(tmp5, DONE);
5902 }
5903 bind(TAIL01);
5904 if (elem_size == 1) { // Only needed when comparing byte arrays.
5905 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5906 {
5907 ldrb(tmp1, a1);
5908 ldrb(tmp2, a2);
5909 eorw(tmp5, tmp1, tmp2);
5910 cbnzw(tmp5, DONE);
5911 }
5912 }
5913 } else {
5914 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5915 CSET_EQ, LAST_CHECK;
5916 mov(result, false);
5917 cbz(a1, DONE);
5918 ldrw(cnt1, Address(a1, length_offset));
5919 cbz(a2, DONE);
5920 ldrw(cnt2, Address(a2, length_offset));
5921 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5922 // faster to perform another branch before comparing a1 and a2
5923 cmp(cnt1, (u1)elem_per_word);
5924 br(LE, SHORT); // short or same
5925 ldr(tmp3, Address(pre(a1, base_offset)));
5926 subs(zr, cnt1, stubBytesThreshold);
5927 br(GE, STUB);
5928 ldr(tmp4, Address(pre(a2, base_offset)));
5929 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5930 cmp(cnt2, cnt1);
5931 br(NE, DONE);
5932
5933 // Main 16 byte comparison loop with 2 exits
5934 bind(NEXT_DWORD); {
5935 ldr(tmp1, Address(pre(a1, wordSize)));
5936 ldr(tmp2, Address(pre(a2, wordSize)));
5937 subs(cnt1, cnt1, 2 * elem_per_word);
5938 br(LE, TAIL);
5939 eor(tmp4, tmp3, tmp4);
5940 cbnz(tmp4, DONE);
5941 ldr(tmp3, Address(pre(a1, wordSize)));
5942 ldr(tmp4, Address(pre(a2, wordSize)));
5943 cmp(cnt1, (u1)elem_per_word);
5944 br(LE, TAIL2);
5945 cmp(tmp1, tmp2);
5946 } br(EQ, NEXT_DWORD);
5947 b(DONE);
5948
5949 bind(TAIL);
5950 eor(tmp4, tmp3, tmp4);
5951 eor(tmp2, tmp1, tmp2);
5952 lslv(tmp2, tmp2, tmp5);
5953 orr(tmp5, tmp4, tmp2);
5954 cmp(tmp5, zr);
5955 b(CSET_EQ);
5956
5957 bind(TAIL2);
5958 eor(tmp2, tmp1, tmp2);
5959 cbnz(tmp2, DONE);
5960 b(LAST_CHECK);
5961
5962 bind(STUB);
5963 ldr(tmp4, Address(pre(a2, base_offset)));
5964 cmp(cnt2, cnt1);
5965 br(NE, DONE);
5966 if (elem_size == 2) { // convert to byte counter
5967 lsl(cnt1, cnt1, 1);
5968 }
5969 eor(tmp5, tmp3, tmp4);
5970 cbnz(tmp5, DONE);
5971 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5972 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5973 address tpc = trampoline_call(stub);
5974 if (tpc == nullptr) {
5975 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5976 postcond(pc() == badAddress);
5977 return nullptr;
5978 }
5979 b(DONE);
5980
5981 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5982 // so, if a2 == null => return false(0), else return true, so we can return a2
5983 mov(result, a2);
5984 b(DONE);
5985 bind(SHORT);
5986 cmp(cnt2, cnt1);
5987 br(NE, DONE);
5988 cbz(cnt1, SAME);
5989 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5990 ldr(tmp3, Address(a1, base_offset));
5991 ldr(tmp4, Address(a2, base_offset));
5992 bind(LAST_CHECK);
5993 eor(tmp4, tmp3, tmp4);
5994 lslv(tmp5, tmp4, tmp5);
5995 cmp(tmp5, zr);
5996 bind(CSET_EQ);
5997 cset(result, EQ);
5998 b(DONE);
5999 }
6000
6001 bind(SAME);
6002 mov(result, true);
6003 // That's it.
6004 bind(DONE);
6005
6006 BLOCK_COMMENT("} array_equals");
6007 postcond(pc() != badAddress);
6008 return pc();
6009 }
6010
6011 // Compare Strings
|