5020 }
5021
5022 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5023 load_method_holder(rresult, rmethod);
5024 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5025 }
5026
5027 void MacroAssembler::load_method_holder(Register holder, Register method) {
5028 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5029 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5030 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5031 }
5032
5033 // Loads the obj's Klass* into dst.
5034 // Preserves all registers (incl src, rscratch1 and rscratch2).
5035 // Input:
5036 // src - the oop we want to load the klass from.
5037 // dst - output narrow klass.
5038 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5039 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5040 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5041 lsr(dst, dst, markWord::klass_shift);
5042 }
5043
5044 void MacroAssembler::load_klass(Register dst, Register src) {
5045 if (UseCompactObjectHeaders) {
5046 load_narrow_klass_compact(dst, src);
5047 decode_klass_not_null(dst);
5048 } else if (UseCompressedClassPointers) {
5049 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5050 decode_klass_not_null(dst);
5051 } else {
5052 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053 }
5054 }
5055
5056 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5057 if (RestoreMXCSROnJNICalls) {
5058 Label OK;
5059 get_fpcr(tmp1);
5060 mov(tmp2, tmp1);
5061 // Set FPCR to the state we need. We do want Round to Nearest. We
5814 b(DONE);
5815
5816 BIND(SET_RESULT);
5817
5818 add(len, len, wordSize);
5819 sub(result, result, len);
5820
5821 BIND(DONE);
5822 postcond(pc() != badAddress);
5823 return pc();
5824 }
5825
5826 // Clobbers: rscratch1, rscratch2, rflags
5827 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5828 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5829 Register tmp4, Register tmp5, Register result,
5830 Register cnt1, int elem_size) {
5831 Label DONE, SAME;
5832 Register tmp1 = rscratch1;
5833 Register tmp2 = rscratch2;
5834 int elem_per_word = wordSize/elem_size;
5835 int log_elem_size = exact_log2(elem_size);
5836 int klass_offset = arrayOopDesc::klass_offset_in_bytes();
5837 int length_offset = arrayOopDesc::length_offset_in_bytes();
5838 int base_offset
5839 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5840 // When the length offset is not aligned to 8 bytes,
5841 // then we align it down. This is valid because the new
5842 // offset will always be the klass which is the same
5843 // for type arrays.
5844 int start_offset = align_down(length_offset, BytesPerWord);
5845 int extra_length = base_offset - start_offset;
5846 assert(start_offset == length_offset || start_offset == klass_offset,
5847 "start offset must be 8-byte-aligned or be the klass offset");
5848 assert(base_offset != start_offset, "must include the length field");
5849 extra_length = extra_length / elem_size; // We count in elements, not bytes.
5850 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5851
5852 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5853 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5854
5855 #ifndef PRODUCT
5856 {
5857 const char kind = (elem_size == 2) ? 'U' : 'L';
5858 char comment[64];
5859 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5860 BLOCK_COMMENT(comment);
5861 }
5862 #endif
5863
5864 // if (a1 == a2)
5865 // return true;
5866 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5867 br(EQ, SAME);
5868
5869 if (UseSimpleArrayEquals) {
5870 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5871 // if (a1 == nullptr || a2 == nullptr)
5872 // return false;
5873 // a1 & a2 == 0 means (some-pointer is null) or
5874 // (very-rare-or-even-probably-impossible-pointer-values)
5875 // so, we can save one branch in most cases
5876 tst(a1, a2);
5877 mov(result, false);
5878 br(EQ, A_MIGHT_BE_NULL);
5879 // if (a1.length != a2.length)
5880 // return false;
5881 bind(A_IS_NOT_NULL);
5882 ldrw(cnt1, Address(a1, length_offset));
5883 // Increase loop counter by diff between base- and actual start-offset.
5884 addw(cnt1, cnt1, extra_length);
5885 lea(a1, Address(a1, start_offset));
5886 lea(a2, Address(a2, start_offset));
5887 // Check for short strings, i.e. smaller than wordSize.
5888 subs(cnt1, cnt1, elem_per_word);
5889 br(Assembler::LT, SHORT);
5890 // Main 8 byte comparison loop.
5891 bind(NEXT_WORD); {
5892 ldr(tmp1, Address(post(a1, wordSize)));
5893 ldr(tmp2, Address(post(a2, wordSize)));
5894 subs(cnt1, cnt1, elem_per_word);
5895 eor(tmp5, tmp1, tmp2);
5896 cbnz(tmp5, DONE);
5897 } br(GT, NEXT_WORD);
5898 // Last longword. In the case where length == 4 we compare the
5899 // same longword twice, but that's still faster than another
5900 // conditional branch.
5901 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5902 // length == 4.
5903 if (log_elem_size > 0)
5904 lsl(cnt1, cnt1, log_elem_size);
5905 ldr(tmp3, Address(a1, cnt1));
5906 ldr(tmp4, Address(a2, cnt1));
5929 eorw(tmp5, tmp3, tmp4);
5930 cbnzw(tmp5, DONE);
5931 }
5932 bind(TAIL01);
5933 if (elem_size == 1) { // Only needed when comparing byte arrays.
5934 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5935 {
5936 ldrb(tmp1, a1);
5937 ldrb(tmp2, a2);
5938 eorw(tmp5, tmp1, tmp2);
5939 cbnzw(tmp5, DONE);
5940 }
5941 }
5942 } else {
5943 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5944 CSET_EQ, LAST_CHECK;
5945 mov(result, false);
5946 cbz(a1, DONE);
5947 ldrw(cnt1, Address(a1, length_offset));
5948 cbz(a2, DONE);
5949 // Increase loop counter by diff between base- and actual start-offset.
5950 addw(cnt1, cnt1, extra_length);
5951
5952 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5953 // faster to perform another branch before comparing a1 and a2
5954 cmp(cnt1, (u1)elem_per_word);
5955 br(LE, SHORT); // short or same
5956 ldr(tmp3, Address(pre(a1, start_offset)));
5957 subs(zr, cnt1, stubBytesThreshold);
5958 br(GE, STUB);
5959 ldr(tmp4, Address(pre(a2, start_offset)));
5960 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5961
5962 // Main 16 byte comparison loop with 2 exits
5963 bind(NEXT_DWORD); {
5964 ldr(tmp1, Address(pre(a1, wordSize)));
5965 ldr(tmp2, Address(pre(a2, wordSize)));
5966 subs(cnt1, cnt1, 2 * elem_per_word);
5967 br(LE, TAIL);
5968 eor(tmp4, tmp3, tmp4);
5969 cbnz(tmp4, DONE);
5970 ldr(tmp3, Address(pre(a1, wordSize)));
5971 ldr(tmp4, Address(pre(a2, wordSize)));
5972 cmp(cnt1, (u1)elem_per_word);
5973 br(LE, TAIL2);
5974 cmp(tmp1, tmp2);
5975 } br(EQ, NEXT_DWORD);
5976 b(DONE);
5977
5978 bind(TAIL);
5979 eor(tmp4, tmp3, tmp4);
5980 eor(tmp2, tmp1, tmp2);
5981 lslv(tmp2, tmp2, tmp5);
5982 orr(tmp5, tmp4, tmp2);
5983 cmp(tmp5, zr);
5984 b(CSET_EQ);
5985
5986 bind(TAIL2);
5987 eor(tmp2, tmp1, tmp2);
5988 cbnz(tmp2, DONE);
5989 b(LAST_CHECK);
5990
5991 bind(STUB);
5992 ldr(tmp4, Address(pre(a2, start_offset)));
5993 if (elem_size == 2) { // convert to byte counter
5994 lsl(cnt1, cnt1, 1);
5995 }
5996 eor(tmp5, tmp3, tmp4);
5997 cbnz(tmp5, DONE);
5998 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5999 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6000 address tpc = trampoline_call(stub);
6001 if (tpc == nullptr) {
6002 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6003 postcond(pc() == badAddress);
6004 return nullptr;
6005 }
6006 b(DONE);
6007
6008 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6009 // so, if a2 == null => return false(0), else return true, so we can return a2
6010 mov(result, a2);
6011 b(DONE);
6012 bind(SHORT);
6013 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6014 ldr(tmp3, Address(a1, start_offset));
6015 ldr(tmp4, Address(a2, start_offset));
6016 bind(LAST_CHECK);
6017 eor(tmp4, tmp3, tmp4);
6018 lslv(tmp5, tmp4, tmp5);
6019 cmp(tmp5, zr);
6020 bind(CSET_EQ);
6021 cset(result, EQ);
6022 b(DONE);
6023 }
6024
6025 bind(SAME);
6026 mov(result, true);
6027 // That's it.
6028 bind(DONE);
6029
6030 BLOCK_COMMENT("} array_equals");
6031 postcond(pc() != badAddress);
6032 return pc();
6033 }
6034
6035 // Compare Strings
|
5020 }
5021
5022 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5023 load_method_holder(rresult, rmethod);
5024 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5025 }
5026
5027 void MacroAssembler::load_method_holder(Register holder, Register method) {
5028 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5029 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5030 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5031 }
5032
5033 // Loads the obj's Klass* into dst.
5034 // Preserves all registers (incl src, rscratch1 and rscratch2).
5035 // Input:
5036 // src - the oop we want to load the klass from.
5037 // dst - output narrow klass.
5038 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5039 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5040 ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5041 lsrw(dst, dst, markWord::klass_shift);
5042 }
5043
5044 void MacroAssembler::load_klass(Register dst, Register src) {
5045 if (UseCompactObjectHeaders) {
5046 load_narrow_klass_compact(dst, src);
5047 decode_klass_not_null(dst);
5048 } else if (UseCompressedClassPointers) {
5049 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5050 decode_klass_not_null(dst);
5051 } else {
5052 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053 }
5054 }
5055
5056 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5057 if (RestoreMXCSROnJNICalls) {
5058 Label OK;
5059 get_fpcr(tmp1);
5060 mov(tmp2, tmp1);
5061 // Set FPCR to the state we need. We do want Round to Nearest. We
5814 b(DONE);
5815
5816 BIND(SET_RESULT);
5817
5818 add(len, len, wordSize);
5819 sub(result, result, len);
5820
5821 BIND(DONE);
5822 postcond(pc() != badAddress);
5823 return pc();
5824 }
5825
5826 // Clobbers: rscratch1, rscratch2, rflags
5827 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5828 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5829 Register tmp4, Register tmp5, Register result,
5830 Register cnt1, int elem_size) {
5831 Label DONE, SAME;
5832 Register tmp1 = rscratch1;
5833 Register tmp2 = rscratch2;
5834 Register cnt2 = tmp2; // cnt2 only used in array length compare
5835 int elem_per_word = wordSize/elem_size;
5836 int log_elem_size = exact_log2(elem_size);
5837 int length_offset = arrayOopDesc::length_offset_in_bytes();
5838 int base_offset
5839 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5840 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5841
5842 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5843 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5844
5845 #ifndef PRODUCT
5846 {
5847 const char kind = (elem_size == 2) ? 'U' : 'L';
5848 char comment[64];
5849 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5850 BLOCK_COMMENT(comment);
5851 }
5852 #endif
5853
5854 // if (a1 == a2)
5855 // return true;
5856 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5857 br(EQ, SAME);
5858
5859 if (UseSimpleArrayEquals) {
5860 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5861 // if (a1 == nullptr || a2 == nullptr)
5862 // return false;
5863 // a1 & a2 == 0 means (some-pointer is null) or
5864 // (very-rare-or-even-probably-impossible-pointer-values)
5865 // so, we can save one branch in most cases
5866 tst(a1, a2);
5867 mov(result, false);
5868 br(EQ, A_MIGHT_BE_NULL);
5869 // if (a1.length != a2.length)
5870 // return false;
5871 bind(A_IS_NOT_NULL);
5872 ldrw(cnt1, Address(a1, length_offset));
5873 ldrw(cnt2, Address(a2, length_offset));
5874 eorw(tmp5, cnt1, cnt2);
5875 cbnzw(tmp5, DONE);
5876 lea(a1, Address(a1, base_offset));
5877 lea(a2, Address(a2, base_offset));
5878 // Check for short strings, i.e. smaller than wordSize.
5879 subs(cnt1, cnt1, elem_per_word);
5880 br(Assembler::LT, SHORT);
5881 // Main 8 byte comparison loop.
5882 bind(NEXT_WORD); {
5883 ldr(tmp1, Address(post(a1, wordSize)));
5884 ldr(tmp2, Address(post(a2, wordSize)));
5885 subs(cnt1, cnt1, elem_per_word);
5886 eor(tmp5, tmp1, tmp2);
5887 cbnz(tmp5, DONE);
5888 } br(GT, NEXT_WORD);
5889 // Last longword. In the case where length == 4 we compare the
5890 // same longword twice, but that's still faster than another
5891 // conditional branch.
5892 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5893 // length == 4.
5894 if (log_elem_size > 0)
5895 lsl(cnt1, cnt1, log_elem_size);
5896 ldr(tmp3, Address(a1, cnt1));
5897 ldr(tmp4, Address(a2, cnt1));
5920 eorw(tmp5, tmp3, tmp4);
5921 cbnzw(tmp5, DONE);
5922 }
5923 bind(TAIL01);
5924 if (elem_size == 1) { // Only needed when comparing byte arrays.
5925 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5926 {
5927 ldrb(tmp1, a1);
5928 ldrb(tmp2, a2);
5929 eorw(tmp5, tmp1, tmp2);
5930 cbnzw(tmp5, DONE);
5931 }
5932 }
5933 } else {
5934 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5935 CSET_EQ, LAST_CHECK;
5936 mov(result, false);
5937 cbz(a1, DONE);
5938 ldrw(cnt1, Address(a1, length_offset));
5939 cbz(a2, DONE);
5940 ldrw(cnt2, Address(a2, length_offset));
5941 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5942 // faster to perform another branch before comparing a1 and a2
5943 cmp(cnt1, (u1)elem_per_word);
5944 br(LE, SHORT); // short or same
5945 ldr(tmp3, Address(pre(a1, base_offset)));
5946 subs(zr, cnt1, stubBytesThreshold);
5947 br(GE, STUB);
5948 ldr(tmp4, Address(pre(a2, base_offset)));
5949 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5950 cmp(cnt2, cnt1);
5951 br(NE, DONE);
5952
5953 // Main 16 byte comparison loop with 2 exits
5954 bind(NEXT_DWORD); {
5955 ldr(tmp1, Address(pre(a1, wordSize)));
5956 ldr(tmp2, Address(pre(a2, wordSize)));
5957 subs(cnt1, cnt1, 2 * elem_per_word);
5958 br(LE, TAIL);
5959 eor(tmp4, tmp3, tmp4);
5960 cbnz(tmp4, DONE);
5961 ldr(tmp3, Address(pre(a1, wordSize)));
5962 ldr(tmp4, Address(pre(a2, wordSize)));
5963 cmp(cnt1, (u1)elem_per_word);
5964 br(LE, TAIL2);
5965 cmp(tmp1, tmp2);
5966 } br(EQ, NEXT_DWORD);
5967 b(DONE);
5968
5969 bind(TAIL);
5970 eor(tmp4, tmp3, tmp4);
5971 eor(tmp2, tmp1, tmp2);
5972 lslv(tmp2, tmp2, tmp5);
5973 orr(tmp5, tmp4, tmp2);
5974 cmp(tmp5, zr);
5975 b(CSET_EQ);
5976
5977 bind(TAIL2);
5978 eor(tmp2, tmp1, tmp2);
5979 cbnz(tmp2, DONE);
5980 b(LAST_CHECK);
5981
5982 bind(STUB);
5983 ldr(tmp4, Address(pre(a2, base_offset)));
5984 cmp(cnt2, cnt1);
5985 br(NE, DONE);
5986 if (elem_size == 2) { // convert to byte counter
5987 lsl(cnt1, cnt1, 1);
5988 }
5989 eor(tmp5, tmp3, tmp4);
5990 cbnz(tmp5, DONE);
5991 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5992 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5993 address tpc = trampoline_call(stub);
5994 if (tpc == nullptr) {
5995 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5996 postcond(pc() == badAddress);
5997 return nullptr;
5998 }
5999 b(DONE);
6000
6001 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6002 // so, if a2 == null => return false(0), else return true, so we can return a2
6003 mov(result, a2);
6004 b(DONE);
6005 bind(SHORT);
6006 cmp(cnt2, cnt1);
6007 br(NE, DONE);
6008 cbz(cnt1, SAME);
6009 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6010 ldr(tmp3, Address(a1, base_offset));
6011 ldr(tmp4, Address(a2, base_offset));
6012 bind(LAST_CHECK);
6013 eor(tmp4, tmp3, tmp4);
6014 lslv(tmp5, tmp4, tmp5);
6015 cmp(tmp5, zr);
6016 bind(CSET_EQ);
6017 cset(result, EQ);
6018 b(DONE);
6019 }
6020
6021 bind(SAME);
6022 mov(result, true);
6023 // That's it.
6024 bind(DONE);
6025
6026 BLOCK_COMMENT("} array_equals");
6027 postcond(pc() != badAddress);
6028 return pc();
6029 }
6030
6031 // Compare Strings
|