5013 }
5014
5015 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5016 load_method_holder(rresult, rmethod);
5017 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5018 }
5019
5020 void MacroAssembler::load_method_holder(Register holder, Register method) {
5021 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5022 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5023 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5024 }
5025
5026 // Loads the obj's Klass* into dst.
5027 // Preserves all registers (incl src, rscratch1 and rscratch2).
5028 // Input:
5029 // src - the oop we want to load the klass from.
5030 // dst - output narrow klass.
5031 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5032 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5033 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5034 lsr(dst, dst, markWord::klass_shift);
5035 }
5036
5037 void MacroAssembler::load_klass(Register dst, Register src) {
5038 if (UseCompactObjectHeaders) {
5039 load_narrow_klass_compact(dst, src);
5040 decode_klass_not_null(dst);
5041 } else if (UseCompressedClassPointers) {
5042 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5043 decode_klass_not_null(dst);
5044 } else {
5045 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5046 }
5047 }
5048
5049 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5050 if (RestoreMXCSROnJNICalls) {
5051 Label OK;
5052 get_fpcr(tmp1);
5053 mov(tmp2, tmp1);
5054 // Set FPCR to the state we need. We do want Round to Nearest. We
5808 b(DONE);
5809
5810 BIND(SET_RESULT);
5811
5812 add(len, len, wordSize);
5813 sub(result, result, len);
5814
5815 BIND(DONE);
5816 postcond(pc() != badAddress);
5817 return pc();
5818 }
5819
5820 // Clobbers: rscratch1, rscratch2, rflags
5821 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5822 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5823 Register tmp4, Register tmp5, Register result,
5824 Register cnt1, int elem_size) {
5825 Label DONE, SAME;
5826 Register tmp1 = rscratch1;
5827 Register tmp2 = rscratch2;
5828 int elem_per_word = wordSize/elem_size;
5829 int log_elem_size = exact_log2(elem_size);
5830 int klass_offset = arrayOopDesc::klass_offset_in_bytes();
5831 int length_offset = arrayOopDesc::length_offset_in_bytes();
5832 int base_offset
5833 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5834 // When the length offset is not aligned to 8 bytes,
5835 // then we align it down. This is valid because the new
5836 // offset will always be the klass which is the same
5837 // for type arrays.
5838 int start_offset = align_down(length_offset, BytesPerWord);
5839 int extra_length = base_offset - start_offset;
5840 assert(start_offset == length_offset || start_offset == klass_offset,
5841 "start offset must be 8-byte-aligned or be the klass offset");
5842 assert(base_offset != start_offset, "must include the length field");
5843 extra_length = extra_length / elem_size; // We count in elements, not bytes.
5844 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5845
5846 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5847 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5848
5849 #ifndef PRODUCT
5850 {
5851 const char kind = (elem_size == 2) ? 'U' : 'L';
5852 char comment[64];
5853 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5854 BLOCK_COMMENT(comment);
5855 }
5856 #endif
5857
5858 // if (a1 == a2)
5859 // return true;
5860 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5861 br(EQ, SAME);
5862
5863 if (UseSimpleArrayEquals) {
5864 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5865 // if (a1 == nullptr || a2 == nullptr)
5866 // return false;
5867 // a1 & a2 == 0 means (some-pointer is null) or
5868 // (very-rare-or-even-probably-impossible-pointer-values)
5869 // so, we can save one branch in most cases
5870 tst(a1, a2);
5871 mov(result, false);
5872 br(EQ, A_MIGHT_BE_NULL);
5873 // if (a1.length != a2.length)
5874 // return false;
5875 bind(A_IS_NOT_NULL);
5876 ldrw(cnt1, Address(a1, length_offset));
5877 // Increase loop counter by diff between base- and actual start-offset.
5878 addw(cnt1, cnt1, extra_length);
5879 lea(a1, Address(a1, start_offset));
5880 lea(a2, Address(a2, start_offset));
5881 // Check for short strings, i.e. smaller than wordSize.
5882 subs(cnt1, cnt1, elem_per_word);
5883 br(Assembler::LT, SHORT);
5884 // Main 8 byte comparison loop.
5885 bind(NEXT_WORD); {
5886 ldr(tmp1, Address(post(a1, wordSize)));
5887 ldr(tmp2, Address(post(a2, wordSize)));
5888 subs(cnt1, cnt1, elem_per_word);
5889 eor(tmp5, tmp1, tmp2);
5890 cbnz(tmp5, DONE);
5891 } br(GT, NEXT_WORD);
5892 // Last longword. In the case where length == 4 we compare the
5893 // same longword twice, but that's still faster than another
5894 // conditional branch.
5895 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5896 // length == 4.
5897 if (log_elem_size > 0)
5898 lsl(cnt1, cnt1, log_elem_size);
5899 ldr(tmp3, Address(a1, cnt1));
5900 ldr(tmp4, Address(a2, cnt1));
5923 eorw(tmp5, tmp3, tmp4);
5924 cbnzw(tmp5, DONE);
5925 }
5926 bind(TAIL01);
5927 if (elem_size == 1) { // Only needed when comparing byte arrays.
5928 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5929 {
5930 ldrb(tmp1, a1);
5931 ldrb(tmp2, a2);
5932 eorw(tmp5, tmp1, tmp2);
5933 cbnzw(tmp5, DONE);
5934 }
5935 }
5936 } else {
5937 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5938 CSET_EQ, LAST_CHECK;
5939 mov(result, false);
5940 cbz(a1, DONE);
5941 ldrw(cnt1, Address(a1, length_offset));
5942 cbz(a2, DONE);
5943 // Increase loop counter by diff between base- and actual start-offset.
5944 addw(cnt1, cnt1, extra_length);
5945
5946 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5947 // faster to perform another branch before comparing a1 and a2
5948 cmp(cnt1, (u1)elem_per_word);
5949 br(LE, SHORT); // short or same
5950 ldr(tmp3, Address(pre(a1, start_offset)));
5951 subs(zr, cnt1, stubBytesThreshold);
5952 br(GE, STUB);
5953 ldr(tmp4, Address(pre(a2, start_offset)));
5954 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5955
5956 // Main 16 byte comparison loop with 2 exits
5957 bind(NEXT_DWORD); {
5958 ldr(tmp1, Address(pre(a1, wordSize)));
5959 ldr(tmp2, Address(pre(a2, wordSize)));
5960 subs(cnt1, cnt1, 2 * elem_per_word);
5961 br(LE, TAIL);
5962 eor(tmp4, tmp3, tmp4);
5963 cbnz(tmp4, DONE);
5964 ldr(tmp3, Address(pre(a1, wordSize)));
5965 ldr(tmp4, Address(pre(a2, wordSize)));
5966 cmp(cnt1, (u1)elem_per_word);
5967 br(LE, TAIL2);
5968 cmp(tmp1, tmp2);
5969 } br(EQ, NEXT_DWORD);
5970 b(DONE);
5971
5972 bind(TAIL);
5973 eor(tmp4, tmp3, tmp4);
5974 eor(tmp2, tmp1, tmp2);
5975 lslv(tmp2, tmp2, tmp5);
5976 orr(tmp5, tmp4, tmp2);
5977 cmp(tmp5, zr);
5978 b(CSET_EQ);
5979
5980 bind(TAIL2);
5981 eor(tmp2, tmp1, tmp2);
5982 cbnz(tmp2, DONE);
5983 b(LAST_CHECK);
5984
5985 bind(STUB);
5986 ldr(tmp4, Address(pre(a2, start_offset)));
5987 if (elem_size == 2) { // convert to byte counter
5988 lsl(cnt1, cnt1, 1);
5989 }
5990 eor(tmp5, tmp3, tmp4);
5991 cbnz(tmp5, DONE);
5992 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5993 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5994 address tpc = trampoline_call(stub);
5995 if (tpc == nullptr) {
5996 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5997 postcond(pc() == badAddress);
5998 return nullptr;
5999 }
6000 b(DONE);
6001
6002 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6003 // so, if a2 == null => return false(0), else return true, so we can return a2
6004 mov(result, a2);
6005 b(DONE);
6006 bind(SHORT);
6007 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6008 ldr(tmp3, Address(a1, start_offset));
6009 ldr(tmp4, Address(a2, start_offset));
6010 bind(LAST_CHECK);
6011 eor(tmp4, tmp3, tmp4);
6012 lslv(tmp5, tmp4, tmp5);
6013 cmp(tmp5, zr);
6014 bind(CSET_EQ);
6015 cset(result, EQ);
6016 b(DONE);
6017 }
6018
6019 bind(SAME);
6020 mov(result, true);
6021 // That's it.
6022 bind(DONE);
6023
6024 BLOCK_COMMENT("} array_equals");
6025 postcond(pc() != badAddress);
6026 return pc();
6027 }
6028
6029 // Compare Strings
|
5013 }
5014
5015 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5016 load_method_holder(rresult, rmethod);
5017 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5018 }
5019
5020 void MacroAssembler::load_method_holder(Register holder, Register method) {
5021 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5022 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5023 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5024 }
5025
5026 // Loads the obj's Klass* into dst.
5027 // Preserves all registers (incl src, rscratch1 and rscratch2).
5028 // Input:
5029 // src - the oop we want to load the klass from.
5030 // dst - output narrow klass.
5031 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5032 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5033 ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5034 lsrw(dst, dst, markWord::klass_shift);
5035 }
5036
5037 void MacroAssembler::load_klass(Register dst, Register src) {
5038 if (UseCompactObjectHeaders) {
5039 load_narrow_klass_compact(dst, src);
5040 decode_klass_not_null(dst);
5041 } else if (UseCompressedClassPointers) {
5042 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5043 decode_klass_not_null(dst);
5044 } else {
5045 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5046 }
5047 }
5048
5049 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5050 if (RestoreMXCSROnJNICalls) {
5051 Label OK;
5052 get_fpcr(tmp1);
5053 mov(tmp2, tmp1);
5054 // Set FPCR to the state we need. We do want Round to Nearest. We
5808 b(DONE);
5809
5810 BIND(SET_RESULT);
5811
5812 add(len, len, wordSize);
5813 sub(result, result, len);
5814
5815 BIND(DONE);
5816 postcond(pc() != badAddress);
5817 return pc();
5818 }
5819
5820 // Clobbers: rscratch1, rscratch2, rflags
5821 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5822 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5823 Register tmp4, Register tmp5, Register result,
5824 Register cnt1, int elem_size) {
5825 Label DONE, SAME;
5826 Register tmp1 = rscratch1;
5827 Register tmp2 = rscratch2;
5828 Register cnt2 = tmp2; // cnt2 only used in array length compare
5829 int elem_per_word = wordSize/elem_size;
5830 int log_elem_size = exact_log2(elem_size);
5831 int length_offset = arrayOopDesc::length_offset_in_bytes();
5832 int base_offset
5833 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5834 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5835
5836 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5837 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5838
5839 #ifndef PRODUCT
5840 {
5841 const char kind = (elem_size == 2) ? 'U' : 'L';
5842 char comment[64];
5843 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5844 BLOCK_COMMENT(comment);
5845 }
5846 #endif
5847
5848 // if (a1 == a2)
5849 // return true;
5850 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5851 br(EQ, SAME);
5852
5853 if (UseSimpleArrayEquals) {
5854 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5855 // if (a1 == nullptr || a2 == nullptr)
5856 // return false;
5857 // a1 & a2 == 0 means (some-pointer is null) or
5858 // (very-rare-or-even-probably-impossible-pointer-values)
5859 // so, we can save one branch in most cases
5860 tst(a1, a2);
5861 mov(result, false);
5862 br(EQ, A_MIGHT_BE_NULL);
5863 // if (a1.length != a2.length)
5864 // return false;
5865 bind(A_IS_NOT_NULL);
5866 ldrw(cnt1, Address(a1, length_offset));
5867 ldrw(cnt2, Address(a2, length_offset));
5868 eorw(tmp5, cnt1, cnt2);
5869 cbnzw(tmp5, DONE);
5870 lea(a1, Address(a1, base_offset));
5871 lea(a2, Address(a2, base_offset));
5872 // Check for short strings, i.e. smaller than wordSize.
5873 subs(cnt1, cnt1, elem_per_word);
5874 br(Assembler::LT, SHORT);
5875 // Main 8 byte comparison loop.
5876 bind(NEXT_WORD); {
5877 ldr(tmp1, Address(post(a1, wordSize)));
5878 ldr(tmp2, Address(post(a2, wordSize)));
5879 subs(cnt1, cnt1, elem_per_word);
5880 eor(tmp5, tmp1, tmp2);
5881 cbnz(tmp5, DONE);
5882 } br(GT, NEXT_WORD);
5883 // Last longword. In the case where length == 4 we compare the
5884 // same longword twice, but that's still faster than another
5885 // conditional branch.
5886 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5887 // length == 4.
5888 if (log_elem_size > 0)
5889 lsl(cnt1, cnt1, log_elem_size);
5890 ldr(tmp3, Address(a1, cnt1));
5891 ldr(tmp4, Address(a2, cnt1));
5914 eorw(tmp5, tmp3, tmp4);
5915 cbnzw(tmp5, DONE);
5916 }
5917 bind(TAIL01);
5918 if (elem_size == 1) { // Only needed when comparing byte arrays.
5919 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5920 {
5921 ldrb(tmp1, a1);
5922 ldrb(tmp2, a2);
5923 eorw(tmp5, tmp1, tmp2);
5924 cbnzw(tmp5, DONE);
5925 }
5926 }
5927 } else {
5928 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5929 CSET_EQ, LAST_CHECK;
5930 mov(result, false);
5931 cbz(a1, DONE);
5932 ldrw(cnt1, Address(a1, length_offset));
5933 cbz(a2, DONE);
5934 ldrw(cnt2, Address(a2, length_offset));
5935 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
5936 // faster to perform another branch before comparing a1 and a2
5937 cmp(cnt1, (u1)elem_per_word);
5938 br(LE, SHORT); // short or same
5939 ldr(tmp3, Address(pre(a1, base_offset)));
5940 subs(zr, cnt1, stubBytesThreshold);
5941 br(GE, STUB);
5942 ldr(tmp4, Address(pre(a2, base_offset)));
5943 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5944 cmp(cnt2, cnt1);
5945 br(NE, DONE);
5946
5947 // Main 16 byte comparison loop with 2 exits
5948 bind(NEXT_DWORD); {
5949 ldr(tmp1, Address(pre(a1, wordSize)));
5950 ldr(tmp2, Address(pre(a2, wordSize)));
5951 subs(cnt1, cnt1, 2 * elem_per_word);
5952 br(LE, TAIL);
5953 eor(tmp4, tmp3, tmp4);
5954 cbnz(tmp4, DONE);
5955 ldr(tmp3, Address(pre(a1, wordSize)));
5956 ldr(tmp4, Address(pre(a2, wordSize)));
5957 cmp(cnt1, (u1)elem_per_word);
5958 br(LE, TAIL2);
5959 cmp(tmp1, tmp2);
5960 } br(EQ, NEXT_DWORD);
5961 b(DONE);
5962
5963 bind(TAIL);
5964 eor(tmp4, tmp3, tmp4);
5965 eor(tmp2, tmp1, tmp2);
5966 lslv(tmp2, tmp2, tmp5);
5967 orr(tmp5, tmp4, tmp2);
5968 cmp(tmp5, zr);
5969 b(CSET_EQ);
5970
5971 bind(TAIL2);
5972 eor(tmp2, tmp1, tmp2);
5973 cbnz(tmp2, DONE);
5974 b(LAST_CHECK);
5975
5976 bind(STUB);
5977 ldr(tmp4, Address(pre(a2, base_offset)));
5978 cmp(cnt2, cnt1);
5979 br(NE, DONE);
5980 if (elem_size == 2) { // convert to byte counter
5981 lsl(cnt1, cnt1, 1);
5982 }
5983 eor(tmp5, tmp3, tmp4);
5984 cbnz(tmp5, DONE);
5985 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
5986 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
5987 address tpc = trampoline_call(stub);
5988 if (tpc == nullptr) {
5989 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
5990 postcond(pc() == badAddress);
5991 return nullptr;
5992 }
5993 b(DONE);
5994
5995 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
5996 // so, if a2 == null => return false(0), else return true, so we can return a2
5997 mov(result, a2);
5998 b(DONE);
5999 bind(SHORT);
6000 cmp(cnt2, cnt1);
6001 br(NE, DONE);
6002 cbz(cnt1, SAME);
6003 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6004 ldr(tmp3, Address(a1, base_offset));
6005 ldr(tmp4, Address(a2, base_offset));
6006 bind(LAST_CHECK);
6007 eor(tmp4, tmp3, tmp4);
6008 lslv(tmp5, tmp4, tmp5);
6009 cmp(tmp5, zr);
6010 bind(CSET_EQ);
6011 cset(result, EQ);
6012 b(DONE);
6013 }
6014
6015 bind(SAME);
6016 mov(result, true);
6017 // That's it.
6018 bind(DONE);
6019
6020 BLOCK_COMMENT("} array_equals");
6021 postcond(pc() != badAddress);
6022 return pc();
6023 }
6024
6025 // Compare Strings
|