5023 }
5024
5025 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5026 load_method_holder(rresult, rmethod);
5027 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5028 }
5029
5030 void MacroAssembler::load_method_holder(Register holder, Register method) {
5031 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5032 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5033 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5034 }
5035
5036 // Loads the obj's Klass* into dst.
5037 // Preserves all registers (incl src, rscratch1 and rscratch2).
5038 // Input:
5039 // src - the oop we want to load the klass from.
5040 // dst - output narrow klass.
5041 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5042 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5043 ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5044 lsr(dst, dst, markWord::klass_shift);
5045 }
5046
5047 void MacroAssembler::load_klass(Register dst, Register src) {
5048 if (UseCompactObjectHeaders) {
5049 load_narrow_klass_compact(dst, src);
5050 decode_klass_not_null(dst);
5051 } else if (UseCompressedClassPointers) {
5052 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053 decode_klass_not_null(dst);
5054 } else {
5055 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5056 }
5057 }
5058
5059 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5060 if (RestoreMXCSROnJNICalls) {
5061 Label OK;
5062 get_fpcr(tmp1);
5063 mov(tmp2, tmp1);
5064 // Set FPCR to the state we need. We do want Round to Nearest. We
5878 b(DONE);
5879
5880 BIND(SET_RESULT);
5881
5882 add(len, len, wordSize);
5883 sub(result, result, len);
5884
5885 BIND(DONE);
5886 postcond(pc() != badAddress);
5887 return pc();
5888 }
5889
5890 // Clobbers: rscratch1, rscratch2, rflags
5891 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5892 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5893 Register tmp4, Register tmp5, Register result,
5894 Register cnt1, int elem_size) {
5895 Label DONE, SAME;
5896 Register tmp1 = rscratch1;
5897 Register tmp2 = rscratch2;
5898 int elem_per_word = wordSize/elem_size;
5899 int log_elem_size = exact_log2(elem_size);
5900 int klass_offset = arrayOopDesc::klass_offset_in_bytes();
5901 int length_offset = arrayOopDesc::length_offset_in_bytes();
5902 int base_offset
5903 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5904 // When the length offset is not aligned to 8 bytes,
5905 // then we align it down. This is valid because the new
5906 // offset will always be the klass which is the same
5907 // for type arrays.
5908 int start_offset = align_down(length_offset, BytesPerWord);
5909 int extra_length = base_offset - start_offset;
5910 assert(start_offset == length_offset || start_offset == klass_offset,
5911 "start offset must be 8-byte-aligned or be the klass offset");
5912 assert(base_offset != start_offset, "must include the length field");
5913 extra_length = extra_length / elem_size; // We count in elements, not bytes.
5914 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5915
5916 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5917 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5918
5919 #ifndef PRODUCT
5920 {
5921 const char kind = (elem_size == 2) ? 'U' : 'L';
5922 char comment[64];
5923 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5924 BLOCK_COMMENT(comment);
5925 }
5926 #endif
5927
5928 // if (a1 == a2)
5929 // return true;
5930 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5931 br(EQ, SAME);
5932
5933 if (UseSimpleArrayEquals) {
5934 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5935 // if (a1 == nullptr || a2 == nullptr)
5936 // return false;
5937 // a1 & a2 == 0 means (some-pointer is null) or
5938 // (very-rare-or-even-probably-impossible-pointer-values)
5939 // so, we can save one branch in most cases
5940 tst(a1, a2);
5941 mov(result, false);
5942 br(EQ, A_MIGHT_BE_NULL);
5943 // if (a1.length != a2.length)
5944 // return false;
5945 bind(A_IS_NOT_NULL);
5946 ldrw(cnt1, Address(a1, length_offset));
5947 // Increase loop counter by diff between base- and actual start-offset.
5948 addw(cnt1, cnt1, extra_length);
5949 lea(a1, Address(a1, start_offset));
5950 lea(a2, Address(a2, start_offset));
5951 // Check for short strings, i.e. smaller than wordSize.
5952 subs(cnt1, cnt1, elem_per_word);
5953 br(Assembler::LT, SHORT);
5954 // Main 8 byte comparison loop.
5955 bind(NEXT_WORD); {
5956 ldr(tmp1, Address(post(a1, wordSize)));
5957 ldr(tmp2, Address(post(a2, wordSize)));
5958 subs(cnt1, cnt1, elem_per_word);
5959 eor(tmp5, tmp1, tmp2);
5960 cbnz(tmp5, DONE);
5961 } br(GT, NEXT_WORD);
5962 // Last longword. In the case where length == 4 we compare the
5963 // same longword twice, but that's still faster than another
5964 // conditional branch.
5965 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5966 // length == 4.
5967 if (log_elem_size > 0)
5968 lsl(cnt1, cnt1, log_elem_size);
5969 ldr(tmp3, Address(a1, cnt1));
5970 ldr(tmp4, Address(a2, cnt1));
5993 eorw(tmp5, tmp3, tmp4);
5994 cbnzw(tmp5, DONE);
5995 }
5996 bind(TAIL01);
5997 if (elem_size == 1) { // Only needed when comparing byte arrays.
5998 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5999 {
6000 ldrb(tmp1, a1);
6001 ldrb(tmp2, a2);
6002 eorw(tmp5, tmp1, tmp2);
6003 cbnzw(tmp5, DONE);
6004 }
6005 }
6006 } else {
6007 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
6008 CSET_EQ, LAST_CHECK;
6009 mov(result, false);
6010 cbz(a1, DONE);
6011 ldrw(cnt1, Address(a1, length_offset));
6012 cbz(a2, DONE);
6013 // Increase loop counter by diff between base- and actual start-offset.
6014 addw(cnt1, cnt1, extra_length);
6015
6016 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
6017 // faster to perform another branch before comparing a1 and a2
6018 cmp(cnt1, (u1)elem_per_word);
6019 br(LE, SHORT); // short or same
6020 ldr(tmp3, Address(pre(a1, start_offset)));
6021 subs(zr, cnt1, stubBytesThreshold);
6022 br(GE, STUB);
6023 ldr(tmp4, Address(pre(a2, start_offset)));
6024 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6025
6026 // Main 16 byte comparison loop with 2 exits
6027 bind(NEXT_DWORD); {
6028 ldr(tmp1, Address(pre(a1, wordSize)));
6029 ldr(tmp2, Address(pre(a2, wordSize)));
6030 subs(cnt1, cnt1, 2 * elem_per_word);
6031 br(LE, TAIL);
6032 eor(tmp4, tmp3, tmp4);
6033 cbnz(tmp4, DONE);
6034 ldr(tmp3, Address(pre(a1, wordSize)));
6035 ldr(tmp4, Address(pre(a2, wordSize)));
6036 cmp(cnt1, (u1)elem_per_word);
6037 br(LE, TAIL2);
6038 cmp(tmp1, tmp2);
6039 } br(EQ, NEXT_DWORD);
6040 b(DONE);
6041
6042 bind(TAIL);
6043 eor(tmp4, tmp3, tmp4);
6044 eor(tmp2, tmp1, tmp2);
6045 lslv(tmp2, tmp2, tmp5);
6046 orr(tmp5, tmp4, tmp2);
6047 cmp(tmp5, zr);
6048 b(CSET_EQ);
6049
6050 bind(TAIL2);
6051 eor(tmp2, tmp1, tmp2);
6052 cbnz(tmp2, DONE);
6053 b(LAST_CHECK);
6054
6055 bind(STUB);
6056 ldr(tmp4, Address(pre(a2, start_offset)));
6057 if (elem_size == 2) { // convert to byte counter
6058 lsl(cnt1, cnt1, 1);
6059 }
6060 eor(tmp5, tmp3, tmp4);
6061 cbnz(tmp5, DONE);
6062 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
6063 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6064 address tpc = trampoline_call(stub);
6065 if (tpc == nullptr) {
6066 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6067 postcond(pc() == badAddress);
6068 return nullptr;
6069 }
6070 b(DONE);
6071
6072 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6073 // so, if a2 == null => return false(0), else return true, so we can return a2
6074 mov(result, a2);
6075 b(DONE);
6076 bind(SHORT);
6077 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6078 ldr(tmp3, Address(a1, start_offset));
6079 ldr(tmp4, Address(a2, start_offset));
6080 bind(LAST_CHECK);
6081 eor(tmp4, tmp3, tmp4);
6082 lslv(tmp5, tmp4, tmp5);
6083 cmp(tmp5, zr);
6084 bind(CSET_EQ);
6085 cset(result, EQ);
6086 b(DONE);
6087 }
6088
6089 bind(SAME);
6090 mov(result, true);
6091 // That's it.
6092 bind(DONE);
6093
6094 BLOCK_COMMENT("} array_equals");
6095 postcond(pc() != badAddress);
6096 return pc();
6097 }
6098
6099 // Compare Strings
|
5023 }
5024
5025 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
5026 load_method_holder(rresult, rmethod);
5027 ldr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
5028 }
5029
5030 void MacroAssembler::load_method_holder(Register holder, Register method) {
5031 ldr(holder, Address(method, Method::const_offset())); // ConstMethod*
5032 ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
5033 ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
5034 }
5035
5036 // Loads the obj's Klass* into dst.
5037 // Preserves all registers (incl src, rscratch1 and rscratch2).
5038 // Input:
5039 // src - the oop we want to load the klass from.
5040 // dst - output narrow klass.
5041 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
5042 assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");
5043 ldrw(dst, Address(src, oopDesc::mark_offset_in_bytes()));
5044 lsrw(dst, dst, markWord::klass_shift);
5045 }
5046
5047 void MacroAssembler::load_klass(Register dst, Register src) {
5048 if (UseCompactObjectHeaders) {
5049 load_narrow_klass_compact(dst, src);
5050 decode_klass_not_null(dst);
5051 } else if (UseCompressedClassPointers) {
5052 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5053 decode_klass_not_null(dst);
5054 } else {
5055 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5056 }
5057 }
5058
5059 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
5060 if (RestoreMXCSROnJNICalls) {
5061 Label OK;
5062 get_fpcr(tmp1);
5063 mov(tmp2, tmp1);
5064 // Set FPCR to the state we need. We do want Round to Nearest. We
5878 b(DONE);
5879
5880 BIND(SET_RESULT);
5881
5882 add(len, len, wordSize);
5883 sub(result, result, len);
5884
5885 BIND(DONE);
5886 postcond(pc() != badAddress);
5887 return pc();
5888 }
5889
5890 // Clobbers: rscratch1, rscratch2, rflags
5891 // May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
5892 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
5893 Register tmp4, Register tmp5, Register result,
5894 Register cnt1, int elem_size) {
5895 Label DONE, SAME;
5896 Register tmp1 = rscratch1;
5897 Register tmp2 = rscratch2;
5898 Register cnt2 = tmp2; // cnt2 only used in array length compare
5899 int elem_per_word = wordSize/elem_size;
5900 int log_elem_size = exact_log2(elem_size);
5901 int length_offset = arrayOopDesc::length_offset_in_bytes();
5902 int base_offset
5903 = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5904 int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
5905
5906 assert(elem_size == 1 || elem_size == 2, "must be char or byte");
5907 assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
5908
5909 #ifndef PRODUCT
5910 {
5911 const char kind = (elem_size == 2) ? 'U' : 'L';
5912 char comment[64];
5913 snprintf(comment, sizeof comment, "array_equals%c{", kind);
5914 BLOCK_COMMENT(comment);
5915 }
5916 #endif
5917
5918 // if (a1 == a2)
5919 // return true;
5920 cmpoop(a1, a2); // May have read barriers for a1 and a2.
5921 br(EQ, SAME);
5922
5923 if (UseSimpleArrayEquals) {
5924 Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
5925 // if (a1 == nullptr || a2 == nullptr)
5926 // return false;
5927 // a1 & a2 == 0 means (some-pointer is null) or
5928 // (very-rare-or-even-probably-impossible-pointer-values)
5929 // so, we can save one branch in most cases
5930 tst(a1, a2);
5931 mov(result, false);
5932 br(EQ, A_MIGHT_BE_NULL);
5933 // if (a1.length != a2.length)
5934 // return false;
5935 bind(A_IS_NOT_NULL);
5936 ldrw(cnt1, Address(a1, length_offset));
5937 ldrw(cnt2, Address(a2, length_offset));
5938 eorw(tmp5, cnt1, cnt2);
5939 cbnzw(tmp5, DONE);
5940 lea(a1, Address(a1, base_offset));
5941 lea(a2, Address(a2, base_offset));
5942 // Check for short strings, i.e. smaller than wordSize.
5943 subs(cnt1, cnt1, elem_per_word);
5944 br(Assembler::LT, SHORT);
5945 // Main 8 byte comparison loop.
5946 bind(NEXT_WORD); {
5947 ldr(tmp1, Address(post(a1, wordSize)));
5948 ldr(tmp2, Address(post(a2, wordSize)));
5949 subs(cnt1, cnt1, elem_per_word);
5950 eor(tmp5, tmp1, tmp2);
5951 cbnz(tmp5, DONE);
5952 } br(GT, NEXT_WORD);
5953 // Last longword. In the case where length == 4 we compare the
5954 // same longword twice, but that's still faster than another
5955 // conditional branch.
5956 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
5957 // length == 4.
5958 if (log_elem_size > 0)
5959 lsl(cnt1, cnt1, log_elem_size);
5960 ldr(tmp3, Address(a1, cnt1));
5961 ldr(tmp4, Address(a2, cnt1));
5984 eorw(tmp5, tmp3, tmp4);
5985 cbnzw(tmp5, DONE);
5986 }
5987 bind(TAIL01);
5988 if (elem_size == 1) { // Only needed when comparing byte arrays.
5989 tbz(cnt1, 0, SAME); // 0-1 bytes left.
5990 {
5991 ldrb(tmp1, a1);
5992 ldrb(tmp2, a2);
5993 eorw(tmp5, tmp1, tmp2);
5994 cbnzw(tmp5, DONE);
5995 }
5996 }
5997 } else {
5998 Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
5999 CSET_EQ, LAST_CHECK;
6000 mov(result, false);
6001 cbz(a1, DONE);
6002 ldrw(cnt1, Address(a1, length_offset));
6003 cbz(a2, DONE);
6004 ldrw(cnt2, Address(a2, length_offset));
6005 // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
6006 // faster to perform another branch before comparing a1 and a2
6007 cmp(cnt1, (u1)elem_per_word);
6008 br(LE, SHORT); // short or same
6009 ldr(tmp3, Address(pre(a1, base_offset)));
6010 subs(zr, cnt1, stubBytesThreshold);
6011 br(GE, STUB);
6012 ldr(tmp4, Address(pre(a2, base_offset)));
6013 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6014 cmp(cnt2, cnt1);
6015 br(NE, DONE);
6016
6017 // Main 16 byte comparison loop with 2 exits
6018 bind(NEXT_DWORD); {
6019 ldr(tmp1, Address(pre(a1, wordSize)));
6020 ldr(tmp2, Address(pre(a2, wordSize)));
6021 subs(cnt1, cnt1, 2 * elem_per_word);
6022 br(LE, TAIL);
6023 eor(tmp4, tmp3, tmp4);
6024 cbnz(tmp4, DONE);
6025 ldr(tmp3, Address(pre(a1, wordSize)));
6026 ldr(tmp4, Address(pre(a2, wordSize)));
6027 cmp(cnt1, (u1)elem_per_word);
6028 br(LE, TAIL2);
6029 cmp(tmp1, tmp2);
6030 } br(EQ, NEXT_DWORD);
6031 b(DONE);
6032
6033 bind(TAIL);
6034 eor(tmp4, tmp3, tmp4);
6035 eor(tmp2, tmp1, tmp2);
6036 lslv(tmp2, tmp2, tmp5);
6037 orr(tmp5, tmp4, tmp2);
6038 cmp(tmp5, zr);
6039 b(CSET_EQ);
6040
6041 bind(TAIL2);
6042 eor(tmp2, tmp1, tmp2);
6043 cbnz(tmp2, DONE);
6044 b(LAST_CHECK);
6045
6046 bind(STUB);
6047 ldr(tmp4, Address(pre(a2, base_offset)));
6048 cmp(cnt2, cnt1);
6049 br(NE, DONE);
6050 if (elem_size == 2) { // convert to byte counter
6051 lsl(cnt1, cnt1, 1);
6052 }
6053 eor(tmp5, tmp3, tmp4);
6054 cbnz(tmp5, DONE);
6055 RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
6056 assert(stub.target() != nullptr, "array_equals_long stub has not been generated");
6057 address tpc = trampoline_call(stub);
6058 if (tpc == nullptr) {
6059 DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
6060 postcond(pc() == badAddress);
6061 return nullptr;
6062 }
6063 b(DONE);
6064
6065 // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
6066 // so, if a2 == null => return false(0), else return true, so we can return a2
6067 mov(result, a2);
6068 b(DONE);
6069 bind(SHORT);
6070 cmp(cnt2, cnt1);
6071 br(NE, DONE);
6072 cbz(cnt1, SAME);
6073 sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
6074 ldr(tmp3, Address(a1, base_offset));
6075 ldr(tmp4, Address(a2, base_offset));
6076 bind(LAST_CHECK);
6077 eor(tmp4, tmp3, tmp4);
6078 lslv(tmp5, tmp4, tmp5);
6079 cmp(tmp5, zr);
6080 bind(CSET_EQ);
6081 cset(result, EQ);
6082 b(DONE);
6083 }
6084
6085 bind(SAME);
6086 mov(result, true);
6087 // That's it.
6088 bind(DONE);
6089
6090 BLOCK_COMMENT("} array_equals");
6091 postcond(pc() != badAddress);
6092 return pc();
6093 }
6094
6095 // Compare Strings
|