1 /*
2 * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2023, Red Hat, Inc.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "asm/macroAssembler.hpp"
29 #include "ci/ciEnv.hpp"
30 #include "code/compiledIC.hpp"
31 #include "code/nativeInst.hpp"
32 #include "compiler/disassembler.hpp"
33 #include "gc/shared/barrierSet.hpp"
34 #include "gc/shared/cardTable.hpp"
35 #include "gc/shared/barrierSetAssembler.hpp"
36 #include "gc/shared/cardTableBarrierSet.hpp"
37 #include "gc/shared/collectedHeap.inline.hpp"
38 #include "interpreter/bytecodeHistogram.hpp"
39 #include "interpreter/interpreter.hpp"
40 #include "memory/resourceArea.hpp"
41 #include "metaprogramming/primitiveConversions.hpp"
42 #include "oops/accessDecorators.hpp"
43 #include "oops/klass.inline.hpp"
44 #include "prims/methodHandles.hpp"
45 #include "runtime/interfaceSupport.inline.hpp"
46 #include "runtime/javaThread.hpp"
47 #include "runtime/jniHandles.hpp"
48 #include "runtime/objectMonitor.hpp"
49 #include "runtime/os.hpp"
50 #include "runtime/sharedRuntime.hpp"
51 #include "runtime/stubRoutines.hpp"
52 #include "utilities/macros.hpp"
53 #include "utilities/powerOfTwo.hpp"
54
55 // Implementation of AddressLiteral
56
57 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
58 switch (rtype) {
59 case relocInfo::oop_type:
60 // Oops are a special case. Normally they would be their own section
61 // but in cases like icBuffer they are literals in the code stream that
62 // we don't have a section for. We use none so that we get a literal address
63 // which is always patchable.
64 break;
65 case relocInfo::external_word_type:
66 _rspec = external_word_Relocation::spec(_target);
67 break;
68 case relocInfo::internal_word_type:
69 _rspec = internal_word_Relocation::spec(_target);
70 break;
71 case relocInfo::opt_virtual_call_type:
72 _rspec = opt_virtual_call_Relocation::spec();
73 break;
74 case relocInfo::static_call_type:
75 _rspec = static_call_Relocation::spec();
76 break;
77 case relocInfo::runtime_call_type:
78 _rspec = runtime_call_Relocation::spec();
79 break;
80 case relocInfo::poll_type:
81 case relocInfo::poll_return_type:
82 _rspec = Relocation::spec_simple(rtype);
83 break;
84 case relocInfo::none:
85 break;
86 default:
87 ShouldNotReachHere();
88 break;
89 }
90 }
91
92
93 // virtual method calling
94 void MacroAssembler::lookup_virtual_method(Register recv_klass,
95 Register vtable_index,
96 Register method_result) {
97 const ByteSize base_offset = Klass::vtable_start_offset() + vtableEntry::method_offset();
98 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
99 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
100 ldr(method_result, Address(recv_klass, base_offset));
101 }
102
103
104 // Simplified, combined version, good for typical uses.
105 // Falls through on failure.
106 void MacroAssembler::check_klass_subtype(Register sub_klass,
107 Register super_klass,
108 Register temp_reg,
109 Register temp_reg2,
110 Register temp_reg3,
111 Label& L_success) {
112 Label L_failure;
113 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, nullptr);
114 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, nullptr);
115 bind(L_failure);
116 };
117
118 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
119 Register super_klass,
120 Register temp_reg,
121 Register temp_reg2,
122 Label* L_success,
123 Label* L_failure,
124 Label* L_slow_path) {
125
126 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
127 const Register super_check_offset = temp_reg2;
128
129 Label L_fallthrough;
130 int label_nulls = 0;
131 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
132 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
133 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
134 assert(label_nulls <= 1, "at most one null in the batch");
135
136 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
137 int sco_offset = in_bytes(Klass::super_check_offset_offset());
138 Address super_check_offset_addr(super_klass, sco_offset);
139
140 // If the pointers are equal, we are done (e.g., String[] elements).
141 // This self-check enables sharing of secondary supertype arrays among
142 // non-primary types such as array-of-interface. Otherwise, each such
143 // type would need its own customized SSA.
144 // We move this check to the front of the fast path because many
145 // type checks are in fact trivially successful in this manner,
146 // so we get a nicely predicted branch right at the start of the check.
147 cmp(sub_klass, super_klass);
148 b(*L_success, eq);
149
150 // Check the supertype display:
151 ldr_u32(super_check_offset, super_check_offset_addr);
152
153 Address super_check_addr(sub_klass, super_check_offset);
154 ldr(temp_reg, super_check_addr);
155 cmp(super_klass, temp_reg); // load displayed supertype
156
157 // This check has worked decisively for primary supers.
158 // Secondary supers are sought in the super_cache ('super_cache_addr').
159 // (Secondary supers are interfaces and very deeply nested subtypes.)
160 // This works in the same check above because of a tricky aliasing
161 // between the super_cache and the primary super display elements.
162 // (The 'super_check_addr' can address either, as the case requires.)
163 // Note that the cache is updated below if it does not help us find
164 // what we need immediately.
165 // So if it was a primary super, we can just fail immediately.
166 // Otherwise, it's the slow path for us (no success at this point).
167
168 b(*L_success, eq);
169 cmp_32(super_check_offset, sc_offset);
170 if (L_failure == &L_fallthrough) {
171 b(*L_slow_path, eq);
172 } else {
173 b(*L_failure, ne);
174 if (L_slow_path != &L_fallthrough) {
175 b(*L_slow_path);
176 }
177 }
178
179 bind(L_fallthrough);
180 }
181
182
183 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
184 Register super_klass,
185 Register temp_reg,
186 Register temp2_reg,
187 Register temp3_reg,
188 Label* L_success,
189 Label* L_failure,
190 bool set_cond_codes) {
191 // Note: if used by code that expects a register to be 0 on success,
192 // this register must be temp_reg and set_cond_codes must be true
193
194 Register saved_reg = noreg;
195
196 // get additional tmp registers
197 if (temp3_reg == noreg) {
198 saved_reg = temp3_reg = LR;
199 push(saved_reg);
200 }
201
202 assert(temp2_reg != noreg, "need all the temporary registers");
203 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
204
205 Register cmp_temp = temp_reg;
206 Register scan_temp = temp3_reg;
207 Register count_temp = temp2_reg;
208
209 Label L_fallthrough;
210 int label_nulls = 0;
211 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
212 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
213 assert(label_nulls <= 1, "at most one null in the batch");
214
215 // a couple of useful fields in sub_klass:
216 int ss_offset = in_bytes(Klass::secondary_supers_offset());
217 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
218 Address secondary_supers_addr(sub_klass, ss_offset);
219 Address super_cache_addr( sub_klass, sc_offset);
220
221 #ifndef PRODUCT
222 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
223 #endif
224
225 // We will consult the secondary-super array.
226 ldr(scan_temp, Address(sub_klass, ss_offset));
227
228 assert(! UseCompressedOops, "search_key must be the compressed super_klass");
229 // else search_key is the
230 Register search_key = super_klass;
231
232 // Load the array length.
233 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
234 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
235
236 add(count_temp, count_temp, 1);
237
238 Label L_loop, L_fail;
239
240 // Top of search loop
241 bind(L_loop);
242 // Notes:
243 // scan_temp starts at the array elements
244 // count_temp is 1+size
245 subs(count_temp, count_temp, 1);
246 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
247 // direct jump to L_failure if failed and no cleanup needed
248 b(*L_failure, eq); // not found and
249 } else {
250 b(L_fail, eq); // not found in the array
251 }
252
253 // Load next super to check
254 // In the array of super classes elements are pointer sized.
255 int element_size = wordSize;
256 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
257
258 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
259 subs(cmp_temp, cmp_temp, search_key);
260
261 // A miss means we are NOT a subtype and need to keep looping
262 b(L_loop, ne);
263
264 // Falling out the bottom means we found a hit; we ARE a subtype
265
266 // Note: temp_reg/cmp_temp is already 0 and flag Z is set
267
268 // Success. Cache the super we found and proceed in triumph.
269 str(super_klass, Address(sub_klass, sc_offset));
270
271 if (saved_reg != noreg) {
272 // Return success
273 pop(saved_reg);
274 }
275
276 b(*L_success);
277
278 bind(L_fail);
279 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
280 if (set_cond_codes) {
281 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
282 }
283 if (saved_reg != noreg) {
284 pop(saved_reg);
285 }
286 if (L_failure != &L_fallthrough) {
287 b(*L_failure);
288 }
289
290 bind(L_fallthrough);
291 }
292
293 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
294 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
295 assert_different_registers(params_base, params_count);
296 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
297 return Address(tmp, -Interpreter::stackElementSize);
298 }
299
300 void MacroAssembler::align(int modulus, int target) {
301 int delta = target - offset();
302 while ((offset() + delta) % modulus != 0) nop();
303 }
304
305 void MacroAssembler::align(int modulus) {
306 align(modulus, offset());
307 }
308
309 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
310 Register last_java_fp,
311 bool save_last_java_pc,
312 Register tmp) {
313 int pc_offset;
314 if (last_java_fp != noreg) {
315 // optional
316 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
317 _fp_saved = true;
318 } else {
319 _fp_saved = false;
320 }
321 if (save_last_java_pc) {
322 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
323 pc_offset = offset() + VM_Version::stored_pc_adjustment();
324 _pc_saved = true;
325 } else {
326 _pc_saved = false;
327 pc_offset = -1;
328 }
329 // According to comment in javaFrameAnchorm SP must be saved last, so that other
330 // entries are valid when SP is set.
331
332 // However, this is probably not a strong constrainst since for instance PC is
333 // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
334 // we now write the fields in the expected order but we have not added a StoreStore
335 // barrier.
336
337 // XXX: if the ordering is really important, PC should always be saved (without forgetting
338 // to update oop_map offsets) and a StoreStore barrier might be needed.
339
340 if (last_java_sp == noreg) {
341 last_java_sp = SP; // always saved
342 }
343 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
344
345 return pc_offset; // for oopmaps
346 }
347
348 void MacroAssembler::reset_last_Java_frame(Register tmp) {
349 const Register Rzero = zero_register(tmp);
350 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
351 if (_fp_saved) {
352 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
353 }
354 if (_pc_saved) {
355 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
356 }
357 }
358
359
360 // Implementation of call_VM versions
361
362 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
363 assert(number_of_arguments >= 0, "cannot have negative number of arguments");
364 assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
365
366 // Safer to save R9 here since callers may have been written
367 // assuming R9 survives. This is suboptimal but is not worth
368 // optimizing for the few platforms where R9 is scratched.
369 push(RegisterSet(R4) | R9ifScratched);
370 mov(R4, SP);
371 bic(SP, SP, StackAlignmentInBytes - 1);
372 call(entry_point, relocInfo::runtime_call_type);
373 mov(SP, R4);
374 pop(RegisterSet(R4) | R9ifScratched);
375 }
376
377
378 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
379 assert(number_of_arguments >= 0, "cannot have negative number of arguments");
380 assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
381
382 const Register tmp = Rtemp;
383 assert_different_registers(oop_result, tmp);
384
385 set_last_Java_frame(SP, FP, true, tmp);
386
387 #if R9_IS_SCRATCHED
388 // Safer to save R9 here since callers may have been written
389 // assuming R9 survives. This is suboptimal but is not worth
390 // optimizing for the few platforms where R9 is scratched.
391
392 // Note: cannot save R9 above the saved SP (some calls expect for
393 // instance the Java stack top at the saved SP)
394 // => once saved (with set_last_Java_frame), decrease SP before rounding to
395 // ensure the slot at SP will be free for R9).
396 sub(SP, SP, 4);
397 bic(SP, SP, StackAlignmentInBytes - 1);
398 str(R9, Address(SP, 0));
399 #else
400 bic(SP, SP, StackAlignmentInBytes - 1);
401 #endif // R9_IS_SCRATCHED
402
403 mov(R0, Rthread);
404 call(entry_point, relocInfo::runtime_call_type);
405
406 #if R9_IS_SCRATCHED
407 ldr(R9, Address(SP, 0));
408 #endif
409 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
410
411 reset_last_Java_frame(tmp);
412
413 // C++ interp handles this in the interpreter
414 check_and_handle_popframe();
415 check_and_handle_earlyret();
416
417 if (check_exceptions) {
418 // check for pending exceptions
419 ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
420 cmp(tmp, 0);
421 mov(Rexception_pc, PC, ne);
422 b(StubRoutines::forward_exception_entry(), ne);
423 }
424
425 // get oop result if there is one and reset the value in the thread
426 if (oop_result->is_valid()) {
427 get_vm_result_oop(oop_result, tmp);
428 }
429 }
430
431 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
432 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
433 }
434
435
436 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
437 assert (arg_1 == R1, "fixed register for arg_1");
438 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
439 }
440
441
442 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
443 assert (arg_1 == R1, "fixed register for arg_1");
444 assert (arg_2 == R2, "fixed register for arg_2");
445 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
446 }
447
448
449 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
450 assert (arg_1 == R1, "fixed register for arg_1");
451 assert (arg_2 == R2, "fixed register for arg_2");
452 assert (arg_3 == R3, "fixed register for arg_3");
453 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
454 }
455
456
457 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
458 // Not used on ARM
459 Unimplemented();
460 }
461
462
463 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
464 // Not used on ARM
465 Unimplemented();
466 }
467
468
469 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
470 // Not used on ARM
471 Unimplemented();
472 }
473
474
475 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
476 // Not used on ARM
477 Unimplemented();
478 }
479
480 // Raw call, without saving/restoring registers, exception handling, etc.
481 // Mainly used from various stubs.
482 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
483 const Register tmp = Rtemp; // Rtemp free since scratched by call
484 set_last_Java_frame(SP, FP, true, tmp);
485 #if R9_IS_SCRATCHED
486 if (save_R9_if_scratched) {
487 // Note: Saving also R10 for alignment.
488 push(RegisterSet(R9, R10));
489 }
490 #endif
491 mov(R0, Rthread);
492 call(entry_point, relocInfo::runtime_call_type);
493 #if R9_IS_SCRATCHED
494 if (save_R9_if_scratched) {
495 pop(RegisterSet(R9, R10));
496 }
497 #endif
498 reset_last_Java_frame(tmp);
499 }
500
501 void MacroAssembler::call_VM_leaf(address entry_point) {
502 call_VM_leaf_helper(entry_point, 0);
503 }
504
505 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
506 assert (arg_1 == R0, "fixed register for arg_1");
507 call_VM_leaf_helper(entry_point, 1);
508 }
509
510 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
511 assert (arg_1 == R0, "fixed register for arg_1");
512 assert (arg_2 == R1, "fixed register for arg_2");
513 call_VM_leaf_helper(entry_point, 2);
514 }
515
516 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
517 assert (arg_1 == R0, "fixed register for arg_1");
518 assert (arg_2 == R1, "fixed register for arg_2");
519 assert (arg_3 == R2, "fixed register for arg_3");
520 call_VM_leaf_helper(entry_point, 3);
521 }
522
523 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
524 assert (arg_1 == R0, "fixed register for arg_1");
525 assert (arg_2 == R1, "fixed register for arg_2");
526 assert (arg_3 == R2, "fixed register for arg_3");
527 assert (arg_4 == R3, "fixed register for arg_4");
528 call_VM_leaf_helper(entry_point, 4);
529 }
530
531 void MacroAssembler::get_vm_result_oop(Register oop_result, Register tmp) {
532 assert_different_registers(oop_result, tmp);
533 ldr(oop_result, Address(Rthread, JavaThread::vm_result_oop_offset()));
534 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_oop_offset()));
535 verify_oop(oop_result);
536 }
537
538 void MacroAssembler::get_vm_result_metadata(Register metadata_result, Register tmp) {
539 assert_different_registers(metadata_result, tmp);
540 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_metadata_offset()));
541 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_metadata_offset()));
542 }
543
544 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
545 if (arg2.is_register()) {
546 add(dst, arg1, arg2.as_register());
547 } else {
548 add(dst, arg1, arg2.as_constant());
549 }
550 }
551
552 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
553 // This function is used in compiler for handling large frame offsets
554 if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
555 return sub(rd, rn, (-c));
556 }
557 int low = c & 0x3fc;
558 if (low != 0) {
559 add(rd, rn, low);
560 rn = rd;
561 }
562 if (c & ~0x3fc) {
563 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
564 add(rd, rn, c & ~0x3fc);
565 } else if (rd != rn) {
566 assert(c == 0, "");
567 mov(rd, rn); // need to generate at least one move!
568 }
569 }
570
571 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
572 // This function is used in compiler for handling large frame offsets
573 if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
574 return add(rd, rn, (-c));
575 }
576 int low = c & 0x3fc;
577 if (low != 0) {
578 sub(rd, rn, low);
579 rn = rd;
580 }
581 if (c & ~0x3fc) {
582 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
583 sub(rd, rn, c & ~0x3fc);
584 } else if (rd != rn) {
585 assert(c == 0, "");
586 mov(rd, rn); // need to generate at least one move!
587 }
588 }
589
590 void MacroAssembler::mov_slow(Register rd, address addr) {
591 // do *not* call the non relocated mov_related_address
592 mov_slow(rd, (intptr_t)addr);
593 }
594
595 void MacroAssembler::mov_slow(Register rd, const char *str) {
596 mov_slow(rd, (intptr_t)str);
597 }
598
599
600 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
601 if (AsmOperand::is_rotated_imm(c)) {
602 mov(rd, c, cond);
603 } else if (AsmOperand::is_rotated_imm(~c)) {
604 mvn(rd, ~c, cond);
605 } else if (VM_Version::supports_movw()) {
606 movw(rd, c & 0xffff, cond);
607 if ((unsigned int)c >> 16) {
608 movt(rd, (unsigned int)c >> 16, cond);
609 }
610 } else {
611 // Find first non-zero bit
612 int shift = 0;
613 while ((c & (3 << shift)) == 0) {
614 shift += 2;
615 }
616 // Put the least significant part of the constant
617 int mask = 0xff << shift;
618 mov(rd, c & mask, cond);
619 // Add up to 3 other parts of the constant;
620 // each of them can be represented as rotated_imm
621 if (c & (mask << 8)) {
622 orr(rd, rd, c & (mask << 8), cond);
623 }
624 if (c & (mask << 16)) {
625 orr(rd, rd, c & (mask << 16), cond);
626 }
627 if (c & (mask << 24)) {
628 orr(rd, rd, c & (mask << 24), cond);
629 }
630 }
631 }
632
633
634 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
635 AsmCondition cond
636 ) {
637
638 if (o == nullptr) {
639 mov(rd, 0, cond);
640 return;
641 }
642
643 if (oop_index == 0) {
644 oop_index = oop_recorder()->allocate_oop_index(o);
645 }
646 relocate(oop_Relocation::spec(oop_index));
647
648 if (VM_Version::supports_movw()) {
649 movw(rd, 0, cond);
650 movt(rd, 0, cond);
651 } else {
652 ldr(rd, Address(PC), cond);
653 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
654 nop();
655 }
656 }
657
658 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index) {
659 if (o == nullptr) {
660 mov(rd, 0);
661 return;
662 }
663
664 if (metadata_index == 0) {
665 metadata_index = oop_recorder()->allocate_metadata_index(o);
666 }
667 relocate(metadata_Relocation::spec(metadata_index));
668
669 if (VM_Version::supports_movw()) {
670 movw(rd, ((int)o) & 0xffff);
671 movt(rd, (unsigned int)o >> 16);
672 } else {
673 ldr(rd, Address(PC));
674 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
675 nop();
676 }
677 }
678
679 void MacroAssembler::mov_float(FloatRegister fd, jfloat c, AsmCondition cond) {
680 Label skip_constant;
681 jint float_bits = PrimitiveConversions::cast<jint>(c);
682
683 flds(fd, Address(PC), cond);
684 b(skip_constant);
685 emit_int32(float_bits);
686 bind(skip_constant);
687 }
688
689 void MacroAssembler::mov_double(FloatRegister fd, jdouble c, AsmCondition cond) {
690 Label skip_constant;
691 union {
692 jdouble d;
693 jint i[2];
694 } accessor;
695 accessor.d = c;
696
697 fldd(fd, Address(PC), cond);
698 b(skip_constant);
699 emit_int32(accessor.i[0]);
700 emit_int32(accessor.i[1]);
701 bind(skip_constant);
702 }
703
704 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
705 intptr_t addr = (intptr_t) address_of_global;
706 mov_slow(reg, addr & ~0xfff);
707 ldr(reg, Address(reg, addr & 0xfff));
708 }
709
710 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
711 ldr_global_s32(reg, address_of_global);
712 }
713
714 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
715 intptr_t addr = (intptr_t) address_of_global;
716 mov_slow(reg, addr & ~0xfff);
717 ldrb(reg, Address(reg, addr & 0xfff));
718 }
719
720 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
721 if (bits <= 8) {
722 andr(rd, rn, (1 << bits) - 1);
723 } else if (bits >= 24) {
724 bic(rd, rn, -1 << bits);
725 } else {
726 mov(rd, AsmOperand(rn, lsl, 32 - bits));
727 mov(rd, AsmOperand(rd, lsr, 32 - bits));
728 }
729 }
730
731 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
732 mov(rd, AsmOperand(rn, lsl, 32 - bits));
733 mov(rd, AsmOperand(rd, asr, 32 - bits));
734 }
735
736
737 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
738 cmp(obj1, obj2);
739 }
740
741 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
742 Register rn_lo, Register rn_hi,
743 AsmCondition cond) {
744 if (rd_lo != rn_hi) {
745 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
746 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
747 } else if (rd_hi != rn_lo) {
748 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
749 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
750 } else {
751 eor(rd_lo, rd_hi, rd_lo, cond);
752 eor(rd_hi, rd_lo, rd_hi, cond);
753 eor(rd_lo, rd_hi, rd_lo, cond);
754 }
755 }
756
757 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
758 Register rn_lo, Register rn_hi,
759 AsmShift shift, Register count) {
760 Register tmp;
761 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
762 tmp = rd_lo;
763 } else {
764 tmp = rd_hi;
765 }
766 assert_different_registers(tmp, count, rn_lo, rn_hi);
767
768 subs(tmp, count, 32);
769 if (shift == lsl) {
770 assert_different_registers(rd_hi, rn_lo);
771 assert_different_registers(count, rd_hi);
772 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
773 rsb(tmp, count, 32, mi);
774 if (rd_hi == rn_hi) {
775 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
776 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
777 } else {
778 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
779 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
780 }
781 mov(rd_lo, AsmOperand(rn_lo, shift, count));
782 } else {
783 assert_different_registers(rd_lo, rn_hi);
784 assert_different_registers(rd_lo, count);
785 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
786 rsb(tmp, count, 32, mi);
787 if (rd_lo == rn_lo) {
788 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
789 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
790 } else {
791 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
792 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
793 }
794 mov(rd_hi, AsmOperand(rn_hi, shift, count));
795 }
796 }
797
798 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
799 Register rn_lo, Register rn_hi,
800 AsmShift shift, int count) {
801 assert(count != 0 && (count & ~63) == 0, "must be");
802
803 if (shift == lsl) {
804 assert_different_registers(rd_hi, rn_lo);
805 if (count >= 32) {
806 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
807 mov(rd_lo, 0);
808 } else {
809 mov(rd_hi, AsmOperand(rn_hi, lsl, count));
810 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
811 mov(rd_lo, AsmOperand(rn_lo, lsl, count));
812 }
813 } else {
814 assert_different_registers(rd_lo, rn_hi);
815 if (count >= 32) {
816 if (count == 32) {
817 mov(rd_lo, rn_hi);
818 } else {
819 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
820 }
821 if (shift == asr) {
822 mov(rd_hi, AsmOperand(rn_hi, asr, 0));
823 } else {
824 mov(rd_hi, 0);
825 }
826 } else {
827 mov(rd_lo, AsmOperand(rn_lo, lsr, count));
828 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
829 mov(rd_hi, AsmOperand(rn_hi, shift, count));
830 }
831 }
832 }
833
834 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
835 // This code pattern is matched in NativeIntruction::skip_verify_oop.
836 // Update it at modifications.
837 if (!VerifyOops) return;
838
839 char buffer[64];
840 #ifdef COMPILER1
841 if (CommentedAssembly) {
842 os::snprintf_checked(buffer, sizeof(buffer), "verify_oop at %d", offset());
843 block_comment(buffer);
844 }
845 #endif
846 const char* msg_buffer = nullptr;
847 {
848 ResourceMark rm;
849 stringStream ss;
850 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
851 msg_buffer = code_string(ss.as_string());
852 }
853
854 save_all_registers();
855
856 if (reg != R2) {
857 mov(R2, reg); // oop to verify
858 }
859 mov(R1, SP); // register save area
860
861 Label done;
862 InlinedString Lmsg(msg_buffer);
863 ldr_literal(R0, Lmsg); // message
864
865 // call indirectly to solve generation ordering problem
866 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
867 call(Rtemp);
868
869 restore_all_registers();
870
871 b(done);
872 #ifdef COMPILER2
873 int off = offset();
874 #endif
875 bind_literal(Lmsg);
876 #ifdef COMPILER2
877 if (offset() - off == 1 * wordSize) {
878 // no padding, so insert nop for worst-case sizing
879 nop();
880 }
881 #endif
882 bind(done);
883 }
884
885 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
886 if (!VerifyOops) return;
887
888 const char* msg_buffer = nullptr;
889 {
890 ResourceMark rm;
891 stringStream ss;
892 if ((addr.base() == SP) && (addr.index()==noreg)) {
893 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
894 } else {
895 ss.print("verify_oop_addr: %s", s);
896 }
897 ss.print(" (%s:%d)", file, line);
898 msg_buffer = code_string(ss.as_string());
899 }
900
901 int push_size = save_all_registers();
902
903 if (addr.base() == SP) {
904 // computes an addr that takes into account the push
905 if (addr.index() != noreg) {
906 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
907 add(new_base, SP, push_size);
908 addr = addr.rebase(new_base);
909 } else {
910 addr = addr.plus_disp(push_size);
911 }
912 }
913
914 ldr(R2, addr); // oop to verify
915 mov(R1, SP); // register save area
916
917 Label done;
918 InlinedString Lmsg(msg_buffer);
919 ldr_literal(R0, Lmsg); // message
920
921 // call indirectly to solve generation ordering problem
922 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
923 call(Rtemp);
924
925 restore_all_registers();
926
927 b(done);
928 bind_literal(Lmsg);
929 bind(done);
930 }
931
932 void MacroAssembler::c2bool(Register x)
933 {
934 tst(x, 0xff); // Only look at the lowest byte
935 mov(x, 1, ne);
936 }
937
938 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
939 if (needs_explicit_null_check(offset)) {
940 assert_different_registers(reg, tmp);
941 if (tmp == noreg) {
942 tmp = Rtemp;
943 assert((! Thread::current()->is_Compiler_thread()) ||
944 (! (ciEnv::current()->task() == nullptr)) ||
945 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
946 "Rtemp not available in C2"); // explicit tmp register required
947 // XXX: could we mark the code buffer as not compatible with C2 ?
948 }
949 ldr(tmp, Address(reg));
950 }
951 }
952
953 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
954 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
955 RegisterOrConstant size_expression, Label& slow_case) {
956 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
957 bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);
958 }
959
960 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
961 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
962 Label loop;
963 const Register ptr = start;
964
965 mov(tmp, 0);
966 bind(loop);
967 cmp(ptr, end);
968 str(tmp, Address(ptr, wordSize, post_indexed), lo);
969 b(loop, lo);
970 }
971
972 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
973 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
974 const int page_size = (int)os::vm_page_size();
975
976 sub_slow(tmp, SP, StackOverflow::stack_shadow_zone_size());
977 strb(R0, Address(tmp));
978 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
979 strb(R0, Address(tmp, -0xff0, pre_indexed));
980 }
981 }
982
983 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
984 Label loop;
985
986 mov(tmp, SP);
987 add_slow(Rsize, Rsize, StackOverflow::stack_shadow_zone_size() - os::vm_page_size());
988 bind(loop);
989 subs(Rsize, Rsize, 0xff0);
990 strb(R0, Address(tmp, -0xff0, pre_indexed));
991 b(loop, hi);
992 }
993
994 void MacroAssembler::stop(const char* msg) {
995 // This code pattern is matched in NativeIntruction::is_stop.
996 // Update it at modifications.
997 #ifdef COMPILER1
998 if (CommentedAssembly) {
999 block_comment("stop");
1000 }
1001 #endif
1002
1003 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1004 InlinedString Lmsg(msg);
1005
1006 // save all registers for further inspection
1007 save_all_registers();
1008
1009 ldr_literal(R0, Lmsg); // message
1010 mov(R1, SP); // register save area
1011
1012 ldr_literal(PC, Ldebug); // call MacroAssembler::debug
1013
1014 bind_literal(Lmsg);
1015 bind_literal(Ldebug);
1016 }
1017
1018 void MacroAssembler::warn(const char* msg) {
1019 #ifdef COMPILER1
1020 if (CommentedAssembly) {
1021 block_comment("warn");
1022 }
1023 #endif
1024
1025 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1026 InlinedString Lmsg(msg);
1027 Label done;
1028
1029 int push_size = save_caller_save_registers();
1030
1031
1032 ldr_literal(R0, Lmsg); // message
1033 ldr_literal(LR, Lwarn); // call warning
1034
1035 call(LR);
1036
1037 restore_caller_save_registers();
1038
1039 b(done);
1040 bind_literal(Lmsg);
1041 bind_literal(Lwarn);
1042 bind(done);
1043 }
1044
1045
1046 int MacroAssembler::save_all_registers() {
1047 // This code pattern is matched in NativeIntruction::is_save_all_registers.
1048 // Update it at modifications.
1049 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1050 return 15*wordSize;
1051 }
1052
1053 void MacroAssembler::restore_all_registers() {
1054 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers
1055 add(SP, SP, wordSize); // discard saved PC
1056 }
1057
1058 int MacroAssembler::save_caller_save_registers() {
1059 #if R9_IS_SCRATCHED
1060 // Save also R10 to preserve alignment
1061 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1062 return 8*wordSize;
1063 #else
1064 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1065 return 6*wordSize;
1066 #endif
1067 }
1068
1069 void MacroAssembler::restore_caller_save_registers() {
1070 #if R9_IS_SCRATCHED
1071 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1072 #else
1073 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1074 #endif
1075 }
1076
1077 void MacroAssembler::debug(const char* msg, const intx* registers) {
1078 // In order to get locks to work, we need to fake a in_VM state
1079 JavaThread* thread = JavaThread::current();
1080 thread->set_thread_state(_thread_in_vm);
1081
1082 if (ShowMessageBoxOnError) {
1083 ttyLocker ttyl;
1084 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1085 BytecodeCounter::print();
1086 }
1087 if (os::message_box(msg, "Execution stopped, print registers?")) {
1088 // saved registers: R0-R12, LR, PC
1089 const int nregs = 15;
1090 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1091
1092 for (int i = 0; i < nregs; i++) {
1093 tty->print_cr("%s = " INTPTR_FORMAT, regs[i]->name(), registers[i]);
1094 }
1095
1096 // derive original SP value from the address of register save area
1097 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs]));
1098 }
1099 BREAKPOINT;
1100 } else {
1101 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1102 }
1103 assert(false, "DEBUG MESSAGE: %s", msg);
1104 fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1105 }
1106
1107 void MacroAssembler::unimplemented(const char* what) {
1108 const char* buf = nullptr;
1109 {
1110 ResourceMark rm;
1111 stringStream ss;
1112 ss.print("unimplemented: %s", what);
1113 buf = code_string(ss.as_string());
1114 }
1115 stop(buf);
1116 }
1117
1118
1119 // Implementation of FixedSizeCodeBlock
1120
1121 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1122 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1123 }
1124
1125 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1126 if (_enabled) {
1127 address curr_pc = _masm->pc();
1128
1129 assert(_start < curr_pc, "invalid current pc");
1130 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1131
1132 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1133 for (int i = 0; i < nops_count; i++) {
1134 _masm->nop();
1135 }
1136 }
1137 }
1138
1139
1140 // Serializes memory. Potentially blows flags and reg.
1141 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecture versions)
1142 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1143 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1144 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1145 Register tmp,
1146 bool preserve_flags,
1147 Register load_tgt) {
1148
1149 if (order_constraint == StoreStore) {
1150 dmb(DMB_st, tmp);
1151 } else if ((order_constraint & StoreLoad) ||
1152 (order_constraint & LoadLoad) ||
1153 (order_constraint & StoreStore) ||
1154 (load_tgt == noreg) ||
1155 preserve_flags) {
1156 dmb(DMB_all, tmp);
1157 } else {
1158 // LoadStore: speculative stores reordeing is prohibited
1159
1160 // By providing an ordered load target register, we avoid an extra memory load reference
1161 Label not_taken;
1162 bind(not_taken);
1163 cmp(load_tgt, load_tgt);
1164 b(not_taken, ne);
1165 }
1166 }
1167
1168
1169 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1170 // on failure, so fall-through can only mean success.
1171 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1172 // This is only needed for C2, which for some reason does not rety,
1173 // while C1/interpreter does.
1174 // TODO: measure if it makes a difference
1175
1176 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1177 Register base, Register tmp, Label &slow_case,
1178 bool allow_fallthrough_on_failure, bool one_shot)
1179 {
1180
1181 bool fallthrough_is_success = false;
1182
1183 // ARM Litmus Test example does prefetching here.
1184 // TODO: investigate if it helps performance
1185
1186 // The last store was to the displaced header, so to prevent
1187 // reordering we must issue a StoreStore or Release barrier before
1188 // the CAS store.
1189
1190 membar(MacroAssembler::StoreStore, noreg);
1191
1192 if (one_shot) {
1193 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1194 cmp(tmp, oldval);
1195 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1196 cmp(tmp, 0, eq);
1197 } else {
1198 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1199 }
1200
1201 // Here, on success, EQ is set, NE otherwise
1202
1203 // MemBarAcquireLock barrier
1204 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1205 // but that doesn't prevent a load or store from floating up between
1206 // the load and store in the CAS sequence, so play it safe and
1207 // do a full fence.
1208 // Note: we preserve flags here.
1209 // Todo: Do we really need this also for the CAS fail case?
1210 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1211 if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1212 b(slow_case, ne);
1213 }
1214 }
1215
1216 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1217 Register base, Register tmp, Label &slow_case,
1218 bool allow_fallthrough_on_failure, bool one_shot)
1219 {
1220 bool fallthrough_is_success = false;
1221
1222 assert_different_registers(oldval,newval,base,tmp);
1223
1224 // MemBarReleaseLock barrier
1225 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1226 // but that doesn't prevent a load or store from floating down between
1227 // the load and store in the CAS sequence, so play it safe and
1228 // do a full fence.
1229 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1230
1231 if (one_shot) {
1232 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1233 cmp(tmp, oldval);
1234 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1235 cmp(tmp, 0, eq);
1236 } else {
1237 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1238 }
1239 if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1240 b(slow_case, ne);
1241 }
1242
1243 // ExitEnter
1244 // According to JSR-133 Cookbook, this should be StoreLoad, the same
1245 // barrier that follows volatile store.
1246 // TODO: Should be able to remove on armv8 if volatile loads
1247 // use the load-acquire instruction.
1248 membar(StoreLoad, noreg);
1249 }
1250
1251 #ifndef PRODUCT
1252
1253 // Preserves flags and all registers.
1254 // On SMP the updated value might not be visible to external observers without a synchronization barrier
1255 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1256 if (counter_addr != nullptr) {
1257 InlinedAddress counter_addr_literal((address)counter_addr);
1258 Label done, retry;
1259 if (cond != al) {
1260 b(done, inverse(cond));
1261 }
1262
1263 push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1264 ldr_literal(R0, counter_addr_literal);
1265
1266 mrs(CPSR, Rtemp);
1267
1268 bind(retry);
1269 ldr_s32(R1, Address(R0));
1270 add(R2, R1, 1);
1271 atomic_cas_bool(R1, R2, R0, 0, R3);
1272 b(retry, ne);
1273
1274 msr(CPSR_fsxc, Rtemp);
1275
1276 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1277
1278 b(done);
1279 bind_literal(counter_addr_literal);
1280
1281 bind(done);
1282 }
1283 }
1284
1285 #endif // !PRODUCT
1286
1287 void MacroAssembler::resolve_jobject(Register value,
1288 Register tmp1,
1289 Register tmp2) {
1290 assert_different_registers(value, tmp1, tmp2);
1291 Label done, tagged, weak_tagged;
1292
1293 cbz(value, done); // Use null as-is.
1294 tst(value, JNIHandles::tag_mask); // Test for tag.
1295 b(tagged, ne);
1296
1297 // Resolve local handle
1298 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, Address(value, 0), value, tmp1, tmp2, noreg);
1299 verify_oop(value);
1300 b(done);
1301
1302 bind(tagged);
1303 tst(value, JNIHandles::TypeTag::weak_global); // Test for weak tag.
1304 b(weak_tagged, ne);
1305
1306 // Resolve global handle
1307 access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2, noreg);
1308 verify_oop(value);
1309 b(done);
1310
1311 bind(weak_tagged);
1312 // Resolve jweak.
1313 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1314 Address(value, -JNIHandles::TypeTag::weak_global), value, tmp1, tmp2, noreg);
1315 verify_oop(value);
1316
1317 bind(done);
1318 }
1319
1320 void MacroAssembler::resolve_global_jobject(Register value,
1321 Register tmp1,
1322 Register tmp2) {
1323 assert_different_registers(value, tmp1, tmp2);
1324 Label done;
1325
1326 cbz(value, done); // Use null as-is.
1327
1328 #ifdef ASSERT
1329 {
1330 Label valid_global_tag;
1331 tst(value, JNIHandles::TypeTag::global); // Test for global tag.
1332 b(valid_global_tag, ne);
1333 stop("non global jobject using resolve_global_jobject");
1334 bind(valid_global_tag);
1335 }
1336 #endif
1337
1338 // Resolve global handle
1339 access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2, noreg);
1340 verify_oop(value);
1341
1342 bind(done);
1343 }
1344
1345
1346 //////////////////////////////////////////////////////////////////////////////////
1347
1348
1349 void MacroAssembler::load_sized_value(Register dst, Address src,
1350 size_t size_in_bytes, bool is_signed, AsmCondition cond) {
1351 switch (size_in_bytes) {
1352 case 4: ldr(dst, src, cond); break;
1353 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
1354 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
1355 default: ShouldNotReachHere();
1356 }
1357 }
1358
1359
1360 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
1361 switch (size_in_bytes) {
1362 case 4: str(src, dst, cond); break;
1363 case 2: strh(src, dst, cond); break;
1364 case 1: strb(src, dst, cond); break;
1365 default: ShouldNotReachHere();
1366 }
1367 }
1368
1369 // Look up the method for a megamorphic invokeinterface call.
1370 // The target method is determined by <Rinterf, Rindex>.
1371 // The receiver klass is in Rklass.
1372 // On success, the result will be in method_result, and execution falls through.
1373 // On failure, execution transfers to the given label.
1374 void MacroAssembler::lookup_interface_method(Register Rklass,
1375 Register Rintf,
1376 RegisterOrConstant itable_index,
1377 Register method_result,
1378 Register Rscan,
1379 Register Rtmp,
1380 Label& L_no_such_interface) {
1381
1382 assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
1383
1384 const int entry_size = itableOffsetEntry::size() * HeapWordSize;
1385 assert(itableOffsetEntry::interface_offset() == 0, "not added for convenience");
1386
1387 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
1388 const int base = in_bytes(Klass::vtable_start_offset());
1389 const int scale = exact_log2(vtableEntry::size_in_bytes());
1390 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
1391 add(Rscan, Rklass, base);
1392 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
1393
1394 // Search through the itable for an interface equal to incoming Rintf
1395 // itable looks like [intface][offset][intface][offset][intface][offset]
1396
1397 Label loop;
1398 bind(loop);
1399 ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
1400 cmp(Rtmp, Rintf); // set ZF and CF if interface is found
1401 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is
1402 b(loop, ne);
1403
1404 // CF == 0 means we reached the end of itable without finding icklass
1405 b(L_no_such_interface, cc);
1406
1407 if (method_result != noreg) {
1408 // Interface found at previous position of Rscan, now load the method
1409 ldr_s32(Rtmp, Address(Rscan, in_bytes(itableOffsetEntry::offset_offset()) - entry_size));
1410 if (itable_index.is_register()) {
1411 add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
1412 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
1413 assert(itableMethodEntry::method_offset() == 0, "adjust the offset in the code below");
1414 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
1415 } else {
1416 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
1417 in_bytes(itableMethodEntry::method_offset());
1418 add_slow(method_result, Rklass, method_offset);
1419 ldr(method_result, Address(method_result, Rtmp));
1420 }
1421 }
1422 }
1423
1424
1425 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
1426 mov_slow(tmpreg1, counter_addr);
1427 ldr_s32(tmpreg2, tmpreg1);
1428 add_32(tmpreg2, tmpreg2, 1);
1429 str_32(tmpreg2, tmpreg1);
1430 }
1431
1432 void MacroAssembler::floating_cmp(Register dst) {
1433 vmrs(dst, FPSCR);
1434 orr(dst, dst, 0x08000000);
1435 eor(dst, dst, AsmOperand(dst, lsl, 3));
1436 mov(dst, AsmOperand(dst, asr, 30));
1437 }
1438
1439 void MacroAssembler::restore_default_fp_mode() {
1440 #ifndef __SOFTFP__
1441 // Round to Near mode, IEEE compatible, masked exceptions
1442 mov(Rtemp, 0);
1443 vmsr(FPSCR, Rtemp);
1444 #endif // !__SOFTFP__
1445 }
1446
1447 // 24-bit word range == 26-bit byte range
1448 bool check26(int offset) {
1449 // this could be simplified, but it mimics encoding and decoding
1450 // an actual branch insrtuction
1451 int off1 = offset << 6 >> 8;
1452 int encoded = off1 & ((1<<24)-1);
1453 int decoded = encoded << 8 >> 6;
1454 return offset == decoded;
1455 }
1456
1457 // Perform some slight adjustments so the default 32MB code cache
1458 // is fully reachable.
1459 static inline address first_cache_address() {
1460 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
1461 }
1462 static inline address last_cache_address() {
1463 return CodeCache::high_bound() - Assembler::InstructionSize;
1464 }
1465
1466
1467 // Can we reach target using unconditional branch or call from anywhere
1468 // in the code cache (because code can be relocated)?
1469 bool MacroAssembler::_reachable_from_cache(address target) {
1470 #ifdef __thumb__
1471 if ((1 & (intptr_t)target) != 0) {
1472 // Return false to avoid 'b' if we need switching to THUMB mode.
1473 return false;
1474 }
1475 #endif
1476
1477 address cl = first_cache_address();
1478 address ch = last_cache_address();
1479
1480 if (ForceUnreachable) {
1481 // Only addresses from CodeCache can be treated as reachable.
1482 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
1483 return false;
1484 }
1485 }
1486
1487 intptr_t loffset = (intptr_t)target - (intptr_t)cl;
1488 intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
1489
1490 return check26(loffset - 8) && check26(hoffset - 8);
1491 }
1492
1493 bool MacroAssembler::reachable_from_cache(address target) {
1494 assert(CodeCache::contains(pc()), "not supported");
1495 return _reachable_from_cache(target);
1496 }
1497
1498 // Can we reach the entire code cache from anywhere else in the code cache?
1499 bool MacroAssembler::_cache_fully_reachable() {
1500 address cl = first_cache_address();
1501 address ch = last_cache_address();
1502 return _reachable_from_cache(cl) && _reachable_from_cache(ch);
1503 }
1504
1505 bool MacroAssembler::cache_fully_reachable() {
1506 assert(CodeCache::contains(pc()), "not supported");
1507 return _cache_fully_reachable();
1508 }
1509
1510 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1511 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1512 if (reachable_from_cache(target)) {
1513 relocate(rtype);
1514 b(target, cond);
1515 return;
1516 }
1517
1518 // Note: relocate is not needed for the code below,
1519 // encoding targets in absolute format.
1520 if (ignore_non_patchable_relocations()) {
1521 rtype = relocInfo::none;
1522 }
1523
1524 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
1525 // Note: this version cannot be (atomically) patched
1526 mov_slow(scratch, (intptr_t)target, cond);
1527 bx(scratch, cond);
1528 } else {
1529 Label skip;
1530 InlinedAddress address_literal(target);
1531 if (cond != al) {
1532 b(skip, inverse(cond));
1533 }
1534 relocate(rtype);
1535 ldr_literal(PC, address_literal);
1536 bind_literal(address_literal);
1537 bind(skip);
1538 }
1539 }
1540
1541 // Similar to jump except that:
1542 // - near calls are valid only if any destination in the cache is near
1543 // - no movt/movw (not atomically patchable)
1544 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1545 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1546 if (cache_fully_reachable()) {
1547 // Note: this assumes that all possible targets (the initial one
1548 // and the addressed patched to) are all in the code cache.
1549 assert(CodeCache::contains(target), "target might be too far");
1550 relocate(rtype);
1551 b(target, cond);
1552 return;
1553 }
1554
1555 // Discard the relocation information if not needed for CacheCompiledCode
1556 // since the next encodings are all in absolute format.
1557 if (ignore_non_patchable_relocations()) {
1558 rtype = relocInfo::none;
1559 }
1560
1561 {
1562 Label skip;
1563 InlinedAddress address_literal(target);
1564 if (cond != al) {
1565 b(skip, inverse(cond));
1566 }
1567 relocate(rtype);
1568 ldr_literal(PC, address_literal);
1569 bind_literal(address_literal);
1570 bind(skip);
1571 }
1572 }
1573
1574 void MacroAssembler::call(address target, RelocationHolder rspec, AsmCondition cond) {
1575 Register scratch = LR;
1576 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
1577 if (reachable_from_cache(target)) {
1578 relocate(rspec);
1579 bl(target, cond);
1580 return;
1581 }
1582
1583 // Note: relocate is not needed for the code below,
1584 // encoding targets in absolute format.
1585 if (ignore_non_patchable_relocations()) {
1586 // This assumes the information was needed only for relocating the code.
1587 rspec = RelocationHolder::none;
1588 }
1589
1590 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
1591 // Note: this version cannot be (atomically) patched
1592 mov_slow(scratch, (intptr_t)target, cond);
1593 blx(scratch, cond);
1594 return;
1595 }
1596
1597 {
1598 Label ret_addr;
1599 if (cond != al) {
1600 b(ret_addr, inverse(cond));
1601 }
1602
1603
1604 InlinedAddress address_literal(target);
1605 relocate(rspec);
1606 adr(LR, ret_addr);
1607 ldr_literal(PC, address_literal);
1608
1609 bind_literal(address_literal);
1610 bind(ret_addr);
1611 }
1612 }
1613
1614
1615 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
1616 assert(rspec.type() == relocInfo::static_call_type ||
1617 rspec.type() == relocInfo::none ||
1618 rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
1619
1620 // Always generate the relocation information, needed for patching
1621 relocate(rspec); // used by NativeCall::is_call_before()
1622 if (cache_fully_reachable()) {
1623 // Note: this assumes that all possible targets (the initial one
1624 // and the addresses patched to) are all in the code cache.
1625 assert(CodeCache::contains(target), "target might be too far");
1626 bl(target);
1627 } else {
1628 Label ret_addr;
1629 InlinedAddress address_literal(target);
1630 adr(LR, ret_addr);
1631 ldr_literal(PC, address_literal);
1632 bind_literal(address_literal);
1633 bind(ret_addr);
1634 }
1635 return offset();
1636 }
1637
1638 // ((OopHandle)result).resolve();
1639 void MacroAssembler::resolve_oop_handle(Register result) {
1640 // OopHandle::resolve is an indirection.
1641 ldr(result, Address(result, 0));
1642 }
1643
1644 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
1645 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1646 ldr(tmp, Address(method, Method::const_offset()));
1647 ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
1648 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset()));
1649 ldr(mirror, Address(tmp, mirror_offset));
1650 resolve_oop_handle(mirror);
1651 }
1652
1653
1654 ///////////////////////////////////////////////////////////////////////////////
1655
1656 // Compressed pointers
1657
1658
1659 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
1660 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
1661 }
1662
1663 // Blows src_klass.
1664 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
1665 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
1666 }
1667
1668
1669
1670 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1671 access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
1672 }
1673
1674 // Blows src and flags.
1675 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1676 access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
1677 }
1678
1679 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1680 access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
1681 }
1682
1683 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1684 Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
1685 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1686 decorators = AccessInternal::decorator_fixup(decorators, type);
1687 bool as_raw = (decorators & AS_RAW) != 0;
1688 if (as_raw) {
1689 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1690 } else {
1691 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1692 }
1693 }
1694
1695 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1696 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
1697 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1698 decorators = AccessInternal::decorator_fixup(decorators, type);
1699 bool as_raw = (decorators & AS_RAW) != 0;
1700 if (as_raw) {
1701 bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
1702 } else {
1703 bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
1704 }
1705 }
1706
1707 void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
1708 ldr_u32(tmp1, Address(Rthread, JavaThread::polling_word_offset()));
1709 tst(tmp1, exact_log2(SafepointMechanism::poll_bit()));
1710 b(slow_path, eq);
1711 }
1712
1713 void MacroAssembler::get_polling_page(Register dest) {
1714 ldr(dest, Address(Rthread, JavaThread::polling_page_offset()));
1715 }
1716
1717 void MacroAssembler::read_polling_page(Register dest, relocInfo::relocType rtype) {
1718 get_polling_page(dest);
1719 relocate(rtype);
1720 ldr(dest, Address(dest));
1721 }
1722
1723 #define PUSH_REG(mask, bit, Reg) \
1724 if (mask & ((unsigned)1 << bit)) { \
1725 push(Reg); \
1726 }
1727
1728 #define POP_REG(mask, bit, Reg, condition) \
1729 if (mask & ((unsigned)1 << bit)) { \
1730 pop(Reg, condition); \
1731 }
1732
1733 #define PUSH_REGS(mask, R1, R2, R3) \
1734 PUSH_REG(mask, 0, R1) \
1735 PUSH_REG(mask, 1, R2) \
1736 PUSH_REG(mask, 2, R3)
1737
1738 #define POP_REGS(mask, R1, R2, R3, condition) \
1739 POP_REG(mask, 0, R1, condition) \
1740 POP_REG(mask, 1, R2, condition) \
1741 POP_REG(mask, 2, R3, condition)
1742
1743 #define POISON_REG(mask, bit, Reg, poison) \
1744 if (mask & ((unsigned)1 << bit)) { \
1745 mov(Reg, poison); \
1746 }
1747
1748 #define POISON_REGS(mask, R1, R2, R3, poison) \
1749 POISON_REG(mask, 0, R1, poison) \
1750 POISON_REG(mask, 1, R2, poison) \
1751 POISON_REG(mask, 2, R3, poison)
1752
1753 // Attempt to fast-lock an object
1754 // Registers:
1755 // - obj: the object to be locked
1756 // - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown.
1757 // Result:
1758 // - Success: fallthrough
1759 // - Error: break to slow, Z cleared.
1760 void MacroAssembler::fast_lock(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) {
1761 assert_different_registers(obj, t1, t2, t3);
1762
1763 #ifdef ASSERT
1764 // Poison scratch regs
1765 POISON_REGS((~savemask), t1, t2, t3, 0x10000001);
1766 #endif
1767
1768 PUSH_REGS(savemask, t1, t2, t3);
1769
1770 // Check if we would have space on lock-stack for the object.
1771 ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1772 // cmp(t1, (unsigned)LockStack::end_offset()); // too complicated constant: 1132 (46c)
1773 movw(t2, LockStack::end_offset() - 1);
1774 cmp(t1, t2);
1775 POP_REGS(savemask, t1, t2, t3, gt);
1776 b(slow, gt); // Z is cleared
1777
1778 // Prepare old, new header
1779 Register old_hdr = t1;
1780 Register new_hdr = t2;
1781 ldr(new_hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
1782 bic(new_hdr, new_hdr, markWord::lock_mask_in_place); // new header (00)
1783 orr(old_hdr, new_hdr, markWord::unlocked_value); // old header (01)
1784
1785 Label dummy;
1786
1787 cas_for_lock_acquire(old_hdr /* old */, new_hdr /* new */,
1788 obj /* location */, t3 /* scratch */, dummy,
1789 true /* allow_fallthrough_on_failure */, true /* one_shot */);
1790
1791 POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow
1792 b(slow, ne); // Cas failed -> slow
1793
1794 // After successful lock, push object onto lock-stack
1795 ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1796 str(obj, Address(Rthread, t1));
1797 add(t1, t1, oopSize);
1798 str(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1799
1800 POP_REGS(savemask, t1, t2, t3, al);
1801
1802 #ifdef ASSERT
1803 // Poison scratch regs
1804 POISON_REGS((~savemask), t1, t2, t3, 0x20000002);
1805 #endif
1806
1807 // Success: fall through
1808 }
1809
1810 // Attempt to fast-unlock an object
1811 // Registers:
1812 // - obj: the object to be unlocked
1813 // - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown.
1814 // Result:
1815 // - Success: fallthrough
1816 // - Error: break to slow, Z cleared.
1817 void MacroAssembler::fast_unlock(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) {
1818 assert_different_registers(obj, t1, t2, t3);
1819
1820 #ifdef ASSERT
1821 // Poison scratch regs
1822 POISON_REGS((~savemask), t1, t2, t3, 0x30000003);
1823 #endif
1824
1825 PUSH_REGS(savemask, t1, t2, t3);
1826
1827 // Prepare old, new header
1828 Register old_hdr = t1;
1829 Register new_hdr = t2;
1830 ldr(old_hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
1831 bic(old_hdr, old_hdr, markWord::lock_mask_in_place); // old header (00)
1832 orr(new_hdr, old_hdr, markWord::unlocked_value); // new header (01)
1833
1834 // Try to swing header from locked to unlocked
1835 Label dummy;
1836 cas_for_lock_release(old_hdr /* old */, new_hdr /* new */,
1837 obj /* location */, t3 /* scratch */, dummy,
1838 true /* allow_fallthrough_on_failure */, true /* one_shot */);
1839
1840 POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow
1841 b(slow, ne); // Cas failed -> slow
1842
1843 // After successful unlock, pop object from lock-stack
1844 ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1845 sub(t1, t1, oopSize);
1846 str(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1847
1848 #ifdef ASSERT
1849 // zero out popped slot
1850 mov(t2, 0);
1851 str(t2, Address(Rthread, t1));
1852 #endif
1853
1854 POP_REGS(savemask, t1, t2, t3, al);
1855
1856 #ifdef ASSERT
1857 // Poison scratch regs
1858 POISON_REGS((~savemask), t1, t2, t3, 0x40000004);
1859 #endif
1860
1861 // Fallthrough: success
1862 }
1863
1864 int MacroAssembler::ic_check_size() {
1865 return NativeInstruction::instruction_size * 7;
1866 }
1867
1868 int MacroAssembler::ic_check(int end_alignment) {
1869 Register receiver = j_rarg0;
1870 Register tmp1 = R4;
1871 Register tmp2 = R5;
1872
1873 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1874 // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1875 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1876 // before the inline cache check here, and not after
1877 align(end_alignment, offset() + ic_check_size());
1878
1879 int uep_offset = offset();
1880
1881 ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1882 ldr(tmp2, Address(Ricklass, CompiledICData::speculated_klass_offset()));
1883 cmp(tmp1, tmp2);
1884
1885 Label dont;
1886 b(dont, eq);
1887 jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1888 bind(dont);
1889 return uep_offset;
1890 }