1 /*
2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "precompiled.hpp"
28 #include "asm/assembler.hpp"
29 #include "asm/assembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc/shared/barrierSet.hpp"
32 #include "gc/shared/barrierSetAssembler.hpp"
33 #include "gc/shared/cardTable.hpp"
34 #include "gc/shared/cardTableBarrierSet.hpp"
35 #include "gc/shared/collectedHeap.hpp"
36 #include "interpreter/bytecodeHistogram.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "memory/resourceArea.hpp"
39 #include "memory/universe.hpp"
40 #include "nativeInst_riscv.hpp"
41 #include "oops/accessDecorators.hpp"
42 #include "oops/compressedOops.inline.hpp"
43 #include "oops/klass.inline.hpp"
44 #include "oops/oop.hpp"
45 #include "runtime/interfaceSupport.inline.hpp"
46 #include "runtime/javaThread.hpp"
47 #include "runtime/jniHandles.inline.hpp"
48 #include "runtime/sharedRuntime.hpp"
49 #include "runtime/stubRoutines.hpp"
50 #include "utilities/globalDefinitions.hpp"
51 #include "utilities/powerOfTwo.hpp"
52 #ifdef COMPILER2
53 #include "opto/compile.hpp"
54 #include "opto/node.hpp"
55 #include "opto/output.hpp"
56 #endif
57
58 #ifdef PRODUCT
59 #define BLOCK_COMMENT(str) /* nothing */
60 #else
61 #define BLOCK_COMMENT(str) block_comment(str)
62 #endif
63 #define STOP(str) stop(str);
64 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
65
66 static void pass_arg0(MacroAssembler* masm, Register arg) {
67 if (c_rarg0 != arg) {
68 masm->mv(c_rarg0, arg);
69 }
70 }
71
72 static void pass_arg1(MacroAssembler* masm, Register arg) {
73 if (c_rarg1 != arg) {
74 masm->mv(c_rarg1, arg);
75 }
76 }
77
78 static void pass_arg2(MacroAssembler* masm, Register arg) {
79 if (c_rarg2 != arg) {
80 masm->mv(c_rarg2, arg);
81 }
82 }
83
84 static void pass_arg3(MacroAssembler* masm, Register arg) {
85 if (c_rarg3 != arg) {
86 masm->mv(c_rarg3, arg);
87 }
88 }
89
90 void MacroAssembler::push_cont_fastpath(Register java_thread) {
91 if (!Continuations::enabled()) return;
92 Label done;
93 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset()));
94 bleu(sp, t0, done);
95 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset()));
96 bind(done);
97 }
98
99 void MacroAssembler::pop_cont_fastpath(Register java_thread) {
100 if (!Continuations::enabled()) return;
101 Label done;
102 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset()));
103 bltu(sp, t0, done);
104 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset()));
105 bind(done);
106 }
107
108 int MacroAssembler::align(int modulus, int extra_offset) {
109 CompressibleRegion cr(this);
110 intptr_t before = offset();
111 while ((offset() + extra_offset) % modulus != 0) { nop(); }
112 return (int)(offset() - before);
113 }
114
115 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
116 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
117 }
118
119 // Implementation of call_VM versions
120
121 void MacroAssembler::call_VM(Register oop_result,
122 address entry_point,
123 bool check_exceptions) {
124 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
125 }
126
127 void MacroAssembler::call_VM(Register oop_result,
128 address entry_point,
129 Register arg_1,
130 bool check_exceptions) {
131 pass_arg1(this, arg_1);
132 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
133 }
134
135 void MacroAssembler::call_VM(Register oop_result,
136 address entry_point,
137 Register arg_1,
138 Register arg_2,
139 bool check_exceptions) {
140 assert(arg_1 != c_rarg2, "smashed arg");
141 pass_arg2(this, arg_2);
142 pass_arg1(this, arg_1);
143 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
144 }
145
146 void MacroAssembler::call_VM(Register oop_result,
147 address entry_point,
148 Register arg_1,
149 Register arg_2,
150 Register arg_3,
151 bool check_exceptions) {
152 assert(arg_1 != c_rarg3, "smashed arg");
153 assert(arg_2 != c_rarg3, "smashed arg");
154 pass_arg3(this, arg_3);
155
156 assert(arg_1 != c_rarg2, "smashed arg");
157 pass_arg2(this, arg_2);
158
159 pass_arg1(this, arg_1);
160 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
161 }
162
163 void MacroAssembler::call_VM(Register oop_result,
164 Register last_java_sp,
165 address entry_point,
166 int number_of_arguments,
167 bool check_exceptions) {
168 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
169 }
170
171 void MacroAssembler::call_VM(Register oop_result,
172 Register last_java_sp,
173 address entry_point,
174 Register arg_1,
175 bool check_exceptions) {
176 pass_arg1(this, arg_1);
177 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
178 }
179
180 void MacroAssembler::call_VM(Register oop_result,
181 Register last_java_sp,
182 address entry_point,
183 Register arg_1,
184 Register arg_2,
185 bool check_exceptions) {
186
187 assert(arg_1 != c_rarg2, "smashed arg");
188 pass_arg2(this, arg_2);
189 pass_arg1(this, arg_1);
190 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
191 }
192
193 void MacroAssembler::call_VM(Register oop_result,
194 Register last_java_sp,
195 address entry_point,
196 Register arg_1,
197 Register arg_2,
198 Register arg_3,
199 bool check_exceptions) {
200 assert(arg_1 != c_rarg3, "smashed arg");
201 assert(arg_2 != c_rarg3, "smashed arg");
202 pass_arg3(this, arg_3);
203 assert(arg_1 != c_rarg2, "smashed arg");
204 pass_arg2(this, arg_2);
205 pass_arg1(this, arg_1);
206 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
207 }
208
209 void MacroAssembler::post_call_nop() {
210 if (!Continuations::enabled()) {
211 return;
212 }
213 relocate(post_call_nop_Relocation::spec(), [&] {
214 InlineSkippedInstructionsCounter skipCounter(this);
215 nop();
216 li32(zr, 0);
217 });
218 }
219
220 // these are no-ops overridden by InterpreterMacroAssembler
221 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
222 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
223
224 // Calls to C land
225 //
226 // When entering C land, the fp, & esp of the last Java frame have to be recorded
227 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
228 // has to be reset to 0. This is required to allow proper stack traversal.
229 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
230 Register last_java_fp,
231 Register last_java_pc,
232 Register tmp) {
233
234 if (last_java_pc->is_valid()) {
235 sd(last_java_pc, Address(xthread,
236 JavaThread::frame_anchor_offset() +
237 JavaFrameAnchor::last_Java_pc_offset()));
238 }
239
240 // determine last_java_sp register
241 if (last_java_sp == sp) {
242 mv(tmp, sp);
243 last_java_sp = tmp;
244 } else if (!last_java_sp->is_valid()) {
245 last_java_sp = esp;
246 }
247
248 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
249
250 // last_java_fp is optional
251 if (last_java_fp->is_valid()) {
252 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
253 }
254 }
255
256 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
257 Register last_java_fp,
258 address last_java_pc,
259 Register tmp) {
260 assert(last_java_pc != nullptr, "must provide a valid PC");
261
262 la(tmp, last_java_pc);
263 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
264
265 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
266 }
267
268 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
269 Register last_java_fp,
270 Label &L,
271 Register tmp) {
272 if (L.is_bound()) {
273 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
274 } else {
275 L.add_patch_at(code(), locator());
276 IncompressibleRegion ir(this); // the label address will be patched back.
277 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
278 }
279 }
280
281 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
282 // we must set sp to zero to clear frame
283 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
284
285 // must clear fp, so that compiled frames are not confused; it is
286 // possible that we need it only for debugging
287 if (clear_fp) {
288 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
289 }
290
291 // Always clear the pc because it could have been set by make_walkable()
292 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
293 }
294
295 void MacroAssembler::call_VM_base(Register oop_result,
296 Register java_thread,
297 Register last_java_sp,
298 address entry_point,
299 int number_of_arguments,
300 bool check_exceptions) {
301 // determine java_thread register
302 if (!java_thread->is_valid()) {
303 java_thread = xthread;
304 }
305 // determine last_java_sp register
306 if (!last_java_sp->is_valid()) {
307 last_java_sp = esp;
308 }
309
310 // debugging support
311 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
312 assert(java_thread == xthread, "unexpected register");
313
314 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
315 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
316
317 // push java thread (becomes first argument of C function)
318 mv(c_rarg0, java_thread);
319
320 // set last Java frame before call
321 assert(last_java_sp != fp, "can't use fp");
322
323 Label l;
324 set_last_Java_frame(last_java_sp, fp, l, t0);
325
326 // do the call, remove parameters
327 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
328
329 // reset last Java frame
330 // Only interpreter should have to clear fp
331 reset_last_Java_frame(true);
332
333 // C++ interp handles this in the interpreter
334 check_and_handle_popframe(java_thread);
335 check_and_handle_earlyret(java_thread);
336
337 if (check_exceptions) {
338 // check for pending exceptions (java_thread is set upon return)
339 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
340 Label ok;
341 beqz(t0, ok);
342 RuntimeAddress target(StubRoutines::forward_exception_entry());
343 relocate(target.rspec(), [&] {
344 int32_t offset;
345 la_patchable(t0, target, offset);
346 jalr(x0, t0, offset);
347 });
348 bind(ok);
349 }
350
351 // get oop result if there is one and reset the value in the thread
352 if (oop_result->is_valid()) {
353 get_vm_result(oop_result, java_thread);
354 }
355 }
356
357 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
358 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
359 sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
360 verify_oop_msg(oop_result, "broken oop in call_VM_base");
361 }
362
363 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
364 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
365 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
366 }
367
368 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
369 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
370 assert_different_registers(klass, xthread, tmp);
371
372 Label L_fallthrough, L_tmp;
373 if (L_fast_path == nullptr) {
374 L_fast_path = &L_fallthrough;
375 } else if (L_slow_path == nullptr) {
376 L_slow_path = &L_fallthrough;
377 }
378
379 // Fast path check: class is fully initialized
380 lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
381 sub(tmp, tmp, InstanceKlass::fully_initialized);
382 beqz(tmp, *L_fast_path);
383
384 // Fast path check: current thread is initializer thread
385 ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
386
387 if (L_slow_path == &L_fallthrough) {
388 beq(xthread, tmp, *L_fast_path);
389 bind(*L_slow_path);
390 } else if (L_fast_path == &L_fallthrough) {
391 bne(xthread, tmp, *L_slow_path);
392 bind(*L_fast_path);
393 } else {
394 Unimplemented();
395 }
396 }
397
398 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
399 if (!VerifyOops) { return; }
400
401 // Pass register number to verify_oop_subroutine
402 const char* b = nullptr;
403 {
404 ResourceMark rm;
405 stringStream ss;
406 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
407 b = code_string(ss.as_string());
408 }
409 BLOCK_COMMENT("verify_oop {");
410
411 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
412
413 mv(c_rarg0, reg); // c_rarg0 : x10
414 {
415 // The length of the instruction sequence emitted should not depend
416 // on the address of the char buffer so that the size of mach nodes for
417 // scratch emit and normal emit matches.
418 IncompressibleRegion ir(this); // Fixed length
419 movptr(t0, (address) b);
420 }
421
422 // call indirectly to solve generation ordering problem
423 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address());
424 relocate(target.rspec(), [&] {
425 int32_t offset;
426 la_patchable(t1, target, offset);
427 ld(t1, Address(t1, offset));
428 });
429 jalr(t1);
430
431 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
432
433 BLOCK_COMMENT("} verify_oop");
434 }
435
436 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
437 if (!VerifyOops) {
438 return;
439 }
440
441 const char* b = nullptr;
442 {
443 ResourceMark rm;
444 stringStream ss;
445 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
446 b = code_string(ss.as_string());
447 }
448 BLOCK_COMMENT("verify_oop_addr {");
449
450 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
451
452 if (addr.uses(sp)) {
453 la(x10, addr);
454 ld(x10, Address(x10, 4 * wordSize));
455 } else {
456 ld(x10, addr);
457 }
458
459 {
460 // The length of the instruction sequence emitted should not depend
461 // on the address of the char buffer so that the size of mach nodes for
462 // scratch emit and normal emit matches.
463 IncompressibleRegion ir(this); // Fixed length
464 movptr(t0, (address) b);
465 }
466
467 // call indirectly to solve generation ordering problem
468 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address());
469 relocate(target.rspec(), [&] {
470 int32_t offset;
471 la_patchable(t1, target, offset);
472 ld(t1, Address(t1, offset));
473 });
474 jalr(t1);
475
476 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
477
478 BLOCK_COMMENT("} verify_oop_addr");
479 }
480
481 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
482 int extra_slot_offset) {
483 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
484 int stackElementSize = Interpreter::stackElementSize;
485 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
486 #ifdef ASSERT
487 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
488 assert(offset1 - offset == stackElementSize, "correct arithmetic");
489 #endif
490 if (arg_slot.is_constant()) {
491 return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
492 } else {
493 assert_different_registers(t0, arg_slot.as_register());
494 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
495 return Address(t0, offset);
496 }
497 }
498
499 #ifndef PRODUCT
500 extern "C" void findpc(intptr_t x);
501 #endif
502
503 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
504 {
505 // In order to get locks to work, we need to fake a in_VM state
506 if (ShowMessageBoxOnError) {
507 JavaThread* thread = JavaThread::current();
508 JavaThreadState saved_state = thread->thread_state();
509 thread->set_thread_state(_thread_in_vm);
510 #ifndef PRODUCT
511 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
512 ttyLocker ttyl;
513 BytecodeCounter::print();
514 }
515 #endif
516 if (os::message_box(msg, "Execution stopped, print registers?")) {
517 ttyLocker ttyl;
518 tty->print_cr(" pc = 0x%016lx", pc);
519 #ifndef PRODUCT
520 tty->cr();
521 findpc(pc);
522 tty->cr();
523 #endif
524 tty->print_cr(" x0 = 0x%016lx", regs[0]);
525 tty->print_cr(" x1 = 0x%016lx", regs[1]);
526 tty->print_cr(" x2 = 0x%016lx", regs[2]);
527 tty->print_cr(" x3 = 0x%016lx", regs[3]);
528 tty->print_cr(" x4 = 0x%016lx", regs[4]);
529 tty->print_cr(" x5 = 0x%016lx", regs[5]);
530 tty->print_cr(" x6 = 0x%016lx", regs[6]);
531 tty->print_cr(" x7 = 0x%016lx", regs[7]);
532 tty->print_cr(" x8 = 0x%016lx", regs[8]);
533 tty->print_cr(" x9 = 0x%016lx", regs[9]);
534 tty->print_cr("x10 = 0x%016lx", regs[10]);
535 tty->print_cr("x11 = 0x%016lx", regs[11]);
536 tty->print_cr("x12 = 0x%016lx", regs[12]);
537 tty->print_cr("x13 = 0x%016lx", regs[13]);
538 tty->print_cr("x14 = 0x%016lx", regs[14]);
539 tty->print_cr("x15 = 0x%016lx", regs[15]);
540 tty->print_cr("x16 = 0x%016lx", regs[16]);
541 tty->print_cr("x17 = 0x%016lx", regs[17]);
542 tty->print_cr("x18 = 0x%016lx", regs[18]);
543 tty->print_cr("x19 = 0x%016lx", regs[19]);
544 tty->print_cr("x20 = 0x%016lx", regs[20]);
545 tty->print_cr("x21 = 0x%016lx", regs[21]);
546 tty->print_cr("x22 = 0x%016lx", regs[22]);
547 tty->print_cr("x23 = 0x%016lx", regs[23]);
548 tty->print_cr("x24 = 0x%016lx", regs[24]);
549 tty->print_cr("x25 = 0x%016lx", regs[25]);
550 tty->print_cr("x26 = 0x%016lx", regs[26]);
551 tty->print_cr("x27 = 0x%016lx", regs[27]);
552 tty->print_cr("x28 = 0x%016lx", regs[28]);
553 tty->print_cr("x30 = 0x%016lx", regs[30]);
554 tty->print_cr("x31 = 0x%016lx", regs[31]);
555 BREAKPOINT;
556 }
557 }
558 fatal("DEBUG MESSAGE: %s", msg);
559 }
560
561 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
562 assert_different_registers(value, tmp1, tmp2);
563 Label done, tagged, weak_tagged;
564
565 beqz(value, done); // Use null as-is.
566 // Test for tag.
567 andi(tmp1, value, JNIHandles::tag_mask);
568 bnez(tmp1, tagged);
569
570 // Resolve local handle
571 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2);
572 verify_oop(value);
573 j(done);
574
575 bind(tagged);
576 // Test for jweak tag.
577 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1);
578 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global));
579 bnez(tmp1, weak_tagged);
580
581 // Resolve global handle
582 access_load_at(T_OBJECT, IN_NATIVE, value,
583 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2);
584 verify_oop(value);
585 j(done);
586
587 bind(weak_tagged);
588 // Resolve jweak.
589 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
590 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2);
591 verify_oop(value);
592
593 bind(done);
594 }
595
596 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) {
597 assert_different_registers(value, tmp1, tmp2);
598 Label done;
599
600 beqz(value, done); // Use null as-is.
601
602 #ifdef ASSERT
603 {
604 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10);
605 Label valid_global_tag;
606 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag.
607 bnez(tmp1, valid_global_tag);
608 stop("non global jobject using resolve_global_jobject");
609 bind(valid_global_tag);
610 }
611 #endif
612
613 // Resolve global handle
614 access_load_at(T_OBJECT, IN_NATIVE, value,
615 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2);
616 verify_oop(value);
617
618 bind(done);
619 }
620
621 void MacroAssembler::stop(const char* msg) {
622 BLOCK_COMMENT(msg);
623 illegal_instruction(Assembler::csr::time);
624 emit_int64((uintptr_t)msg);
625 }
626
627 void MacroAssembler::unimplemented(const char* what) {
628 const char* buf = nullptr;
629 {
630 ResourceMark rm;
631 stringStream ss;
632 ss.print("unimplemented: %s", what);
633 buf = code_string(ss.as_string());
634 }
635 stop(buf);
636 }
637
638 void MacroAssembler::emit_static_call_stub() {
639 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size().
640 // CompiledDirectStaticCall::set_to_interpreted knows the
641 // exact layout of this stub.
642
643 mov_metadata(xmethod, (Metadata*)nullptr);
644
645 // Jump to the entry point of the c2i stub.
646 int32_t offset = 0;
647 movptr(t0, 0, offset);
648 jalr(x0, t0, offset);
649 }
650
651 void MacroAssembler::call_VM_leaf_base(address entry_point,
652 int number_of_arguments,
653 Label *retaddr) {
654 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
655 call(entry_point);
656 if (retaddr != nullptr) {
657 bind(*retaddr);
658 }
659 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
660 }
661
662 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
663 call_VM_leaf_base(entry_point, number_of_arguments);
664 }
665
666 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
667 pass_arg0(this, arg_0);
668 call_VM_leaf_base(entry_point, 1);
669 }
670
671 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
672 pass_arg0(this, arg_0);
673 pass_arg1(this, arg_1);
674 call_VM_leaf_base(entry_point, 2);
675 }
676
677 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
678 Register arg_1, Register arg_2) {
679 pass_arg0(this, arg_0);
680 pass_arg1(this, arg_1);
681 pass_arg2(this, arg_2);
682 call_VM_leaf_base(entry_point, 3);
683 }
684
685 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
686 pass_arg0(this, arg_0);
687 MacroAssembler::call_VM_leaf_base(entry_point, 1);
688 }
689
690 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
691
692 assert(arg_0 != c_rarg1, "smashed arg");
693 pass_arg1(this, arg_1);
694 pass_arg0(this, arg_0);
695 MacroAssembler::call_VM_leaf_base(entry_point, 2);
696 }
697
698 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
699 assert(arg_0 != c_rarg2, "smashed arg");
700 assert(arg_1 != c_rarg2, "smashed arg");
701 pass_arg2(this, arg_2);
702 assert(arg_0 != c_rarg1, "smashed arg");
703 pass_arg1(this, arg_1);
704 pass_arg0(this, arg_0);
705 MacroAssembler::call_VM_leaf_base(entry_point, 3);
706 }
707
708 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
709 assert(arg_0 != c_rarg3, "smashed arg");
710 assert(arg_1 != c_rarg3, "smashed arg");
711 assert(arg_2 != c_rarg3, "smashed arg");
712 pass_arg3(this, arg_3);
713 assert(arg_0 != c_rarg2, "smashed arg");
714 assert(arg_1 != c_rarg2, "smashed arg");
715 pass_arg2(this, arg_2);
716 assert(arg_0 != c_rarg1, "smashed arg");
717 pass_arg1(this, arg_1);
718 pass_arg0(this, arg_0);
719 MacroAssembler::call_VM_leaf_base(entry_point, 4);
720 }
721
722 void MacroAssembler::la(Register Rd, const address dest) {
723 int64_t offset = dest - pc();
724 if (is_valid_32bit_offset(offset)) {
725 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit
726 addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
727 } else {
728 movptr(Rd, dest);
729 }
730 }
731
732 void MacroAssembler::la(Register Rd, const Address &adr) {
733 switch (adr.getMode()) {
734 case Address::literal: {
735 relocInfo::relocType rtype = adr.rspec().reloc()->type();
736 if (rtype == relocInfo::none) {
737 mv(Rd, (intptr_t)(adr.target()));
738 } else {
739 relocate(adr.rspec(), [&] {
740 movptr(Rd, adr.target());
741 });
742 }
743 break;
744 }
745 case Address::base_plus_offset: {
746 Address new_adr = legitimize_address(Rd, adr);
747 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) {
748 addi(Rd, new_adr.base(), new_adr.offset());
749 }
750 break;
751 }
752 default:
753 ShouldNotReachHere();
754 }
755 }
756
757 void MacroAssembler::la(Register Rd, Label &label) {
758 IncompressibleRegion ir(this); // the label address may be patched back.
759 wrap_label(Rd, label, &MacroAssembler::la);
760 }
761
762 void MacroAssembler::li16u(Register Rd, uint16_t imm) {
763 lui(Rd, (uint32_t)imm << 12);
764 srli(Rd, Rd, 12);
765 }
766
767 void MacroAssembler::li32(Register Rd, int32_t imm) {
768 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit
769 int64_t upper = imm, lower = imm;
770 lower = (imm << 20) >> 20;
771 upper -= lower;
772 upper = (int32_t)upper;
773 // lui Rd, imm[31:12] + imm[11]
774 lui(Rd, upper);
775 // use addiw to distinguish li32 to li64
776 addiw(Rd, Rd, lower);
777 }
778
779 void MacroAssembler::li64(Register Rd, int64_t imm) {
780 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
781 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
782 int64_t lower = imm & 0xffffffff;
783 lower -= ((lower << 44) >> 44);
784 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
785 int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
786
787 // Load upper 32 bits
788 int64_t up = upper, lo = upper;
789 lo = (lo << 52) >> 52;
790 up -= lo;
791 up = (int32_t)up;
792 lui(Rd, up);
793 addi(Rd, Rd, lo);
794
795 // Load the rest 32 bits.
796 slli(Rd, Rd, 12);
797 addi(Rd, Rd, (int32_t)lower >> 20);
798 slli(Rd, Rd, 12);
799 lower = ((int32_t)imm << 12) >> 20;
800 addi(Rd, Rd, lower);
801 slli(Rd, Rd, 8);
802 lower = imm & 0xff;
803 addi(Rd, Rd, lower);
804 }
805
806 void MacroAssembler::li(Register Rd, int64_t imm) {
807 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
808 // li -> c.li
809 if (do_compress() && (is_simm6(imm) && Rd != x0)) {
810 c_li(Rd, imm);
811 return;
812 }
813
814 int shift = 12;
815 int64_t upper = imm, lower = imm;
816 // Split imm to a lower 12-bit sign-extended part and the remainder,
817 // because addi will sign-extend the lower imm.
818 lower = ((int32_t)imm << 20) >> 20;
819 upper -= lower;
820
821 // Test whether imm is a 32-bit integer.
822 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 ||
823 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) {
824 while (((upper >> shift) & 1) == 0) { shift++; }
825 upper >>= shift;
826 li(Rd, upper);
827 slli(Rd, Rd, shift);
828 if (lower != 0) {
829 addi(Rd, Rd, lower);
830 }
831 } else {
832 // 32-bit integer
833 Register hi_Rd = zr;
834 if (upper != 0) {
835 lui(Rd, (int32_t)upper);
836 hi_Rd = Rd;
837 }
838 if (lower != 0 || hi_Rd == zr) {
839 addiw(Rd, hi_Rd, lower);
840 }
841 }
842 }
843
844 #define INSN(NAME, REGISTER) \
845 void MacroAssembler::NAME(const address dest, Register temp) { \
846 assert_cond(dest != nullptr); \
847 int64_t distance = dest - pc(); \
848 if (is_simm21(distance) && ((distance % 2) == 0)) { \
849 Assembler::jal(REGISTER, distance); \
850 } else { \
851 assert(temp != noreg, "expecting a register"); \
852 int32_t offset = 0; \
853 movptr(temp, dest, offset); \
854 Assembler::jalr(REGISTER, temp, offset); \
855 } \
856 } \
857
858 INSN(j, x0);
859 INSN(jal, x1);
860
861 #undef INSN
862
863 #define INSN(NAME, REGISTER) \
864 void MacroAssembler::NAME(const Address &adr, Register temp) { \
865 switch (adr.getMode()) { \
866 case Address::literal: { \
867 relocate(adr.rspec(), [&] { \
868 NAME(adr.target(), temp); \
869 }); \
870 break; \
871 } \
872 case Address::base_plus_offset: { \
873 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
874 la(temp, Address(adr.base(), adr.offset() - offset)); \
875 Assembler::jalr(REGISTER, temp, offset); \
876 break; \
877 } \
878 default: \
879 ShouldNotReachHere(); \
880 } \
881 }
882
883 INSN(j, x0);
884 INSN(jal, x1);
885
886 #undef INSN
887
888 #define INSN(NAME) \
889 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \
890 assert_cond(dest != nullptr); \
891 int64_t distance = dest - pc(); \
892 if (is_simm21(distance) && ((distance % 2) == 0)) { \
893 Assembler::NAME(Rd, distance); \
894 } else { \
895 assert_different_registers(Rd, temp); \
896 int32_t offset = 0; \
897 movptr(temp, dest, offset); \
898 jalr(Rd, temp, offset); \
899 } \
900 } \
901 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \
902 assert_different_registers(Rd, temp); \
903 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \
904 }
905
906 INSN(jal);
907
908 #undef INSN
909
910 #define INSN(NAME, REGISTER) \
911 void MacroAssembler::NAME(Label &l, Register temp) { \
912 jal(REGISTER, l, temp); \
913 } \
914
915 INSN(j, x0);
916 INSN(jal, x1);
917
918 #undef INSN
919
920 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) {
921 if (L.is_bound()) {
922 (this->*insn)(Rt, target(L), tmp);
923 } else {
924 L.add_patch_at(code(), locator());
925 (this->*insn)(Rt, pc(), tmp);
926 }
927 }
928
929 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) {
930 if (L.is_bound()) {
931 (this->*insn)(Rt, target(L));
932 } else {
933 L.add_patch_at(code(), locator());
934 (this->*insn)(Rt, pc());
935 }
936 }
937
938 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L,
939 compare_and_branch_insn insn,
940 compare_and_branch_label_insn neg_insn, bool is_far) {
941 if (is_far) {
942 Label done;
943 (this->*neg_insn)(r1, r2, done, /* is_far */ false);
944 j(L);
945 bind(done);
946 } else {
947 if (L.is_bound()) {
948 (this->*insn)(r1, r2, target(L));
949 } else {
950 L.add_patch_at(code(), locator());
951 (this->*insn)(r1, r2, pc());
952 }
953 }
954 }
955
956 #define INSN(NAME, NEG_INSN) \
957 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \
958 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \
959 }
960
961 INSN(beq, bne);
962 INSN(bne, beq);
963 INSN(blt, bge);
964 INSN(bge, blt);
965 INSN(bltu, bgeu);
966 INSN(bgeu, bltu);
967
968 #undef INSN
969
970 #define INSN(NAME) \
971 void MacroAssembler::NAME##z(Register Rs, const address dest) { \
972 NAME(Rs, zr, dest); \
973 } \
974 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \
975 NAME(Rs, zr, l, is_far); \
976 } \
977
978 INSN(beq);
979 INSN(bne);
980 INSN(blt);
981 INSN(ble);
982 INSN(bge);
983 INSN(bgt);
984
985 #undef INSN
986
987 #define INSN(NAME, NEG_INSN) \
988 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \
989 NEG_INSN(Rt, Rs, dest); \
990 } \
991 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \
992 NEG_INSN(Rt, Rs, l, is_far); \
993 }
994
995 INSN(bgt, blt);
996 INSN(ble, bge);
997 INSN(bgtu, bltu);
998 INSN(bleu, bgeu);
999
1000 #undef INSN
1001
1002 // Float compare branch instructions
1003
1004 #define INSN(NAME, FLOATCMP, BRANCH) \
1005 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
1006 FLOATCMP##_s(t0, Rs1, Rs2); \
1007 BRANCH(t0, l, is_far); \
1008 } \
1009 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
1010 FLOATCMP##_d(t0, Rs1, Rs2); \
1011 BRANCH(t0, l, is_far); \
1012 }
1013
1014 INSN(beq, feq, bnez);
1015 INSN(bne, feq, beqz);
1016
1017 #undef INSN
1018
1019
1020 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \
1021 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1022 bool is_far, bool is_unordered) { \
1023 if (is_unordered) { \
1024 /* jump if either source is NaN or condition is expected */ \
1025 FLOATCMP2##_s(t0, Rs2, Rs1); \
1026 beqz(t0, l, is_far); \
1027 } else { \
1028 /* jump if no NaN in source and condition is expected */ \
1029 FLOATCMP1##_s(t0, Rs1, Rs2); \
1030 bnez(t0, l, is_far); \
1031 } \
1032 } \
1033 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1034 bool is_far, bool is_unordered) { \
1035 if (is_unordered) { \
1036 /* jump if either source is NaN or condition is expected */ \
1037 FLOATCMP2##_d(t0, Rs2, Rs1); \
1038 beqz(t0, l, is_far); \
1039 } else { \
1040 /* jump if no NaN in source and condition is expected */ \
1041 FLOATCMP1##_d(t0, Rs1, Rs2); \
1042 bnez(t0, l, is_far); \
1043 } \
1044 }
1045
1046 INSN(ble, fle, flt);
1047 INSN(blt, flt, fle);
1048
1049 #undef INSN
1050
1051 #define INSN(NAME, CMP) \
1052 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1053 bool is_far, bool is_unordered) { \
1054 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
1055 } \
1056 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1057 bool is_far, bool is_unordered) { \
1058 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
1059 }
1060
1061 INSN(bgt, blt);
1062 INSN(bge, ble);
1063
1064 #undef INSN
1065
1066
1067 #define INSN(NAME, CSR) \
1068 void MacroAssembler::NAME(Register Rd) { \
1069 csrr(Rd, CSR); \
1070 }
1071
1072 INSN(rdinstret, CSR_INSTRET);
1073 INSN(rdcycle, CSR_CYCLE);
1074 INSN(rdtime, CSR_TIME);
1075 INSN(frcsr, CSR_FCSR);
1076 INSN(frrm, CSR_FRM);
1077 INSN(frflags, CSR_FFLAGS);
1078
1079 #undef INSN
1080
1081 void MacroAssembler::csrr(Register Rd, unsigned csr) {
1082 csrrs(Rd, csr, x0);
1083 }
1084
1085 #define INSN(NAME, OPFUN) \
1086 void MacroAssembler::NAME(unsigned csr, Register Rs) { \
1087 OPFUN(x0, csr, Rs); \
1088 }
1089
1090 INSN(csrw, csrrw);
1091 INSN(csrs, csrrs);
1092 INSN(csrc, csrrc);
1093
1094 #undef INSN
1095
1096 #define INSN(NAME, OPFUN) \
1097 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \
1098 OPFUN(x0, csr, imm); \
1099 }
1100
1101 INSN(csrwi, csrrwi);
1102 INSN(csrsi, csrrsi);
1103 INSN(csrci, csrrci);
1104
1105 #undef INSN
1106
1107 #define INSN(NAME, CSR) \
1108 void MacroAssembler::NAME(Register Rd, Register Rs) { \
1109 csrrw(Rd, CSR, Rs); \
1110 }
1111
1112 INSN(fscsr, CSR_FCSR);
1113 INSN(fsrm, CSR_FRM);
1114 INSN(fsflags, CSR_FFLAGS);
1115
1116 #undef INSN
1117
1118 #define INSN(NAME) \
1119 void MacroAssembler::NAME(Register Rs) { \
1120 NAME(x0, Rs); \
1121 }
1122
1123 INSN(fscsr);
1124 INSN(fsrm);
1125 INSN(fsflags);
1126
1127 #undef INSN
1128
1129 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
1130 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
1131 csrrwi(Rd, CSR_FRM, imm);
1132 }
1133
1134 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
1135 csrrwi(Rd, CSR_FFLAGS, imm);
1136 }
1137
1138 #define INSN(NAME) \
1139 void MacroAssembler::NAME(unsigned imm) { \
1140 NAME(x0, imm); \
1141 }
1142
1143 INSN(fsrmi);
1144 INSN(fsflagsi);
1145
1146 #undef INSN
1147
1148 void MacroAssembler::push_reg(Register Rs)
1149 {
1150 addi(esp, esp, 0 - wordSize);
1151 sd(Rs, Address(esp, 0));
1152 }
1153
1154 void MacroAssembler::pop_reg(Register Rd)
1155 {
1156 ld(Rd, Address(esp, 0));
1157 addi(esp, esp, wordSize);
1158 }
1159
1160 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
1161 int count = 0;
1162 // Scan bitset to accumulate register pairs
1163 for (int reg = 31; reg >= 0; reg--) {
1164 if ((1U << 31) & bitset) {
1165 regs[count++] = reg;
1166 }
1167 bitset <<= 1;
1168 }
1169 return count;
1170 }
1171
1172 // Push integer registers in the bitset supplied. Don't push sp.
1173 // Return the number of words pushed
1174 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
1175 DEBUG_ONLY(int words_pushed = 0;)
1176 unsigned char regs[32];
1177 int count = bitset_to_regs(bitset, regs);
1178 // reserve one slot to align for odd count
1179 int offset = is_even(count) ? 0 : wordSize;
1180
1181 if (count) {
1182 addi(stack, stack, -count * wordSize - offset);
1183 }
1184 for (int i = count - 1; i >= 0; i--) {
1185 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
1186 DEBUG_ONLY(words_pushed++;)
1187 }
1188
1189 assert(words_pushed == count, "oops, pushed != count");
1190
1191 return count;
1192 }
1193
1194 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
1195 DEBUG_ONLY(int words_popped = 0;)
1196 unsigned char regs[32];
1197 int count = bitset_to_regs(bitset, regs);
1198 // reserve one slot to align for odd count
1199 int offset = is_even(count) ? 0 : wordSize;
1200
1201 for (int i = count - 1; i >= 0; i--) {
1202 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
1203 DEBUG_ONLY(words_popped++;)
1204 }
1205
1206 if (count) {
1207 addi(stack, stack, count * wordSize + offset);
1208 }
1209 assert(words_popped == count, "oops, popped != count");
1210
1211 return count;
1212 }
1213
1214 // Push floating-point registers in the bitset supplied.
1215 // Return the number of words pushed
1216 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
1217 DEBUG_ONLY(int words_pushed = 0;)
1218 unsigned char regs[32];
1219 int count = bitset_to_regs(bitset, regs);
1220 int push_slots = count + (count & 1);
1221
1222 if (count) {
1223 addi(stack, stack, -push_slots * wordSize);
1224 }
1225
1226 for (int i = count - 1; i >= 0; i--) {
1227 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1228 DEBUG_ONLY(words_pushed++;)
1229 }
1230
1231 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1232
1233 return count;
1234 }
1235
1236 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1237 DEBUG_ONLY(int words_popped = 0;)
1238 unsigned char regs[32];
1239 int count = bitset_to_regs(bitset, regs);
1240 int pop_slots = count + (count & 1);
1241
1242 for (int i = count - 1; i >= 0; i--) {
1243 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1244 DEBUG_ONLY(words_popped++;)
1245 }
1246
1247 if (count) {
1248 addi(stack, stack, pop_slots * wordSize);
1249 }
1250
1251 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1252
1253 return count;
1254 }
1255
1256 #ifdef COMPILER2
1257 // Push vector registers in the bitset supplied.
1258 // Return the number of words pushed
1259 int MacroAssembler::push_v(unsigned int bitset, Register stack) {
1260 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1261
1262 // Scan bitset to accumulate register pairs
1263 unsigned char regs[32];
1264 int count = bitset_to_regs(bitset, regs);
1265
1266 for (int i = 0; i < count; i++) {
1267 sub(stack, stack, vector_size_in_bytes);
1268 vs1r_v(as_VectorRegister(regs[i]), stack);
1269 }
1270
1271 return count * vector_size_in_bytes / wordSize;
1272 }
1273
1274 int MacroAssembler::pop_v(unsigned int bitset, Register stack) {
1275 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1276
1277 // Scan bitset to accumulate register pairs
1278 unsigned char regs[32];
1279 int count = bitset_to_regs(bitset, regs);
1280
1281 for (int i = count - 1; i >= 0; i--) {
1282 vl1r_v(as_VectorRegister(regs[i]), stack);
1283 add(stack, stack, vector_size_in_bytes);
1284 }
1285
1286 return count * vector_size_in_bytes / wordSize;
1287 }
1288 #endif // COMPILER2
1289
1290 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1291 // Push integer registers x7, x10-x17, x28-x31.
1292 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1293
1294 // Push float registers f0-f7, f10-f17, f28-f31.
1295 addi(sp, sp, - wordSize * 20);
1296 int offset = 0;
1297 for (int i = 0; i < 32; i++) {
1298 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1299 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1300 }
1301 }
1302 }
1303
1304 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1305 int offset = 0;
1306 for (int i = 0; i < 32; i++) {
1307 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1308 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1309 }
1310 }
1311 addi(sp, sp, wordSize * 20);
1312
1313 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1314 }
1315
1316 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1317 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1318 push_reg(RegSet::range(x5, x31), sp);
1319
1320 // float registers
1321 addi(sp, sp, - 32 * wordSize);
1322 for (int i = 0; i < 32; i++) {
1323 fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1324 }
1325
1326 // vector registers
1327 if (save_vectors) {
1328 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers);
1329 vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1330 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1331 add(t0, sp, vector_size_in_bytes * i);
1332 vse64_v(as_VectorRegister(i), t0);
1333 }
1334 }
1335 }
1336
1337 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1338 // vector registers
1339 if (restore_vectors) {
1340 vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1341 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1342 vle64_v(as_VectorRegister(i), sp);
1343 add(sp, sp, vector_size_in_bytes * 8);
1344 }
1345 }
1346
1347 // float registers
1348 for (int i = 0; i < 32; i++) {
1349 fld(as_FloatRegister(i), Address(sp, i * wordSize));
1350 }
1351 addi(sp, sp, 32 * wordSize);
1352
1353 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1354 pop_reg(RegSet::range(x5, x31), sp);
1355 }
1356
1357 static int patch_offset_in_jal(address branch, int64_t offset) {
1358 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0),
1359 "offset is too large to be patched in one jal instruction!\n");
1360 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
1361 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
1362 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
1363 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12]
1364 return NativeInstruction::instruction_size; // only one instruction
1365 }
1366
1367 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1368 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0),
1369 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n");
1370 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31]
1371 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25]
1372 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7]
1373 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8]
1374 return NativeInstruction::instruction_size; // only one instruction
1375 }
1376
1377 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1378 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load
1379 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12]
1380 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20]
1381 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1382 }
1383
1384 static int patch_addr_in_movptr(address branch, address target) {
1385 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load
1386 int32_t lower = ((intptr_t)target << 35) >> 35;
1387 int64_t upper = ((intptr_t)target - lower) >> 29;
1388 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12]
1389 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20]
1390 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20]
1391 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20]
1392 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1393 }
1394
1395 static int patch_imm_in_li64(address branch, address target) {
1396 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi
1397 int64_t lower = (intptr_t)target & 0xffffffff;
1398 lower = lower - ((lower << 44) >> 44);
1399 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1400 int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
1401 int64_t tmp_upper = upper, tmp_lower = upper;
1402 tmp_lower = (tmp_lower << 52) >> 52;
1403 tmp_upper -= tmp_lower;
1404 tmp_upper >>= 12;
1405 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1),
1406 // upper = target[63:32] + 1.
1407 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui.
1408 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi.
1409 // Load the rest 32 bits.
1410 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi.
1411 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi.
1412 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi.
1413 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1414 }
1415
1416 static int patch_imm_in_li16u(address branch, uint16_t target) {
1417 Assembler::patch(branch, 31, 12, target); // patch lui only
1418 return NativeInstruction::instruction_size;
1419 }
1420
1421 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) {
1422 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw
1423 int64_t upper = (intptr_t)target;
1424 int32_t lower = (((int32_t)target) << 20) >> 20;
1425 upper -= lower;
1426 upper = (int32_t)upper;
1427 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui.
1428 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw.
1429 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1430 }
1431
1432 static long get_offset_of_jal(address insn_addr) {
1433 assert_cond(insn_addr != nullptr);
1434 long offset = 0;
1435 unsigned insn = Assembler::ld_instr(insn_addr);
1436 long val = (long)Assembler::sextract(insn, 31, 12);
1437 offset |= ((val >> 19) & 0x1) << 20;
1438 offset |= (val & 0xff) << 12;
1439 offset |= ((val >> 8) & 0x1) << 11;
1440 offset |= ((val >> 9) & 0x3ff) << 1;
1441 offset = (offset << 43) >> 43;
1442 return offset;
1443 }
1444
1445 static long get_offset_of_conditional_branch(address insn_addr) {
1446 long offset = 0;
1447 assert_cond(insn_addr != nullptr);
1448 unsigned insn = Assembler::ld_instr(insn_addr);
1449 offset = (long)Assembler::sextract(insn, 31, 31);
1450 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1451 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1452 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1453 offset = (offset << 41) >> 41;
1454 return offset;
1455 }
1456
1457 static long get_offset_of_pc_relative(address insn_addr) {
1458 long offset = 0;
1459 assert_cond(insn_addr != nullptr);
1460 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc.
1461 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load.
1462 offset = (offset << 32) >> 32;
1463 return offset;
1464 }
1465
1466 static address get_target_of_movptr(address insn_addr) {
1467 assert_cond(insn_addr != nullptr);
1468 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui.
1469 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi.
1470 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi.
1471 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load.
1472 return (address) target_address;
1473 }
1474
1475 static address get_target_of_li64(address insn_addr) {
1476 assert_cond(insn_addr != nullptr);
1477 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui.
1478 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi.
1479 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi.
1480 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi.
1481 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi.
1482 return (address)target_address;
1483 }
1484
1485 address MacroAssembler::get_target_of_li32(address insn_addr) {
1486 assert_cond(insn_addr != nullptr);
1487 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui.
1488 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw.
1489 return (address)target_address;
1490 }
1491
1492 // Patch any kind of instruction; there may be several instructions.
1493 // Return the total length (in bytes) of the instructions.
1494 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1495 assert_cond(branch != nullptr);
1496 int64_t offset = target - branch;
1497 if (NativeInstruction::is_jal_at(branch)) { // jal
1498 return patch_offset_in_jal(branch, offset);
1499 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne
1500 return patch_offset_in_conditional_branch(branch, offset);
1501 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load
1502 return patch_offset_in_pc_relative(branch, offset);
1503 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr
1504 return patch_addr_in_movptr(branch, target);
1505 } else if (NativeInstruction::is_li64_at(branch)) { // li64
1506 return patch_imm_in_li64(branch, target);
1507 } else if (NativeInstruction::is_li32_at(branch)) { // li32
1508 int64_t imm = (intptr_t)target;
1509 return patch_imm_in_li32(branch, (int32_t)imm);
1510 } else if (NativeInstruction::is_li16u_at(branch)) {
1511 int64_t imm = (intptr_t)target;
1512 return patch_imm_in_li16u(branch, (uint16_t)imm);
1513 } else {
1514 #ifdef ASSERT
1515 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
1516 Assembler::ld_instr(branch), p2i(branch));
1517 Disassembler::decode(branch - 16, branch + 16);
1518 #endif
1519 ShouldNotReachHere();
1520 return -1;
1521 }
1522 }
1523
1524 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1525 long offset = 0;
1526 assert_cond(insn_addr != nullptr);
1527 if (NativeInstruction::is_jal_at(insn_addr)) { // jal
1528 offset = get_offset_of_jal(insn_addr);
1529 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne
1530 offset = get_offset_of_conditional_branch(insn_addr);
1531 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load
1532 offset = get_offset_of_pc_relative(insn_addr);
1533 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr
1534 return get_target_of_movptr(insn_addr);
1535 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64
1536 return get_target_of_li64(insn_addr);
1537 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32
1538 return get_target_of_li32(insn_addr);
1539 } else {
1540 ShouldNotReachHere();
1541 }
1542 return address(((uintptr_t)insn_addr + offset));
1543 }
1544
1545 int MacroAssembler::patch_oop(address insn_addr, address o) {
1546 // OOPs are either narrow (32 bits) or wide (48 bits). We encode
1547 // narrow OOPs by setting the upper 16 bits in the first
1548 // instruction.
1549 if (NativeInstruction::is_li32_at(insn_addr)) {
1550 // Move narrow OOP
1551 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1552 return patch_imm_in_li32(insn_addr, (int32_t)n);
1553 } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1554 // Move wide OOP
1555 return patch_addr_in_movptr(insn_addr, o);
1556 }
1557 ShouldNotReachHere();
1558 return -1;
1559 }
1560
1561 void MacroAssembler::reinit_heapbase() {
1562 if (UseCompressedOops) {
1563 if (Universe::is_fully_initialized()) {
1564 mv(xheapbase, CompressedOops::ptrs_base());
1565 } else {
1566 ExternalAddress target(CompressedOops::ptrs_base_addr());
1567 relocate(target.rspec(), [&] {
1568 int32_t offset;
1569 la_patchable(xheapbase, target, offset);
1570 ld(xheapbase, Address(xheapbase, offset));
1571 });
1572 }
1573 }
1574 }
1575
1576 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) {
1577 int64_t imm64 = (int64_t)addr;
1578 #ifndef PRODUCT
1579 {
1580 char buffer[64];
1581 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64);
1582 block_comment(buffer);
1583 }
1584 #endif
1585 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant");
1586 // Load upper 31 bits
1587 int64_t imm = imm64 >> 17;
1588 int64_t upper = imm, lower = imm;
1589 lower = (lower << 52) >> 52;
1590 upper -= lower;
1591 upper = (int32_t)upper;
1592 lui(Rd, upper);
1593 addi(Rd, Rd, lower);
1594
1595 // Load the rest 17 bits.
1596 slli(Rd, Rd, 11);
1597 addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
1598 slli(Rd, Rd, 6);
1599
1600 // This offset will be used by following jalr/ld.
1601 offset = imm64 & 0x3f;
1602 }
1603
1604 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) {
1605 if (is_simm12(increment)) {
1606 addi(Rd, Rn, increment);
1607 } else {
1608 assert_different_registers(Rn, temp);
1609 li(temp, increment);
1610 add(Rd, Rn, temp);
1611 }
1612 }
1613
1614 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
1615 if (is_simm12(increment)) {
1616 addiw(Rd, Rn, increment);
1617 } else {
1618 assert_different_registers(Rn, temp);
1619 li(temp, increment);
1620 addw(Rd, Rn, temp);
1621 }
1622 }
1623
1624 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
1625 if (is_simm12(-decrement)) {
1626 addi(Rd, Rn, -decrement);
1627 } else {
1628 assert_different_registers(Rn, temp);
1629 li(temp, decrement);
1630 sub(Rd, Rn, temp);
1631 }
1632 }
1633
1634 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
1635 if (is_simm12(-decrement)) {
1636 addiw(Rd, Rn, -decrement);
1637 } else {
1638 assert_different_registers(Rn, temp);
1639 li(temp, decrement);
1640 subw(Rd, Rn, temp);
1641 }
1642 }
1643
1644 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1645 andr(Rd, Rs1, Rs2);
1646 sign_extend(Rd, Rd, 32);
1647 }
1648
1649 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1650 orr(Rd, Rs1, Rs2);
1651 sign_extend(Rd, Rd, 32);
1652 }
1653
1654 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1655 xorr(Rd, Rs1, Rs2);
1656 sign_extend(Rd, Rd, 32);
1657 }
1658
1659 // Rd = Rs1 & (~Rd2)
1660 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) {
1661 if (UseZbb) {
1662 Assembler::andn(Rd, Rs1, Rs2);
1663 return;
1664 }
1665
1666 notr(Rd, Rs2);
1667 andr(Rd, Rs1, Rd);
1668 }
1669
1670 // Rd = Rs1 | (~Rd2)
1671 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) {
1672 if (UseZbb) {
1673 Assembler::orn(Rd, Rs1, Rs2);
1674 return;
1675 }
1676
1677 notr(Rd, Rs2);
1678 orr(Rd, Rs1, Rd);
1679 }
1680
1681 // Note: load_unsigned_short used to be called load_unsigned_word.
1682 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1683 int off = offset();
1684 lhu(dst, src);
1685 return off;
1686 }
1687
1688 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1689 int off = offset();
1690 lbu(dst, src);
1691 return off;
1692 }
1693
1694 int MacroAssembler::load_signed_short(Register dst, Address src) {
1695 int off = offset();
1696 lh(dst, src);
1697 return off;
1698 }
1699
1700 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1701 int off = offset();
1702 lb(dst, src);
1703 return off;
1704 }
1705
1706 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
1707 switch (size_in_bytes) {
1708 case 8: ld(dst, src); break;
1709 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break;
1710 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1711 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1712 default: ShouldNotReachHere();
1713 }
1714 }
1715
1716 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) {
1717 switch (size_in_bytes) {
1718 case 8: sd(src, dst); break;
1719 case 4: sw(src, dst); break;
1720 case 2: sh(src, dst); break;
1721 case 1: sb(src, dst); break;
1722 default: ShouldNotReachHere();
1723 }
1724 }
1725
1726 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
1727 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
1728 if (granularity != 1 && granularity != 2) {
1729 ShouldNotReachHere();
1730 }
1731 if (AvoidUnalignedAccesses && (granularity != 2)) {
1732 assert_different_registers(dst, tmp);
1733 assert_different_registers(tmp, src.base());
1734 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
1735 slli(tmp, tmp, 8);
1736 lbu(dst, src);
1737 add(dst, dst, tmp);
1738 } else {
1739 is_signed ? lh(dst, src) : lhu(dst, src);
1740 }
1741 }
1742
1743 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
1744 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
1745 if (AvoidUnalignedAccesses && (granularity != 4)) {
1746 switch(granularity) {
1747 case 1:
1748 assert_different_registers(dst, tmp, src.base());
1749 lbu(dst, src);
1750 lbu(tmp, Address(src.base(), src.offset() + 1));
1751 slli(tmp, tmp, 8);
1752 add(dst, dst, tmp);
1753 lbu(tmp, Address(src.base(), src.offset() + 2));
1754 slli(tmp, tmp, 16);
1755 add(dst, dst, tmp);
1756 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3));
1757 slli(tmp, tmp, 24);
1758 add(dst, dst, tmp);
1759 break;
1760 case 2:
1761 assert_different_registers(dst, tmp);
1762 assert_different_registers(tmp, src.base());
1763 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
1764 slli(tmp, tmp, 16);
1765 lhu(dst, src);
1766 add(dst, dst, tmp);
1767 break;
1768 default:
1769 ShouldNotReachHere();
1770 }
1771 } else {
1772 is_signed ? lw(dst, src) : lwu(dst, src);
1773 }
1774 }
1775
1776 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
1777 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
1778 if (AvoidUnalignedAccesses && (granularity != 8)) {
1779 switch(granularity){
1780 case 1:
1781 assert_different_registers(dst, tmp, src.base());
1782 lbu(dst, src);
1783 lbu(tmp, Address(src.base(), src.offset() + 1));
1784 slli(tmp, tmp, 8);
1785 add(dst, dst, tmp);
1786 lbu(tmp, Address(src.base(), src.offset() + 2));
1787 slli(tmp, tmp, 16);
1788 add(dst, dst, tmp);
1789 lbu(tmp, Address(src.base(), src.offset() + 3));
1790 slli(tmp, tmp, 24);
1791 add(dst, dst, tmp);
1792 lbu(tmp, Address(src.base(), src.offset() + 4));
1793 slli(tmp, tmp, 32);
1794 add(dst, dst, tmp);
1795 lbu(tmp, Address(src.base(), src.offset() + 5));
1796 slli(tmp, tmp, 40);
1797 add(dst, dst, tmp);
1798 lbu(tmp, Address(src.base(), src.offset() + 6));
1799 slli(tmp, tmp, 48);
1800 add(dst, dst, tmp);
1801 lbu(tmp, Address(src.base(), src.offset() + 7));
1802 slli(tmp, tmp, 56);
1803 add(dst, dst, tmp);
1804 break;
1805 case 2:
1806 assert_different_registers(dst, tmp, src.base());
1807 lhu(dst, src);
1808 lhu(tmp, Address(src.base(), src.offset() + 2));
1809 slli(tmp, tmp, 16);
1810 add(dst, dst, tmp);
1811 lhu(tmp, Address(src.base(), src.offset() + 4));
1812 slli(tmp, tmp, 32);
1813 add(dst, dst, tmp);
1814 lhu(tmp, Address(src.base(), src.offset() + 6));
1815 slli(tmp, tmp, 48);
1816 add(dst, dst, tmp);
1817 break;
1818 case 4:
1819 assert_different_registers(dst, tmp);
1820 assert_different_registers(tmp, src.base());
1821 lwu(tmp, Address(src.base(), src.offset() + 4));
1822 slli(tmp, tmp, 32);
1823 lwu(dst, src);
1824 add(dst, dst, tmp);
1825 break;
1826 default:
1827 ShouldNotReachHere();
1828 }
1829 } else {
1830 ld(dst, src);
1831 }
1832 }
1833
1834
1835 // reverse bytes in halfword in lower 16 bits and sign-extend
1836 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1837 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
1838 if (UseZbb) {
1839 rev8(Rd, Rs);
1840 srai(Rd, Rd, 48);
1841 return;
1842 }
1843 assert_different_registers(Rs, tmp);
1844 assert_different_registers(Rd, tmp);
1845 srli(tmp, Rs, 8);
1846 andi(tmp, tmp, 0xFF);
1847 slli(Rd, Rs, 56);
1848 srai(Rd, Rd, 48); // sign-extend
1849 orr(Rd, Rd, tmp);
1850 }
1851
1852 // reverse bytes in lower word and sign-extend
1853 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1854 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1855 if (UseZbb) {
1856 rev8(Rd, Rs);
1857 srai(Rd, Rd, 32);
1858 return;
1859 }
1860 assert_different_registers(Rs, tmp1, tmp2);
1861 assert_different_registers(Rd, tmp1, tmp2);
1862 revb_h_w_u(Rd, Rs, tmp1, tmp2);
1863 slli(tmp2, Rd, 48);
1864 srai(tmp2, tmp2, 32); // sign-extend
1865 srli(Rd, Rd, 16);
1866 orr(Rd, Rd, tmp2);
1867 }
1868
1869 // reverse bytes in halfword in lower 16 bits and zero-extend
1870 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1871 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
1872 if (UseZbb) {
1873 rev8(Rd, Rs);
1874 srli(Rd, Rd, 48);
1875 return;
1876 }
1877 assert_different_registers(Rs, tmp);
1878 assert_different_registers(Rd, tmp);
1879 srli(tmp, Rs, 8);
1880 andi(tmp, tmp, 0xFF);
1881 andi(Rd, Rs, 0xFF);
1882 slli(Rd, Rd, 8);
1883 orr(Rd, Rd, tmp);
1884 }
1885
1886 // reverse bytes in halfwords in lower 32 bits and zero-extend
1887 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1888 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1889 if (UseZbb) {
1890 rev8(Rd, Rs);
1891 rori(Rd, Rd, 32);
1892 roriw(Rd, Rd, 16);
1893 zero_extend(Rd, Rd, 32);
1894 return;
1895 }
1896 assert_different_registers(Rs, tmp1, tmp2);
1897 assert_different_registers(Rd, tmp1, tmp2);
1898 srli(tmp2, Rs, 16);
1899 revb_h_h_u(tmp2, tmp2, tmp1);
1900 revb_h_h_u(Rd, Rs, tmp1);
1901 slli(tmp2, tmp2, 16);
1902 orr(Rd, Rd, tmp2);
1903 }
1904
1905 // This method is only used for revb_h
1906 // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1907 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1908 assert_different_registers(Rs, tmp1, tmp2);
1909 assert_different_registers(Rd, tmp1);
1910 srli(tmp1, Rs, 48);
1911 andi(tmp2, tmp1, 0xFF);
1912 slli(tmp2, tmp2, 8);
1913 srli(tmp1, tmp1, 8);
1914 orr(tmp1, tmp1, tmp2);
1915 slli(Rd, Rs, 16);
1916 orr(Rd, Rd, tmp1);
1917 }
1918
1919 // reverse bytes in each halfword
1920 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1921 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1922 if (UseZbb) {
1923 assert_different_registers(Rs, tmp1);
1924 assert_different_registers(Rd, tmp1);
1925 rev8(Rd, Rs);
1926 zero_extend(tmp1, Rd, 32);
1927 roriw(tmp1, tmp1, 16);
1928 slli(tmp1, tmp1, 32);
1929 srli(Rd, Rd, 32);
1930 roriw(Rd, Rd, 16);
1931 zero_extend(Rd, Rd, 32);
1932 orr(Rd, Rd, tmp1);
1933 return;
1934 }
1935 assert_different_registers(Rs, tmp1, tmp2);
1936 assert_different_registers(Rd, tmp1, tmp2);
1937 revb_h_helper(Rd, Rs, tmp1, tmp2);
1938 for (int i = 0; i < 3; ++i) {
1939 revb_h_helper(Rd, Rd, tmp1, tmp2);
1940 }
1941 }
1942
1943 // reverse bytes in each word
1944 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1945 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1946 if (UseZbb) {
1947 rev8(Rd, Rs);
1948 rori(Rd, Rd, 32);
1949 return;
1950 }
1951 assert_different_registers(Rs, tmp1, tmp2);
1952 assert_different_registers(Rd, tmp1, tmp2);
1953 revb(Rd, Rs, tmp1, tmp2);
1954 ror_imm(Rd, Rd, 32);
1955 }
1956
1957 // reverse bytes in doubleword
1958 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1959 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1960 if (UseZbb) {
1961 rev8(Rd, Rs);
1962 return;
1963 }
1964 assert_different_registers(Rs, tmp1, tmp2);
1965 assert_different_registers(Rd, tmp1, tmp2);
1966 andi(tmp1, Rs, 0xFF);
1967 slli(tmp1, tmp1, 8);
1968 for (int step = 8; step < 56; step += 8) {
1969 srli(tmp2, Rs, step);
1970 andi(tmp2, tmp2, 0xFF);
1971 orr(tmp1, tmp1, tmp2);
1972 slli(tmp1, tmp1, 8);
1973 }
1974 srli(Rd, Rs, 56);
1975 andi(Rd, Rd, 0xFF);
1976 orr(Rd, tmp1, Rd);
1977 }
1978
1979 // rotate right with shift bits
1980 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
1981 {
1982 if (UseZbb) {
1983 rori(dst, src, shift);
1984 return;
1985 }
1986
1987 assert_different_registers(dst, tmp);
1988 assert_different_registers(src, tmp);
1989 assert(shift < 64, "shift amount must be < 64");
1990 slli(tmp, src, 64 - shift);
1991 srli(dst, src, shift);
1992 orr(dst, dst, tmp);
1993 }
1994
1995 // rotate left with shift bits, 32-bit version
1996 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) {
1997 if (UseZbb) {
1998 // no roliw available
1999 roriw(dst, src, 32 - shift);
2000 return;
2001 }
2002
2003 assert_different_registers(dst, tmp);
2004 assert_different_registers(src, tmp);
2005 assert(shift < 32, "shift amount must be < 32");
2006 srliw(tmp, src, 32 - shift);
2007 slliw(dst, src, shift);
2008 orr(dst, dst, tmp);
2009 }
2010
2011 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
2012 if (is_simm12(imm)) {
2013 and_imm12(Rd, Rn, imm);
2014 } else {
2015 assert_different_registers(Rn, tmp);
2016 mv(tmp, imm);
2017 andr(Rd, Rn, tmp);
2018 }
2019 }
2020
2021 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
2022 ld(tmp1, adr);
2023 if (src.is_register()) {
2024 orr(tmp1, tmp1, src.as_register());
2025 } else {
2026 if (is_simm12(src.as_constant())) {
2027 ori(tmp1, tmp1, src.as_constant());
2028 } else {
2029 assert_different_registers(tmp1, tmp2);
2030 mv(tmp2, src.as_constant());
2031 orr(tmp1, tmp1, tmp2);
2032 }
2033 }
2034 sd(tmp1, adr);
2035 }
2036
2037 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) {
2038 assert_different_registers(oop, trial_klass, tmp1, tmp2);
2039 if (UseCompressedClassPointers) {
2040 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes()));
2041 if (CompressedKlassPointers::base() == nullptr) {
2042 slli(tmp1, tmp1, CompressedKlassPointers::shift());
2043 beq(trial_klass, tmp1, L);
2044 return;
2045 }
2046 decode_klass_not_null(tmp1, tmp2);
2047 } else {
2048 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes()));
2049 }
2050 beq(trial_klass, tmp1, L);
2051 }
2052
2053 // Move an oop into a register.
2054 void MacroAssembler::movoop(Register dst, jobject obj) {
2055 int oop_index;
2056 if (obj == nullptr) {
2057 oop_index = oop_recorder()->allocate_oop_index(obj);
2058 } else {
2059 #ifdef ASSERT
2060 {
2061 ThreadInVMfromUnknown tiv;
2062 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2063 }
2064 #endif
2065 oop_index = oop_recorder()->find_index(obj);
2066 }
2067 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2068
2069 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) {
2070 mv(dst, Address((address)obj, rspec));
2071 } else {
2072 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
2073 ld_constant(dst, Address(dummy, rspec));
2074 }
2075 }
2076
2077 // Move a metadata address into a register.
2078 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
2079 int oop_index;
2080 if (obj == nullptr) {
2081 oop_index = oop_recorder()->allocate_metadata_index(obj);
2082 } else {
2083 oop_index = oop_recorder()->find_index(obj);
2084 }
2085 RelocationHolder rspec = metadata_Relocation::spec(oop_index);
2086 mv(dst, Address((address)obj, rspec));
2087 }
2088
2089 // Writes to stack successive pages until offset reached to check for
2090 // stack overflow + shadow pages. This clobbers tmp.
2091 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
2092 assert_different_registers(tmp, size, t0);
2093 // Bang stack for total size given plus shadow page size.
2094 // Bang one page at a time because large size can bang beyond yellow and
2095 // red zones.
2096 mv(t0, (int)os::vm_page_size());
2097 Label loop;
2098 bind(loop);
2099 sub(tmp, sp, t0);
2100 subw(size, size, t0);
2101 sd(size, Address(tmp));
2102 bgtz(size, loop);
2103
2104 // Bang down shadow pages too.
2105 // At this point, (tmp-0) is the last address touched, so don't
2106 // touch it again. (It was touched as (tmp-pagesize) but then tmp
2107 // was post-decremented.) Skip this address by starting at i=1, and
2108 // touch a few more pages below. N.B. It is important to touch all
2109 // the way down to and including i=StackShadowPages.
2110 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) {
2111 // this could be any sized move but this is can be a debugging crumb
2112 // so the bigger the better.
2113 sub(tmp, tmp, (int)os::vm_page_size());
2114 sd(size, Address(tmp, 0));
2115 }
2116 }
2117
2118 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
2119 int32_t offset = 0;
2120 _masm = masm;
2121 ExternalAddress target((address)flag_addr);
2122 _masm->relocate(target.rspec(), [&] {
2123 int32_t offset;
2124 _masm->la_patchable(t0, target, offset);
2125 _masm->lbu(t0, Address(t0, offset));
2126 });
2127 if (value) {
2128 _masm->bnez(t0, _label);
2129 } else {
2130 _masm->beqz(t0, _label);
2131 }
2132 }
2133
2134 SkipIfEqual::~SkipIfEqual() {
2135 _masm->bind(_label);
2136 _masm = nullptr;
2137 }
2138
2139 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
2140 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2141 ld(dst, Address(xmethod, Method::const_offset()));
2142 ld(dst, Address(dst, ConstMethod::constants_offset()));
2143 ld(dst, Address(dst, ConstantPool::pool_holder_offset()));
2144 ld(dst, Address(dst, mirror_offset));
2145 resolve_oop_handle(dst, tmp1, tmp2);
2146 }
2147
2148 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) {
2149 // OopHandle::resolve is an indirection.
2150 assert_different_registers(result, tmp1, tmp2);
2151 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2);
2152 }
2153
2154 // ((WeakHandle)result).resolve()
2155 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) {
2156 assert_different_registers(result, tmp1, tmp2);
2157 Label resolved;
2158
2159 // A null weak handle resolves to null.
2160 beqz(result, resolved);
2161
2162 // Only 64 bit platforms support GCs that require a tmp register
2163 // Only IN_HEAP loads require a thread_tmp register
2164 // WeakHandle::resolve is an indirection like jweak.
2165 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
2166 result, Address(result), tmp1, tmp2);
2167 bind(resolved);
2168 }
2169
2170 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2171 Register dst, Address src,
2172 Register tmp1, Register tmp2) {
2173 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2174 decorators = AccessInternal::decorator_fixup(decorators, type);
2175 bool as_raw = (decorators & AS_RAW) != 0;
2176 if (as_raw) {
2177 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
2178 } else {
2179 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
2180 }
2181 }
2182
2183 void MacroAssembler::null_check(Register reg, int offset) {
2184 if (needs_explicit_null_check(offset)) {
2185 // provoke OS null exception if reg is null by
2186 // accessing M[reg] w/o changing any registers
2187 // NOTE: this is plenty to provoke a segv
2188 ld(zr, Address(reg, 0));
2189 } else {
2190 // nothing to do, (later) access of M[reg + offset]
2191 // will provoke OS null exception if reg is null
2192 }
2193 }
2194
2195 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2196 Address dst, Register val,
2197 Register tmp1, Register tmp2, Register tmp3) {
2198 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2199 decorators = AccessInternal::decorator_fixup(decorators, type);
2200 bool as_raw = (decorators & AS_RAW) != 0;
2201 if (as_raw) {
2202 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
2203 } else {
2204 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
2205 }
2206 }
2207
2208 // Algorithm must match CompressedOops::encode.
2209 void MacroAssembler::encode_heap_oop(Register d, Register s) {
2210 verify_oop_msg(s, "broken oop in encode_heap_oop");
2211 if (CompressedOops::base() == nullptr) {
2212 if (CompressedOops::shift() != 0) {
2213 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2214 srli(d, s, LogMinObjAlignmentInBytes);
2215 } else {
2216 mv(d, s);
2217 }
2218 } else {
2219 Label notNull;
2220 sub(d, s, xheapbase);
2221 bgez(d, notNull);
2222 mv(d, zr);
2223 bind(notNull);
2224 if (CompressedOops::shift() != 0) {
2225 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2226 srli(d, d, CompressedOops::shift());
2227 }
2228 }
2229 }
2230
2231 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
2232 assert_different_registers(dst, tmp);
2233 assert_different_registers(src, tmp);
2234 if (UseCompressedClassPointers) {
2235 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
2236 decode_klass_not_null(dst, tmp);
2237 } else {
2238 ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
2239 }
2240 }
2241
2242 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
2243 // FIXME: Should this be a store release? concurrent gcs assumes
2244 // klass length is valid if klass field is not null.
2245 if (UseCompressedClassPointers) {
2246 encode_klass_not_null(src, tmp);
2247 sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
2248 } else {
2249 sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
2250 }
2251 }
2252
2253 void MacroAssembler::store_klass_gap(Register dst, Register src) {
2254 if (UseCompressedClassPointers) {
2255 // Store to klass gap in destination
2256 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2257 }
2258 }
2259
2260 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
2261 assert_different_registers(r, tmp);
2262 decode_klass_not_null(r, r, tmp);
2263 }
2264
2265 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
2266 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2267
2268 if (CompressedKlassPointers::base() == nullptr) {
2269 if (CompressedKlassPointers::shift() != 0) {
2270 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2271 slli(dst, src, LogKlassAlignmentInBytes);
2272 } else {
2273 mv(dst, src);
2274 }
2275 return;
2276 }
2277
2278 Register xbase = dst;
2279 if (dst == src) {
2280 xbase = tmp;
2281 }
2282
2283 assert_different_registers(src, xbase);
2284 mv(xbase, (uintptr_t)CompressedKlassPointers::base());
2285
2286 if (CompressedKlassPointers::shift() != 0) {
2287 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2288 assert_different_registers(t0, xbase);
2289 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
2290 } else {
2291 add(dst, xbase, src);
2292 }
2293 }
2294
2295 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
2296 assert_different_registers(r, tmp);
2297 encode_klass_not_null(r, r, tmp);
2298 }
2299
2300 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
2301 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2302
2303 if (CompressedKlassPointers::base() == nullptr) {
2304 if (CompressedKlassPointers::shift() != 0) {
2305 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2306 srli(dst, src, LogKlassAlignmentInBytes);
2307 } else {
2308 mv(dst, src);
2309 }
2310 return;
2311 }
2312
2313 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
2314 CompressedKlassPointers::shift() == 0) {
2315 zero_extend(dst, src, 32);
2316 return;
2317 }
2318
2319 Register xbase = dst;
2320 if (dst == src) {
2321 xbase = tmp;
2322 }
2323
2324 assert_different_registers(src, xbase);
2325 mv(xbase, (uintptr_t)CompressedKlassPointers::base());
2326 sub(dst, src, xbase);
2327 if (CompressedKlassPointers::shift() != 0) {
2328 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2329 srli(dst, dst, LogKlassAlignmentInBytes);
2330 }
2331 }
2332
2333 void MacroAssembler::decode_heap_oop_not_null(Register r) {
2334 decode_heap_oop_not_null(r, r);
2335 }
2336
2337 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2338 assert(UseCompressedOops, "should only be used for compressed headers");
2339 assert(Universe::heap() != nullptr, "java heap should be initialized");
2340 // Cannot assert, unverified entry point counts instructions (see .ad file)
2341 // vtableStubs also counts instructions in pd_code_size_limit.
2342 // Also do not verify_oop as this is called by verify_oop.
2343 if (CompressedOops::shift() != 0) {
2344 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2345 slli(dst, src, LogMinObjAlignmentInBytes);
2346 if (CompressedOops::base() != nullptr) {
2347 add(dst, xheapbase, dst);
2348 }
2349 } else {
2350 assert(CompressedOops::base() == nullptr, "sanity");
2351 mv(dst, src);
2352 }
2353 }
2354
2355 void MacroAssembler::decode_heap_oop(Register d, Register s) {
2356 if (CompressedOops::base() == nullptr) {
2357 if (CompressedOops::shift() != 0 || d != s) {
2358 slli(d, s, CompressedOops::shift());
2359 }
2360 } else {
2361 Label done;
2362 mv(d, s);
2363 beqz(s, done);
2364 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
2365 bind(done);
2366 }
2367 verify_oop_msg(d, "broken oop in decode_heap_oop");
2368 }
2369
2370 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
2371 Register tmp2, Register tmp3, DecoratorSet decorators) {
2372 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
2373 }
2374
2375 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
2376 Register tmp2, DecoratorSet decorators) {
2377 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
2378 }
2379
2380 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
2381 Register tmp2, DecoratorSet decorators) {
2382 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2);
2383 }
2384
2385 // Used for storing nulls.
2386 void MacroAssembler::store_heap_oop_null(Address dst) {
2387 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
2388 }
2389
2390 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
2391 bool want_remainder)
2392 {
2393 // Full implementation of Java idiv and irem. The function
2394 // returns the (pc) offset of the div instruction - may be needed
2395 // for implicit exceptions.
2396 //
2397 // input : rs1: dividend
2398 // rs2: divisor
2399 //
2400 // result: either
2401 // quotient (= rs1 idiv rs2)
2402 // remainder (= rs1 irem rs2)
2403
2404
2405 int idivl_offset = offset();
2406 if (!want_remainder) {
2407 divw(result, rs1, rs2);
2408 } else {
2409 remw(result, rs1, rs2); // result = rs1 % rs2;
2410 }
2411 return idivl_offset;
2412 }
2413
2414 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
2415 bool want_remainder)
2416 {
2417 // Full implementation of Java ldiv and lrem. The function
2418 // returns the (pc) offset of the div instruction - may be needed
2419 // for implicit exceptions.
2420 //
2421 // input : rs1: dividend
2422 // rs2: divisor
2423 //
2424 // result: either
2425 // quotient (= rs1 idiv rs2)
2426 // remainder (= rs1 irem rs2)
2427
2428 int idivq_offset = offset();
2429 if (!want_remainder) {
2430 div(result, rs1, rs2);
2431 } else {
2432 rem(result, rs1, rs2); // result = rs1 % rs2;
2433 }
2434 return idivq_offset;
2435 }
2436
2437 // Look up the method for a megamorpic invkkeinterface call.
2438 // The target method is determined by <intf_klass, itable_index>.
2439 // The receiver klass is in recv_klass.
2440 // On success, the result will be in method_result, and execution falls through.
2441 // On failure, execution transfers to the given label.
2442 void MacroAssembler::lookup_interface_method(Register recv_klass,
2443 Register intf_klass,
2444 RegisterOrConstant itable_index,
2445 Register method_result,
2446 Register scan_tmp,
2447 Label& L_no_such_interface,
2448 bool return_method) {
2449 assert_different_registers(recv_klass, intf_klass, scan_tmp);
2450 assert_different_registers(method_result, intf_klass, scan_tmp);
2451 assert(recv_klass != method_result || !return_method,
2452 "recv_klass can be destroyed when mehtid isn't needed");
2453 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2454 "caller must be same register for non-constant itable index as for method");
2455
2456 // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2457 int vtable_base = in_bytes(Klass::vtable_start_offset());
2458 int itentry_off = in_bytes(itableMethodEntry::method_offset());
2459 int scan_step = itableOffsetEntry::size() * wordSize;
2460 int vte_size = vtableEntry::size_in_bytes();
2461 assert(vte_size == wordSize, "else adjust times_vte_scale");
2462
2463 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
2464
2465 // %%% Could store the aligned, prescaled offset in the klassoop.
2466 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
2467 add(scan_tmp, scan_tmp, vtable_base);
2468
2469 if (return_method) {
2470 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2471 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2472 if (itable_index.is_register()) {
2473 slli(t0, itable_index.as_register(), 3);
2474 } else {
2475 mv(t0, itable_index.as_constant() << 3);
2476 }
2477 add(recv_klass, recv_klass, t0);
2478 if (itentry_off) {
2479 add(recv_klass, recv_klass, itentry_off);
2480 }
2481 }
2482
2483 Label search, found_method;
2484
2485 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset()));
2486 beq(intf_klass, method_result, found_method);
2487 bind(search);
2488 // Check that the previous entry is non-null. A null entry means that
2489 // the receiver class doesn't implement the interface, and wasn't the
2490 // same as when the caller was compiled.
2491 beqz(method_result, L_no_such_interface, /* is_far */ true);
2492 addi(scan_tmp, scan_tmp, scan_step);
2493 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset()));
2494 bne(intf_klass, method_result, search);
2495
2496 bind(found_method);
2497
2498 // Got a hit.
2499 if (return_method) {
2500 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset()));
2501 add(method_result, recv_klass, scan_tmp);
2502 ld(method_result, Address(method_result));
2503 }
2504 }
2505
2506 // virtual method calling
2507 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2508 RegisterOrConstant vtable_index,
2509 Register method_result) {
2510 const ByteSize base = Klass::vtable_start_offset();
2511 assert(vtableEntry::size() * wordSize == 8,
2512 "adjust the scaling in the code below");
2513 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset());
2514
2515 if (vtable_index.is_register()) {
2516 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
2517 ld(method_result, Address(method_result, vtable_offset_in_bytes));
2518 } else {
2519 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2520 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2521 }
2522 }
2523
2524 void MacroAssembler::membar(uint32_t order_constraint) {
2525 address prev = pc() - NativeMembar::instruction_size;
2526 address last = code()->last_insn();
2527
2528 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) {
2529 NativeMembar *bar = NativeMembar_at(prev);
2530 // We are merging two memory barrier instructions. On RISCV we
2531 // can do this simply by ORing them together.
2532 bar->set_kind(bar->get_kind() | order_constraint);
2533 BLOCK_COMMENT("merged membar");
2534 } else {
2535 code()->set_last_insn(pc());
2536
2537 uint32_t predecessor = 0;
2538 uint32_t successor = 0;
2539
2540 membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2541 fence(predecessor, successor);
2542 }
2543 }
2544
2545 // Form an address from base + offset in Rd. Rd my or may not
2546 // actually be used: you must use the Address that is returned. It
2547 // is up to you to ensure that the shift provided matches the size
2548 // of your data.
2549 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) {
2550 if (is_simm12(byte_offset)) { // 12: imm in range 2^12
2551 return Address(base, byte_offset);
2552 }
2553
2554 assert_different_registers(Rd, base, noreg);
2555
2556 // Do it the hard way
2557 mv(Rd, byte_offset);
2558 add(Rd, base, Rd);
2559 return Address(Rd);
2560 }
2561
2562 void MacroAssembler::check_klass_subtype(Register sub_klass,
2563 Register super_klass,
2564 Register tmp_reg,
2565 Label& L_success) {
2566 Label L_failure;
2567 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr);
2568 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr);
2569 bind(L_failure);
2570 }
2571
2572 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2573 ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2574 if (acquire) {
2575 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2576 }
2577 if (at_return) {
2578 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true);
2579 } else {
2580 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit()));
2581 bnez(t0, slow_path, true /* is_far */);
2582 }
2583 }
2584
2585 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2586 Label &succeed, Label *fail) {
2587 assert_different_registers(addr, tmp);
2588 assert_different_registers(newv, tmp);
2589 assert_different_registers(oldv, tmp);
2590
2591 // oldv holds comparison value
2592 // newv holds value to write in exchange
2593 // addr identifies memory word to compare against/update
2594 Label retry_load, nope;
2595 bind(retry_load);
2596 // Load reserved from the memory location
2597 lr_d(tmp, addr, Assembler::aqrl);
2598 // Fail and exit if it is not what we expect
2599 bne(tmp, oldv, nope);
2600 // If the store conditional succeeds, tmp will be zero
2601 sc_d(tmp, newv, addr, Assembler::rl);
2602 beqz(tmp, succeed);
2603 // Retry only when the store conditional failed
2604 j(retry_load);
2605
2606 bind(nope);
2607 membar(AnyAny);
2608 mv(oldv, tmp);
2609 if (fail != nullptr) {
2610 j(*fail);
2611 }
2612 }
2613
2614 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
2615 Label &succeed, Label *fail) {
2616 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
2617 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
2618 }
2619
2620 void MacroAssembler::load_reserved(Register addr,
2621 enum operand_size size,
2622 Assembler::Aqrl acquire) {
2623 switch (size) {
2624 case int64:
2625 lr_d(t0, addr, acquire);
2626 break;
2627 case int32:
2628 lr_w(t0, addr, acquire);
2629 break;
2630 case uint32:
2631 lr_w(t0, addr, acquire);
2632 zero_extend(t0, t0, 32);
2633 break;
2634 default:
2635 ShouldNotReachHere();
2636 }
2637 }
2638
2639 void MacroAssembler::store_conditional(Register addr,
2640 Register new_val,
2641 enum operand_size size,
2642 Assembler::Aqrl release) {
2643 switch (size) {
2644 case int64:
2645 sc_d(t0, new_val, addr, release);
2646 break;
2647 case int32:
2648 case uint32:
2649 sc_w(t0, new_val, addr, release);
2650 break;
2651 default:
2652 ShouldNotReachHere();
2653 }
2654 }
2655
2656
2657 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2658 Register new_val,
2659 enum operand_size size,
2660 Register tmp1, Register tmp2, Register tmp3) {
2661 assert(size == int8 || size == int16, "unsupported operand size");
2662
2663 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2664
2665 andi(shift, addr, 3);
2666 slli(shift, shift, 3);
2667
2668 andi(aligned_addr, addr, ~3);
2669
2670 if (size == int8) {
2671 mv(mask, 0xff);
2672 } else {
2673 // size == int16 case
2674 mv(mask, -1);
2675 zero_extend(mask, mask, 16);
2676 }
2677 sll(mask, mask, shift);
2678
2679 xori(not_mask, mask, -1);
2680
2681 sll(expected, expected, shift);
2682 andr(expected, expected, mask);
2683
2684 sll(new_val, new_val, shift);
2685 andr(new_val, new_val, mask);
2686 }
2687
2688 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2689 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2690 // which are forced to work with 4-byte aligned address.
2691 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2692 Register new_val,
2693 enum operand_size size,
2694 Assembler::Aqrl acquire, Assembler::Aqrl release,
2695 Register result, bool result_as_bool,
2696 Register tmp1, Register tmp2, Register tmp3) {
2697 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2698 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2699 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2700
2701 Label retry, fail, done;
2702
2703 bind(retry);
2704 lr_w(old, aligned_addr, acquire);
2705 andr(tmp, old, mask);
2706 bne(tmp, expected, fail);
2707
2708 andr(tmp, old, not_mask);
2709 orr(tmp, tmp, new_val);
2710 sc_w(tmp, tmp, aligned_addr, release);
2711 bnez(tmp, retry);
2712
2713 if (result_as_bool) {
2714 mv(result, 1);
2715 j(done);
2716
2717 bind(fail);
2718 mv(result, zr);
2719
2720 bind(done);
2721 } else {
2722 andr(tmp, old, mask);
2723
2724 bind(fail);
2725 srl(result, tmp, shift);
2726
2727 if (size == int8) {
2728 sign_extend(result, result, 8);
2729 } else {
2730 // size == int16 case
2731 sign_extend(result, result, 16);
2732 }
2733 }
2734 }
2735
2736 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2737 // the weak CAS stuff. The major difference is that it just failed when store conditional
2738 // failed.
2739 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2740 Register new_val,
2741 enum operand_size size,
2742 Assembler::Aqrl acquire, Assembler::Aqrl release,
2743 Register result,
2744 Register tmp1, Register tmp2, Register tmp3) {
2745 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2746 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2747 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2748
2749 Label fail, done;
2750
2751 lr_w(old, aligned_addr, acquire);
2752 andr(tmp, old, mask);
2753 bne(tmp, expected, fail);
2754
2755 andr(tmp, old, not_mask);
2756 orr(tmp, tmp, new_val);
2757 sc_w(tmp, tmp, aligned_addr, release);
2758 bnez(tmp, fail);
2759
2760 // Success
2761 mv(result, 1);
2762 j(done);
2763
2764 // Fail
2765 bind(fail);
2766 mv(result, zr);
2767
2768 bind(done);
2769 }
2770
2771 void MacroAssembler::cmpxchg(Register addr, Register expected,
2772 Register new_val,
2773 enum operand_size size,
2774 Assembler::Aqrl acquire, Assembler::Aqrl release,
2775 Register result, bool result_as_bool) {
2776 assert(size != int8 && size != int16, "unsupported operand size");
2777 assert_different_registers(addr, t0);
2778 assert_different_registers(expected, t0);
2779 assert_different_registers(new_val, t0);
2780
2781 Label retry_load, done, ne_done;
2782 bind(retry_load);
2783 load_reserved(addr, size, acquire);
2784 bne(t0, expected, ne_done);
2785 store_conditional(addr, new_val, size, release);
2786 bnez(t0, retry_load);
2787
2788 // equal, succeed
2789 if (result_as_bool) {
2790 mv(result, 1);
2791 } else {
2792 mv(result, expected);
2793 }
2794 j(done);
2795
2796 // not equal, failed
2797 bind(ne_done);
2798 if (result_as_bool) {
2799 mv(result, zr);
2800 } else {
2801 mv(result, t0);
2802 }
2803
2804 bind(done);
2805 }
2806
2807 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2808 Register new_val,
2809 enum operand_size size,
2810 Assembler::Aqrl acquire, Assembler::Aqrl release,
2811 Register result) {
2812 assert_different_registers(addr, t0);
2813 assert_different_registers(expected, t0);
2814 assert_different_registers(new_val, t0);
2815
2816 Label fail, done;
2817 load_reserved(addr, size, acquire);
2818 bne(t0, expected, fail);
2819 store_conditional(addr, new_val, size, release);
2820 bnez(t0, fail);
2821
2822 // Success
2823 mv(result, 1);
2824 j(done);
2825
2826 // Fail
2827 bind(fail);
2828 mv(result, zr);
2829
2830 bind(done);
2831 }
2832
2833 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \
2834 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2835 prev = prev->is_valid() ? prev : zr; \
2836 if (incr.is_register()) { \
2837 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2838 } else { \
2839 mv(t0, incr.as_constant()); \
2840 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2841 } \
2842 return; \
2843 }
2844
2845 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2846 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2847 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2848 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2849
2850 #undef ATOMIC_OP
2851
2852 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \
2853 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
2854 prev = prev->is_valid() ? prev : zr; \
2855 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2856 return; \
2857 }
2858
2859 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2860 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2861 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2862 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2863
2864 #undef ATOMIC_XCHG
2865
2866 #define ATOMIC_XCHGU(OP1, OP2) \
2867 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
2868 atomic_##OP2(prev, newv, addr); \
2869 zero_extend(prev, prev, 32); \
2870 return; \
2871 }
2872
2873 ATOMIC_XCHGU(xchgwu, xchgw)
2874 ATOMIC_XCHGU(xchgalwu, xchgalw)
2875
2876 #undef ATOMIC_XCHGU
2877
2878 void MacroAssembler::far_jump(Address entry, Register tmp) {
2879 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2880 assert(CodeCache::find_blob(entry.target()) != nullptr,
2881 "destination of far call not found in code cache");
2882 assert(entry.rspec().type() == relocInfo::external_word_type
2883 || entry.rspec().type() == relocInfo::runtime_call_type
2884 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2885 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
2886 if (far_branches()) {
2887 // We can use auipc + jalr here because we know that the total size of
2888 // the code cache cannot exceed 2Gb.
2889 relocate(entry.rspec(), [&] {
2890 int32_t offset;
2891 la_patchable(tmp, entry, offset);
2892 jalr(x0, tmp, offset);
2893 });
2894 } else {
2895 j(entry);
2896 }
2897 }
2898
2899 void MacroAssembler::far_call(Address entry, Register tmp) {
2900 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2901 assert(CodeCache::find_blob(entry.target()) != nullptr,
2902 "destination of far call not found in code cache");
2903 assert(entry.rspec().type() == relocInfo::external_word_type
2904 || entry.rspec().type() == relocInfo::runtime_call_type
2905 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2906 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
2907 if (far_branches()) {
2908 // We can use auipc + jalr here because we know that the total size of
2909 // the code cache cannot exceed 2Gb.
2910 relocate(entry.rspec(), [&] {
2911 int32_t offset;
2912 la_patchable(tmp, entry, offset);
2913 jalr(x1, tmp, offset); // link
2914 });
2915 } else {
2916 jal(entry); // link
2917 }
2918 }
2919
2920 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2921 Register super_klass,
2922 Register tmp_reg,
2923 Label* L_success,
2924 Label* L_failure,
2925 Label* L_slow_path,
2926 Register super_check_offset) {
2927 assert_different_registers(sub_klass, super_klass, tmp_reg);
2928 bool must_load_sco = (super_check_offset == noreg);
2929 if (must_load_sco) {
2930 assert(tmp_reg != noreg, "supply either a temp or a register offset");
2931 } else {
2932 assert_different_registers(sub_klass, super_klass, super_check_offset);
2933 }
2934
2935 Label L_fallthrough;
2936 int label_nulls = 0;
2937 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
2938 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
2939 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
2940 assert(label_nulls <= 1, "at most one null in batch");
2941
2942 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2943 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2944 Address super_check_offset_addr(super_klass, sco_offset);
2945
2946 // Hacked jmp, which may only be used just before L_fallthrough.
2947 #define final_jmp(label) \
2948 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
2949 else j(label) /*omit semi*/
2950
2951 // If the pointers are equal, we are done (e.g., String[] elements).
2952 // This self-check enables sharing of secondary supertype arrays among
2953 // non-primary types such as array-of-interface. Otherwise, each such
2954 // type would need its own customized SSA.
2955 // We move this check to the front of the fast path because many
2956 // type checks are in fact trivially successful in this manner,
2957 // so we get a nicely predicted branch right at the start of the check.
2958 beq(sub_klass, super_klass, *L_success);
2959
2960 // Check the supertype display:
2961 if (must_load_sco) {
2962 lwu(tmp_reg, super_check_offset_addr);
2963 super_check_offset = tmp_reg;
2964 }
2965 add(t0, sub_klass, super_check_offset);
2966 Address super_check_addr(t0);
2967 ld(t0, super_check_addr); // load displayed supertype
2968
2969 // This check has worked decisively for primary supers.
2970 // Secondary supers are sought in the super_cache ('super_cache_addr').
2971 // (Secondary supers are interfaces and very deeply nested subtypes.)
2972 // This works in the same check above because of a tricky aliasing
2973 // between the super_Cache and the primary super display elements.
2974 // (The 'super_check_addr' can address either, as the case requires.)
2975 // Note that the cache is updated below if it does not help us find
2976 // what we need immediately.
2977 // So if it was a primary super, we can just fail immediately.
2978 // Otherwise, it's the slow path for us (no success at this point).
2979
2980 beq(super_klass, t0, *L_success);
2981 mv(t1, sc_offset);
2982 if (L_failure == &L_fallthrough) {
2983 beq(super_check_offset, t1, *L_slow_path);
2984 } else {
2985 bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2986 final_jmp(*L_slow_path);
2987 }
2988
2989 bind(L_fallthrough);
2990
2991 #undef final_jmp
2992 }
2993
2994 // Scans count pointer sized words at [addr] for occurrence of value,
2995 // generic
2996 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2997 Register tmp) {
2998 Label Lloop, Lexit;
2999 beqz(count, Lexit);
3000 bind(Lloop);
3001 ld(tmp, addr);
3002 beq(value, tmp, Lexit);
3003 add(addr, addr, wordSize);
3004 sub(count, count, 1);
3005 bnez(count, Lloop);
3006 bind(Lexit);
3007 }
3008
3009 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3010 Register super_klass,
3011 Register tmp1_reg,
3012 Register tmp2_reg,
3013 Label* L_success,
3014 Label* L_failure) {
3015 assert_different_registers(sub_klass, super_klass, tmp1_reg);
3016 if (tmp2_reg != noreg) {
3017 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
3018 }
3019 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
3020
3021 Label L_fallthrough;
3022 int label_nulls = 0;
3023 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
3024 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
3025
3026 assert(label_nulls <= 1, "at most one null in the batch");
3027
3028 // A couple of useful fields in sub_klass:
3029 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3030 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3031 Address secondary_supers_addr(sub_klass, ss_offset);
3032 Address super_cache_addr( sub_klass, sc_offset);
3033
3034 BLOCK_COMMENT("check_klass_subtype_slow_path");
3035
3036 // Do a linear scan of the secondary super-klass chain.
3037 // This code is rarely used, so simplicity is a virtue here.
3038 // The repne_scan instruction uses fixed registers, which we must spill.
3039 // Don't worry too much about pre-existing connections with the input regs.
3040
3041 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
3042 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
3043
3044 RegSet pushed_registers;
3045 if (!IS_A_TEMP(x12)) {
3046 pushed_registers += x12;
3047 }
3048 if (!IS_A_TEMP(x15)) {
3049 pushed_registers += x15;
3050 }
3051
3052 if (super_klass != x10) {
3053 if (!IS_A_TEMP(x10)) {
3054 pushed_registers += x10;
3055 }
3056 }
3057
3058 push_reg(pushed_registers, sp);
3059
3060 // Get super_klass value into x10 (even if it was in x15 or x12)
3061 mv(x10, super_klass);
3062
3063 #ifndef PRODUCT
3064 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
3065 Address pst_counter_addr(t1);
3066 ld(t0, pst_counter_addr);
3067 add(t0, t0, 1);
3068 sd(t0, pst_counter_addr);
3069 #endif // PRODUCT
3070
3071 // We will consult the secondary-super array.
3072 ld(x15, secondary_supers_addr);
3073 // Load the array length.
3074 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
3075 // Skip to start of data.
3076 add(x15, x15, Array<Klass*>::base_offset_in_bytes());
3077
3078 // Set t0 to an obvious invalid value, falling through by default
3079 mv(t0, -1);
3080 // Scan X12 words at [X15] for an occurrence of X10.
3081 repne_scan(x15, x10, x12, t0);
3082
3083 // pop will restore x10, so we should use a temp register to keep its value
3084 mv(t1, x10);
3085
3086 // Unspill the temp registers:
3087 pop_reg(pushed_registers, sp);
3088
3089 bne(t1, t0, *L_failure);
3090
3091 // Success. Cache the super we found an proceed in triumph.
3092 sd(super_klass, super_cache_addr);
3093
3094 if (L_success != &L_fallthrough) {
3095 j(*L_success);
3096 }
3097
3098 #undef IS_A_TEMP
3099
3100 bind(L_fallthrough);
3101 }
3102
3103 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
3104 void MacroAssembler::tlab_allocate(Register obj,
3105 Register var_size_in_bytes,
3106 int con_size_in_bytes,
3107 Register tmp1,
3108 Register tmp2,
3109 Label& slow_case,
3110 bool is_far) {
3111 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
3112 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
3113 }
3114
3115 // get_thread() can be called anywhere inside generated code so we
3116 // need to save whatever non-callee save context might get clobbered
3117 // by the call to Thread::current() or, indeed, the call setup code.
3118 void MacroAssembler::get_thread(Register thread) {
3119 // save all call-clobbered regs except thread
3120 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
3121 RegSet::range(x28, x31) + ra - thread;
3122 push_reg(saved_regs, sp);
3123
3124 mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
3125 jalr(ra);
3126 if (thread != c_rarg0) {
3127 mv(thread, c_rarg0);
3128 }
3129
3130 // restore pushed registers
3131 pop_reg(saved_regs, sp);
3132 }
3133
3134 void MacroAssembler::load_byte_map_base(Register reg) {
3135 CardTable::CardValue* byte_map_base =
3136 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
3137 mv(reg, (uint64_t)byte_map_base);
3138 }
3139
3140 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
3141 unsigned long low_address = (uintptr_t)CodeCache::low_bound();
3142 unsigned long high_address = (uintptr_t)CodeCache::high_bound();
3143 unsigned long dest_address = (uintptr_t)dest.target();
3144 long offset_low = dest_address - low_address;
3145 long offset_high = dest_address - high_address;
3146
3147 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
3148 assert((uintptr_t)dest.target() < (1ull << 48), "bad address");
3149
3150 // RISC-V doesn't compute a page-aligned address, in order to partially
3151 // compensate for the use of *signed* offsets in its base+disp12
3152 // addressing mode (RISC-V's PC-relative reach remains asymmetric
3153 // [-(2G + 2K), 2G - 2K).
3154 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
3155 int64_t distance = dest.target() - pc();
3156 auipc(reg1, (int32_t)distance + 0x800);
3157 offset = ((int32_t)distance << 20) >> 20;
3158 } else {
3159 movptr(reg1, dest.target(), offset);
3160 }
3161 }
3162
3163 void MacroAssembler::build_frame(int framesize) {
3164 assert(framesize >= 2, "framesize must include space for FP/RA");
3165 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
3166 sub(sp, sp, framesize);
3167 sd(fp, Address(sp, framesize - 2 * wordSize));
3168 sd(ra, Address(sp, framesize - wordSize));
3169 if (PreserveFramePointer) { add(fp, sp, framesize); }
3170 }
3171
3172 void MacroAssembler::remove_frame(int framesize) {
3173 assert(framesize >= 2, "framesize must include space for FP/RA");
3174 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
3175 ld(fp, Address(sp, framesize - 2 * wordSize));
3176 ld(ra, Address(sp, framesize - wordSize));
3177 add(sp, sp, framesize);
3178 }
3179
3180 void MacroAssembler::reserved_stack_check() {
3181 // testing if reserved zone needs to be enabled
3182 Label no_reserved_zone_enabling;
3183
3184 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
3185 bltu(sp, t0, no_reserved_zone_enabling);
3186
3187 enter(); // RA and FP are live.
3188 mv(c_rarg0, xthread);
3189 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
3190 relocate(target.rspec(), [&] {
3191 int32_t offset;
3192 la_patchable(t0, target, offset);
3193 jalr(x1, t0, offset);
3194 });
3195 leave();
3196
3197 // We have already removed our own frame.
3198 // throw_delayed_StackOverflowError will think that it's been
3199 // called by our caller.
3200 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry());
3201 relocate(target.rspec(), [&] {
3202 int32_t offset;
3203 la_patchable(t0, target, offset);
3204 jalr(x0, t0, offset);
3205 });
3206 should_not_reach_here();
3207
3208 bind(no_reserved_zone_enabling);
3209 }
3210
3211 // Move the address of the polling page into dest.
3212 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
3213 ld(dest, Address(xthread, JavaThread::polling_page_offset()));
3214 }
3215
3216 // Read the polling page. The address of the polling page must
3217 // already be in r.
3218 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
3219 relocate(rtype, [&] {
3220 lwu(zr, Address(r, offset));
3221 });
3222 }
3223
3224 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3225 #ifdef ASSERT
3226 {
3227 ThreadInVMfromUnknown tiv;
3228 assert (UseCompressedOops, "should only be used for compressed oops");
3229 assert (Universe::heap() != nullptr, "java heap should be initialized");
3230 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
3231 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
3232 }
3233 #endif
3234 int oop_index = oop_recorder()->find_index(obj);
3235 relocate(oop_Relocation::spec(oop_index), [&] {
3236 li32(dst, 0xDEADBEEF);
3237 });
3238 zero_extend(dst, dst, 32);
3239 }
3240
3241 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3242 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3243 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
3244 int index = oop_recorder()->find_index(k);
3245 assert(!Universe::heap()->is_in(k), "should not be an oop");
3246
3247 narrowKlass nk = CompressedKlassPointers::encode(k);
3248 relocate(metadata_Relocation::spec(index), [&] {
3249 li32(dst, nk);
3250 });
3251 zero_extend(dst, dst, 32);
3252 }
3253
3254 // Maybe emit a call via a trampoline. If the code cache is small
3255 // trampolines won't be emitted.
3256 address MacroAssembler::trampoline_call(Address entry) {
3257 assert(entry.rspec().type() == relocInfo::runtime_call_type ||
3258 entry.rspec().type() == relocInfo::opt_virtual_call_type ||
3259 entry.rspec().type() == relocInfo::static_call_type ||
3260 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
3261
3262 address target = entry.target();
3263
3264 // We need a trampoline if branches are far.
3265 if (far_branches()) {
3266 if (!in_scratch_emit_size()) {
3267 if (entry.rspec().type() == relocInfo::runtime_call_type) {
3268 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
3269 code()->share_trampoline_for(entry.target(), offset());
3270 } else {
3271 address stub = emit_trampoline_stub(offset(), target);
3272 if (stub == nullptr) {
3273 postcond(pc() == badAddress);
3274 return nullptr; // CodeCache is full
3275 }
3276 }
3277 }
3278 target = pc();
3279 }
3280
3281 address call_pc = pc();
3282 #ifdef ASSERT
3283 if (entry.rspec().type() != relocInfo::runtime_call_type) {
3284 assert_alignment(call_pc);
3285 }
3286 #endif
3287 relocate(entry.rspec(), [&] {
3288 jal(target);
3289 });
3290
3291 postcond(pc() != badAddress);
3292 return call_pc;
3293 }
3294
3295 address MacroAssembler::ic_call(address entry, jint method_index) {
3296 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
3297 IncompressibleRegion ir(this); // relocations
3298 movptr(t1, (address)Universe::non_oop_word());
3299 assert_cond(entry != nullptr);
3300 return trampoline_call(Address(entry, rh));
3301 }
3302
3303 // Emit a trampoline stub for a call to a target which is too far away.
3304 //
3305 // code sequences:
3306 //
3307 // call-site:
3308 // branch-and-link to <destination> or <trampoline stub>
3309 //
3310 // Related trampoline stub for this call site in the stub section:
3311 // load the call target from the constant pool
3312 // branch (RA still points to the call site above)
3313
3314 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
3315 address dest) {
3316 // Max stub size: alignment nop, TrampolineStub.
3317 address stub = start_a_stub(max_trampoline_stub_size());
3318 if (stub == nullptr) {
3319 return nullptr; // CodeBuffer::expand failed
3320 }
3321
3322 // We are always 4-byte aligned here.
3323 assert_alignment(pc());
3324
3325 // Create a trampoline stub relocation which relates this trampoline stub
3326 // with the call instruction at insts_call_instruction_offset in the
3327 // instructions code-section.
3328
3329 // Make sure the address of destination 8-byte aligned after 3 instructions.
3330 align(wordSize, NativeCallTrampolineStub::data_offset);
3331
3332 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
3333 insts_call_instruction_offset);
3334 const int stub_start_offset = offset();
3335 relocate(rh, [&] {
3336 // Now, create the trampoline stub's code:
3337 // - load the call
3338 // - call
3339 Label target;
3340 ld(t0, target); // auipc + ld
3341 jr(t0); // jalr
3342 bind(target);
3343 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
3344 "should be");
3345 assert(offset() % wordSize == 0, "bad alignment");
3346 emit_int64((int64_t)dest);
3347 });
3348
3349 const address stub_start_addr = addr_at(stub_start_offset);
3350
3351 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
3352
3353 end_a_stub();
3354 return stub_start_addr;
3355 }
3356
3357 int MacroAssembler::max_trampoline_stub_size() {
3358 // Max stub size: alignment nop, TrampolineStub.
3359 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
3360 }
3361
3362 int MacroAssembler::static_call_stub_size() {
3363 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
3364 return 12 * NativeInstruction::instruction_size;
3365 }
3366
3367 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) {
3368 switch (dst.getMode()) {
3369 case Address::base_plus_offset:
3370 // This is the expected mode, although we allow all the other
3371 // forms below.
3372 return form_address(tmp, dst.base(), dst.offset());
3373 default:
3374 la(tmp, dst);
3375 return Address(tmp);
3376 }
3377 }
3378
3379 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) {
3380 assert(((dst.getMode() == Address::base_plus_offset &&
3381 is_simm12(dst.offset())) || is_simm12(value)),
3382 "invalid value and address mode combination");
3383 Address adr = add_memory_helper(dst, tmp2);
3384 assert(!adr.uses(tmp1), "invalid dst for address increment");
3385 ld(tmp1, adr);
3386 add(tmp1, tmp1, value, tmp2);
3387 sd(tmp1, adr);
3388 }
3389
3390 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) {
3391 assert(((dst.getMode() == Address::base_plus_offset &&
3392 is_simm12(dst.offset())) || is_simm12(value)),
3393 "invalid value and address mode combination");
3394 Address adr = add_memory_helper(dst, tmp2);
3395 assert(!adr.uses(tmp1), "invalid dst for address increment");
3396 lwu(tmp1, adr);
3397 addw(tmp1, tmp1, value, tmp2);
3398 sw(tmp1, adr);
3399 }
3400
3401 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) {
3402 assert(((dst.getMode() == Address::base_plus_offset &&
3403 is_simm12(dst.offset())) || is_simm12(value)),
3404 "invalid value and address mode combination");
3405 Address adr = add_memory_helper(dst, tmp2);
3406 assert(!adr.uses(tmp1), "invalid dst for address decrement");
3407 ld(tmp1, adr);
3408 sub(tmp1, tmp1, value, tmp2);
3409 sd(tmp1, adr);
3410 }
3411
3412 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) {
3413 assert(((dst.getMode() == Address::base_plus_offset &&
3414 is_simm12(dst.offset())) || is_simm12(value)),
3415 "invalid value and address mode combination");
3416 Address adr = add_memory_helper(dst, tmp2);
3417 assert(!adr.uses(tmp1), "invalid dst for address decrement");
3418 lwu(tmp1, adr);
3419 subw(tmp1, tmp1, value, tmp2);
3420 sw(tmp1, adr);
3421 }
3422
3423 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
3424 assert_different_registers(src1, t0);
3425 relocate(src2.rspec(), [&] {
3426 int32_t offset;
3427 la_patchable(t0, src2, offset);
3428 ld(t0, Address(t0, offset));
3429 });
3430 beq(src1, t0, equal);
3431 }
3432
3433 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
3434 load_method_holder(result, method);
3435 ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
3436 }
3437
3438 void MacroAssembler::load_method_holder(Register holder, Register method) {
3439 ld(holder, Address(method, Method::const_offset())); // ConstMethod*
3440 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
3441 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
3442 }
3443
3444 // string indexof
3445 // compute index by trailing zeros
3446 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
3447 Register match_mask, Register result,
3448 Register ch2, Register tmp,
3449 bool haystack_isL) {
3450 int haystack_chr_shift = haystack_isL ? 0 : 1;
3451 srl(match_mask, match_mask, trailing_zeros);
3452 srli(match_mask, match_mask, 1);
3453 srli(tmp, trailing_zeros, LogBitsPerByte);
3454 if (!haystack_isL) andi(tmp, tmp, 0xE);
3455 add(haystack, haystack, tmp);
3456 ld(ch2, Address(haystack));
3457 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
3458 add(result, result, tmp);
3459 }
3460
3461 // string indexof
3462 // Find pattern element in src, compute match mask,
3463 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
3464 // match mask patterns and corresponding indices would be like:
3465 // - 0x8080808080808080 (Latin1)
3466 // - 7 6 5 4 3 2 1 0 (match index)
3467 // - 0x8000800080008000 (UTF16)
3468 // - 3 2 1 0 (match index)
3469 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
3470 Register mask1, Register mask2) {
3471 xorr(src, pattern, src);
3472 sub(match_mask, src, mask1);
3473 orr(src, src, mask2);
3474 notr(src, src);
3475 andr(match_mask, match_mask, src);
3476 }
3477
3478 #ifdef COMPILER2
3479 // Code for BigInteger::mulAdd intrinsic
3480 // out = x10
3481 // in = x11
3482 // offset = x12 (already out.length-offset)
3483 // len = x13
3484 // k = x14
3485 // tmp = x28
3486 //
3487 // pseudo code from java implementation:
3488 // long kLong = k & LONG_MASK;
3489 // carry = 0;
3490 // offset = out.length-offset - 1;
3491 // for (int j = len - 1; j >= 0; j--) {
3492 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3493 // out[offset--] = (int)product;
3494 // carry = product >>> 32;
3495 // }
3496 // return (int)carry;
3497 void MacroAssembler::mul_add(Register out, Register in, Register offset,
3498 Register len, Register k, Register tmp) {
3499 Label L_tail_loop, L_unroll, L_end;
3500 mv(tmp, out);
3501 mv(out, zr);
3502 blez(len, L_end);
3503 zero_extend(k, k, 32);
3504 slliw(t0, offset, LogBytesPerInt);
3505 add(offset, tmp, t0);
3506 slliw(t0, len, LogBytesPerInt);
3507 add(in, in, t0);
3508
3509 const int unroll = 8;
3510 mv(tmp, unroll);
3511 blt(len, tmp, L_tail_loop);
3512 bind(L_unroll);
3513 for (int i = 0; i < unroll; i++) {
3514 sub(in, in, BytesPerInt);
3515 lwu(t0, Address(in, 0));
3516 mul(t1, t0, k);
3517 add(t0, t1, out);
3518 sub(offset, offset, BytesPerInt);
3519 lwu(t1, Address(offset, 0));
3520 add(t0, t0, t1);
3521 sw(t0, Address(offset, 0));
3522 srli(out, t0, 32);
3523 }
3524 subw(len, len, tmp);
3525 bge(len, tmp, L_unroll);
3526
3527 bind(L_tail_loop);
3528 blez(len, L_end);
3529 sub(in, in, BytesPerInt);
3530 lwu(t0, Address(in, 0));
3531 mul(t1, t0, k);
3532 add(t0, t1, out);
3533 sub(offset, offset, BytesPerInt);
3534 lwu(t1, Address(offset, 0));
3535 add(t0, t0, t1);
3536 sw(t0, Address(offset, 0));
3537 srli(out, t0, 32);
3538 subw(len, len, 1);
3539 j(L_tail_loop);
3540
3541 bind(L_end);
3542 }
3543
3544 // add two unsigned input and output carry
3545 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3546 {
3547 assert_different_registers(dst, carry);
3548 assert_different_registers(dst, src2);
3549 add(dst, src1, src2);
3550 sltu(carry, dst, src2);
3551 }
3552
3553 // add two input with carry
3554 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) {
3555 assert_different_registers(dst, carry);
3556 add(dst, src1, src2);
3557 add(dst, dst, carry);
3558 }
3559
3560 // add two unsigned input with carry and output carry
3561 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) {
3562 assert_different_registers(dst, src2);
3563 adc(dst, src1, src2, carry);
3564 sltu(carry, dst, src2);
3565 }
3566
3567 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
3568 Register src1, Register src2, Register carry) {
3569 cad(dest_lo, dest_lo, src1, carry);
3570 add(dest_hi, dest_hi, carry);
3571 cad(dest_lo, dest_lo, src2, carry);
3572 add(final_dest_hi, dest_hi, carry);
3573 }
3574
3575 /**
3576 * Multiply 32 bit by 32 bit first loop.
3577 */
3578 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
3579 Register y, Register y_idx, Register z,
3580 Register carry, Register product,
3581 Register idx, Register kdx) {
3582 // jlong carry, x[], y[], z[];
3583 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3584 // long product = y[idx] * x[xstart] + carry;
3585 // z[kdx] = (int)product;
3586 // carry = product >>> 32;
3587 // }
3588 // z[xstart] = (int)carry;
3589
3590 Label L_first_loop, L_first_loop_exit;
3591 blez(idx, L_first_loop_exit);
3592
3593 shadd(t0, xstart, x, t0, LogBytesPerInt);
3594 lwu(x_xstart, Address(t0, 0));
3595
3596 bind(L_first_loop);
3597 subw(idx, idx, 1);
3598 shadd(t0, idx, y, t0, LogBytesPerInt);
3599 lwu(y_idx, Address(t0, 0));
3600 mul(product, x_xstart, y_idx);
3601 add(product, product, carry);
3602 srli(carry, product, 32);
3603 subw(kdx, kdx, 1);
3604 shadd(t0, kdx, z, t0, LogBytesPerInt);
3605 sw(product, Address(t0, 0));
3606 bgtz(idx, L_first_loop);
3607
3608 bind(L_first_loop_exit);
3609 }
3610
3611 /**
3612 * Multiply 64 bit by 64 bit first loop.
3613 */
3614 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
3615 Register y, Register y_idx, Register z,
3616 Register carry, Register product,
3617 Register idx, Register kdx) {
3618 //
3619 // jlong carry, x[], y[], z[];
3620 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3621 // huge_128 product = y[idx] * x[xstart] + carry;
3622 // z[kdx] = (jlong)product;
3623 // carry = (jlong)(product >>> 64);
3624 // }
3625 // z[xstart] = carry;
3626 //
3627
3628 Label L_first_loop, L_first_loop_exit;
3629 Label L_one_x, L_one_y, L_multiply;
3630
3631 subw(xstart, xstart, 1);
3632 bltz(xstart, L_one_x);
3633
3634 shadd(t0, xstart, x, t0, LogBytesPerInt);
3635 ld(x_xstart, Address(t0, 0));
3636 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
3637
3638 bind(L_first_loop);
3639 subw(idx, idx, 1);
3640 bltz(idx, L_first_loop_exit);
3641 subw(idx, idx, 1);
3642 bltz(idx, L_one_y);
3643
3644 shadd(t0, idx, y, t0, LogBytesPerInt);
3645 ld(y_idx, Address(t0, 0));
3646 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
3647 bind(L_multiply);
3648
3649 mulhu(t0, x_xstart, y_idx);
3650 mul(product, x_xstart, y_idx);
3651 cad(product, product, carry, t1);
3652 adc(carry, t0, zr, t1);
3653
3654 subw(kdx, kdx, 2);
3655 ror_imm(product, product, 32); // back to big-endian
3656 shadd(t0, kdx, z, t0, LogBytesPerInt);
3657 sd(product, Address(t0, 0));
3658
3659 j(L_first_loop);
3660
3661 bind(L_one_y);
3662 lwu(y_idx, Address(y, 0));
3663 j(L_multiply);
3664
3665 bind(L_one_x);
3666 lwu(x_xstart, Address(x, 0));
3667 j(L_first_loop);
3668
3669 bind(L_first_loop_exit);
3670 }
3671
3672 /**
3673 * Multiply 128 bit by 128 bit. Unrolled inner loop.
3674 *
3675 */
3676 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
3677 Register carry, Register carry2,
3678 Register idx, Register jdx,
3679 Register yz_idx1, Register yz_idx2,
3680 Register tmp, Register tmp3, Register tmp4,
3681 Register tmp6, Register product_hi) {
3682 // jlong carry, x[], y[], z[];
3683 // int kdx = xstart+1;
3684 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3685 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
3686 // jlong carry2 = (jlong)(tmp3 >>> 64);
3687 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2;
3688 // carry = (jlong)(tmp4 >>> 64);
3689 // z[kdx+idx+1] = (jlong)tmp3;
3690 // z[kdx+idx] = (jlong)tmp4;
3691 // }
3692 // idx += 2;
3693 // if (idx > 0) {
3694 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
3695 // z[kdx+idx] = (jlong)yz_idx1;
3696 // carry = (jlong)(yz_idx1 >>> 64);
3697 // }
3698 //
3699
3700 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3701
3702 srliw(jdx, idx, 2);
3703
3704 bind(L_third_loop);
3705
3706 subw(jdx, jdx, 1);
3707 bltz(jdx, L_third_loop_exit);
3708 subw(idx, idx, 4);
3709
3710 shadd(t0, idx, y, t0, LogBytesPerInt);
3711 ld(yz_idx2, Address(t0, 0));
3712 ld(yz_idx1, Address(t0, wordSize));
3713
3714 shadd(tmp6, idx, z, t0, LogBytesPerInt);
3715
3716 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
3717 ror_imm(yz_idx2, yz_idx2, 32);
3718
3719 ld(t1, Address(tmp6, 0));
3720 ld(t0, Address(tmp6, wordSize));
3721
3722 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
3723 mulhu(tmp4, product_hi, yz_idx1);
3724
3725 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
3726 ror_imm(t1, t1, 32, tmp);
3727
3728 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp
3729 mulhu(carry2, product_hi, yz_idx2);
3730
3731 cad(tmp3, tmp3, carry, carry);
3732 adc(tmp4, tmp4, zr, carry);
3733 cad(tmp3, tmp3, t0, t0);
3734 cadc(tmp4, tmp4, tmp, t0);
3735 adc(carry, carry2, zr, t0);
3736 cad(tmp4, tmp4, t1, carry2);
3737 adc(carry, carry, zr, carry2);
3738
3739 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
3740 ror_imm(tmp4, tmp4, 32);
3741 sd(tmp4, Address(tmp6, 0));
3742 sd(tmp3, Address(tmp6, wordSize));
3743
3744 j(L_third_loop);
3745
3746 bind(L_third_loop_exit);
3747
3748 andi(idx, idx, 0x3);
3749 beqz(idx, L_post_third_loop_done);
3750
3751 Label L_check_1;
3752 subw(idx, idx, 2);
3753 bltz(idx, L_check_1);
3754
3755 shadd(t0, idx, y, t0, LogBytesPerInt);
3756 ld(yz_idx1, Address(t0, 0));
3757 ror_imm(yz_idx1, yz_idx1, 32);
3758
3759 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
3760 mulhu(tmp4, product_hi, yz_idx1);
3761
3762 shadd(t0, idx, z, t0, LogBytesPerInt);
3763 ld(yz_idx2, Address(t0, 0));
3764 ror_imm(yz_idx2, yz_idx2, 32, tmp);
3765
3766 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
3767
3768 ror_imm(tmp3, tmp3, 32, tmp);
3769 sd(tmp3, Address(t0, 0));
3770
3771 bind(L_check_1);
3772
3773 andi(idx, idx, 0x1);
3774 subw(idx, idx, 1);
3775 bltz(idx, L_post_third_loop_done);
3776 shadd(t0, idx, y, t0, LogBytesPerInt);
3777 lwu(tmp4, Address(t0, 0));
3778 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3
3779 mulhu(carry2, tmp4, product_hi);
3780
3781 shadd(t0, idx, z, t0, LogBytesPerInt);
3782 lwu(tmp4, Address(t0, 0));
3783
3784 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
3785
3786 shadd(t0, idx, z, t0, LogBytesPerInt);
3787 sw(tmp3, Address(t0, 0));
3788
3789 slli(t0, carry2, 32);
3790 srli(carry, tmp3, 32);
3791 orr(carry, carry, t0);
3792
3793 bind(L_post_third_loop_done);
3794 }
3795
3796 /**
3797 * Code for BigInteger::multiplyToLen() intrinsic.
3798 *
3799 * x10: x
3800 * x11: xlen
3801 * x12: y
3802 * x13: ylen
3803 * x14: z
3804 * x15: zlen
3805 * x16: tmp1
3806 * x17: tmp2
3807 * x7: tmp3
3808 * x28: tmp4
3809 * x29: tmp5
3810 * x30: tmp6
3811 * x31: tmp7
3812 */
3813 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3814 Register z, Register zlen,
3815 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3816 Register tmp5, Register tmp6, Register product_hi) {
3817 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3818
3819 const Register idx = tmp1;
3820 const Register kdx = tmp2;
3821 const Register xstart = tmp3;
3822
3823 const Register y_idx = tmp4;
3824 const Register carry = tmp5;
3825 const Register product = xlen;
3826 const Register x_xstart = zlen; // reuse register
3827
3828 mv(idx, ylen); // idx = ylen;
3829 mv(kdx, zlen); // kdx = xlen+ylen;
3830 mv(carry, zr); // carry = 0;
3831
3832 Label L_multiply_64_x_64_loop, L_done;
3833
3834 subw(xstart, xlen, 1);
3835 bltz(xstart, L_done);
3836
3837 const Register jdx = tmp1;
3838
3839 if (AvoidUnalignedAccesses) {
3840 // Check if x and y are both 8-byte aligned.
3841 orr(t0, xlen, ylen);
3842 test_bit(t0, t0, 0);
3843 beqz(t0, L_multiply_64_x_64_loop);
3844
3845 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3846 shadd(t0, xstart, z, t0, LogBytesPerInt);
3847 sw(carry, Address(t0, 0));
3848
3849 Label L_second_loop_unaligned;
3850 bind(L_second_loop_unaligned);
3851 mv(carry, zr);
3852 mv(jdx, ylen);
3853 subw(xstart, xstart, 1);
3854 bltz(xstart, L_done);
3855 sub(sp, sp, 2 * wordSize);
3856 sd(z, Address(sp, 0));
3857 sd(zr, Address(sp, wordSize));
3858 shadd(t0, xstart, z, t0, LogBytesPerInt);
3859 addi(z, t0, 4);
3860 shadd(t0, xstart, x, t0, LogBytesPerInt);
3861 lwu(product, Address(t0, 0));
3862 Label L_third_loop, L_third_loop_exit;
3863
3864 blez(jdx, L_third_loop_exit);
3865
3866 bind(L_third_loop);
3867 subw(jdx, jdx, 1);
3868 shadd(t0, jdx, y, t0, LogBytesPerInt);
3869 lwu(t0, Address(t0, 0));
3870 mul(t1, t0, product);
3871 add(t0, t1, carry);
3872 shadd(tmp6, jdx, z, t1, LogBytesPerInt);
3873 lwu(t1, Address(tmp6, 0));
3874 add(t0, t0, t1);
3875 sw(t0, Address(tmp6, 0));
3876 srli(carry, t0, 32);
3877 bgtz(jdx, L_third_loop);
3878
3879 bind(L_third_loop_exit);
3880 ld(z, Address(sp, 0));
3881 addi(sp, sp, 2 * wordSize);
3882 shadd(t0, xstart, z, t0, LogBytesPerInt);
3883 sw(carry, Address(t0, 0));
3884
3885 j(L_second_loop_unaligned);
3886 }
3887
3888 bind(L_multiply_64_x_64_loop);
3889 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3890
3891 Label L_second_loop_aligned;
3892 beqz(kdx, L_second_loop_aligned);
3893
3894 Label L_carry;
3895 subw(kdx, kdx, 1);
3896 beqz(kdx, L_carry);
3897
3898 shadd(t0, kdx, z, t0, LogBytesPerInt);
3899 sw(carry, Address(t0, 0));
3900 srli(carry, carry, 32);
3901 subw(kdx, kdx, 1);
3902
3903 bind(L_carry);
3904 shadd(t0, kdx, z, t0, LogBytesPerInt);
3905 sw(carry, Address(t0, 0));
3906
3907 // Second and third (nested) loops.
3908 //
3909 // for (int i = xstart-1; i >= 0; i--) { // Second loop
3910 // carry = 0;
3911 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3912 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3913 // (z[k] & LONG_MASK) + carry;
3914 // z[k] = (int)product;
3915 // carry = product >>> 32;
3916 // }
3917 // z[i] = (int)carry;
3918 // }
3919 //
3920 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3921
3922 bind(L_second_loop_aligned);
3923 mv(carry, zr); // carry = 0;
3924 mv(jdx, ylen); // j = ystart+1
3925
3926 subw(xstart, xstart, 1); // i = xstart-1;
3927 bltz(xstart, L_done);
3928
3929 sub(sp, sp, 4 * wordSize);
3930 sd(z, Address(sp, 0));
3931
3932 Label L_last_x;
3933 shadd(t0, xstart, z, t0, LogBytesPerInt);
3934 addi(z, t0, 4);
3935 subw(xstart, xstart, 1); // i = xstart-1;
3936 bltz(xstart, L_last_x);
3937
3938 shadd(t0, xstart, x, t0, LogBytesPerInt);
3939 ld(product_hi, Address(t0, 0));
3940 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
3941
3942 Label L_third_loop_prologue;
3943 bind(L_third_loop_prologue);
3944
3945 sd(ylen, Address(sp, wordSize));
3946 sd(x, Address(sp, 2 * wordSize));
3947 sd(xstart, Address(sp, 3 * wordSize));
3948 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
3949 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
3950 ld(z, Address(sp, 0));
3951 ld(ylen, Address(sp, wordSize));
3952 ld(x, Address(sp, 2 * wordSize));
3953 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
3954 addi(sp, sp, 4 * wordSize);
3955
3956 addiw(tmp3, xlen, 1);
3957 shadd(t0, tmp3, z, t0, LogBytesPerInt);
3958 sw(carry, Address(t0, 0));
3959
3960 subw(tmp3, tmp3, 1);
3961 bltz(tmp3, L_done);
3962
3963 srli(carry, carry, 32);
3964 shadd(t0, tmp3, z, t0, LogBytesPerInt);
3965 sw(carry, Address(t0, 0));
3966 j(L_second_loop_aligned);
3967
3968 // Next infrequent code is moved outside loops.
3969 bind(L_last_x);
3970 lwu(product_hi, Address(x, 0));
3971 j(L_third_loop_prologue);
3972
3973 bind(L_done);
3974 }
3975 #endif
3976
3977 // Count bits of trailing zero chars from lsb to msb until first non-zero element.
3978 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3979 // shift 16 bits once.
3980 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) {
3981 if (UseZbb) {
3982 assert_different_registers(Rd, Rs, tmp1);
3983 int step = isLL ? 8 : 16;
3984 ctz(Rd, Rs);
3985 andi(tmp1, Rd, step - 1);
3986 sub(Rd, Rd, tmp1);
3987 return;
3988 }
3989
3990 assert_different_registers(Rd, Rs, tmp1, tmp2);
3991 Label Loop;
3992 int step = isLL ? 8 : 16;
3993 mv(Rd, -step);
3994 mv(tmp2, Rs);
3995
3996 bind(Loop);
3997 addi(Rd, Rd, step);
3998 andi(tmp1, tmp2, ((1 << step) - 1));
3999 srli(tmp2, tmp2, step);
4000 beqz(tmp1, Loop);
4001 }
4002
4003 // This instruction reads adjacent 4 bytes from the lower half of source register,
4004 // inflate into a register, for example:
4005 // Rs: A7A6A5A4A3A2A1A0
4006 // Rd: 00A300A200A100A0
4007 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
4008 assert_different_registers(Rd, Rs, tmp1, tmp2);
4009
4010 mv(tmp1, 0xFF000000); // first byte mask at lower word
4011 andr(Rd, Rs, tmp1);
4012 for (int i = 0; i < 2; i++) {
4013 slli(Rd, Rd, wordSize);
4014 srli(tmp1, tmp1, wordSize);
4015 andr(tmp2, Rs, tmp1);
4016 orr(Rd, Rd, tmp2);
4017 }
4018 slli(Rd, Rd, wordSize);
4019 andi(tmp2, Rs, 0xFF); // last byte mask at lower word
4020 orr(Rd, Rd, tmp2);
4021 }
4022
4023 // This instruction reads adjacent 4 bytes from the upper half of source register,
4024 // inflate into a register, for example:
4025 // Rs: A7A6A5A4A3A2A1A0
4026 // Rd: 00A700A600A500A4
4027 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
4028 assert_different_registers(Rd, Rs, tmp1, tmp2);
4029 srli(Rs, Rs, 32); // only upper 32 bits are needed
4030 inflate_lo32(Rd, Rs, tmp1, tmp2);
4031 }
4032
4033 // The size of the blocks erased by the zero_blocks stub. We must
4034 // handle anything smaller than this ourselves in zero_words().
4035 const int MacroAssembler::zero_words_block_size = 8;
4036
4037 // zero_words() is used by C2 ClearArray patterns. It is as small as
4038 // possible, handling small word counts locally and delegating
4039 // anything larger to the zero_blocks stub. It is expanded many times
4040 // in compiled code, so it is important to keep it short.
4041
4042 // ptr: Address of a buffer to be zeroed.
4043 // cnt: Count in HeapWords.
4044 //
4045 // ptr, cnt, and t0 are clobbered.
4046 address MacroAssembler::zero_words(Register ptr, Register cnt) {
4047 assert(is_power_of_2(zero_words_block_size), "adjust this");
4048 assert(ptr == x28 && cnt == x29, "mismatch in register usage");
4049 assert_different_registers(cnt, t0);
4050
4051 BLOCK_COMMENT("zero_words {");
4052
4053 mv(t0, zero_words_block_size);
4054 Label around, done, done16;
4055 bltu(cnt, t0, around);
4056 {
4057 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
4058 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated");
4059 if (StubRoutines::riscv::complete()) {
4060 address tpc = trampoline_call(zero_blocks);
4061 if (tpc == nullptr) {
4062 DEBUG_ONLY(reset_labels(around));
4063 postcond(pc() == badAddress);
4064 return nullptr;
4065 }
4066 } else {
4067 jal(zero_blocks);
4068 }
4069 }
4070 bind(around);
4071 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4072 Label l;
4073 test_bit(t0, cnt, exact_log2(i));
4074 beqz(t0, l);
4075 for (int j = 0; j < i; j++) {
4076 sd(zr, Address(ptr, j * wordSize));
4077 }
4078 addi(ptr, ptr, i * wordSize);
4079 bind(l);
4080 }
4081 {
4082 Label l;
4083 test_bit(t0, cnt, 0);
4084 beqz(t0, l);
4085 sd(zr, Address(ptr, 0));
4086 bind(l);
4087 }
4088
4089 BLOCK_COMMENT("} zero_words");
4090 postcond(pc() != badAddress);
4091 return pc();
4092 }
4093
4094 #define SmallArraySize (18 * BytesPerLong)
4095
4096 // base: Address of a buffer to be zeroed, 8 bytes aligned.
4097 // cnt: Immediate count in HeapWords.
4098 void MacroAssembler::zero_words(Register base, uint64_t cnt) {
4099 assert_different_registers(base, t0, t1);
4100
4101 BLOCK_COMMENT("zero_words {");
4102
4103 if (cnt <= SmallArraySize / BytesPerLong) {
4104 for (int i = 0; i < (int)cnt; i++) {
4105 sd(zr, Address(base, i * wordSize));
4106 }
4107 } else {
4108 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
4109 int remainder = cnt % unroll;
4110 for (int i = 0; i < remainder; i++) {
4111 sd(zr, Address(base, i * wordSize));
4112 }
4113
4114 Label loop;
4115 Register cnt_reg = t0;
4116 Register loop_base = t1;
4117 cnt = cnt - remainder;
4118 mv(cnt_reg, cnt);
4119 add(loop_base, base, remainder * wordSize);
4120 bind(loop);
4121 sub(cnt_reg, cnt_reg, unroll);
4122 for (int i = 0; i < unroll; i++) {
4123 sd(zr, Address(loop_base, i * wordSize));
4124 }
4125 add(loop_base, loop_base, unroll * wordSize);
4126 bnez(cnt_reg, loop);
4127 }
4128
4129 BLOCK_COMMENT("} zero_words");
4130 }
4131
4132 // base: Address of a buffer to be filled, 8 bytes aligned.
4133 // cnt: Count in 8-byte unit.
4134 // value: Value to be filled with.
4135 // base will point to the end of the buffer after filling.
4136 void MacroAssembler::fill_words(Register base, Register cnt, Register value) {
4137 // Algorithm:
4138 //
4139 // t0 = cnt & 7
4140 // cnt -= t0
4141 // p += t0
4142 // switch (t0):
4143 // switch start:
4144 // do while cnt
4145 // cnt -= 8
4146 // p[-8] = value
4147 // case 7:
4148 // p[-7] = value
4149 // case 6:
4150 // p[-6] = value
4151 // // ...
4152 // case 1:
4153 // p[-1] = value
4154 // case 0:
4155 // p += 8
4156 // do-while end
4157 // switch end
4158
4159 assert_different_registers(base, cnt, value, t0, t1);
4160
4161 Label fini, skip, entry, loop;
4162 const int unroll = 8; // Number of sd instructions we'll unroll
4163
4164 beqz(cnt, fini);
4165
4166 andi(t0, cnt, unroll - 1);
4167 sub(cnt, cnt, t0);
4168 // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
4169 shadd(base, t0, base, t1, 3);
4170 la(t1, entry);
4171 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
4172 sub(t1, t1, t0);
4173 jr(t1);
4174
4175 bind(loop);
4176 add(base, base, unroll * 8);
4177 for (int i = -unroll; i < 0; i++) {
4178 sd(value, Address(base, i * 8));
4179 }
4180 bind(entry);
4181 sub(cnt, cnt, unroll);
4182 bgez(cnt, loop);
4183
4184 bind(fini);
4185 }
4186
4187 // Zero blocks of memory by using CBO.ZERO.
4188 //
4189 // Aligns the base address first sufficiently for CBO.ZERO, then uses
4190 // CBO.ZERO repeatedly for every full block. cnt is the size to be
4191 // zeroed in HeapWords. Returns the count of words left to be zeroed
4192 // in cnt.
4193 //
4194 // NOTE: This is intended to be used in the zero_blocks() stub. If
4195 // you want to use it elsewhere, note that cnt must be >= CacheLineSize.
4196 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) {
4197 Label initial_table_end, loop;
4198
4199 // Align base with cache line size.
4200 neg(tmp1, base);
4201 andi(tmp1, tmp1, CacheLineSize - 1);
4202
4203 // tmp1: the number of bytes to be filled to align the base with cache line size.
4204 add(base, base, tmp1);
4205 srai(tmp2, tmp1, 3);
4206 sub(cnt, cnt, tmp2);
4207 srli(tmp2, tmp1, 1);
4208 la(tmp1, initial_table_end);
4209 sub(tmp2, tmp1, tmp2);
4210 jr(tmp2);
4211 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) {
4212 sd(zr, Address(base, i));
4213 }
4214 bind(initial_table_end);
4215
4216 mv(tmp1, CacheLineSize / wordSize);
4217 bind(loop);
4218 cbo_zero(base);
4219 sub(cnt, cnt, tmp1);
4220 add(base, base, CacheLineSize);
4221 bge(cnt, tmp1, loop);
4222 }
4223
4224 // java.lang.Math.round(float a)
4225 // Returns the closest int to the argument, with ties rounding to positive infinity.
4226 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
4227 // this instructions calling sequence provides performance improvement on all tested devices;
4228 // don't change it without re-verification
4229 Label done;
4230 mv(t0, jint_cast(0.5f));
4231 fmv_w_x(ftmp, t0);
4232
4233 // dst = 0 if NaN
4234 feq_s(t0, src, src); // replacing fclass with feq as performance optimization
4235 mv(dst, zr);
4236 beqz(t0, done);
4237
4238 // dst = (src + 0.5f) rounded down towards negative infinity
4239 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4240 // RDN is required for fadd_s, RNE gives incorrect results:
4241 // --------------------------------------------------------------------
4242 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4243 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4244 // --------------------------------------------------------------------
4245 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4246 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4247 // --------------------------------------------------------------------
4248 fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
4249 fcvt_w_s(dst, ftmp, RoundingMode::rdn);
4250
4251 bind(done);
4252 }
4253
4254 // java.lang.Math.round(double a)
4255 // Returns the closest long to the argument, with ties rounding to positive infinity.
4256 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
4257 // this instructions calling sequence provides performance improvement on all tested devices;
4258 // don't change it without re-verification
4259 Label done;
4260 mv(t0, julong_cast(0.5));
4261 fmv_d_x(ftmp, t0);
4262
4263 // dst = 0 if NaN
4264 feq_d(t0, src, src); // replacing fclass with feq as performance optimization
4265 mv(dst, zr);
4266 beqz(t0, done);
4267
4268 // dst = (src + 0.5) rounded down towards negative infinity
4269 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4270 fcvt_l_d(dst, ftmp, RoundingMode::rdn);
4271
4272 bind(done);
4273 }
4274
4275 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \
4276 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
4277 Label done; \
4278 assert_different_registers(dst, tmp); \
4279 fclass_##FLOATSIG(tmp, src); \
4280 mv(dst, zr); \
4281 /* check if src is NaN */ \
4282 andi(tmp, tmp, 0b1100000000); \
4283 bnez(tmp, done); \
4284 FLOATCVT(dst, src); \
4285 bind(done); \
4286 }
4287
4288 FCVT_SAFE(fcvt_w_s, s);
4289 FCVT_SAFE(fcvt_l_s, s);
4290 FCVT_SAFE(fcvt_w_d, d);
4291 FCVT_SAFE(fcvt_l_d, d);
4292
4293 #undef FCVT_SAFE
4294
4295 #define FCMP(FLOATTYPE, FLOATSIG) \
4296 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \
4297 FloatRegister Rs2, int unordered_result) { \
4298 Label Ldone; \
4299 if (unordered_result < 0) { \
4300 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \
4301 /* installs 1 if gt else 0 */ \
4302 flt_##FLOATSIG(result, Rs2, Rs1); \
4303 /* Rs1 > Rs2, install 1 */ \
4304 bgtz(result, Ldone); \
4305 feq_##FLOATSIG(result, Rs1, Rs2); \
4306 addi(result, result, -1); \
4307 /* Rs1 = Rs2, install 0 */ \
4308 /* NaN or Rs1 < Rs2, install -1 */ \
4309 bind(Ldone); \
4310 } else { \
4311 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \
4312 /* installs 1 if gt or unordered else 0 */ \
4313 flt_##FLOATSIG(result, Rs1, Rs2); \
4314 /* Rs1 < Rs2, install -1 */ \
4315 bgtz(result, Ldone); \
4316 feq_##FLOATSIG(result, Rs1, Rs2); \
4317 addi(result, result, -1); \
4318 /* Rs1 = Rs2, install 0 */ \
4319 /* NaN or Rs1 > Rs2, install 1 */ \
4320 bind(Ldone); \
4321 neg(result, result); \
4322 } \
4323 }
4324
4325 FCMP(float, s);
4326 FCMP(double, d);
4327
4328 #undef FCMP
4329
4330 // Zero words; len is in bytes
4331 // Destroys all registers except addr
4332 // len must be a nonzero multiple of wordSize
4333 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
4334 assert_different_registers(addr, len, tmp, t0, t1);
4335
4336 #ifdef ASSERT
4337 {
4338 Label L;
4339 andi(t0, len, BytesPerWord - 1);
4340 beqz(t0, L);
4341 stop("len is not a multiple of BytesPerWord");
4342 bind(L);
4343 }
4344 #endif // ASSERT
4345
4346 #ifndef PRODUCT
4347 block_comment("zero memory");
4348 #endif // PRODUCT
4349
4350 Label loop;
4351 Label entry;
4352
4353 // Algorithm:
4354 //
4355 // t0 = cnt & 7
4356 // cnt -= t0
4357 // p += t0
4358 // switch (t0) {
4359 // do {
4360 // cnt -= 8
4361 // p[-8] = 0
4362 // case 7:
4363 // p[-7] = 0
4364 // case 6:
4365 // p[-6] = 0
4366 // ...
4367 // case 1:
4368 // p[-1] = 0
4369 // case 0:
4370 // p += 8
4371 // } while (cnt)
4372 // }
4373
4374 const int unroll = 8; // Number of sd(zr) instructions we'll unroll
4375
4376 srli(len, len, LogBytesPerWord);
4377 andi(t0, len, unroll - 1); // t0 = cnt % unroll
4378 sub(len, len, t0); // cnt -= unroll
4379 // tmp always points to the end of the region we're about to zero
4380 shadd(tmp, t0, addr, t1, LogBytesPerWord);
4381 la(t1, entry);
4382 slli(t0, t0, 2);
4383 sub(t1, t1, t0);
4384 jr(t1);
4385 bind(loop);
4386 sub(len, len, unroll);
4387 for (int i = -unroll; i < 0; i++) {
4388 sd(zr, Address(tmp, i * wordSize));
4389 }
4390 bind(entry);
4391 add(tmp, tmp, unroll * wordSize);
4392 bnez(len, loop);
4393 }
4394
4395 // shift left by shamt and add
4396 // Rd = (Rs1 << shamt) + Rs2
4397 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
4398 if (UseZba) {
4399 if (shamt == 1) {
4400 sh1add(Rd, Rs1, Rs2);
4401 return;
4402 } else if (shamt == 2) {
4403 sh2add(Rd, Rs1, Rs2);
4404 return;
4405 } else if (shamt == 3) {
4406 sh3add(Rd, Rs1, Rs2);
4407 return;
4408 }
4409 }
4410
4411 if (shamt != 0) {
4412 slli(tmp, Rs1, shamt);
4413 add(Rd, Rs2, tmp);
4414 } else {
4415 add(Rd, Rs1, Rs2);
4416 }
4417 }
4418
4419 void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
4420 if (UseZba && bits == 32) {
4421 zext_w(dst, src);
4422 return;
4423 }
4424
4425 if (UseZbb && bits == 16) {
4426 zext_h(dst, src);
4427 return;
4428 }
4429
4430 if (bits == 8) {
4431 zext_b(dst, src);
4432 } else {
4433 slli(dst, src, XLEN - bits);
4434 srli(dst, dst, XLEN - bits);
4435 }
4436 }
4437
4438 void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
4439 if (UseZbb) {
4440 if (bits == 8) {
4441 sext_b(dst, src);
4442 return;
4443 } else if (bits == 16) {
4444 sext_h(dst, src);
4445 return;
4446 }
4447 }
4448
4449 if (bits == 32) {
4450 sext_w(dst, src);
4451 } else {
4452 slli(dst, src, XLEN - bits);
4453 srai(dst, dst, XLEN - bits);
4454 }
4455 }
4456
4457 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
4458 {
4459 if (src1 == src2) {
4460 mv(dst, zr);
4461 return;
4462 }
4463 Label done;
4464 Register left = src1;
4465 Register right = src2;
4466 if (dst == src1) {
4467 assert_different_registers(dst, src2, tmp);
4468 mv(tmp, src1);
4469 left = tmp;
4470 } else if (dst == src2) {
4471 assert_different_registers(dst, src1, tmp);
4472 mv(tmp, src2);
4473 right = tmp;
4474 }
4475
4476 // installs 1 if gt else 0
4477 slt(dst, right, left);
4478 bnez(dst, done);
4479 slt(dst, left, right);
4480 // dst = -1 if lt; else if eq , dst = 0
4481 neg(dst, dst);
4482 bind(done);
4483 }
4484
4485 // The java_calling_convention describes stack locations as ideal slots on
4486 // a frame with no abi restrictions. Since we must observe abi restrictions
4487 // (like the placement of the register window) the slots must be biased by
4488 // the following value.
4489 static int reg2offset_in(VMReg r) {
4490 // Account for saved fp and ra
4491 // This should really be in_preserve_stack_slots
4492 return r->reg2stack() * VMRegImpl::stack_slot_size;
4493 }
4494
4495 static int reg2offset_out(VMReg r) {
4496 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
4497 }
4498
4499 // On 64 bit we will store integer like items to the stack as
4500 // 64 bits items (riscv64 abi) even though java would only store
4501 // 32bits for a parameter. On 32bit it will simply be 32 bits
4502 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
4503 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
4504 if (src.first()->is_stack()) {
4505 if (dst.first()->is_stack()) {
4506 // stack to stack
4507 ld(tmp, Address(fp, reg2offset_in(src.first())));
4508 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4509 } else {
4510 // stack to reg
4511 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4512 }
4513 } else if (dst.first()->is_stack()) {
4514 // reg to stack
4515 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4516 } else {
4517 if (dst.first() != src.first()) {
4518 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32);
4519 }
4520 }
4521 }
4522
4523 // An oop arg. Must pass a handle not the oop itself
4524 void MacroAssembler::object_move(OopMap* map,
4525 int oop_handle_offset,
4526 int framesize_in_slots,
4527 VMRegPair src,
4528 VMRegPair dst,
4529 bool is_receiver,
4530 int* receiver_offset) {
4531 assert_cond(map != nullptr && receiver_offset != nullptr);
4532
4533 // must pass a handle. First figure out the location we use as a handle
4534 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
4535
4536 // See if oop is null if it is we need no handle
4537
4538 if (src.first()->is_stack()) {
4539 // Oop is already on the stack as an argument
4540 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
4541 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
4542 if (is_receiver) {
4543 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
4544 }
4545
4546 ld(t0, Address(fp, reg2offset_in(src.first())));
4547 la(rHandle, Address(fp, reg2offset_in(src.first())));
4548 // conditionally move a null
4549 Label notZero1;
4550 bnez(t0, notZero1);
4551 mv(rHandle, zr);
4552 bind(notZero1);
4553 } else {
4554
4555 // Oop is in a register we must store it to the space we reserve
4556 // on the stack for oop_handles and pass a handle if oop is non-null
4557
4558 const Register rOop = src.first()->as_Register();
4559 int oop_slot = -1;
4560 if (rOop == j_rarg0) {
4561 oop_slot = 0;
4562 } else if (rOop == j_rarg1) {
4563 oop_slot = 1;
4564 } else if (rOop == j_rarg2) {
4565 oop_slot = 2;
4566 } else if (rOop == j_rarg3) {
4567 oop_slot = 3;
4568 } else if (rOop == j_rarg4) {
4569 oop_slot = 4;
4570 } else if (rOop == j_rarg5) {
4571 oop_slot = 5;
4572 } else if (rOop == j_rarg6) {
4573 oop_slot = 6;
4574 } else {
4575 assert(rOop == j_rarg7, "wrong register");
4576 oop_slot = 7;
4577 }
4578
4579 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
4580 int offset = oop_slot * VMRegImpl::stack_slot_size;
4581
4582 map->set_oop(VMRegImpl::stack2reg(oop_slot));
4583 // Store oop in handle area, may be null
4584 sd(rOop, Address(sp, offset));
4585 if (is_receiver) {
4586 *receiver_offset = offset;
4587 }
4588
4589 //rOop maybe the same as rHandle
4590 if (rOop == rHandle) {
4591 Label isZero;
4592 beqz(rOop, isZero);
4593 la(rHandle, Address(sp, offset));
4594 bind(isZero);
4595 } else {
4596 Label notZero2;
4597 la(rHandle, Address(sp, offset));
4598 bnez(rOop, notZero2);
4599 mv(rHandle, zr);
4600 bind(notZero2);
4601 }
4602 }
4603
4604 // If arg is on the stack then place it otherwise it is already in correct reg.
4605 if (dst.first()->is_stack()) {
4606 sd(rHandle, Address(sp, reg2offset_out(dst.first())));
4607 }
4608 }
4609
4610 // A float arg may have to do float reg int reg conversion
4611 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
4612 assert(src.first()->is_stack() && dst.first()->is_stack() ||
4613 src.first()->is_reg() && dst.first()->is_reg() ||
4614 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4615 if (src.first()->is_stack()) {
4616 if (dst.first()->is_stack()) {
4617 lwu(tmp, Address(fp, reg2offset_in(src.first())));
4618 sw(tmp, Address(sp, reg2offset_out(dst.first())));
4619 } else if (dst.first()->is_Register()) {
4620 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4621 } else {
4622 ShouldNotReachHere();
4623 }
4624 } else if (src.first() != dst.first()) {
4625 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4626 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4627 } else {
4628 ShouldNotReachHere();
4629 }
4630 }
4631 }
4632
4633 // A long move
4634 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
4635 if (src.first()->is_stack()) {
4636 if (dst.first()->is_stack()) {
4637 // stack to stack
4638 ld(tmp, Address(fp, reg2offset_in(src.first())));
4639 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4640 } else {
4641 // stack to reg
4642 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4643 }
4644 } else if (dst.first()->is_stack()) {
4645 // reg to stack
4646 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4647 } else {
4648 if (dst.first() != src.first()) {
4649 mv(dst.first()->as_Register(), src.first()->as_Register());
4650 }
4651 }
4652 }
4653
4654 // A double move
4655 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
4656 assert(src.first()->is_stack() && dst.first()->is_stack() ||
4657 src.first()->is_reg() && dst.first()->is_reg() ||
4658 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4659 if (src.first()->is_stack()) {
4660 if (dst.first()->is_stack()) {
4661 ld(tmp, Address(fp, reg2offset_in(src.first())));
4662 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4663 } else if (dst.first()-> is_Register()) {
4664 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4665 } else {
4666 ShouldNotReachHere();
4667 }
4668 } else if (src.first() != dst.first()) {
4669 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4670 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4671 } else {
4672 ShouldNotReachHere();
4673 }
4674 }
4675 }
4676
4677 void MacroAssembler::rt_call(address dest, Register tmp) {
4678 CodeBlob *cb = CodeCache::find_blob(dest);
4679 RuntimeAddress target(dest);
4680 if (cb) {
4681 far_call(target);
4682 } else {
4683 relocate(target.rspec(), [&] {
4684 int32_t offset;
4685 la_patchable(tmp, target, offset);
4686 jalr(x1, tmp, offset);
4687 });
4688 }
4689 }
4690
4691 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) {
4692 assert(bit_pos < 64, "invalid bit range");
4693 if (UseZbs) {
4694 bexti(Rd, Rs, bit_pos);
4695 return;
4696 }
4697 int64_t imm = (int64_t)(1UL << bit_pos);
4698 if (is_simm12(imm)) {
4699 and_imm12(Rd, Rs, imm);
4700 } else {
4701 srli(Rd, Rs, bit_pos);
4702 and_imm12(Rd, Rd, 1);
4703 }
4704 }
4705
4706 // Implements lightweight-locking.
4707 //
4708 // - obj: the object to be locked
4709 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed
4710 // - slow: branched to if locking fails
4711 void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
4712 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4713 assert_different_registers(obj, tmp1, tmp2, tmp3, t0);
4714
4715 Label push;
4716 const Register top = tmp1;
4717 const Register mark = tmp2;
4718 const Register t = tmp3;
4719
4720 // Preload the markWord. It is important that this is the first
4721 // instruction emitted as it is part of C1's null check semantics.
4722 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
4723
4724 // Check if the lock-stack is full.
4725 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset()));
4726 mv(t, (unsigned)LockStack::end_offset());
4727 bge(top, t, slow, /* is_far */ true);
4728
4729 // Check for recursion.
4730 add(t, xthread, top);
4731 ld(t, Address(t, -oopSize));
4732 beq(obj, t, push);
4733
4734 // Check header for monitor (0b10).
4735 test_bit(t, mark, exact_log2(markWord::monitor_value));
4736 bnez(t, slow, /* is_far */ true);
4737
4738 // Try to lock. Transition lock-bits 0b01 => 0b00
4739 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la");
4740 ori(mark, mark, markWord::unlocked_value);
4741 xori(t, mark, markWord::unlocked_value);
4742 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64,
4743 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t);
4744 bne(mark, t, slow, /* is_far */ true);
4745
4746 bind(push);
4747 // After successful lock, push object on lock-stack.
4748 add(t, xthread, top);
4749 sd(obj, Address(t));
4750 addw(top, top, oopSize);
4751 sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
4752 }
4753
4754 // Implements ligthweight-unlocking.
4755 //
4756 // - obj: the object to be unlocked
4757 // - tmp1, tmp2, tmp3: temporary registers
4758 // - slow: branched to if unlocking fails
4759 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
4760 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4761 assert_different_registers(obj, tmp1, tmp2, tmp3, t0);
4762
4763 #ifdef ASSERT
4764 {
4765 // Check for lock-stack underflow.
4766 Label stack_ok;
4767 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4768 mv(tmp2, (unsigned)LockStack::start_offset());
4769 bge(tmp1, tmp2, stack_ok);
4770 STOP("Lock-stack underflow");
4771 bind(stack_ok);
4772 }
4773 #endif
4774
4775 Label unlocked, push_and_slow;
4776 const Register top = tmp1;
4777 const Register mark = tmp2;
4778 const Register t = tmp3;
4779
4780 // Check if obj is top of lock-stack.
4781 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset()));
4782 subw(top, top, oopSize);
4783 add(t, xthread, top);
4784 ld(t, Address(t));
4785 bne(obj, t, slow, /* is_far */ true);
4786
4787 // Pop lock-stack.
4788 DEBUG_ONLY(add(t, xthread, top);)
4789 DEBUG_ONLY(sd(zr, Address(t));)
4790 sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
4791
4792 // Check if recursive.
4793 add(t, xthread, top);
4794 ld(t, Address(t, -oopSize));
4795 beq(obj, t, unlocked);
4796
4797 // Not recursive. Check header for monitor (0b10).
4798 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
4799 test_bit(t, mark, exact_log2(markWord::monitor_value));
4800 bnez(t, push_and_slow);
4801
4802 #ifdef ASSERT
4803 // Check header not unlocked (0b01).
4804 Label not_unlocked;
4805 test_bit(t, mark, exact_log2(markWord::unlocked_value));
4806 beqz(t, not_unlocked);
4807 stop("lightweight_unlock already unlocked");
4808 bind(not_unlocked);
4809 #endif
4810
4811 // Try to unlock. Transition lock bits 0b00 => 0b01
4812 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
4813 ori(t, mark, markWord::unlocked_value);
4814 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64,
4815 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t);
4816 beq(mark, t, unlocked);
4817
4818 bind(push_and_slow);
4819 // Restore lock-stack and handle the unlock in runtime.
4820 DEBUG_ONLY(add(t, xthread, top);)
4821 DEBUG_ONLY(sd(obj, Address(t));)
4822 addw(top, top, oopSize);
4823 sw(top, Address(xthread, JavaThread::lock_stack_top_offset()));
4824 j(slow);
4825
4826 bind(unlocked);
4827 }