1 /*
2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "precompiled.hpp"
28 #include "asm/assembler.hpp"
29 #include "asm/assembler.inline.hpp"
30 #include "compiler/disassembler.hpp"
31 #include "gc/shared/barrierSet.hpp"
32 #include "gc/shared/barrierSetAssembler.hpp"
33 #include "gc/shared/cardTable.hpp"
34 #include "gc/shared/cardTableBarrierSet.hpp"
35 #include "gc/shared/collectedHeap.hpp"
36 #include "interpreter/bytecodeHistogram.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "memory/resourceArea.hpp"
39 #include "memory/universe.hpp"
40 #include "nativeInst_riscv.hpp"
41 #include "oops/accessDecorators.hpp"
42 #include "oops/compressedOops.inline.hpp"
43 #include "oops/klass.inline.hpp"
44 #include "oops/oop.hpp"
45 #include "runtime/interfaceSupport.inline.hpp"
46 #include "runtime/javaThread.hpp"
47 #include "runtime/jniHandles.inline.hpp"
48 #include "runtime/sharedRuntime.hpp"
49 #include "runtime/stubRoutines.hpp"
50 #include "utilities/powerOfTwo.hpp"
51 #ifdef COMPILER2
52 #include "opto/compile.hpp"
53 #include "opto/node.hpp"
54 #include "opto/output.hpp"
55 #endif
56
57 #ifdef PRODUCT
58 #define BLOCK_COMMENT(str) /* nothing */
59 #else
60 #define BLOCK_COMMENT(str) block_comment(str)
61 #endif
62 #define STOP(str) stop(str);
63 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
64
65 static void pass_arg0(MacroAssembler* masm, Register arg) {
66 if (c_rarg0 != arg) {
67 masm->mv(c_rarg0, arg);
68 }
69 }
70
71 static void pass_arg1(MacroAssembler* masm, Register arg) {
72 if (c_rarg1 != arg) {
73 masm->mv(c_rarg1, arg);
74 }
75 }
76
77 static void pass_arg2(MacroAssembler* masm, Register arg) {
78 if (c_rarg2 != arg) {
79 masm->mv(c_rarg2, arg);
80 }
81 }
82
83 static void pass_arg3(MacroAssembler* masm, Register arg) {
84 if (c_rarg3 != arg) {
85 masm->mv(c_rarg3, arg);
86 }
87 }
88
89 void MacroAssembler::push_cont_fastpath(Register java_thread) {
90 if (!Continuations::enabled()) return;
91 Label done;
92 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset()));
93 bleu(sp, t0, done);
94 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset()));
95 bind(done);
96 }
97
98 void MacroAssembler::pop_cont_fastpath(Register java_thread) {
99 if (!Continuations::enabled()) return;
100 Label done;
101 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset()));
102 bltu(sp, t0, done);
103 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset()));
104 bind(done);
105 }
106
107 int MacroAssembler::align(int modulus, int extra_offset) {
108 CompressibleRegion cr(this);
109 intptr_t before = offset();
110 while ((offset() + extra_offset) % modulus != 0) { nop(); }
111 return (int)(offset() - before);
112 }
113
114 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
115 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
116 }
117
118 // Implementation of call_VM versions
119
120 void MacroAssembler::call_VM(Register oop_result,
121 address entry_point,
122 bool check_exceptions) {
123 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
124 }
125
126 void MacroAssembler::call_VM(Register oop_result,
127 address entry_point,
128 Register arg_1,
129 bool check_exceptions) {
130 pass_arg1(this, arg_1);
131 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
132 }
133
134 void MacroAssembler::call_VM(Register oop_result,
135 address entry_point,
136 Register arg_1,
137 Register arg_2,
138 bool check_exceptions) {
139 assert(arg_1 != c_rarg2, "smashed arg");
140 pass_arg2(this, arg_2);
141 pass_arg1(this, arg_1);
142 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
143 }
144
145 void MacroAssembler::call_VM(Register oop_result,
146 address entry_point,
147 Register arg_1,
148 Register arg_2,
149 Register arg_3,
150 bool check_exceptions) {
151 assert(arg_1 != c_rarg3, "smashed arg");
152 assert(arg_2 != c_rarg3, "smashed arg");
153 pass_arg3(this, arg_3);
154
155 assert(arg_1 != c_rarg2, "smashed arg");
156 pass_arg2(this, arg_2);
157
158 pass_arg1(this, arg_1);
159 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
160 }
161
162 void MacroAssembler::call_VM(Register oop_result,
163 Register last_java_sp,
164 address entry_point,
165 int number_of_arguments,
166 bool check_exceptions) {
167 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
168 }
169
170 void MacroAssembler::call_VM(Register oop_result,
171 Register last_java_sp,
172 address entry_point,
173 Register arg_1,
174 bool check_exceptions) {
175 pass_arg1(this, arg_1);
176 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
177 }
178
179 void MacroAssembler::call_VM(Register oop_result,
180 Register last_java_sp,
181 address entry_point,
182 Register arg_1,
183 Register arg_2,
184 bool check_exceptions) {
185
186 assert(arg_1 != c_rarg2, "smashed arg");
187 pass_arg2(this, arg_2);
188 pass_arg1(this, arg_1);
189 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
190 }
191
192 void MacroAssembler::call_VM(Register oop_result,
193 Register last_java_sp,
194 address entry_point,
195 Register arg_1,
196 Register arg_2,
197 Register arg_3,
198 bool check_exceptions) {
199 assert(arg_1 != c_rarg3, "smashed arg");
200 assert(arg_2 != c_rarg3, "smashed arg");
201 pass_arg3(this, arg_3);
202 assert(arg_1 != c_rarg2, "smashed arg");
203 pass_arg2(this, arg_2);
204 pass_arg1(this, arg_1);
205 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
206 }
207
208 void MacroAssembler::post_call_nop() {
209 if (!Continuations::enabled()) {
210 return;
211 }
212 relocate(post_call_nop_Relocation::spec(), [&] {
213 InlineSkippedInstructionsCounter skipCounter(this);
214 nop();
215 li32(zr, 0);
216 });
217 }
218
219 // these are no-ops overridden by InterpreterMacroAssembler
220 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
221 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
222
223 // Calls to C land
224 //
225 // When entering C land, the fp, & esp of the last Java frame have to be recorded
226 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
227 // has to be reset to 0. This is required to allow proper stack traversal.
228 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
229 Register last_java_fp,
230 Register last_java_pc,
231 Register tmp) {
232
233 if (last_java_pc->is_valid()) {
234 sd(last_java_pc, Address(xthread,
235 JavaThread::frame_anchor_offset() +
236 JavaFrameAnchor::last_Java_pc_offset()));
237 }
238
239 // determine last_java_sp register
240 if (last_java_sp == sp) {
241 mv(tmp, sp);
242 last_java_sp = tmp;
243 } else if (!last_java_sp->is_valid()) {
244 last_java_sp = esp;
245 }
246
247 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
248
249 // last_java_fp is optional
250 if (last_java_fp->is_valid()) {
251 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
252 }
253 }
254
255 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
256 Register last_java_fp,
257 address last_java_pc,
258 Register tmp) {
259 assert(last_java_pc != nullptr, "must provide a valid PC");
260
261 la(tmp, last_java_pc);
262 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
263
264 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
265 }
266
267 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
268 Register last_java_fp,
269 Label &L,
270 Register tmp) {
271 if (L.is_bound()) {
272 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
273 } else {
274 L.add_patch_at(code(), locator());
275 IncompressibleRegion ir(this); // the label address will be patched back.
276 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
277 }
278 }
279
280 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
281 // we must set sp to zero to clear frame
282 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
283
284 // must clear fp, so that compiled frames are not confused; it is
285 // possible that we need it only for debugging
286 if (clear_fp) {
287 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
288 }
289
290 // Always clear the pc because it could have been set by make_walkable()
291 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
292 }
293
294 void MacroAssembler::call_VM_base(Register oop_result,
295 Register java_thread,
296 Register last_java_sp,
297 address entry_point,
298 int number_of_arguments,
299 bool check_exceptions) {
300 // determine java_thread register
301 if (!java_thread->is_valid()) {
302 java_thread = xthread;
303 }
304 // determine last_java_sp register
305 if (!last_java_sp->is_valid()) {
306 last_java_sp = esp;
307 }
308
309 // debugging support
310 assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
311 assert(java_thread == xthread, "unexpected register");
312
313 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
314 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
315
316 // push java thread (becomes first argument of C function)
317 mv(c_rarg0, java_thread);
318
319 // set last Java frame before call
320 assert(last_java_sp != fp, "can't use fp");
321
322 Label l;
323 set_last_Java_frame(last_java_sp, fp, l, t0);
324
325 // do the call, remove parameters
326 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
327
328 // reset last Java frame
329 // Only interpreter should have to clear fp
330 reset_last_Java_frame(true);
331
332 // C++ interp handles this in the interpreter
333 check_and_handle_popframe(java_thread);
334 check_and_handle_earlyret(java_thread);
335
336 if (check_exceptions) {
337 // check for pending exceptions (java_thread is set upon return)
338 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
339 Label ok;
340 beqz(t0, ok);
341 RuntimeAddress target(StubRoutines::forward_exception_entry());
342 relocate(target.rspec(), [&] {
343 int32_t offset;
344 la_patchable(t0, target, offset);
345 jalr(x0, t0, offset);
346 });
347 bind(ok);
348 }
349
350 // get oop result if there is one and reset the value in the thread
351 if (oop_result->is_valid()) {
352 get_vm_result(oop_result, java_thread);
353 }
354 }
355
356 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
357 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
358 sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
359 verify_oop_msg(oop_result, "broken oop in call_VM_base");
360 }
361
362 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
363 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
364 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
365 }
366
367 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
368 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
369 assert_different_registers(klass, xthread, tmp);
370
371 Label L_fallthrough, L_tmp;
372 if (L_fast_path == nullptr) {
373 L_fast_path = &L_fallthrough;
374 } else if (L_slow_path == nullptr) {
375 L_slow_path = &L_fallthrough;
376 }
377
378 // Fast path check: class is fully initialized
379 lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
380 sub(tmp, tmp, InstanceKlass::fully_initialized);
381 beqz(tmp, *L_fast_path);
382
383 // Fast path check: current thread is initializer thread
384 ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
385
386 if (L_slow_path == &L_fallthrough) {
387 beq(xthread, tmp, *L_fast_path);
388 bind(*L_slow_path);
389 } else if (L_fast_path == &L_fallthrough) {
390 bne(xthread, tmp, *L_slow_path);
391 bind(*L_fast_path);
392 } else {
393 Unimplemented();
394 }
395 }
396
397 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
398 if (!VerifyOops) { return; }
399
400 // Pass register number to verify_oop_subroutine
401 const char* b = nullptr;
402 {
403 ResourceMark rm;
404 stringStream ss;
405 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
406 b = code_string(ss.as_string());
407 }
408 BLOCK_COMMENT("verify_oop {");
409
410 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
411
412 mv(c_rarg0, reg); // c_rarg0 : x10
413 {
414 // The length of the instruction sequence emitted should not depend
415 // on the address of the char buffer so that the size of mach nodes for
416 // scratch emit and normal emit matches.
417 IncompressibleRegion ir(this); // Fixed length
418 movptr(t0, (address) b);
419 }
420
421 // call indirectly to solve generation ordering problem
422 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address());
423 relocate(target.rspec(), [&] {
424 int32_t offset;
425 la_patchable(t1, target, offset);
426 ld(t1, Address(t1, offset));
427 });
428 jalr(t1);
429
430 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
431
432 BLOCK_COMMENT("} verify_oop");
433 }
434
435 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
436 if (!VerifyOops) {
437 return;
438 }
439
440 const char* b = nullptr;
441 {
442 ResourceMark rm;
443 stringStream ss;
444 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
445 b = code_string(ss.as_string());
446 }
447 BLOCK_COMMENT("verify_oop_addr {");
448
449 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
450
451 if (addr.uses(sp)) {
452 la(x10, addr);
453 ld(x10, Address(x10, 4 * wordSize));
454 } else {
455 ld(x10, addr);
456 }
457
458 {
459 // The length of the instruction sequence emitted should not depend
460 // on the address of the char buffer so that the size of mach nodes for
461 // scratch emit and normal emit matches.
462 IncompressibleRegion ir(this); // Fixed length
463 movptr(t0, (address) b);
464 }
465
466 // call indirectly to solve generation ordering problem
467 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address());
468 relocate(target.rspec(), [&] {
469 int32_t offset;
470 la_patchable(t1, target, offset);
471 ld(t1, Address(t1, offset));
472 });
473 jalr(t1);
474
475 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
476
477 BLOCK_COMMENT("} verify_oop_addr");
478 }
479
480 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
481 int extra_slot_offset) {
482 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
483 int stackElementSize = Interpreter::stackElementSize;
484 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
485 #ifdef ASSERT
486 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
487 assert(offset1 - offset == stackElementSize, "correct arithmetic");
488 #endif
489 if (arg_slot.is_constant()) {
490 return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
491 } else {
492 assert_different_registers(t0, arg_slot.as_register());
493 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
494 return Address(t0, offset);
495 }
496 }
497
498 #ifndef PRODUCT
499 extern "C" void findpc(intptr_t x);
500 #endif
501
502 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
503 {
504 // In order to get locks to work, we need to fake a in_VM state
505 if (ShowMessageBoxOnError) {
506 JavaThread* thread = JavaThread::current();
507 JavaThreadState saved_state = thread->thread_state();
508 thread->set_thread_state(_thread_in_vm);
509 #ifndef PRODUCT
510 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
511 ttyLocker ttyl;
512 BytecodeCounter::print();
513 }
514 #endif
515 if (os::message_box(msg, "Execution stopped, print registers?")) {
516 ttyLocker ttyl;
517 tty->print_cr(" pc = 0x%016lx", pc);
518 #ifndef PRODUCT
519 tty->cr();
520 findpc(pc);
521 tty->cr();
522 #endif
523 tty->print_cr(" x0 = 0x%016lx", regs[0]);
524 tty->print_cr(" x1 = 0x%016lx", regs[1]);
525 tty->print_cr(" x2 = 0x%016lx", regs[2]);
526 tty->print_cr(" x3 = 0x%016lx", regs[3]);
527 tty->print_cr(" x4 = 0x%016lx", regs[4]);
528 tty->print_cr(" x5 = 0x%016lx", regs[5]);
529 tty->print_cr(" x6 = 0x%016lx", regs[6]);
530 tty->print_cr(" x7 = 0x%016lx", regs[7]);
531 tty->print_cr(" x8 = 0x%016lx", regs[8]);
532 tty->print_cr(" x9 = 0x%016lx", regs[9]);
533 tty->print_cr("x10 = 0x%016lx", regs[10]);
534 tty->print_cr("x11 = 0x%016lx", regs[11]);
535 tty->print_cr("x12 = 0x%016lx", regs[12]);
536 tty->print_cr("x13 = 0x%016lx", regs[13]);
537 tty->print_cr("x14 = 0x%016lx", regs[14]);
538 tty->print_cr("x15 = 0x%016lx", regs[15]);
539 tty->print_cr("x16 = 0x%016lx", regs[16]);
540 tty->print_cr("x17 = 0x%016lx", regs[17]);
541 tty->print_cr("x18 = 0x%016lx", regs[18]);
542 tty->print_cr("x19 = 0x%016lx", regs[19]);
543 tty->print_cr("x20 = 0x%016lx", regs[20]);
544 tty->print_cr("x21 = 0x%016lx", regs[21]);
545 tty->print_cr("x22 = 0x%016lx", regs[22]);
546 tty->print_cr("x23 = 0x%016lx", regs[23]);
547 tty->print_cr("x24 = 0x%016lx", regs[24]);
548 tty->print_cr("x25 = 0x%016lx", regs[25]);
549 tty->print_cr("x26 = 0x%016lx", regs[26]);
550 tty->print_cr("x27 = 0x%016lx", regs[27]);
551 tty->print_cr("x28 = 0x%016lx", regs[28]);
552 tty->print_cr("x30 = 0x%016lx", regs[30]);
553 tty->print_cr("x31 = 0x%016lx", regs[31]);
554 BREAKPOINT;
555 }
556 }
557 fatal("DEBUG MESSAGE: %s", msg);
558 }
559
560 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
561 assert_different_registers(value, tmp1, tmp2);
562 Label done, tagged, weak_tagged;
563
564 beqz(value, done); // Use null as-is.
565 // Test for tag.
566 andi(tmp1, value, JNIHandles::tag_mask);
567 bnez(tmp1, tagged);
568
569 // Resolve local handle
570 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2);
571 verify_oop(value);
572 j(done);
573
574 bind(tagged);
575 // Test for jweak tag.
576 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1);
577 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global));
578 bnez(tmp1, weak_tagged);
579
580 // Resolve global handle
581 access_load_at(T_OBJECT, IN_NATIVE, value,
582 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2);
583 verify_oop(value);
584 j(done);
585
586 bind(weak_tagged);
587 // Resolve jweak.
588 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
589 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2);
590 verify_oop(value);
591
592 bind(done);
593 }
594
595 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) {
596 assert_different_registers(value, tmp1, tmp2);
597 Label done;
598
599 beqz(value, done); // Use null as-is.
600
601 #ifdef ASSERT
602 {
603 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10);
604 Label valid_global_tag;
605 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag.
606 bnez(tmp1, valid_global_tag);
607 stop("non global jobject using resolve_global_jobject");
608 bind(valid_global_tag);
609 }
610 #endif
611
612 // Resolve global handle
613 access_load_at(T_OBJECT, IN_NATIVE, value,
614 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2);
615 verify_oop(value);
616
617 bind(done);
618 }
619
620 void MacroAssembler::stop(const char* msg) {
621 BLOCK_COMMENT(msg);
622 illegal_instruction(Assembler::csr::time);
623 emit_int64((uintptr_t)msg);
624 }
625
626 void MacroAssembler::unimplemented(const char* what) {
627 const char* buf = nullptr;
628 {
629 ResourceMark rm;
630 stringStream ss;
631 ss.print("unimplemented: %s", what);
632 buf = code_string(ss.as_string());
633 }
634 stop(buf);
635 }
636
637 void MacroAssembler::emit_static_call_stub() {
638 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size().
639 // CompiledDirectStaticCall::set_to_interpreted knows the
640 // exact layout of this stub.
641
642 mov_metadata(xmethod, (Metadata*)nullptr);
643
644 // Jump to the entry point of the c2i stub.
645 int32_t offset = 0;
646 movptr(t0, 0, offset);
647 jalr(x0, t0, offset);
648 }
649
650 void MacroAssembler::call_VM_leaf_base(address entry_point,
651 int number_of_arguments,
652 Label *retaddr) {
653 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
654 call(entry_point);
655 if (retaddr != nullptr) {
656 bind(*retaddr);
657 }
658 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
659 }
660
661 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
662 call_VM_leaf_base(entry_point, number_of_arguments);
663 }
664
665 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
666 pass_arg0(this, arg_0);
667 call_VM_leaf_base(entry_point, 1);
668 }
669
670 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
671 pass_arg0(this, arg_0);
672 pass_arg1(this, arg_1);
673 call_VM_leaf_base(entry_point, 2);
674 }
675
676 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
677 Register arg_1, Register arg_2) {
678 pass_arg0(this, arg_0);
679 pass_arg1(this, arg_1);
680 pass_arg2(this, arg_2);
681 call_VM_leaf_base(entry_point, 3);
682 }
683
684 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
685 pass_arg0(this, arg_0);
686 MacroAssembler::call_VM_leaf_base(entry_point, 1);
687 }
688
689 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
690
691 assert(arg_0 != c_rarg1, "smashed arg");
692 pass_arg1(this, arg_1);
693 pass_arg0(this, arg_0);
694 MacroAssembler::call_VM_leaf_base(entry_point, 2);
695 }
696
697 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
698 assert(arg_0 != c_rarg2, "smashed arg");
699 assert(arg_1 != c_rarg2, "smashed arg");
700 pass_arg2(this, arg_2);
701 assert(arg_0 != c_rarg1, "smashed arg");
702 pass_arg1(this, arg_1);
703 pass_arg0(this, arg_0);
704 MacroAssembler::call_VM_leaf_base(entry_point, 3);
705 }
706
707 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
708 assert(arg_0 != c_rarg3, "smashed arg");
709 assert(arg_1 != c_rarg3, "smashed arg");
710 assert(arg_2 != c_rarg3, "smashed arg");
711 pass_arg3(this, arg_3);
712 assert(arg_0 != c_rarg2, "smashed arg");
713 assert(arg_1 != c_rarg2, "smashed arg");
714 pass_arg2(this, arg_2);
715 assert(arg_0 != c_rarg1, "smashed arg");
716 pass_arg1(this, arg_1);
717 pass_arg0(this, arg_0);
718 MacroAssembler::call_VM_leaf_base(entry_point, 4);
719 }
720
721 void MacroAssembler::la(Register Rd, const address dest) {
722 int64_t offset = dest - pc();
723 if (is_valid_32bit_offset(offset)) {
724 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit
725 addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
726 } else {
727 movptr(Rd, dest);
728 }
729 }
730
731 void MacroAssembler::la(Register Rd, const Address &adr) {
732 switch (adr.getMode()) {
733 case Address::literal: {
734 relocInfo::relocType rtype = adr.rspec().reloc()->type();
735 if (rtype == relocInfo::none) {
736 mv(Rd, (intptr_t)(adr.target()));
737 } else {
738 relocate(adr.rspec(), [&] {
739 movptr(Rd, adr.target());
740 });
741 }
742 break;
743 }
744 case Address::base_plus_offset: {
745 Address new_adr = legitimize_address(Rd, adr);
746 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) {
747 addi(Rd, new_adr.base(), new_adr.offset());
748 }
749 break;
750 }
751 default:
752 ShouldNotReachHere();
753 }
754 }
755
756 void MacroAssembler::la(Register Rd, Label &label) {
757 IncompressibleRegion ir(this); // the label address may be patched back.
758 wrap_label(Rd, label, &MacroAssembler::la);
759 }
760
761 void MacroAssembler::li16u(Register Rd, uint16_t imm) {
762 lui(Rd, (uint32_t)imm << 12);
763 srli(Rd, Rd, 12);
764 }
765
766 void MacroAssembler::li32(Register Rd, int32_t imm) {
767 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit
768 int64_t upper = imm, lower = imm;
769 lower = (imm << 20) >> 20;
770 upper -= lower;
771 upper = (int32_t)upper;
772 // lui Rd, imm[31:12] + imm[11]
773 lui(Rd, upper);
774 // use addiw to distinguish li32 to li64
775 addiw(Rd, Rd, lower);
776 }
777
778 void MacroAssembler::li64(Register Rd, int64_t imm) {
779 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
780 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
781 int64_t lower = imm & 0xffffffff;
782 lower -= ((lower << 44) >> 44);
783 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
784 int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
785
786 // Load upper 32 bits
787 int64_t up = upper, lo = upper;
788 lo = (lo << 52) >> 52;
789 up -= lo;
790 up = (int32_t)up;
791 lui(Rd, up);
792 addi(Rd, Rd, lo);
793
794 // Load the rest 32 bits.
795 slli(Rd, Rd, 12);
796 addi(Rd, Rd, (int32_t)lower >> 20);
797 slli(Rd, Rd, 12);
798 lower = ((int32_t)imm << 12) >> 20;
799 addi(Rd, Rd, lower);
800 slli(Rd, Rd, 8);
801 lower = imm & 0xff;
802 addi(Rd, Rd, lower);
803 }
804
805 void MacroAssembler::li(Register Rd, int64_t imm) {
806 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
807 // li -> c.li
808 if (do_compress() && (is_simm6(imm) && Rd != x0)) {
809 c_li(Rd, imm);
810 return;
811 }
812
813 int shift = 12;
814 int64_t upper = imm, lower = imm;
815 // Split imm to a lower 12-bit sign-extended part and the remainder,
816 // because addi will sign-extend the lower imm.
817 lower = ((int32_t)imm << 20) >> 20;
818 upper -= lower;
819
820 // Test whether imm is a 32-bit integer.
821 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 ||
822 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) {
823 while (((upper >> shift) & 1) == 0) { shift++; }
824 upper >>= shift;
825 li(Rd, upper);
826 slli(Rd, Rd, shift);
827 if (lower != 0) {
828 addi(Rd, Rd, lower);
829 }
830 } else {
831 // 32-bit integer
832 Register hi_Rd = zr;
833 if (upper != 0) {
834 lui(Rd, (int32_t)upper);
835 hi_Rd = Rd;
836 }
837 if (lower != 0 || hi_Rd == zr) {
838 addiw(Rd, hi_Rd, lower);
839 }
840 }
841 }
842
843 #define INSN(NAME, REGISTER) \
844 void MacroAssembler::NAME(const address dest, Register temp) { \
845 assert_cond(dest != nullptr); \
846 int64_t distance = dest - pc(); \
847 if (is_simm21(distance) && ((distance % 2) == 0)) { \
848 Assembler::jal(REGISTER, distance); \
849 } else { \
850 assert(temp != noreg, "expecting a register"); \
851 int32_t offset = 0; \
852 movptr(temp, dest, offset); \
853 Assembler::jalr(REGISTER, temp, offset); \
854 } \
855 } \
856
857 INSN(j, x0);
858 INSN(jal, x1);
859
860 #undef INSN
861
862 #define INSN(NAME, REGISTER) \
863 void MacroAssembler::NAME(const Address &adr, Register temp) { \
864 switch (adr.getMode()) { \
865 case Address::literal: { \
866 relocate(adr.rspec(), [&] { \
867 NAME(adr.target(), temp); \
868 }); \
869 break; \
870 } \
871 case Address::base_plus_offset: { \
872 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
873 la(temp, Address(adr.base(), adr.offset() - offset)); \
874 Assembler::jalr(REGISTER, temp, offset); \
875 break; \
876 } \
877 default: \
878 ShouldNotReachHere(); \
879 } \
880 }
881
882 INSN(j, x0);
883 INSN(jal, x1);
884
885 #undef INSN
886
887 #define INSN(NAME) \
888 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \
889 assert_cond(dest != nullptr); \
890 int64_t distance = dest - pc(); \
891 if (is_simm21(distance) && ((distance % 2) == 0)) { \
892 Assembler::NAME(Rd, distance); \
893 } else { \
894 assert_different_registers(Rd, temp); \
895 int32_t offset = 0; \
896 movptr(temp, dest, offset); \
897 jalr(Rd, temp, offset); \
898 } \
899 } \
900 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \
901 assert_different_registers(Rd, temp); \
902 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \
903 }
904
905 INSN(jal);
906
907 #undef INSN
908
909 #define INSN(NAME, REGISTER) \
910 void MacroAssembler::NAME(Label &l, Register temp) { \
911 jal(REGISTER, l, temp); \
912 } \
913
914 INSN(j, x0);
915 INSN(jal, x1);
916
917 #undef INSN
918
919 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) {
920 if (L.is_bound()) {
921 (this->*insn)(Rt, target(L), tmp);
922 } else {
923 L.add_patch_at(code(), locator());
924 (this->*insn)(Rt, pc(), tmp);
925 }
926 }
927
928 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) {
929 if (L.is_bound()) {
930 (this->*insn)(Rt, target(L));
931 } else {
932 L.add_patch_at(code(), locator());
933 (this->*insn)(Rt, pc());
934 }
935 }
936
937 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L,
938 compare_and_branch_insn insn,
939 compare_and_branch_label_insn neg_insn, bool is_far) {
940 if (is_far) {
941 Label done;
942 (this->*neg_insn)(r1, r2, done, /* is_far */ false);
943 j(L);
944 bind(done);
945 } else {
946 if (L.is_bound()) {
947 (this->*insn)(r1, r2, target(L));
948 } else {
949 L.add_patch_at(code(), locator());
950 (this->*insn)(r1, r2, pc());
951 }
952 }
953 }
954
955 #define INSN(NAME, NEG_INSN) \
956 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \
957 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \
958 }
959
960 INSN(beq, bne);
961 INSN(bne, beq);
962 INSN(blt, bge);
963 INSN(bge, blt);
964 INSN(bltu, bgeu);
965 INSN(bgeu, bltu);
966
967 #undef INSN
968
969 #define INSN(NAME) \
970 void MacroAssembler::NAME##z(Register Rs, const address dest) { \
971 NAME(Rs, zr, dest); \
972 } \
973 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \
974 NAME(Rs, zr, l, is_far); \
975 } \
976
977 INSN(beq);
978 INSN(bne);
979 INSN(blt);
980 INSN(ble);
981 INSN(bge);
982 INSN(bgt);
983
984 #undef INSN
985
986 #define INSN(NAME, NEG_INSN) \
987 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \
988 NEG_INSN(Rt, Rs, dest); \
989 } \
990 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \
991 NEG_INSN(Rt, Rs, l, is_far); \
992 }
993
994 INSN(bgt, blt);
995 INSN(ble, bge);
996 INSN(bgtu, bltu);
997 INSN(bleu, bgeu);
998
999 #undef INSN
1000
1001 // Float compare branch instructions
1002
1003 #define INSN(NAME, FLOATCMP, BRANCH) \
1004 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
1005 FLOATCMP##_s(t0, Rs1, Rs2); \
1006 BRANCH(t0, l, is_far); \
1007 } \
1008 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
1009 FLOATCMP##_d(t0, Rs1, Rs2); \
1010 BRANCH(t0, l, is_far); \
1011 }
1012
1013 INSN(beq, feq, bnez);
1014 INSN(bne, feq, beqz);
1015
1016 #undef INSN
1017
1018
1019 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \
1020 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1021 bool is_far, bool is_unordered) { \
1022 if (is_unordered) { \
1023 /* jump if either source is NaN or condition is expected */ \
1024 FLOATCMP2##_s(t0, Rs2, Rs1); \
1025 beqz(t0, l, is_far); \
1026 } else { \
1027 /* jump if no NaN in source and condition is expected */ \
1028 FLOATCMP1##_s(t0, Rs1, Rs2); \
1029 bnez(t0, l, is_far); \
1030 } \
1031 } \
1032 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1033 bool is_far, bool is_unordered) { \
1034 if (is_unordered) { \
1035 /* jump if either source is NaN or condition is expected */ \
1036 FLOATCMP2##_d(t0, Rs2, Rs1); \
1037 beqz(t0, l, is_far); \
1038 } else { \
1039 /* jump if no NaN in source and condition is expected */ \
1040 FLOATCMP1##_d(t0, Rs1, Rs2); \
1041 bnez(t0, l, is_far); \
1042 } \
1043 }
1044
1045 INSN(ble, fle, flt);
1046 INSN(blt, flt, fle);
1047
1048 #undef INSN
1049
1050 #define INSN(NAME, CMP) \
1051 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1052 bool is_far, bool is_unordered) { \
1053 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
1054 } \
1055 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
1056 bool is_far, bool is_unordered) { \
1057 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
1058 }
1059
1060 INSN(bgt, blt);
1061 INSN(bge, ble);
1062
1063 #undef INSN
1064
1065
1066 #define INSN(NAME, CSR) \
1067 void MacroAssembler::NAME(Register Rd) { \
1068 csrr(Rd, CSR); \
1069 }
1070
1071 INSN(rdinstret, CSR_INSTRET);
1072 INSN(rdcycle, CSR_CYCLE);
1073 INSN(rdtime, CSR_TIME);
1074 INSN(frcsr, CSR_FCSR);
1075 INSN(frrm, CSR_FRM);
1076 INSN(frflags, CSR_FFLAGS);
1077
1078 #undef INSN
1079
1080 void MacroAssembler::csrr(Register Rd, unsigned csr) {
1081 csrrs(Rd, csr, x0);
1082 }
1083
1084 #define INSN(NAME, OPFUN) \
1085 void MacroAssembler::NAME(unsigned csr, Register Rs) { \
1086 OPFUN(x0, csr, Rs); \
1087 }
1088
1089 INSN(csrw, csrrw);
1090 INSN(csrs, csrrs);
1091 INSN(csrc, csrrc);
1092
1093 #undef INSN
1094
1095 #define INSN(NAME, OPFUN) \
1096 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \
1097 OPFUN(x0, csr, imm); \
1098 }
1099
1100 INSN(csrwi, csrrwi);
1101 INSN(csrsi, csrrsi);
1102 INSN(csrci, csrrci);
1103
1104 #undef INSN
1105
1106 #define INSN(NAME, CSR) \
1107 void MacroAssembler::NAME(Register Rd, Register Rs) { \
1108 csrrw(Rd, CSR, Rs); \
1109 }
1110
1111 INSN(fscsr, CSR_FCSR);
1112 INSN(fsrm, CSR_FRM);
1113 INSN(fsflags, CSR_FFLAGS);
1114
1115 #undef INSN
1116
1117 #define INSN(NAME) \
1118 void MacroAssembler::NAME(Register Rs) { \
1119 NAME(x0, Rs); \
1120 }
1121
1122 INSN(fscsr);
1123 INSN(fsrm);
1124 INSN(fsflags);
1125
1126 #undef INSN
1127
1128 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
1129 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
1130 csrrwi(Rd, CSR_FRM, imm);
1131 }
1132
1133 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
1134 csrrwi(Rd, CSR_FFLAGS, imm);
1135 }
1136
1137 #define INSN(NAME) \
1138 void MacroAssembler::NAME(unsigned imm) { \
1139 NAME(x0, imm); \
1140 }
1141
1142 INSN(fsrmi);
1143 INSN(fsflagsi);
1144
1145 #undef INSN
1146
1147 void MacroAssembler::push_reg(Register Rs)
1148 {
1149 addi(esp, esp, 0 - wordSize);
1150 sd(Rs, Address(esp, 0));
1151 }
1152
1153 void MacroAssembler::pop_reg(Register Rd)
1154 {
1155 ld(Rd, Address(esp, 0));
1156 addi(esp, esp, wordSize);
1157 }
1158
1159 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
1160 int count = 0;
1161 // Scan bitset to accumulate register pairs
1162 for (int reg = 31; reg >= 0; reg--) {
1163 if ((1U << 31) & bitset) {
1164 regs[count++] = reg;
1165 }
1166 bitset <<= 1;
1167 }
1168 return count;
1169 }
1170
1171 // Push integer registers in the bitset supplied. Don't push sp.
1172 // Return the number of words pushed
1173 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
1174 DEBUG_ONLY(int words_pushed = 0;)
1175 unsigned char regs[32];
1176 int count = bitset_to_regs(bitset, regs);
1177 // reserve one slot to align for odd count
1178 int offset = is_even(count) ? 0 : wordSize;
1179
1180 if (count) {
1181 addi(stack, stack, -count * wordSize - offset);
1182 }
1183 for (int i = count - 1; i >= 0; i--) {
1184 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
1185 DEBUG_ONLY(words_pushed++;)
1186 }
1187
1188 assert(words_pushed == count, "oops, pushed != count");
1189
1190 return count;
1191 }
1192
1193 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
1194 DEBUG_ONLY(int words_popped = 0;)
1195 unsigned char regs[32];
1196 int count = bitset_to_regs(bitset, regs);
1197 // reserve one slot to align for odd count
1198 int offset = is_even(count) ? 0 : wordSize;
1199
1200 for (int i = count - 1; i >= 0; i--) {
1201 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
1202 DEBUG_ONLY(words_popped++;)
1203 }
1204
1205 if (count) {
1206 addi(stack, stack, count * wordSize + offset);
1207 }
1208 assert(words_popped == count, "oops, popped != count");
1209
1210 return count;
1211 }
1212
1213 // Push floating-point registers in the bitset supplied.
1214 // Return the number of words pushed
1215 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
1216 DEBUG_ONLY(int words_pushed = 0;)
1217 unsigned char regs[32];
1218 int count = bitset_to_regs(bitset, regs);
1219 int push_slots = count + (count & 1);
1220
1221 if (count) {
1222 addi(stack, stack, -push_slots * wordSize);
1223 }
1224
1225 for (int i = count - 1; i >= 0; i--) {
1226 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1227 DEBUG_ONLY(words_pushed++;)
1228 }
1229
1230 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1231
1232 return count;
1233 }
1234
1235 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1236 DEBUG_ONLY(int words_popped = 0;)
1237 unsigned char regs[32];
1238 int count = bitset_to_regs(bitset, regs);
1239 int pop_slots = count + (count & 1);
1240
1241 for (int i = count - 1; i >= 0; i--) {
1242 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1243 DEBUG_ONLY(words_popped++;)
1244 }
1245
1246 if (count) {
1247 addi(stack, stack, pop_slots * wordSize);
1248 }
1249
1250 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1251
1252 return count;
1253 }
1254
1255 #ifdef COMPILER2
1256 // Push vector registers in the bitset supplied.
1257 // Return the number of words pushed
1258 int MacroAssembler::push_v(unsigned int bitset, Register stack) {
1259 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1260
1261 // Scan bitset to accumulate register pairs
1262 unsigned char regs[32];
1263 int count = bitset_to_regs(bitset, regs);
1264
1265 for (int i = 0; i < count; i++) {
1266 sub(stack, stack, vector_size_in_bytes);
1267 vs1r_v(as_VectorRegister(regs[i]), stack);
1268 }
1269
1270 return count * vector_size_in_bytes / wordSize;
1271 }
1272
1273 int MacroAssembler::pop_v(unsigned int bitset, Register stack) {
1274 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1275
1276 // Scan bitset to accumulate register pairs
1277 unsigned char regs[32];
1278 int count = bitset_to_regs(bitset, regs);
1279
1280 for (int i = count - 1; i >= 0; i--) {
1281 vl1r_v(as_VectorRegister(regs[i]), stack);
1282 add(stack, stack, vector_size_in_bytes);
1283 }
1284
1285 return count * vector_size_in_bytes / wordSize;
1286 }
1287 #endif // COMPILER2
1288
1289 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1290 // Push integer registers x7, x10-x17, x28-x31.
1291 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1292
1293 // Push float registers f0-f7, f10-f17, f28-f31.
1294 addi(sp, sp, - wordSize * 20);
1295 int offset = 0;
1296 for (int i = 0; i < 32; i++) {
1297 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1298 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1299 }
1300 }
1301 }
1302
1303 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1304 int offset = 0;
1305 for (int i = 0; i < 32; i++) {
1306 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1307 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1308 }
1309 }
1310 addi(sp, sp, wordSize * 20);
1311
1312 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1313 }
1314
1315 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1316 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1317 push_reg(RegSet::range(x5, x31), sp);
1318
1319 // float registers
1320 addi(sp, sp, - 32 * wordSize);
1321 for (int i = 0; i < 32; i++) {
1322 fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1323 }
1324
1325 // vector registers
1326 if (save_vectors) {
1327 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers);
1328 vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1329 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1330 add(t0, sp, vector_size_in_bytes * i);
1331 vse64_v(as_VectorRegister(i), t0);
1332 }
1333 }
1334 }
1335
1336 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1337 // vector registers
1338 if (restore_vectors) {
1339 vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1340 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1341 vle64_v(as_VectorRegister(i), sp);
1342 add(sp, sp, vector_size_in_bytes * 8);
1343 }
1344 }
1345
1346 // float registers
1347 for (int i = 0; i < 32; i++) {
1348 fld(as_FloatRegister(i), Address(sp, i * wordSize));
1349 }
1350 addi(sp, sp, 32 * wordSize);
1351
1352 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1353 pop_reg(RegSet::range(x5, x31), sp);
1354 }
1355
1356 static int patch_offset_in_jal(address branch, int64_t offset) {
1357 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0),
1358 "offset is too large to be patched in one jal instruction!\n");
1359 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
1360 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
1361 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
1362 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12]
1363 return NativeInstruction::instruction_size; // only one instruction
1364 }
1365
1366 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1367 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0),
1368 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n");
1369 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31]
1370 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25]
1371 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7]
1372 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8]
1373 return NativeInstruction::instruction_size; // only one instruction
1374 }
1375
1376 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1377 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load
1378 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12]
1379 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20]
1380 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1381 }
1382
1383 static int patch_addr_in_movptr(address branch, address target) {
1384 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load
1385 int32_t lower = ((intptr_t)target << 35) >> 35;
1386 int64_t upper = ((intptr_t)target - lower) >> 29;
1387 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12]
1388 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20]
1389 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20]
1390 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20]
1391 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1392 }
1393
1394 static int patch_imm_in_li64(address branch, address target) {
1395 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi
1396 int64_t lower = (intptr_t)target & 0xffffffff;
1397 lower = lower - ((lower << 44) >> 44);
1398 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1399 int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
1400 int64_t tmp_upper = upper, tmp_lower = upper;
1401 tmp_lower = (tmp_lower << 52) >> 52;
1402 tmp_upper -= tmp_lower;
1403 tmp_upper >>= 12;
1404 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1),
1405 // upper = target[63:32] + 1.
1406 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui.
1407 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi.
1408 // Load the rest 32 bits.
1409 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi.
1410 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi.
1411 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi.
1412 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1413 }
1414
1415 static int patch_imm_in_li16u(address branch, uint16_t target) {
1416 Assembler::patch(branch, 31, 12, target); // patch lui only
1417 return NativeInstruction::instruction_size;
1418 }
1419
1420 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) {
1421 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw
1422 int64_t upper = (intptr_t)target;
1423 int32_t lower = (((int32_t)target) << 20) >> 20;
1424 upper -= lower;
1425 upper = (int32_t)upper;
1426 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui.
1427 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw.
1428 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1429 }
1430
1431 static long get_offset_of_jal(address insn_addr) {
1432 assert_cond(insn_addr != nullptr);
1433 long offset = 0;
1434 unsigned insn = Assembler::ld_instr(insn_addr);
1435 long val = (long)Assembler::sextract(insn, 31, 12);
1436 offset |= ((val >> 19) & 0x1) << 20;
1437 offset |= (val & 0xff) << 12;
1438 offset |= ((val >> 8) & 0x1) << 11;
1439 offset |= ((val >> 9) & 0x3ff) << 1;
1440 offset = (offset << 43) >> 43;
1441 return offset;
1442 }
1443
1444 static long get_offset_of_conditional_branch(address insn_addr) {
1445 long offset = 0;
1446 assert_cond(insn_addr != nullptr);
1447 unsigned insn = Assembler::ld_instr(insn_addr);
1448 offset = (long)Assembler::sextract(insn, 31, 31);
1449 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1450 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1451 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1452 offset = (offset << 41) >> 41;
1453 return offset;
1454 }
1455
1456 static long get_offset_of_pc_relative(address insn_addr) {
1457 long offset = 0;
1458 assert_cond(insn_addr != nullptr);
1459 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc.
1460 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load.
1461 offset = (offset << 32) >> 32;
1462 return offset;
1463 }
1464
1465 static address get_target_of_movptr(address insn_addr) {
1466 assert_cond(insn_addr != nullptr);
1467 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui.
1468 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi.
1469 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi.
1470 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load.
1471 return (address) target_address;
1472 }
1473
1474 static address get_target_of_li64(address insn_addr) {
1475 assert_cond(insn_addr != nullptr);
1476 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui.
1477 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi.
1478 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi.
1479 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi.
1480 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi.
1481 return (address)target_address;
1482 }
1483
1484 address MacroAssembler::get_target_of_li32(address insn_addr) {
1485 assert_cond(insn_addr != nullptr);
1486 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui.
1487 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw.
1488 return (address)target_address;
1489 }
1490
1491 // Patch any kind of instruction; there may be several instructions.
1492 // Return the total length (in bytes) of the instructions.
1493 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1494 assert_cond(branch != nullptr);
1495 int64_t offset = target - branch;
1496 if (NativeInstruction::is_jal_at(branch)) { // jal
1497 return patch_offset_in_jal(branch, offset);
1498 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne
1499 return patch_offset_in_conditional_branch(branch, offset);
1500 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load
1501 return patch_offset_in_pc_relative(branch, offset);
1502 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr
1503 return patch_addr_in_movptr(branch, target);
1504 } else if (NativeInstruction::is_li64_at(branch)) { // li64
1505 return patch_imm_in_li64(branch, target);
1506 } else if (NativeInstruction::is_li32_at(branch)) { // li32
1507 int64_t imm = (intptr_t)target;
1508 return patch_imm_in_li32(branch, (int32_t)imm);
1509 } else if (NativeInstruction::is_li16u_at(branch)) {
1510 int64_t imm = (intptr_t)target;
1511 return patch_imm_in_li16u(branch, (uint16_t)imm);
1512 } else {
1513 #ifdef ASSERT
1514 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
1515 Assembler::ld_instr(branch), p2i(branch));
1516 Disassembler::decode(branch - 16, branch + 16);
1517 #endif
1518 ShouldNotReachHere();
1519 return -1;
1520 }
1521 }
1522
1523 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1524 long offset = 0;
1525 assert_cond(insn_addr != nullptr);
1526 if (NativeInstruction::is_jal_at(insn_addr)) { // jal
1527 offset = get_offset_of_jal(insn_addr);
1528 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne
1529 offset = get_offset_of_conditional_branch(insn_addr);
1530 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load
1531 offset = get_offset_of_pc_relative(insn_addr);
1532 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr
1533 return get_target_of_movptr(insn_addr);
1534 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64
1535 return get_target_of_li64(insn_addr);
1536 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32
1537 return get_target_of_li32(insn_addr);
1538 } else {
1539 ShouldNotReachHere();
1540 }
1541 return address(((uintptr_t)insn_addr + offset));
1542 }
1543
1544 int MacroAssembler::patch_oop(address insn_addr, address o) {
1545 // OOPs are either narrow (32 bits) or wide (48 bits). We encode
1546 // narrow OOPs by setting the upper 16 bits in the first
1547 // instruction.
1548 if (NativeInstruction::is_li32_at(insn_addr)) {
1549 // Move narrow OOP
1550 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1551 return patch_imm_in_li32(insn_addr, (int32_t)n);
1552 } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1553 // Move wide OOP
1554 return patch_addr_in_movptr(insn_addr, o);
1555 }
1556 ShouldNotReachHere();
1557 return -1;
1558 }
1559
1560 void MacroAssembler::reinit_heapbase() {
1561 if (UseCompressedOops) {
1562 if (Universe::is_fully_initialized()) {
1563 mv(xheapbase, CompressedOops::ptrs_base());
1564 } else {
1565 ExternalAddress target(CompressedOops::ptrs_base_addr());
1566 relocate(target.rspec(), [&] {
1567 int32_t offset;
1568 la_patchable(xheapbase, target, offset);
1569 ld(xheapbase, Address(xheapbase, offset));
1570 });
1571 }
1572 }
1573 }
1574
1575 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) {
1576 int64_t imm64 = (int64_t)addr;
1577 #ifndef PRODUCT
1578 {
1579 char buffer[64];
1580 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64);
1581 block_comment(buffer);
1582 }
1583 #endif
1584 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant");
1585 // Load upper 31 bits
1586 int64_t imm = imm64 >> 17;
1587 int64_t upper = imm, lower = imm;
1588 lower = (lower << 52) >> 52;
1589 upper -= lower;
1590 upper = (int32_t)upper;
1591 lui(Rd, upper);
1592 addi(Rd, Rd, lower);
1593
1594 // Load the rest 17 bits.
1595 slli(Rd, Rd, 11);
1596 addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
1597 slli(Rd, Rd, 6);
1598
1599 // This offset will be used by following jalr/ld.
1600 offset = imm64 & 0x3f;
1601 }
1602
1603 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) {
1604 if (is_simm12(increment)) {
1605 addi(Rd, Rn, increment);
1606 } else {
1607 assert_different_registers(Rn, temp);
1608 li(temp, increment);
1609 add(Rd, Rn, temp);
1610 }
1611 }
1612
1613 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
1614 if (is_simm12(increment)) {
1615 addiw(Rd, Rn, increment);
1616 } else {
1617 assert_different_registers(Rn, temp);
1618 li(temp, increment);
1619 addw(Rd, Rn, temp);
1620 }
1621 }
1622
1623 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
1624 if (is_simm12(-decrement)) {
1625 addi(Rd, Rn, -decrement);
1626 } else {
1627 assert_different_registers(Rn, temp);
1628 li(temp, decrement);
1629 sub(Rd, Rn, temp);
1630 }
1631 }
1632
1633 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
1634 if (is_simm12(-decrement)) {
1635 addiw(Rd, Rn, -decrement);
1636 } else {
1637 assert_different_registers(Rn, temp);
1638 li(temp, decrement);
1639 subw(Rd, Rn, temp);
1640 }
1641 }
1642
1643 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1644 andr(Rd, Rs1, Rs2);
1645 sign_extend(Rd, Rd, 32);
1646 }
1647
1648 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1649 orr(Rd, Rs1, Rs2);
1650 sign_extend(Rd, Rd, 32);
1651 }
1652
1653 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1654 xorr(Rd, Rs1, Rs2);
1655 sign_extend(Rd, Rd, 32);
1656 }
1657
1658 // Rd = Rs1 & (~Rd2)
1659 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) {
1660 if (UseZbb) {
1661 Assembler::andn(Rd, Rs1, Rs2);
1662 return;
1663 }
1664
1665 notr(Rd, Rs2);
1666 andr(Rd, Rs1, Rd);
1667 }
1668
1669 // Rd = Rs1 | (~Rd2)
1670 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) {
1671 if (UseZbb) {
1672 Assembler::orn(Rd, Rs1, Rs2);
1673 return;
1674 }
1675
1676 notr(Rd, Rs2);
1677 orr(Rd, Rs1, Rd);
1678 }
1679
1680 // Note: load_unsigned_short used to be called load_unsigned_word.
1681 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1682 int off = offset();
1683 lhu(dst, src);
1684 return off;
1685 }
1686
1687 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1688 int off = offset();
1689 lbu(dst, src);
1690 return off;
1691 }
1692
1693 int MacroAssembler::load_signed_short(Register dst, Address src) {
1694 int off = offset();
1695 lh(dst, src);
1696 return off;
1697 }
1698
1699 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1700 int off = offset();
1701 lb(dst, src);
1702 return off;
1703 }
1704
1705 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
1706 switch (size_in_bytes) {
1707 case 8: ld(dst, src); break;
1708 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break;
1709 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1710 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1711 default: ShouldNotReachHere();
1712 }
1713 }
1714
1715 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) {
1716 switch (size_in_bytes) {
1717 case 8: sd(src, dst); break;
1718 case 4: sw(src, dst); break;
1719 case 2: sh(src, dst); break;
1720 case 1: sb(src, dst); break;
1721 default: ShouldNotReachHere();
1722 }
1723 }
1724
1725 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
1726 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
1727 if (granularity != 1 && granularity != 2) {
1728 ShouldNotReachHere();
1729 }
1730 if (AvoidUnalignedAccesses && (granularity != 2)) {
1731 assert_different_registers(dst, tmp);
1732 assert_different_registers(tmp, src.base());
1733 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
1734 slli(tmp, tmp, 8);
1735 lbu(dst, src);
1736 add(dst, dst, tmp);
1737 } else {
1738 is_signed ? lh(dst, src) : lhu(dst, src);
1739 }
1740 }
1741
1742 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
1743 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
1744 if (AvoidUnalignedAccesses && (granularity != 4)) {
1745 switch(granularity) {
1746 case 1:
1747 assert_different_registers(dst, tmp, src.base());
1748 lbu(dst, src);
1749 lbu(tmp, Address(src.base(), src.offset() + 1));
1750 slli(tmp, tmp, 8);
1751 add(dst, dst, tmp);
1752 lbu(tmp, Address(src.base(), src.offset() + 2));
1753 slli(tmp, tmp, 16);
1754 add(dst, dst, tmp);
1755 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3));
1756 slli(tmp, tmp, 24);
1757 add(dst, dst, tmp);
1758 break;
1759 case 2:
1760 assert_different_registers(dst, tmp);
1761 assert_different_registers(tmp, src.base());
1762 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
1763 slli(tmp, tmp, 16);
1764 lhu(dst, src);
1765 add(dst, dst, tmp);
1766 break;
1767 default:
1768 ShouldNotReachHere();
1769 }
1770 } else {
1771 is_signed ? lw(dst, src) : lwu(dst, src);
1772 }
1773 }
1774
1775 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
1776 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
1777 if (AvoidUnalignedAccesses && (granularity != 8)) {
1778 switch(granularity){
1779 case 1:
1780 assert_different_registers(dst, tmp, src.base());
1781 lbu(dst, src);
1782 lbu(tmp, Address(src.base(), src.offset() + 1));
1783 slli(tmp, tmp, 8);
1784 add(dst, dst, tmp);
1785 lbu(tmp, Address(src.base(), src.offset() + 2));
1786 slli(tmp, tmp, 16);
1787 add(dst, dst, tmp);
1788 lbu(tmp, Address(src.base(), src.offset() + 3));
1789 slli(tmp, tmp, 24);
1790 add(dst, dst, tmp);
1791 lbu(tmp, Address(src.base(), src.offset() + 4));
1792 slli(tmp, tmp, 32);
1793 add(dst, dst, tmp);
1794 lbu(tmp, Address(src.base(), src.offset() + 5));
1795 slli(tmp, tmp, 40);
1796 add(dst, dst, tmp);
1797 lbu(tmp, Address(src.base(), src.offset() + 6));
1798 slli(tmp, tmp, 48);
1799 add(dst, dst, tmp);
1800 lbu(tmp, Address(src.base(), src.offset() + 7));
1801 slli(tmp, tmp, 56);
1802 add(dst, dst, tmp);
1803 break;
1804 case 2:
1805 assert_different_registers(dst, tmp, src.base());
1806 lhu(dst, src);
1807 lhu(tmp, Address(src.base(), src.offset() + 2));
1808 slli(tmp, tmp, 16);
1809 add(dst, dst, tmp);
1810 lhu(tmp, Address(src.base(), src.offset() + 4));
1811 slli(tmp, tmp, 32);
1812 add(dst, dst, tmp);
1813 lhu(tmp, Address(src.base(), src.offset() + 6));
1814 slli(tmp, tmp, 48);
1815 add(dst, dst, tmp);
1816 break;
1817 case 4:
1818 assert_different_registers(dst, tmp);
1819 assert_different_registers(tmp, src.base());
1820 lwu(tmp, Address(src.base(), src.offset() + 4));
1821 slli(tmp, tmp, 32);
1822 lwu(dst, src);
1823 add(dst, dst, tmp);
1824 break;
1825 default:
1826 ShouldNotReachHere();
1827 }
1828 } else {
1829 ld(dst, src);
1830 }
1831 }
1832
1833
1834 // reverse bytes in halfword in lower 16 bits and sign-extend
1835 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1836 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
1837 if (UseZbb) {
1838 rev8(Rd, Rs);
1839 srai(Rd, Rd, 48);
1840 return;
1841 }
1842 assert_different_registers(Rs, tmp);
1843 assert_different_registers(Rd, tmp);
1844 srli(tmp, Rs, 8);
1845 andi(tmp, tmp, 0xFF);
1846 slli(Rd, Rs, 56);
1847 srai(Rd, Rd, 48); // sign-extend
1848 orr(Rd, Rd, tmp);
1849 }
1850
1851 // reverse bytes in lower word and sign-extend
1852 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1853 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1854 if (UseZbb) {
1855 rev8(Rd, Rs);
1856 srai(Rd, Rd, 32);
1857 return;
1858 }
1859 assert_different_registers(Rs, tmp1, tmp2);
1860 assert_different_registers(Rd, tmp1, tmp2);
1861 revb_h_w_u(Rd, Rs, tmp1, tmp2);
1862 slli(tmp2, Rd, 48);
1863 srai(tmp2, tmp2, 32); // sign-extend
1864 srli(Rd, Rd, 16);
1865 orr(Rd, Rd, tmp2);
1866 }
1867
1868 // reverse bytes in halfword in lower 16 bits and zero-extend
1869 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1870 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
1871 if (UseZbb) {
1872 rev8(Rd, Rs);
1873 srli(Rd, Rd, 48);
1874 return;
1875 }
1876 assert_different_registers(Rs, tmp);
1877 assert_different_registers(Rd, tmp);
1878 srli(tmp, Rs, 8);
1879 andi(tmp, tmp, 0xFF);
1880 andi(Rd, Rs, 0xFF);
1881 slli(Rd, Rd, 8);
1882 orr(Rd, Rd, tmp);
1883 }
1884
1885 // reverse bytes in halfwords in lower 32 bits and zero-extend
1886 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1887 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1888 if (UseZbb) {
1889 rev8(Rd, Rs);
1890 rori(Rd, Rd, 32);
1891 roriw(Rd, Rd, 16);
1892 zero_extend(Rd, Rd, 32);
1893 return;
1894 }
1895 assert_different_registers(Rs, tmp1, tmp2);
1896 assert_different_registers(Rd, tmp1, tmp2);
1897 srli(tmp2, Rs, 16);
1898 revb_h_h_u(tmp2, tmp2, tmp1);
1899 revb_h_h_u(Rd, Rs, tmp1);
1900 slli(tmp2, tmp2, 16);
1901 orr(Rd, Rd, tmp2);
1902 }
1903
1904 // This method is only used for revb_h
1905 // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1906 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1907 assert_different_registers(Rs, tmp1, tmp2);
1908 assert_different_registers(Rd, tmp1);
1909 srli(tmp1, Rs, 48);
1910 andi(tmp2, tmp1, 0xFF);
1911 slli(tmp2, tmp2, 8);
1912 srli(tmp1, tmp1, 8);
1913 orr(tmp1, tmp1, tmp2);
1914 slli(Rd, Rs, 16);
1915 orr(Rd, Rd, tmp1);
1916 }
1917
1918 // reverse bytes in each halfword
1919 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1920 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1921 if (UseZbb) {
1922 assert_different_registers(Rs, tmp1);
1923 assert_different_registers(Rd, tmp1);
1924 rev8(Rd, Rs);
1925 zero_extend(tmp1, Rd, 32);
1926 roriw(tmp1, tmp1, 16);
1927 slli(tmp1, tmp1, 32);
1928 srli(Rd, Rd, 32);
1929 roriw(Rd, Rd, 16);
1930 zero_extend(Rd, Rd, 32);
1931 orr(Rd, Rd, tmp1);
1932 return;
1933 }
1934 assert_different_registers(Rs, tmp1, tmp2);
1935 assert_different_registers(Rd, tmp1, tmp2);
1936 revb_h_helper(Rd, Rs, tmp1, tmp2);
1937 for (int i = 0; i < 3; ++i) {
1938 revb_h_helper(Rd, Rd, tmp1, tmp2);
1939 }
1940 }
1941
1942 // reverse bytes in each word
1943 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1944 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1945 if (UseZbb) {
1946 rev8(Rd, Rs);
1947 rori(Rd, Rd, 32);
1948 return;
1949 }
1950 assert_different_registers(Rs, tmp1, tmp2);
1951 assert_different_registers(Rd, tmp1, tmp2);
1952 revb(Rd, Rs, tmp1, tmp2);
1953 ror_imm(Rd, Rd, 32);
1954 }
1955
1956 // reverse bytes in doubleword
1957 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1958 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1959 if (UseZbb) {
1960 rev8(Rd, Rs);
1961 return;
1962 }
1963 assert_different_registers(Rs, tmp1, tmp2);
1964 assert_different_registers(Rd, tmp1, tmp2);
1965 andi(tmp1, Rs, 0xFF);
1966 slli(tmp1, tmp1, 8);
1967 for (int step = 8; step < 56; step += 8) {
1968 srli(tmp2, Rs, step);
1969 andi(tmp2, tmp2, 0xFF);
1970 orr(tmp1, tmp1, tmp2);
1971 slli(tmp1, tmp1, 8);
1972 }
1973 srli(Rd, Rs, 56);
1974 andi(Rd, Rd, 0xFF);
1975 orr(Rd, tmp1, Rd);
1976 }
1977
1978 // rotate right with shift bits
1979 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
1980 {
1981 if (UseZbb) {
1982 rori(dst, src, shift);
1983 return;
1984 }
1985
1986 assert_different_registers(dst, tmp);
1987 assert_different_registers(src, tmp);
1988 assert(shift < 64, "shift amount must be < 64");
1989 slli(tmp, src, 64 - shift);
1990 srli(dst, src, shift);
1991 orr(dst, dst, tmp);
1992 }
1993
1994 // rotate left with shift bits, 32-bit version
1995 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) {
1996 if (UseZbb) {
1997 // no roliw available
1998 roriw(dst, src, 32 - shift);
1999 return;
2000 }
2001
2002 assert_different_registers(dst, tmp);
2003 assert_different_registers(src, tmp);
2004 assert(shift < 32, "shift amount must be < 32");
2005 srliw(tmp, src, 32 - shift);
2006 slliw(dst, src, shift);
2007 orr(dst, dst, tmp);
2008 }
2009
2010 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
2011 if (is_simm12(imm)) {
2012 and_imm12(Rd, Rn, imm);
2013 } else {
2014 assert_different_registers(Rn, tmp);
2015 mv(tmp, imm);
2016 andr(Rd, Rn, tmp);
2017 }
2018 }
2019
2020 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
2021 ld(tmp1, adr);
2022 if (src.is_register()) {
2023 orr(tmp1, tmp1, src.as_register());
2024 } else {
2025 if (is_simm12(src.as_constant())) {
2026 ori(tmp1, tmp1, src.as_constant());
2027 } else {
2028 assert_different_registers(tmp1, tmp2);
2029 mv(tmp2, src.as_constant());
2030 orr(tmp1, tmp1, tmp2);
2031 }
2032 }
2033 sd(tmp1, adr);
2034 }
2035
2036 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) {
2037 assert_different_registers(oop, trial_klass, tmp1, tmp2);
2038 if (UseCompressedClassPointers) {
2039 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes()));
2040 if (CompressedKlassPointers::base() == nullptr) {
2041 slli(tmp1, tmp1, CompressedKlassPointers::shift());
2042 beq(trial_klass, tmp1, L);
2043 return;
2044 }
2045 decode_klass_not_null(tmp1, tmp2);
2046 } else {
2047 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes()));
2048 }
2049 beq(trial_klass, tmp1, L);
2050 }
2051
2052 // Move an oop into a register.
2053 void MacroAssembler::movoop(Register dst, jobject obj) {
2054 int oop_index;
2055 if (obj == nullptr) {
2056 oop_index = oop_recorder()->allocate_oop_index(obj);
2057 } else {
2058 #ifdef ASSERT
2059 {
2060 ThreadInVMfromUnknown tiv;
2061 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2062 }
2063 #endif
2064 oop_index = oop_recorder()->find_index(obj);
2065 }
2066 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2067
2068 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) {
2069 mv(dst, Address((address)obj, rspec));
2070 } else {
2071 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
2072 ld_constant(dst, Address(dummy, rspec));
2073 }
2074 }
2075
2076 // Move a metadata address into a register.
2077 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
2078 int oop_index;
2079 if (obj == nullptr) {
2080 oop_index = oop_recorder()->allocate_metadata_index(obj);
2081 } else {
2082 oop_index = oop_recorder()->find_index(obj);
2083 }
2084 RelocationHolder rspec = metadata_Relocation::spec(oop_index);
2085 mv(dst, Address((address)obj, rspec));
2086 }
2087
2088 // Writes to stack successive pages until offset reached to check for
2089 // stack overflow + shadow pages. This clobbers tmp.
2090 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
2091 assert_different_registers(tmp, size, t0);
2092 // Bang stack for total size given plus shadow page size.
2093 // Bang one page at a time because large size can bang beyond yellow and
2094 // red zones.
2095 mv(t0, (int)os::vm_page_size());
2096 Label loop;
2097 bind(loop);
2098 sub(tmp, sp, t0);
2099 subw(size, size, t0);
2100 sd(size, Address(tmp));
2101 bgtz(size, loop);
2102
2103 // Bang down shadow pages too.
2104 // At this point, (tmp-0) is the last address touched, so don't
2105 // touch it again. (It was touched as (tmp-pagesize) but then tmp
2106 // was post-decremented.) Skip this address by starting at i=1, and
2107 // touch a few more pages below. N.B. It is important to touch all
2108 // the way down to and including i=StackShadowPages.
2109 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) {
2110 // this could be any sized move but this is can be a debugging crumb
2111 // so the bigger the better.
2112 sub(tmp, tmp, (int)os::vm_page_size());
2113 sd(size, Address(tmp, 0));
2114 }
2115 }
2116
2117 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
2118 int32_t offset = 0;
2119 _masm = masm;
2120 ExternalAddress target((address)flag_addr);
2121 _masm->relocate(target.rspec(), [&] {
2122 int32_t offset;
2123 _masm->la_patchable(t0, target, offset);
2124 _masm->lbu(t0, Address(t0, offset));
2125 });
2126 if (value) {
2127 _masm->bnez(t0, _label);
2128 } else {
2129 _masm->beqz(t0, _label);
2130 }
2131 }
2132
2133 SkipIfEqual::~SkipIfEqual() {
2134 _masm->bind(_label);
2135 _masm = nullptr;
2136 }
2137
2138 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
2139 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2140 ld(dst, Address(xmethod, Method::const_offset()));
2141 ld(dst, Address(dst, ConstMethod::constants_offset()));
2142 ld(dst, Address(dst, ConstantPool::pool_holder_offset()));
2143 ld(dst, Address(dst, mirror_offset));
2144 resolve_oop_handle(dst, tmp1, tmp2);
2145 }
2146
2147 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) {
2148 // OopHandle::resolve is an indirection.
2149 assert_different_registers(result, tmp1, tmp2);
2150 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2);
2151 }
2152
2153 // ((WeakHandle)result).resolve()
2154 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) {
2155 assert_different_registers(result, tmp1, tmp2);
2156 Label resolved;
2157
2158 // A null weak handle resolves to null.
2159 beqz(result, resolved);
2160
2161 // Only 64 bit platforms support GCs that require a tmp register
2162 // Only IN_HEAP loads require a thread_tmp register
2163 // WeakHandle::resolve is an indirection like jweak.
2164 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
2165 result, Address(result), tmp1, tmp2);
2166 bind(resolved);
2167 }
2168
2169 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2170 Register dst, Address src,
2171 Register tmp1, Register tmp2) {
2172 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2173 decorators = AccessInternal::decorator_fixup(decorators, type);
2174 bool as_raw = (decorators & AS_RAW) != 0;
2175 if (as_raw) {
2176 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
2177 } else {
2178 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
2179 }
2180 }
2181
2182 void MacroAssembler::null_check(Register reg, int offset) {
2183 if (needs_explicit_null_check(offset)) {
2184 // provoke OS null exception if reg is null by
2185 // accessing M[reg] w/o changing any registers
2186 // NOTE: this is plenty to provoke a segv
2187 ld(zr, Address(reg, 0));
2188 } else {
2189 // nothing to do, (later) access of M[reg + offset]
2190 // will provoke OS null exception if reg is null
2191 }
2192 }
2193
2194 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2195 Address dst, Register val,
2196 Register tmp1, Register tmp2, Register tmp3) {
2197 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2198 decorators = AccessInternal::decorator_fixup(decorators, type);
2199 bool as_raw = (decorators & AS_RAW) != 0;
2200 if (as_raw) {
2201 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
2202 } else {
2203 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
2204 }
2205 }
2206
2207 // Algorithm must match CompressedOops::encode.
2208 void MacroAssembler::encode_heap_oop(Register d, Register s) {
2209 verify_oop_msg(s, "broken oop in encode_heap_oop");
2210 if (CompressedOops::base() == nullptr) {
2211 if (CompressedOops::shift() != 0) {
2212 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2213 srli(d, s, LogMinObjAlignmentInBytes);
2214 } else {
2215 mv(d, s);
2216 }
2217 } else {
2218 Label notNull;
2219 sub(d, s, xheapbase);
2220 bgez(d, notNull);
2221 mv(d, zr);
2222 bind(notNull);
2223 if (CompressedOops::shift() != 0) {
2224 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2225 srli(d, d, CompressedOops::shift());
2226 }
2227 }
2228 }
2229
2230 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
2231 assert_different_registers(dst, tmp);
2232 assert_different_registers(src, tmp);
2233 if (UseCompressedClassPointers) {
2234 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
2235 decode_klass_not_null(dst, tmp);
2236 } else {
2237 ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
2238 }
2239 }
2240
2241 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
2242 // FIXME: Should this be a store release? concurrent gcs assumes
2243 // klass length is valid if klass field is not null.
2244 if (UseCompressedClassPointers) {
2245 encode_klass_not_null(src, tmp);
2246 sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
2247 } else {
2248 sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
2249 }
2250 }
2251
2252 void MacroAssembler::store_klass_gap(Register dst, Register src) {
2253 if (UseCompressedClassPointers) {
2254 // Store to klass gap in destination
2255 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2256 }
2257 }
2258
2259 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
2260 assert_different_registers(r, tmp);
2261 decode_klass_not_null(r, r, tmp);
2262 }
2263
2264 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
2265 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2266
2267 if (CompressedKlassPointers::base() == nullptr) {
2268 if (CompressedKlassPointers::shift() != 0) {
2269 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2270 slli(dst, src, LogKlassAlignmentInBytes);
2271 } else {
2272 mv(dst, src);
2273 }
2274 return;
2275 }
2276
2277 Register xbase = dst;
2278 if (dst == src) {
2279 xbase = tmp;
2280 }
2281
2282 assert_different_registers(src, xbase);
2283 mv(xbase, (uintptr_t)CompressedKlassPointers::base());
2284
2285 if (CompressedKlassPointers::shift() != 0) {
2286 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2287 assert_different_registers(t0, xbase);
2288 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
2289 } else {
2290 add(dst, xbase, src);
2291 }
2292 }
2293
2294 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
2295 assert_different_registers(r, tmp);
2296 encode_klass_not_null(r, r, tmp);
2297 }
2298
2299 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
2300 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2301
2302 if (CompressedKlassPointers::base() == nullptr) {
2303 if (CompressedKlassPointers::shift() != 0) {
2304 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2305 srli(dst, src, LogKlassAlignmentInBytes);
2306 } else {
2307 mv(dst, src);
2308 }
2309 return;
2310 }
2311
2312 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
2313 CompressedKlassPointers::shift() == 0) {
2314 zero_extend(dst, src, 32);
2315 return;
2316 }
2317
2318 Register xbase = dst;
2319 if (dst == src) {
2320 xbase = tmp;
2321 }
2322
2323 assert_different_registers(src, xbase);
2324 mv(xbase, (uintptr_t)CompressedKlassPointers::base());
2325 sub(dst, src, xbase);
2326 if (CompressedKlassPointers::shift() != 0) {
2327 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
2328 srli(dst, dst, LogKlassAlignmentInBytes);
2329 }
2330 }
2331
2332 void MacroAssembler::decode_heap_oop_not_null(Register r) {
2333 decode_heap_oop_not_null(r, r);
2334 }
2335
2336 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2337 assert(UseCompressedOops, "should only be used for compressed headers");
2338 assert(Universe::heap() != nullptr, "java heap should be initialized");
2339 // Cannot assert, unverified entry point counts instructions (see .ad file)
2340 // vtableStubs also counts instructions in pd_code_size_limit.
2341 // Also do not verify_oop as this is called by verify_oop.
2342 if (CompressedOops::shift() != 0) {
2343 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
2344 slli(dst, src, LogMinObjAlignmentInBytes);
2345 if (CompressedOops::base() != nullptr) {
2346 add(dst, xheapbase, dst);
2347 }
2348 } else {
2349 assert(CompressedOops::base() == nullptr, "sanity");
2350 mv(dst, src);
2351 }
2352 }
2353
2354 void MacroAssembler::decode_heap_oop(Register d, Register s) {
2355 if (CompressedOops::base() == nullptr) {
2356 if (CompressedOops::shift() != 0 || d != s) {
2357 slli(d, s, CompressedOops::shift());
2358 }
2359 } else {
2360 Label done;
2361 mv(d, s);
2362 beqz(s, done);
2363 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
2364 bind(done);
2365 }
2366 verify_oop_msg(d, "broken oop in decode_heap_oop");
2367 }
2368
2369 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1,
2370 Register tmp2, Register tmp3, DecoratorSet decorators) {
2371 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3);
2372 }
2373
2374 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
2375 Register tmp2, DecoratorSet decorators) {
2376 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
2377 }
2378
2379 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
2380 Register tmp2, DecoratorSet decorators) {
2381 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2);
2382 }
2383
2384 // Used for storing nulls.
2385 void MacroAssembler::store_heap_oop_null(Address dst) {
2386 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
2387 }
2388
2389 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
2390 bool want_remainder)
2391 {
2392 // Full implementation of Java idiv and irem. The function
2393 // returns the (pc) offset of the div instruction - may be needed
2394 // for implicit exceptions.
2395 //
2396 // input : rs1: dividend
2397 // rs2: divisor
2398 //
2399 // result: either
2400 // quotient (= rs1 idiv rs2)
2401 // remainder (= rs1 irem rs2)
2402
2403
2404 int idivl_offset = offset();
2405 if (!want_remainder) {
2406 divw(result, rs1, rs2);
2407 } else {
2408 remw(result, rs1, rs2); // result = rs1 % rs2;
2409 }
2410 return idivl_offset;
2411 }
2412
2413 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
2414 bool want_remainder)
2415 {
2416 // Full implementation of Java ldiv and lrem. The function
2417 // returns the (pc) offset of the div instruction - may be needed
2418 // for implicit exceptions.
2419 //
2420 // input : rs1: dividend
2421 // rs2: divisor
2422 //
2423 // result: either
2424 // quotient (= rs1 idiv rs2)
2425 // remainder (= rs1 irem rs2)
2426
2427 int idivq_offset = offset();
2428 if (!want_remainder) {
2429 div(result, rs1, rs2);
2430 } else {
2431 rem(result, rs1, rs2); // result = rs1 % rs2;
2432 }
2433 return idivq_offset;
2434 }
2435
2436 // Look up the method for a megamorpic invkkeinterface call.
2437 // The target method is determined by <intf_klass, itable_index>.
2438 // The receiver klass is in recv_klass.
2439 // On success, the result will be in method_result, and execution falls through.
2440 // On failure, execution transfers to the given label.
2441 void MacroAssembler::lookup_interface_method(Register recv_klass,
2442 Register intf_klass,
2443 RegisterOrConstant itable_index,
2444 Register method_result,
2445 Register scan_tmp,
2446 Label& L_no_such_interface,
2447 bool return_method) {
2448 assert_different_registers(recv_klass, intf_klass, scan_tmp);
2449 assert_different_registers(method_result, intf_klass, scan_tmp);
2450 assert(recv_klass != method_result || !return_method,
2451 "recv_klass can be destroyed when mehtid isn't needed");
2452 assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2453 "caller must be same register for non-constant itable index as for method");
2454
2455 // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2456 int vtable_base = in_bytes(Klass::vtable_start_offset());
2457 int itentry_off = in_bytes(itableMethodEntry::method_offset());
2458 int scan_step = itableOffsetEntry::size() * wordSize;
2459 int vte_size = vtableEntry::size_in_bytes();
2460 assert(vte_size == wordSize, "else adjust times_vte_scale");
2461
2462 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
2463
2464 // %%% Could store the aligned, prescaled offset in the klassoop.
2465 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
2466 add(scan_tmp, scan_tmp, vtable_base);
2467
2468 if (return_method) {
2469 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2470 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2471 if (itable_index.is_register()) {
2472 slli(t0, itable_index.as_register(), 3);
2473 } else {
2474 mv(t0, itable_index.as_constant() << 3);
2475 }
2476 add(recv_klass, recv_klass, t0);
2477 if (itentry_off) {
2478 add(recv_klass, recv_klass, itentry_off);
2479 }
2480 }
2481
2482 Label search, found_method;
2483
2484 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset()));
2485 beq(intf_klass, method_result, found_method);
2486 bind(search);
2487 // Check that the previous entry is non-null. A null entry means that
2488 // the receiver class doesn't implement the interface, and wasn't the
2489 // same as when the caller was compiled.
2490 beqz(method_result, L_no_such_interface, /* is_far */ true);
2491 addi(scan_tmp, scan_tmp, scan_step);
2492 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset()));
2493 bne(intf_klass, method_result, search);
2494
2495 bind(found_method);
2496
2497 // Got a hit.
2498 if (return_method) {
2499 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset()));
2500 add(method_result, recv_klass, scan_tmp);
2501 ld(method_result, Address(method_result));
2502 }
2503 }
2504
2505 // virtual method calling
2506 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2507 RegisterOrConstant vtable_index,
2508 Register method_result) {
2509 const ByteSize base = Klass::vtable_start_offset();
2510 assert(vtableEntry::size() * wordSize == 8,
2511 "adjust the scaling in the code below");
2512 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset());
2513
2514 if (vtable_index.is_register()) {
2515 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
2516 ld(method_result, Address(method_result, vtable_offset_in_bytes));
2517 } else {
2518 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2519 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2520 }
2521 }
2522
2523 void MacroAssembler::membar(uint32_t order_constraint) {
2524 address prev = pc() - NativeMembar::instruction_size;
2525 address last = code()->last_insn();
2526
2527 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) {
2528 NativeMembar *bar = NativeMembar_at(prev);
2529 // We are merging two memory barrier instructions. On RISCV we
2530 // can do this simply by ORing them together.
2531 bar->set_kind(bar->get_kind() | order_constraint);
2532 BLOCK_COMMENT("merged membar");
2533 } else {
2534 code()->set_last_insn(pc());
2535
2536 uint32_t predecessor = 0;
2537 uint32_t successor = 0;
2538
2539 membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2540 fence(predecessor, successor);
2541 }
2542 }
2543
2544 // Form an address from base + offset in Rd. Rd my or may not
2545 // actually be used: you must use the Address that is returned. It
2546 // is up to you to ensure that the shift provided matches the size
2547 // of your data.
2548 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) {
2549 if (is_simm12(byte_offset)) { // 12: imm in range 2^12
2550 return Address(base, byte_offset);
2551 }
2552
2553 assert_different_registers(Rd, base, noreg);
2554
2555 // Do it the hard way
2556 mv(Rd, byte_offset);
2557 add(Rd, base, Rd);
2558 return Address(Rd);
2559 }
2560
2561 void MacroAssembler::check_klass_subtype(Register sub_klass,
2562 Register super_klass,
2563 Register tmp_reg,
2564 Label& L_success) {
2565 Label L_failure;
2566 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr);
2567 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr);
2568 bind(L_failure);
2569 }
2570
2571 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2572 ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2573 if (acquire) {
2574 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2575 }
2576 if (at_return) {
2577 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true);
2578 } else {
2579 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit()));
2580 bnez(t0, slow_path, true /* is_far */);
2581 }
2582 }
2583
2584 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2585 Label &succeed, Label *fail) {
2586 assert_different_registers(addr, tmp);
2587 assert_different_registers(newv, tmp);
2588 assert_different_registers(oldv, tmp);
2589
2590 // oldv holds comparison value
2591 // newv holds value to write in exchange
2592 // addr identifies memory word to compare against/update
2593 Label retry_load, nope;
2594 bind(retry_load);
2595 // Load reserved from the memory location
2596 lr_d(tmp, addr, Assembler::aqrl);
2597 // Fail and exit if it is not what we expect
2598 bne(tmp, oldv, nope);
2599 // If the store conditional succeeds, tmp will be zero
2600 sc_d(tmp, newv, addr, Assembler::rl);
2601 beqz(tmp, succeed);
2602 // Retry only when the store conditional failed
2603 j(retry_load);
2604
2605 bind(nope);
2606 membar(AnyAny);
2607 mv(oldv, tmp);
2608 if (fail != nullptr) {
2609 j(*fail);
2610 }
2611 }
2612
2613 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
2614 Label &succeed, Label *fail) {
2615 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
2616 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
2617 }
2618
2619 void MacroAssembler::load_reserved(Register addr,
2620 enum operand_size size,
2621 Assembler::Aqrl acquire) {
2622 switch (size) {
2623 case int64:
2624 lr_d(t0, addr, acquire);
2625 break;
2626 case int32:
2627 lr_w(t0, addr, acquire);
2628 break;
2629 case uint32:
2630 lr_w(t0, addr, acquire);
2631 zero_extend(t0, t0, 32);
2632 break;
2633 default:
2634 ShouldNotReachHere();
2635 }
2636 }
2637
2638 void MacroAssembler::store_conditional(Register addr,
2639 Register new_val,
2640 enum operand_size size,
2641 Assembler::Aqrl release) {
2642 switch (size) {
2643 case int64:
2644 sc_d(t0, new_val, addr, release);
2645 break;
2646 case int32:
2647 case uint32:
2648 sc_w(t0, new_val, addr, release);
2649 break;
2650 default:
2651 ShouldNotReachHere();
2652 }
2653 }
2654
2655
2656 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2657 Register new_val,
2658 enum operand_size size,
2659 Register tmp1, Register tmp2, Register tmp3) {
2660 assert(size == int8 || size == int16, "unsupported operand size");
2661
2662 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2663
2664 andi(shift, addr, 3);
2665 slli(shift, shift, 3);
2666
2667 andi(aligned_addr, addr, ~3);
2668
2669 if (size == int8) {
2670 mv(mask, 0xff);
2671 } else {
2672 // size == int16 case
2673 mv(mask, -1);
2674 zero_extend(mask, mask, 16);
2675 }
2676 sll(mask, mask, shift);
2677
2678 xori(not_mask, mask, -1);
2679
2680 sll(expected, expected, shift);
2681 andr(expected, expected, mask);
2682
2683 sll(new_val, new_val, shift);
2684 andr(new_val, new_val, mask);
2685 }
2686
2687 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2688 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2689 // which are forced to work with 4-byte aligned address.
2690 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2691 Register new_val,
2692 enum operand_size size,
2693 Assembler::Aqrl acquire, Assembler::Aqrl release,
2694 Register result, bool result_as_bool,
2695 Register tmp1, Register tmp2, Register tmp3) {
2696 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2697 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2698 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2699
2700 Label retry, fail, done;
2701
2702 bind(retry);
2703 lr_w(old, aligned_addr, acquire);
2704 andr(tmp, old, mask);
2705 bne(tmp, expected, fail);
2706
2707 andr(tmp, old, not_mask);
2708 orr(tmp, tmp, new_val);
2709 sc_w(tmp, tmp, aligned_addr, release);
2710 bnez(tmp, retry);
2711
2712 if (result_as_bool) {
2713 mv(result, 1);
2714 j(done);
2715
2716 bind(fail);
2717 mv(result, zr);
2718
2719 bind(done);
2720 } else {
2721 andr(tmp, old, mask);
2722
2723 bind(fail);
2724 srl(result, tmp, shift);
2725
2726 if (size == int8) {
2727 sign_extend(result, result, 8);
2728 } else {
2729 // size == int16 case
2730 sign_extend(result, result, 16);
2731 }
2732 }
2733 }
2734
2735 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2736 // the weak CAS stuff. The major difference is that it just failed when store conditional
2737 // failed.
2738 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2739 Register new_val,
2740 enum operand_size size,
2741 Assembler::Aqrl acquire, Assembler::Aqrl release,
2742 Register result,
2743 Register tmp1, Register tmp2, Register tmp3) {
2744 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2745 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2746 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2747
2748 Label fail, done;
2749
2750 lr_w(old, aligned_addr, acquire);
2751 andr(tmp, old, mask);
2752 bne(tmp, expected, fail);
2753
2754 andr(tmp, old, not_mask);
2755 orr(tmp, tmp, new_val);
2756 sc_w(tmp, tmp, aligned_addr, release);
2757 bnez(tmp, fail);
2758
2759 // Success
2760 mv(result, 1);
2761 j(done);
2762
2763 // Fail
2764 bind(fail);
2765 mv(result, zr);
2766
2767 bind(done);
2768 }
2769
2770 void MacroAssembler::cmpxchg(Register addr, Register expected,
2771 Register new_val,
2772 enum operand_size size,
2773 Assembler::Aqrl acquire, Assembler::Aqrl release,
2774 Register result, bool result_as_bool) {
2775 assert(size != int8 && size != int16, "unsupported operand size");
2776 assert_different_registers(addr, t0);
2777 assert_different_registers(expected, t0);
2778 assert_different_registers(new_val, t0);
2779
2780 Label retry_load, done, ne_done;
2781 bind(retry_load);
2782 load_reserved(addr, size, acquire);
2783 bne(t0, expected, ne_done);
2784 store_conditional(addr, new_val, size, release);
2785 bnez(t0, retry_load);
2786
2787 // equal, succeed
2788 if (result_as_bool) {
2789 mv(result, 1);
2790 } else {
2791 mv(result, expected);
2792 }
2793 j(done);
2794
2795 // not equal, failed
2796 bind(ne_done);
2797 if (result_as_bool) {
2798 mv(result, zr);
2799 } else {
2800 mv(result, t0);
2801 }
2802
2803 bind(done);
2804 }
2805
2806 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2807 Register new_val,
2808 enum operand_size size,
2809 Assembler::Aqrl acquire, Assembler::Aqrl release,
2810 Register result) {
2811 assert_different_registers(addr, t0);
2812 assert_different_registers(expected, t0);
2813 assert_different_registers(new_val, t0);
2814
2815 Label fail, done;
2816 load_reserved(addr, size, acquire);
2817 bne(t0, expected, fail);
2818 store_conditional(addr, new_val, size, release);
2819 bnez(t0, fail);
2820
2821 // Success
2822 mv(result, 1);
2823 j(done);
2824
2825 // Fail
2826 bind(fail);
2827 mv(result, zr);
2828
2829 bind(done);
2830 }
2831
2832 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \
2833 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2834 prev = prev->is_valid() ? prev : zr; \
2835 if (incr.is_register()) { \
2836 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2837 } else { \
2838 mv(t0, incr.as_constant()); \
2839 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2840 } \
2841 return; \
2842 }
2843
2844 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2845 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2846 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2847 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2848
2849 #undef ATOMIC_OP
2850
2851 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \
2852 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
2853 prev = prev->is_valid() ? prev : zr; \
2854 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
2855 return; \
2856 }
2857
2858 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2859 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2860 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2861 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2862
2863 #undef ATOMIC_XCHG
2864
2865 #define ATOMIC_XCHGU(OP1, OP2) \
2866 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
2867 atomic_##OP2(prev, newv, addr); \
2868 zero_extend(prev, prev, 32); \
2869 return; \
2870 }
2871
2872 ATOMIC_XCHGU(xchgwu, xchgw)
2873 ATOMIC_XCHGU(xchgalwu, xchgalw)
2874
2875 #undef ATOMIC_XCHGU
2876
2877 void MacroAssembler::far_jump(Address entry, Register tmp) {
2878 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2879 assert(CodeCache::find_blob(entry.target()) != nullptr,
2880 "destination of far call not found in code cache");
2881 assert(entry.rspec().type() == relocInfo::external_word_type
2882 || entry.rspec().type() == relocInfo::runtime_call_type
2883 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2884 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
2885 if (far_branches()) {
2886 // We can use auipc + jalr here because we know that the total size of
2887 // the code cache cannot exceed 2Gb.
2888 relocate(entry.rspec(), [&] {
2889 int32_t offset;
2890 la_patchable(tmp, entry, offset);
2891 jalr(x0, tmp, offset);
2892 });
2893 } else {
2894 j(entry);
2895 }
2896 }
2897
2898 void MacroAssembler::far_call(Address entry, Register tmp) {
2899 assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2900 assert(CodeCache::find_blob(entry.target()) != nullptr,
2901 "destination of far call not found in code cache");
2902 assert(entry.rspec().type() == relocInfo::external_word_type
2903 || entry.rspec().type() == relocInfo::runtime_call_type
2904 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2905 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
2906 if (far_branches()) {
2907 // We can use auipc + jalr here because we know that the total size of
2908 // the code cache cannot exceed 2Gb.
2909 relocate(entry.rspec(), [&] {
2910 int32_t offset;
2911 la_patchable(tmp, entry, offset);
2912 jalr(x1, tmp, offset); // link
2913 });
2914 } else {
2915 jal(entry); // link
2916 }
2917 }
2918
2919 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2920 Register super_klass,
2921 Register tmp_reg,
2922 Label* L_success,
2923 Label* L_failure,
2924 Label* L_slow_path,
2925 Register super_check_offset) {
2926 assert_different_registers(sub_klass, super_klass, tmp_reg);
2927 bool must_load_sco = (super_check_offset == noreg);
2928 if (must_load_sco) {
2929 assert(tmp_reg != noreg, "supply either a temp or a register offset");
2930 } else {
2931 assert_different_registers(sub_klass, super_klass, super_check_offset);
2932 }
2933
2934 Label L_fallthrough;
2935 int label_nulls = 0;
2936 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
2937 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
2938 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
2939 assert(label_nulls <= 1, "at most one null in batch");
2940
2941 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2942 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2943 Address super_check_offset_addr(super_klass, sco_offset);
2944
2945 // Hacked jmp, which may only be used just before L_fallthrough.
2946 #define final_jmp(label) \
2947 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
2948 else j(label) /*omit semi*/
2949
2950 // If the pointers are equal, we are done (e.g., String[] elements).
2951 // This self-check enables sharing of secondary supertype arrays among
2952 // non-primary types such as array-of-interface. Otherwise, each such
2953 // type would need its own customized SSA.
2954 // We move this check to the front of the fast path because many
2955 // type checks are in fact trivially successful in this manner,
2956 // so we get a nicely predicted branch right at the start of the check.
2957 beq(sub_klass, super_klass, *L_success);
2958
2959 // Check the supertype display:
2960 if (must_load_sco) {
2961 lwu(tmp_reg, super_check_offset_addr);
2962 super_check_offset = tmp_reg;
2963 }
2964 add(t0, sub_klass, super_check_offset);
2965 Address super_check_addr(t0);
2966 ld(t0, super_check_addr); // load displayed supertype
2967
2968 // This check has worked decisively for primary supers.
2969 // Secondary supers are sought in the super_cache ('super_cache_addr').
2970 // (Secondary supers are interfaces and very deeply nested subtypes.)
2971 // This works in the same check above because of a tricky aliasing
2972 // between the super_Cache and the primary super display elements.
2973 // (The 'super_check_addr' can address either, as the case requires.)
2974 // Note that the cache is updated below if it does not help us find
2975 // what we need immediately.
2976 // So if it was a primary super, we can just fail immediately.
2977 // Otherwise, it's the slow path for us (no success at this point).
2978
2979 beq(super_klass, t0, *L_success);
2980 mv(t1, sc_offset);
2981 if (L_failure == &L_fallthrough) {
2982 beq(super_check_offset, t1, *L_slow_path);
2983 } else {
2984 bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2985 final_jmp(*L_slow_path);
2986 }
2987
2988 bind(L_fallthrough);
2989
2990 #undef final_jmp
2991 }
2992
2993 // Scans count pointer sized words at [addr] for occurrence of value,
2994 // generic
2995 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2996 Register tmp) {
2997 Label Lloop, Lexit;
2998 beqz(count, Lexit);
2999 bind(Lloop);
3000 ld(tmp, addr);
3001 beq(value, tmp, Lexit);
3002 add(addr, addr, wordSize);
3003 sub(count, count, 1);
3004 bnez(count, Lloop);
3005 bind(Lexit);
3006 }
3007
3008 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3009 Register super_klass,
3010 Register tmp1_reg,
3011 Register tmp2_reg,
3012 Label* L_success,
3013 Label* L_failure) {
3014 assert_different_registers(sub_klass, super_klass, tmp1_reg);
3015 if (tmp2_reg != noreg) {
3016 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
3017 }
3018 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
3019
3020 Label L_fallthrough;
3021 int label_nulls = 0;
3022 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
3023 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
3024
3025 assert(label_nulls <= 1, "at most one null in the batch");
3026
3027 // A couple of useful fields in sub_klass:
3028 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3029 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3030 Address secondary_supers_addr(sub_klass, ss_offset);
3031 Address super_cache_addr( sub_klass, sc_offset);
3032
3033 BLOCK_COMMENT("check_klass_subtype_slow_path");
3034
3035 // Do a linear scan of the secondary super-klass chain.
3036 // This code is rarely used, so simplicity is a virtue here.
3037 // The repne_scan instruction uses fixed registers, which we must spill.
3038 // Don't worry too much about pre-existing connections with the input regs.
3039
3040 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
3041 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
3042
3043 RegSet pushed_registers;
3044 if (!IS_A_TEMP(x12)) {
3045 pushed_registers += x12;
3046 }
3047 if (!IS_A_TEMP(x15)) {
3048 pushed_registers += x15;
3049 }
3050
3051 if (super_klass != x10) {
3052 if (!IS_A_TEMP(x10)) {
3053 pushed_registers += x10;
3054 }
3055 }
3056
3057 push_reg(pushed_registers, sp);
3058
3059 // Get super_klass value into x10 (even if it was in x15 or x12)
3060 mv(x10, super_klass);
3061
3062 #ifndef PRODUCT
3063 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
3064 Address pst_counter_addr(t1);
3065 ld(t0, pst_counter_addr);
3066 add(t0, t0, 1);
3067 sd(t0, pst_counter_addr);
3068 #endif // PRODUCT
3069
3070 // We will consult the secondary-super array.
3071 ld(x15, secondary_supers_addr);
3072 // Load the array length.
3073 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
3074 // Skip to start of data.
3075 add(x15, x15, Array<Klass*>::base_offset_in_bytes());
3076
3077 // Set t0 to an obvious invalid value, falling through by default
3078 mv(t0, -1);
3079 // Scan X12 words at [X15] for an occurrence of X10.
3080 repne_scan(x15, x10, x12, t0);
3081
3082 // pop will restore x10, so we should use a temp register to keep its value
3083 mv(t1, x10);
3084
3085 // Unspill the temp registers:
3086 pop_reg(pushed_registers, sp);
3087
3088 bne(t1, t0, *L_failure);
3089
3090 // Success. Cache the super we found an proceed in triumph.
3091 sd(super_klass, super_cache_addr);
3092
3093 if (L_success != &L_fallthrough) {
3094 j(*L_success);
3095 }
3096
3097 #undef IS_A_TEMP
3098
3099 bind(L_fallthrough);
3100 }
3101
3102 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
3103 void MacroAssembler::tlab_allocate(Register obj,
3104 Register var_size_in_bytes,
3105 int con_size_in_bytes,
3106 Register tmp1,
3107 Register tmp2,
3108 Label& slow_case,
3109 bool is_far) {
3110 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
3111 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
3112 }
3113
3114 // get_thread() can be called anywhere inside generated code so we
3115 // need to save whatever non-callee save context might get clobbered
3116 // by the call to Thread::current() or, indeed, the call setup code.
3117 void MacroAssembler::get_thread(Register thread) {
3118 // save all call-clobbered regs except thread
3119 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
3120 RegSet::range(x28, x31) + ra - thread;
3121 push_reg(saved_regs, sp);
3122
3123 mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
3124 jalr(ra);
3125 if (thread != c_rarg0) {
3126 mv(thread, c_rarg0);
3127 }
3128
3129 // restore pushed registers
3130 pop_reg(saved_regs, sp);
3131 }
3132
3133 void MacroAssembler::load_byte_map_base(Register reg) {
3134 CardTable::CardValue* byte_map_base =
3135 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
3136 mv(reg, (uint64_t)byte_map_base);
3137 }
3138
3139 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
3140 unsigned long low_address = (uintptr_t)CodeCache::low_bound();
3141 unsigned long high_address = (uintptr_t)CodeCache::high_bound();
3142 unsigned long dest_address = (uintptr_t)dest.target();
3143 long offset_low = dest_address - low_address;
3144 long offset_high = dest_address - high_address;
3145
3146 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
3147 assert((uintptr_t)dest.target() < (1ull << 48), "bad address");
3148
3149 // RISC-V doesn't compute a page-aligned address, in order to partially
3150 // compensate for the use of *signed* offsets in its base+disp12
3151 // addressing mode (RISC-V's PC-relative reach remains asymmetric
3152 // [-(2G + 2K), 2G - 2K).
3153 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
3154 int64_t distance = dest.target() - pc();
3155 auipc(reg1, (int32_t)distance + 0x800);
3156 offset = ((int32_t)distance << 20) >> 20;
3157 } else {
3158 movptr(reg1, dest.target(), offset);
3159 }
3160 }
3161
3162 void MacroAssembler::build_frame(int framesize) {
3163 assert(framesize >= 2, "framesize must include space for FP/RA");
3164 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
3165 sub(sp, sp, framesize);
3166 sd(fp, Address(sp, framesize - 2 * wordSize));
3167 sd(ra, Address(sp, framesize - wordSize));
3168 if (PreserveFramePointer) { add(fp, sp, framesize); }
3169 }
3170
3171 void MacroAssembler::remove_frame(int framesize) {
3172 assert(framesize >= 2, "framesize must include space for FP/RA");
3173 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
3174 ld(fp, Address(sp, framesize - 2 * wordSize));
3175 ld(ra, Address(sp, framesize - wordSize));
3176 add(sp, sp, framesize);
3177 }
3178
3179 void MacroAssembler::reserved_stack_check() {
3180 // testing if reserved zone needs to be enabled
3181 Label no_reserved_zone_enabling;
3182
3183 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
3184 bltu(sp, t0, no_reserved_zone_enabling);
3185
3186 enter(); // RA and FP are live.
3187 mv(c_rarg0, xthread);
3188 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
3189 relocate(target.rspec(), [&] {
3190 int32_t offset;
3191 la_patchable(t0, target, offset);
3192 jalr(x1, t0, offset);
3193 });
3194 leave();
3195
3196 // We have already removed our own frame.
3197 // throw_delayed_StackOverflowError will think that it's been
3198 // called by our caller.
3199 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry());
3200 relocate(target.rspec(), [&] {
3201 int32_t offset;
3202 la_patchable(t0, target, offset);
3203 jalr(x0, t0, offset);
3204 });
3205 should_not_reach_here();
3206
3207 bind(no_reserved_zone_enabling);
3208 }
3209
3210 // Move the address of the polling page into dest.
3211 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
3212 ld(dest, Address(xthread, JavaThread::polling_page_offset()));
3213 }
3214
3215 // Read the polling page. The address of the polling page must
3216 // already be in r.
3217 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
3218 relocate(rtype, [&] {
3219 lwu(zr, Address(r, offset));
3220 });
3221 }
3222
3223 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3224 #ifdef ASSERT
3225 {
3226 ThreadInVMfromUnknown tiv;
3227 assert (UseCompressedOops, "should only be used for compressed oops");
3228 assert (Universe::heap() != nullptr, "java heap should be initialized");
3229 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
3230 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
3231 }
3232 #endif
3233 int oop_index = oop_recorder()->find_index(obj);
3234 relocate(oop_Relocation::spec(oop_index), [&] {
3235 li32(dst, 0xDEADBEEF);
3236 });
3237 zero_extend(dst, dst, 32);
3238 }
3239
3240 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3241 assert (UseCompressedClassPointers, "should only be used for compressed headers");
3242 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
3243 int index = oop_recorder()->find_index(k);
3244 assert(!Universe::heap()->is_in(k), "should not be an oop");
3245
3246 narrowKlass nk = CompressedKlassPointers::encode(k);
3247 relocate(metadata_Relocation::spec(index), [&] {
3248 li32(dst, nk);
3249 });
3250 zero_extend(dst, dst, 32);
3251 }
3252
3253 // Maybe emit a call via a trampoline. If the code cache is small
3254 // trampolines won't be emitted.
3255 address MacroAssembler::trampoline_call(Address entry) {
3256 assert(entry.rspec().type() == relocInfo::runtime_call_type ||
3257 entry.rspec().type() == relocInfo::opt_virtual_call_type ||
3258 entry.rspec().type() == relocInfo::static_call_type ||
3259 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
3260
3261 address target = entry.target();
3262
3263 // We need a trampoline if branches are far.
3264 if (far_branches()) {
3265 if (!in_scratch_emit_size()) {
3266 if (entry.rspec().type() == relocInfo::runtime_call_type) {
3267 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
3268 code()->share_trampoline_for(entry.target(), offset());
3269 } else {
3270 address stub = emit_trampoline_stub(offset(), target);
3271 if (stub == nullptr) {
3272 postcond(pc() == badAddress);
3273 return nullptr; // CodeCache is full
3274 }
3275 }
3276 }
3277 target = pc();
3278 }
3279
3280 address call_pc = pc();
3281 #ifdef ASSERT
3282 if (entry.rspec().type() != relocInfo::runtime_call_type) {
3283 assert_alignment(call_pc);
3284 }
3285 #endif
3286 relocate(entry.rspec(), [&] {
3287 jal(target);
3288 });
3289
3290 postcond(pc() != badAddress);
3291 return call_pc;
3292 }
3293
3294 address MacroAssembler::ic_call(address entry, jint method_index) {
3295 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
3296 IncompressibleRegion ir(this); // relocations
3297 movptr(t1, (address)Universe::non_oop_word());
3298 assert_cond(entry != nullptr);
3299 return trampoline_call(Address(entry, rh));
3300 }
3301
3302 // Emit a trampoline stub for a call to a target which is too far away.
3303 //
3304 // code sequences:
3305 //
3306 // call-site:
3307 // branch-and-link to <destination> or <trampoline stub>
3308 //
3309 // Related trampoline stub for this call site in the stub section:
3310 // load the call target from the constant pool
3311 // branch (RA still points to the call site above)
3312
3313 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
3314 address dest) {
3315 // Max stub size: alignment nop, TrampolineStub.
3316 address stub = start_a_stub(max_trampoline_stub_size());
3317 if (stub == nullptr) {
3318 return nullptr; // CodeBuffer::expand failed
3319 }
3320
3321 // We are always 4-byte aligned here.
3322 assert_alignment(pc());
3323
3324 // Create a trampoline stub relocation which relates this trampoline stub
3325 // with the call instruction at insts_call_instruction_offset in the
3326 // instructions code-section.
3327
3328 // Make sure the address of destination 8-byte aligned after 3 instructions.
3329 align(wordSize, NativeCallTrampolineStub::data_offset);
3330
3331 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
3332 insts_call_instruction_offset);
3333 const int stub_start_offset = offset();
3334 relocate(rh, [&] {
3335 // Now, create the trampoline stub's code:
3336 // - load the call
3337 // - call
3338 Label target;
3339 ld(t0, target); // auipc + ld
3340 jr(t0); // jalr
3341 bind(target);
3342 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
3343 "should be");
3344 assert(offset() % wordSize == 0, "bad alignment");
3345 emit_int64((int64_t)dest);
3346 });
3347
3348 const address stub_start_addr = addr_at(stub_start_offset);
3349
3350 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
3351
3352 end_a_stub();
3353 return stub_start_addr;
3354 }
3355
3356 int MacroAssembler::max_trampoline_stub_size() {
3357 // Max stub size: alignment nop, TrampolineStub.
3358 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
3359 }
3360
3361 int MacroAssembler::static_call_stub_size() {
3362 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
3363 return 12 * NativeInstruction::instruction_size;
3364 }
3365
3366 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) {
3367 switch (dst.getMode()) {
3368 case Address::base_plus_offset:
3369 // This is the expected mode, although we allow all the other
3370 // forms below.
3371 return form_address(tmp, dst.base(), dst.offset());
3372 default:
3373 la(tmp, dst);
3374 return Address(tmp);
3375 }
3376 }
3377
3378 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) {
3379 assert(((dst.getMode() == Address::base_plus_offset &&
3380 is_simm12(dst.offset())) || is_simm12(value)),
3381 "invalid value and address mode combination");
3382 Address adr = add_memory_helper(dst, tmp2);
3383 assert(!adr.uses(tmp1), "invalid dst for address increment");
3384 ld(tmp1, adr);
3385 add(tmp1, tmp1, value, tmp2);
3386 sd(tmp1, adr);
3387 }
3388
3389 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) {
3390 assert(((dst.getMode() == Address::base_plus_offset &&
3391 is_simm12(dst.offset())) || is_simm12(value)),
3392 "invalid value and address mode combination");
3393 Address adr = add_memory_helper(dst, tmp2);
3394 assert(!adr.uses(tmp1), "invalid dst for address increment");
3395 lwu(tmp1, adr);
3396 addw(tmp1, tmp1, value, tmp2);
3397 sw(tmp1, adr);
3398 }
3399
3400 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) {
3401 assert(((dst.getMode() == Address::base_plus_offset &&
3402 is_simm12(dst.offset())) || is_simm12(value)),
3403 "invalid value and address mode combination");
3404 Address adr = add_memory_helper(dst, tmp2);
3405 assert(!adr.uses(tmp1), "invalid dst for address decrement");
3406 ld(tmp1, adr);
3407 sub(tmp1, tmp1, value, tmp2);
3408 sd(tmp1, adr);
3409 }
3410
3411 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) {
3412 assert(((dst.getMode() == Address::base_plus_offset &&
3413 is_simm12(dst.offset())) || is_simm12(value)),
3414 "invalid value and address mode combination");
3415 Address adr = add_memory_helper(dst, tmp2);
3416 assert(!adr.uses(tmp1), "invalid dst for address decrement");
3417 lwu(tmp1, adr);
3418 subw(tmp1, tmp1, value, tmp2);
3419 sw(tmp1, adr);
3420 }
3421
3422 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
3423 assert_different_registers(src1, t0);
3424 relocate(src2.rspec(), [&] {
3425 int32_t offset;
3426 la_patchable(t0, src2, offset);
3427 ld(t0, Address(t0, offset));
3428 });
3429 beq(src1, t0, equal);
3430 }
3431
3432 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
3433 load_method_holder(result, method);
3434 ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
3435 }
3436
3437 void MacroAssembler::load_method_holder(Register holder, Register method) {
3438 ld(holder, Address(method, Method::const_offset())); // ConstMethod*
3439 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
3440 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
3441 }
3442
3443 // string indexof
3444 // compute index by trailing zeros
3445 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
3446 Register match_mask, Register result,
3447 Register ch2, Register tmp,
3448 bool haystack_isL) {
3449 int haystack_chr_shift = haystack_isL ? 0 : 1;
3450 srl(match_mask, match_mask, trailing_zeros);
3451 srli(match_mask, match_mask, 1);
3452 srli(tmp, trailing_zeros, LogBitsPerByte);
3453 if (!haystack_isL) andi(tmp, tmp, 0xE);
3454 add(haystack, haystack, tmp);
3455 ld(ch2, Address(haystack));
3456 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
3457 add(result, result, tmp);
3458 }
3459
3460 // string indexof
3461 // Find pattern element in src, compute match mask,
3462 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
3463 // match mask patterns and corresponding indices would be like:
3464 // - 0x8080808080808080 (Latin1)
3465 // - 7 6 5 4 3 2 1 0 (match index)
3466 // - 0x8000800080008000 (UTF16)
3467 // - 3 2 1 0 (match index)
3468 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
3469 Register mask1, Register mask2) {
3470 xorr(src, pattern, src);
3471 sub(match_mask, src, mask1);
3472 orr(src, src, mask2);
3473 notr(src, src);
3474 andr(match_mask, match_mask, src);
3475 }
3476
3477 #ifdef COMPILER2
3478 // Code for BigInteger::mulAdd intrinsic
3479 // out = x10
3480 // in = x11
3481 // offset = x12 (already out.length-offset)
3482 // len = x13
3483 // k = x14
3484 // tmp = x28
3485 //
3486 // pseudo code from java implementation:
3487 // long kLong = k & LONG_MASK;
3488 // carry = 0;
3489 // offset = out.length-offset - 1;
3490 // for (int j = len - 1; j >= 0; j--) {
3491 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3492 // out[offset--] = (int)product;
3493 // carry = product >>> 32;
3494 // }
3495 // return (int)carry;
3496 void MacroAssembler::mul_add(Register out, Register in, Register offset,
3497 Register len, Register k, Register tmp) {
3498 Label L_tail_loop, L_unroll, L_end;
3499 mv(tmp, out);
3500 mv(out, zr);
3501 blez(len, L_end);
3502 zero_extend(k, k, 32);
3503 slliw(t0, offset, LogBytesPerInt);
3504 add(offset, tmp, t0);
3505 slliw(t0, len, LogBytesPerInt);
3506 add(in, in, t0);
3507
3508 const int unroll = 8;
3509 mv(tmp, unroll);
3510 blt(len, tmp, L_tail_loop);
3511 bind(L_unroll);
3512 for (int i = 0; i < unroll; i++) {
3513 sub(in, in, BytesPerInt);
3514 lwu(t0, Address(in, 0));
3515 mul(t1, t0, k);
3516 add(t0, t1, out);
3517 sub(offset, offset, BytesPerInt);
3518 lwu(t1, Address(offset, 0));
3519 add(t0, t0, t1);
3520 sw(t0, Address(offset, 0));
3521 srli(out, t0, 32);
3522 }
3523 subw(len, len, tmp);
3524 bge(len, tmp, L_unroll);
3525
3526 bind(L_tail_loop);
3527 blez(len, L_end);
3528 sub(in, in, BytesPerInt);
3529 lwu(t0, Address(in, 0));
3530 mul(t1, t0, k);
3531 add(t0, t1, out);
3532 sub(offset, offset, BytesPerInt);
3533 lwu(t1, Address(offset, 0));
3534 add(t0, t0, t1);
3535 sw(t0, Address(offset, 0));
3536 srli(out, t0, 32);
3537 subw(len, len, 1);
3538 j(L_tail_loop);
3539
3540 bind(L_end);
3541 }
3542
3543 // add two unsigned input and output carry
3544 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3545 {
3546 assert_different_registers(dst, carry);
3547 assert_different_registers(dst, src2);
3548 add(dst, src1, src2);
3549 sltu(carry, dst, src2);
3550 }
3551
3552 // add two input with carry
3553 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) {
3554 assert_different_registers(dst, carry);
3555 add(dst, src1, src2);
3556 add(dst, dst, carry);
3557 }
3558
3559 // add two unsigned input with carry and output carry
3560 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) {
3561 assert_different_registers(dst, src2);
3562 adc(dst, src1, src2, carry);
3563 sltu(carry, dst, src2);
3564 }
3565
3566 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
3567 Register src1, Register src2, Register carry) {
3568 cad(dest_lo, dest_lo, src1, carry);
3569 add(dest_hi, dest_hi, carry);
3570 cad(dest_lo, dest_lo, src2, carry);
3571 add(final_dest_hi, dest_hi, carry);
3572 }
3573
3574 /**
3575 * Multiply 32 bit by 32 bit first loop.
3576 */
3577 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
3578 Register y, Register y_idx, Register z,
3579 Register carry, Register product,
3580 Register idx, Register kdx) {
3581 // jlong carry, x[], y[], z[];
3582 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3583 // long product = y[idx] * x[xstart] + carry;
3584 // z[kdx] = (int)product;
3585 // carry = product >>> 32;
3586 // }
3587 // z[xstart] = (int)carry;
3588
3589 Label L_first_loop, L_first_loop_exit;
3590 blez(idx, L_first_loop_exit);
3591
3592 shadd(t0, xstart, x, t0, LogBytesPerInt);
3593 lwu(x_xstart, Address(t0, 0));
3594
3595 bind(L_first_loop);
3596 subw(idx, idx, 1);
3597 shadd(t0, idx, y, t0, LogBytesPerInt);
3598 lwu(y_idx, Address(t0, 0));
3599 mul(product, x_xstart, y_idx);
3600 add(product, product, carry);
3601 srli(carry, product, 32);
3602 subw(kdx, kdx, 1);
3603 shadd(t0, kdx, z, t0, LogBytesPerInt);
3604 sw(product, Address(t0, 0));
3605 bgtz(idx, L_first_loop);
3606
3607 bind(L_first_loop_exit);
3608 }
3609
3610 /**
3611 * Multiply 64 bit by 64 bit first loop.
3612 */
3613 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
3614 Register y, Register y_idx, Register z,
3615 Register carry, Register product,
3616 Register idx, Register kdx) {
3617 //
3618 // jlong carry, x[], y[], z[];
3619 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3620 // huge_128 product = y[idx] * x[xstart] + carry;
3621 // z[kdx] = (jlong)product;
3622 // carry = (jlong)(product >>> 64);
3623 // }
3624 // z[xstart] = carry;
3625 //
3626
3627 Label L_first_loop, L_first_loop_exit;
3628 Label L_one_x, L_one_y, L_multiply;
3629
3630 subw(xstart, xstart, 1);
3631 bltz(xstart, L_one_x);
3632
3633 shadd(t0, xstart, x, t0, LogBytesPerInt);
3634 ld(x_xstart, Address(t0, 0));
3635 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
3636
3637 bind(L_first_loop);
3638 subw(idx, idx, 1);
3639 bltz(idx, L_first_loop_exit);
3640 subw(idx, idx, 1);
3641 bltz(idx, L_one_y);
3642
3643 shadd(t0, idx, y, t0, LogBytesPerInt);
3644 ld(y_idx, Address(t0, 0));
3645 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
3646 bind(L_multiply);
3647
3648 mulhu(t0, x_xstart, y_idx);
3649 mul(product, x_xstart, y_idx);
3650 cad(product, product, carry, t1);
3651 adc(carry, t0, zr, t1);
3652
3653 subw(kdx, kdx, 2);
3654 ror_imm(product, product, 32); // back to big-endian
3655 shadd(t0, kdx, z, t0, LogBytesPerInt);
3656 sd(product, Address(t0, 0));
3657
3658 j(L_first_loop);
3659
3660 bind(L_one_y);
3661 lwu(y_idx, Address(y, 0));
3662 j(L_multiply);
3663
3664 bind(L_one_x);
3665 lwu(x_xstart, Address(x, 0));
3666 j(L_first_loop);
3667
3668 bind(L_first_loop_exit);
3669 }
3670
3671 /**
3672 * Multiply 128 bit by 128 bit. Unrolled inner loop.
3673 *
3674 */
3675 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
3676 Register carry, Register carry2,
3677 Register idx, Register jdx,
3678 Register yz_idx1, Register yz_idx2,
3679 Register tmp, Register tmp3, Register tmp4,
3680 Register tmp6, Register product_hi) {
3681 // jlong carry, x[], y[], z[];
3682 // int kdx = xstart+1;
3683 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3684 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
3685 // jlong carry2 = (jlong)(tmp3 >>> 64);
3686 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2;
3687 // carry = (jlong)(tmp4 >>> 64);
3688 // z[kdx+idx+1] = (jlong)tmp3;
3689 // z[kdx+idx] = (jlong)tmp4;
3690 // }
3691 // idx += 2;
3692 // if (idx > 0) {
3693 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
3694 // z[kdx+idx] = (jlong)yz_idx1;
3695 // carry = (jlong)(yz_idx1 >>> 64);
3696 // }
3697 //
3698
3699 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3700
3701 srliw(jdx, idx, 2);
3702
3703 bind(L_third_loop);
3704
3705 subw(jdx, jdx, 1);
3706 bltz(jdx, L_third_loop_exit);
3707 subw(idx, idx, 4);
3708
3709 shadd(t0, idx, y, t0, LogBytesPerInt);
3710 ld(yz_idx2, Address(t0, 0));
3711 ld(yz_idx1, Address(t0, wordSize));
3712
3713 shadd(tmp6, idx, z, t0, LogBytesPerInt);
3714
3715 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
3716 ror_imm(yz_idx2, yz_idx2, 32);
3717
3718 ld(t1, Address(tmp6, 0));
3719 ld(t0, Address(tmp6, wordSize));
3720
3721 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
3722 mulhu(tmp4, product_hi, yz_idx1);
3723
3724 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
3725 ror_imm(t1, t1, 32, tmp);
3726
3727 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp
3728 mulhu(carry2, product_hi, yz_idx2);
3729
3730 cad(tmp3, tmp3, carry, carry);
3731 adc(tmp4, tmp4, zr, carry);
3732 cad(tmp3, tmp3, t0, t0);
3733 cadc(tmp4, tmp4, tmp, t0);
3734 adc(carry, carry2, zr, t0);
3735 cad(tmp4, tmp4, t1, carry2);
3736 adc(carry, carry, zr, carry2);
3737
3738 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
3739 ror_imm(tmp4, tmp4, 32);
3740 sd(tmp4, Address(tmp6, 0));
3741 sd(tmp3, Address(tmp6, wordSize));
3742
3743 j(L_third_loop);
3744
3745 bind(L_third_loop_exit);
3746
3747 andi(idx, idx, 0x3);
3748 beqz(idx, L_post_third_loop_done);
3749
3750 Label L_check_1;
3751 subw(idx, idx, 2);
3752 bltz(idx, L_check_1);
3753
3754 shadd(t0, idx, y, t0, LogBytesPerInt);
3755 ld(yz_idx1, Address(t0, 0));
3756 ror_imm(yz_idx1, yz_idx1, 32);
3757
3758 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
3759 mulhu(tmp4, product_hi, yz_idx1);
3760
3761 shadd(t0, idx, z, t0, LogBytesPerInt);
3762 ld(yz_idx2, Address(t0, 0));
3763 ror_imm(yz_idx2, yz_idx2, 32, tmp);
3764
3765 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
3766
3767 ror_imm(tmp3, tmp3, 32, tmp);
3768 sd(tmp3, Address(t0, 0));
3769
3770 bind(L_check_1);
3771
3772 andi(idx, idx, 0x1);
3773 subw(idx, idx, 1);
3774 bltz(idx, L_post_third_loop_done);
3775 shadd(t0, idx, y, t0, LogBytesPerInt);
3776 lwu(tmp4, Address(t0, 0));
3777 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3
3778 mulhu(carry2, tmp4, product_hi);
3779
3780 shadd(t0, idx, z, t0, LogBytesPerInt);
3781 lwu(tmp4, Address(t0, 0));
3782
3783 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
3784
3785 shadd(t0, idx, z, t0, LogBytesPerInt);
3786 sw(tmp3, Address(t0, 0));
3787
3788 slli(t0, carry2, 32);
3789 srli(carry, tmp3, 32);
3790 orr(carry, carry, t0);
3791
3792 bind(L_post_third_loop_done);
3793 }
3794
3795 /**
3796 * Code for BigInteger::multiplyToLen() intrinsic.
3797 *
3798 * x10: x
3799 * x11: xlen
3800 * x12: y
3801 * x13: ylen
3802 * x14: z
3803 * x15: zlen
3804 * x16: tmp1
3805 * x17: tmp2
3806 * x7: tmp3
3807 * x28: tmp4
3808 * x29: tmp5
3809 * x30: tmp6
3810 * x31: tmp7
3811 */
3812 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3813 Register z, Register zlen,
3814 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3815 Register tmp5, Register tmp6, Register product_hi) {
3816 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3817
3818 const Register idx = tmp1;
3819 const Register kdx = tmp2;
3820 const Register xstart = tmp3;
3821
3822 const Register y_idx = tmp4;
3823 const Register carry = tmp5;
3824 const Register product = xlen;
3825 const Register x_xstart = zlen; // reuse register
3826
3827 mv(idx, ylen); // idx = ylen;
3828 mv(kdx, zlen); // kdx = xlen+ylen;
3829 mv(carry, zr); // carry = 0;
3830
3831 Label L_multiply_64_x_64_loop, L_done;
3832
3833 subw(xstart, xlen, 1);
3834 bltz(xstart, L_done);
3835
3836 const Register jdx = tmp1;
3837
3838 if (AvoidUnalignedAccesses) {
3839 // Check if x and y are both 8-byte aligned.
3840 orr(t0, xlen, ylen);
3841 test_bit(t0, t0, 0);
3842 beqz(t0, L_multiply_64_x_64_loop);
3843
3844 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3845 shadd(t0, xstart, z, t0, LogBytesPerInt);
3846 sw(carry, Address(t0, 0));
3847
3848 Label L_second_loop_unaligned;
3849 bind(L_second_loop_unaligned);
3850 mv(carry, zr);
3851 mv(jdx, ylen);
3852 subw(xstart, xstart, 1);
3853 bltz(xstart, L_done);
3854 sub(sp, sp, 2 * wordSize);
3855 sd(z, Address(sp, 0));
3856 sd(zr, Address(sp, wordSize));
3857 shadd(t0, xstart, z, t0, LogBytesPerInt);
3858 addi(z, t0, 4);
3859 shadd(t0, xstart, x, t0, LogBytesPerInt);
3860 lwu(product, Address(t0, 0));
3861 Label L_third_loop, L_third_loop_exit;
3862
3863 blez(jdx, L_third_loop_exit);
3864
3865 bind(L_third_loop);
3866 subw(jdx, jdx, 1);
3867 shadd(t0, jdx, y, t0, LogBytesPerInt);
3868 lwu(t0, Address(t0, 0));
3869 mul(t1, t0, product);
3870 add(t0, t1, carry);
3871 shadd(tmp6, jdx, z, t1, LogBytesPerInt);
3872 lwu(t1, Address(tmp6, 0));
3873 add(t0, t0, t1);
3874 sw(t0, Address(tmp6, 0));
3875 srli(carry, t0, 32);
3876 bgtz(jdx, L_third_loop);
3877
3878 bind(L_third_loop_exit);
3879 ld(z, Address(sp, 0));
3880 addi(sp, sp, 2 * wordSize);
3881 shadd(t0, xstart, z, t0, LogBytesPerInt);
3882 sw(carry, Address(t0, 0));
3883
3884 j(L_second_loop_unaligned);
3885 }
3886
3887 bind(L_multiply_64_x_64_loop);
3888 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3889
3890 Label L_second_loop_aligned;
3891 beqz(kdx, L_second_loop_aligned);
3892
3893 Label L_carry;
3894 subw(kdx, kdx, 1);
3895 beqz(kdx, L_carry);
3896
3897 shadd(t0, kdx, z, t0, LogBytesPerInt);
3898 sw(carry, Address(t0, 0));
3899 srli(carry, carry, 32);
3900 subw(kdx, kdx, 1);
3901
3902 bind(L_carry);
3903 shadd(t0, kdx, z, t0, LogBytesPerInt);
3904 sw(carry, Address(t0, 0));
3905
3906 // Second and third (nested) loops.
3907 //
3908 // for (int i = xstart-1; i >= 0; i--) { // Second loop
3909 // carry = 0;
3910 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3911 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3912 // (z[k] & LONG_MASK) + carry;
3913 // z[k] = (int)product;
3914 // carry = product >>> 32;
3915 // }
3916 // z[i] = (int)carry;
3917 // }
3918 //
3919 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3920
3921 bind(L_second_loop_aligned);
3922 mv(carry, zr); // carry = 0;
3923 mv(jdx, ylen); // j = ystart+1
3924
3925 subw(xstart, xstart, 1); // i = xstart-1;
3926 bltz(xstart, L_done);
3927
3928 sub(sp, sp, 4 * wordSize);
3929 sd(z, Address(sp, 0));
3930
3931 Label L_last_x;
3932 shadd(t0, xstart, z, t0, LogBytesPerInt);
3933 addi(z, t0, 4);
3934 subw(xstart, xstart, 1); // i = xstart-1;
3935 bltz(xstart, L_last_x);
3936
3937 shadd(t0, xstart, x, t0, LogBytesPerInt);
3938 ld(product_hi, Address(t0, 0));
3939 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
3940
3941 Label L_third_loop_prologue;
3942 bind(L_third_loop_prologue);
3943
3944 sd(ylen, Address(sp, wordSize));
3945 sd(x, Address(sp, 2 * wordSize));
3946 sd(xstart, Address(sp, 3 * wordSize));
3947 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
3948 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
3949 ld(z, Address(sp, 0));
3950 ld(ylen, Address(sp, wordSize));
3951 ld(x, Address(sp, 2 * wordSize));
3952 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
3953 addi(sp, sp, 4 * wordSize);
3954
3955 addiw(tmp3, xlen, 1);
3956 shadd(t0, tmp3, z, t0, LogBytesPerInt);
3957 sw(carry, Address(t0, 0));
3958
3959 subw(tmp3, tmp3, 1);
3960 bltz(tmp3, L_done);
3961
3962 srli(carry, carry, 32);
3963 shadd(t0, tmp3, z, t0, LogBytesPerInt);
3964 sw(carry, Address(t0, 0));
3965 j(L_second_loop_aligned);
3966
3967 // Next infrequent code is moved outside loops.
3968 bind(L_last_x);
3969 lwu(product_hi, Address(x, 0));
3970 j(L_third_loop_prologue);
3971
3972 bind(L_done);
3973 }
3974 #endif
3975
3976 // Count bits of trailing zero chars from lsb to msb until first non-zero element.
3977 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3978 // shift 16 bits once.
3979 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) {
3980 if (UseZbb) {
3981 assert_different_registers(Rd, Rs, tmp1);
3982 int step = isLL ? 8 : 16;
3983 ctz(Rd, Rs);
3984 andi(tmp1, Rd, step - 1);
3985 sub(Rd, Rd, tmp1);
3986 return;
3987 }
3988
3989 assert_different_registers(Rd, Rs, tmp1, tmp2);
3990 Label Loop;
3991 int step = isLL ? 8 : 16;
3992 mv(Rd, -step);
3993 mv(tmp2, Rs);
3994
3995 bind(Loop);
3996 addi(Rd, Rd, step);
3997 andi(tmp1, tmp2, ((1 << step) - 1));
3998 srli(tmp2, tmp2, step);
3999 beqz(tmp1, Loop);
4000 }
4001
4002 // This instruction reads adjacent 4 bytes from the lower half of source register,
4003 // inflate into a register, for example:
4004 // Rs: A7A6A5A4A3A2A1A0
4005 // Rd: 00A300A200A100A0
4006 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
4007 assert_different_registers(Rd, Rs, tmp1, tmp2);
4008
4009 mv(tmp1, 0xFF000000); // first byte mask at lower word
4010 andr(Rd, Rs, tmp1);
4011 for (int i = 0; i < 2; i++) {
4012 slli(Rd, Rd, wordSize);
4013 srli(tmp1, tmp1, wordSize);
4014 andr(tmp2, Rs, tmp1);
4015 orr(Rd, Rd, tmp2);
4016 }
4017 slli(Rd, Rd, wordSize);
4018 andi(tmp2, Rs, 0xFF); // last byte mask at lower word
4019 orr(Rd, Rd, tmp2);
4020 }
4021
4022 // This instruction reads adjacent 4 bytes from the upper half of source register,
4023 // inflate into a register, for example:
4024 // Rs: A7A6A5A4A3A2A1A0
4025 // Rd: 00A700A600A500A4
4026 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
4027 assert_different_registers(Rd, Rs, tmp1, tmp2);
4028 srli(Rs, Rs, 32); // only upper 32 bits are needed
4029 inflate_lo32(Rd, Rs, tmp1, tmp2);
4030 }
4031
4032 // The size of the blocks erased by the zero_blocks stub. We must
4033 // handle anything smaller than this ourselves in zero_words().
4034 const int MacroAssembler::zero_words_block_size = 8;
4035
4036 // zero_words() is used by C2 ClearArray patterns. It is as small as
4037 // possible, handling small word counts locally and delegating
4038 // anything larger to the zero_blocks stub. It is expanded many times
4039 // in compiled code, so it is important to keep it short.
4040
4041 // ptr: Address of a buffer to be zeroed.
4042 // cnt: Count in HeapWords.
4043 //
4044 // ptr, cnt, and t0 are clobbered.
4045 address MacroAssembler::zero_words(Register ptr, Register cnt) {
4046 assert(is_power_of_2(zero_words_block_size), "adjust this");
4047 assert(ptr == x28 && cnt == x29, "mismatch in register usage");
4048 assert_different_registers(cnt, t0);
4049
4050 BLOCK_COMMENT("zero_words {");
4051
4052 mv(t0, zero_words_block_size);
4053 Label around, done, done16;
4054 bltu(cnt, t0, around);
4055 {
4056 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
4057 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated");
4058 if (StubRoutines::riscv::complete()) {
4059 address tpc = trampoline_call(zero_blocks);
4060 if (tpc == nullptr) {
4061 DEBUG_ONLY(reset_labels(around));
4062 postcond(pc() == badAddress);
4063 return nullptr;
4064 }
4065 } else {
4066 jal(zero_blocks);
4067 }
4068 }
4069 bind(around);
4070 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
4071 Label l;
4072 test_bit(t0, cnt, exact_log2(i));
4073 beqz(t0, l);
4074 for (int j = 0; j < i; j++) {
4075 sd(zr, Address(ptr, j * wordSize));
4076 }
4077 addi(ptr, ptr, i * wordSize);
4078 bind(l);
4079 }
4080 {
4081 Label l;
4082 test_bit(t0, cnt, 0);
4083 beqz(t0, l);
4084 sd(zr, Address(ptr, 0));
4085 bind(l);
4086 }
4087
4088 BLOCK_COMMENT("} zero_words");
4089 postcond(pc() != badAddress);
4090 return pc();
4091 }
4092
4093 #define SmallArraySize (18 * BytesPerLong)
4094
4095 // base: Address of a buffer to be zeroed, 8 bytes aligned.
4096 // cnt: Immediate count in HeapWords.
4097 void MacroAssembler::zero_words(Register base, uint64_t cnt) {
4098 assert_different_registers(base, t0, t1);
4099
4100 BLOCK_COMMENT("zero_words {");
4101
4102 if (cnt <= SmallArraySize / BytesPerLong) {
4103 for (int i = 0; i < (int)cnt; i++) {
4104 sd(zr, Address(base, i * wordSize));
4105 }
4106 } else {
4107 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
4108 int remainder = cnt % unroll;
4109 for (int i = 0; i < remainder; i++) {
4110 sd(zr, Address(base, i * wordSize));
4111 }
4112
4113 Label loop;
4114 Register cnt_reg = t0;
4115 Register loop_base = t1;
4116 cnt = cnt - remainder;
4117 mv(cnt_reg, cnt);
4118 add(loop_base, base, remainder * wordSize);
4119 bind(loop);
4120 sub(cnt_reg, cnt_reg, unroll);
4121 for (int i = 0; i < unroll; i++) {
4122 sd(zr, Address(loop_base, i * wordSize));
4123 }
4124 add(loop_base, loop_base, unroll * wordSize);
4125 bnez(cnt_reg, loop);
4126 }
4127
4128 BLOCK_COMMENT("} zero_words");
4129 }
4130
4131 // base: Address of a buffer to be filled, 8 bytes aligned.
4132 // cnt: Count in 8-byte unit.
4133 // value: Value to be filled with.
4134 // base will point to the end of the buffer after filling.
4135 void MacroAssembler::fill_words(Register base, Register cnt, Register value) {
4136 // Algorithm:
4137 //
4138 // t0 = cnt & 7
4139 // cnt -= t0
4140 // p += t0
4141 // switch (t0):
4142 // switch start:
4143 // do while cnt
4144 // cnt -= 8
4145 // p[-8] = value
4146 // case 7:
4147 // p[-7] = value
4148 // case 6:
4149 // p[-6] = value
4150 // // ...
4151 // case 1:
4152 // p[-1] = value
4153 // case 0:
4154 // p += 8
4155 // do-while end
4156 // switch end
4157
4158 assert_different_registers(base, cnt, value, t0, t1);
4159
4160 Label fini, skip, entry, loop;
4161 const int unroll = 8; // Number of sd instructions we'll unroll
4162
4163 beqz(cnt, fini);
4164
4165 andi(t0, cnt, unroll - 1);
4166 sub(cnt, cnt, t0);
4167 // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
4168 shadd(base, t0, base, t1, 3);
4169 la(t1, entry);
4170 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
4171 sub(t1, t1, t0);
4172 jr(t1);
4173
4174 bind(loop);
4175 add(base, base, unroll * 8);
4176 for (int i = -unroll; i < 0; i++) {
4177 sd(value, Address(base, i * 8));
4178 }
4179 bind(entry);
4180 sub(cnt, cnt, unroll);
4181 bgez(cnt, loop);
4182
4183 bind(fini);
4184 }
4185
4186 // Zero blocks of memory by using CBO.ZERO.
4187 //
4188 // Aligns the base address first sufficiently for CBO.ZERO, then uses
4189 // CBO.ZERO repeatedly for every full block. cnt is the size to be
4190 // zeroed in HeapWords. Returns the count of words left to be zeroed
4191 // in cnt.
4192 //
4193 // NOTE: This is intended to be used in the zero_blocks() stub. If
4194 // you want to use it elsewhere, note that cnt must be >= CacheLineSize.
4195 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) {
4196 Label initial_table_end, loop;
4197
4198 // Align base with cache line size.
4199 neg(tmp1, base);
4200 andi(tmp1, tmp1, CacheLineSize - 1);
4201
4202 // tmp1: the number of bytes to be filled to align the base with cache line size.
4203 add(base, base, tmp1);
4204 srai(tmp2, tmp1, 3);
4205 sub(cnt, cnt, tmp2);
4206 srli(tmp2, tmp1, 1);
4207 la(tmp1, initial_table_end);
4208 sub(tmp2, tmp1, tmp2);
4209 jr(tmp2);
4210 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) {
4211 sd(zr, Address(base, i));
4212 }
4213 bind(initial_table_end);
4214
4215 mv(tmp1, CacheLineSize / wordSize);
4216 bind(loop);
4217 cbo_zero(base);
4218 sub(cnt, cnt, tmp1);
4219 add(base, base, CacheLineSize);
4220 bge(cnt, tmp1, loop);
4221 }
4222
4223 // java.lang.Math.round(float a)
4224 // Returns the closest int to the argument, with ties rounding to positive infinity.
4225 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) {
4226 // this instructions calling sequence provides performance improvement on all tested devices;
4227 // don't change it without re-verification
4228 Label done;
4229 mv(t0, jint_cast(0.5f));
4230 fmv_w_x(ftmp, t0);
4231
4232 // dst = 0 if NaN
4233 feq_s(t0, src, src); // replacing fclass with feq as performance optimization
4234 mv(dst, zr);
4235 beqz(t0, done);
4236
4237 // dst = (src + 0.5f) rounded down towards negative infinity
4238 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place.
4239 // RDN is required for fadd_s, RNE gives incorrect results:
4240 // --------------------------------------------------------------------
4241 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000
4242 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610
4243 // --------------------------------------------------------------------
4244 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000
4245 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609
4246 // --------------------------------------------------------------------
4247 fadd_s(ftmp, src, ftmp, RoundingMode::rdn);
4248 fcvt_w_s(dst, ftmp, RoundingMode::rdn);
4249
4250 bind(done);
4251 }
4252
4253 // java.lang.Math.round(double a)
4254 // Returns the closest long to the argument, with ties rounding to positive infinity.
4255 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) {
4256 // this instructions calling sequence provides performance improvement on all tested devices;
4257 // don't change it without re-verification
4258 Label done;
4259 mv(t0, julong_cast(0.5));
4260 fmv_d_x(ftmp, t0);
4261
4262 // dst = 0 if NaN
4263 feq_d(t0, src, src); // replacing fclass with feq as performance optimization
4264 mv(dst, zr);
4265 beqz(t0, done);
4266
4267 // dst = (src + 0.5) rounded down towards negative infinity
4268 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results
4269 fcvt_l_d(dst, ftmp, RoundingMode::rdn);
4270
4271 bind(done);
4272 }
4273
4274 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \
4275 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
4276 Label done; \
4277 assert_different_registers(dst, tmp); \
4278 fclass_##FLOATSIG(tmp, src); \
4279 mv(dst, zr); \
4280 /* check if src is NaN */ \
4281 andi(tmp, tmp, 0b1100000000); \
4282 bnez(tmp, done); \
4283 FLOATCVT(dst, src); \
4284 bind(done); \
4285 }
4286
4287 FCVT_SAFE(fcvt_w_s, s);
4288 FCVT_SAFE(fcvt_l_s, s);
4289 FCVT_SAFE(fcvt_w_d, d);
4290 FCVT_SAFE(fcvt_l_d, d);
4291
4292 #undef FCVT_SAFE
4293
4294 #define FCMP(FLOATTYPE, FLOATSIG) \
4295 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \
4296 FloatRegister Rs2, int unordered_result) { \
4297 Label Ldone; \
4298 if (unordered_result < 0) { \
4299 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \
4300 /* installs 1 if gt else 0 */ \
4301 flt_##FLOATSIG(result, Rs2, Rs1); \
4302 /* Rs1 > Rs2, install 1 */ \
4303 bgtz(result, Ldone); \
4304 feq_##FLOATSIG(result, Rs1, Rs2); \
4305 addi(result, result, -1); \
4306 /* Rs1 = Rs2, install 0 */ \
4307 /* NaN or Rs1 < Rs2, install -1 */ \
4308 bind(Ldone); \
4309 } else { \
4310 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \
4311 /* installs 1 if gt or unordered else 0 */ \
4312 flt_##FLOATSIG(result, Rs1, Rs2); \
4313 /* Rs1 < Rs2, install -1 */ \
4314 bgtz(result, Ldone); \
4315 feq_##FLOATSIG(result, Rs1, Rs2); \
4316 addi(result, result, -1); \
4317 /* Rs1 = Rs2, install 0 */ \
4318 /* NaN or Rs1 > Rs2, install 1 */ \
4319 bind(Ldone); \
4320 neg(result, result); \
4321 } \
4322 }
4323
4324 FCMP(float, s);
4325 FCMP(double, d);
4326
4327 #undef FCMP
4328
4329 // Zero words; len is in bytes
4330 // Destroys all registers except addr
4331 // len must be a nonzero multiple of wordSize
4332 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
4333 assert_different_registers(addr, len, tmp, t0, t1);
4334
4335 #ifdef ASSERT
4336 {
4337 Label L;
4338 andi(t0, len, BytesPerWord - 1);
4339 beqz(t0, L);
4340 stop("len is not a multiple of BytesPerWord");
4341 bind(L);
4342 }
4343 #endif // ASSERT
4344
4345 #ifndef PRODUCT
4346 block_comment("zero memory");
4347 #endif // PRODUCT
4348
4349 Label loop;
4350 Label entry;
4351
4352 // Algorithm:
4353 //
4354 // t0 = cnt & 7
4355 // cnt -= t0
4356 // p += t0
4357 // switch (t0) {
4358 // do {
4359 // cnt -= 8
4360 // p[-8] = 0
4361 // case 7:
4362 // p[-7] = 0
4363 // case 6:
4364 // p[-6] = 0
4365 // ...
4366 // case 1:
4367 // p[-1] = 0
4368 // case 0:
4369 // p += 8
4370 // } while (cnt)
4371 // }
4372
4373 const int unroll = 8; // Number of sd(zr) instructions we'll unroll
4374
4375 srli(len, len, LogBytesPerWord);
4376 andi(t0, len, unroll - 1); // t0 = cnt % unroll
4377 sub(len, len, t0); // cnt -= unroll
4378 // tmp always points to the end of the region we're about to zero
4379 shadd(tmp, t0, addr, t1, LogBytesPerWord);
4380 la(t1, entry);
4381 slli(t0, t0, 2);
4382 sub(t1, t1, t0);
4383 jr(t1);
4384 bind(loop);
4385 sub(len, len, unroll);
4386 for (int i = -unroll; i < 0; i++) {
4387 sd(zr, Address(tmp, i * wordSize));
4388 }
4389 bind(entry);
4390 add(tmp, tmp, unroll * wordSize);
4391 bnez(len, loop);
4392 }
4393
4394 // shift left by shamt and add
4395 // Rd = (Rs1 << shamt) + Rs2
4396 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
4397 if (UseZba) {
4398 if (shamt == 1) {
4399 sh1add(Rd, Rs1, Rs2);
4400 return;
4401 } else if (shamt == 2) {
4402 sh2add(Rd, Rs1, Rs2);
4403 return;
4404 } else if (shamt == 3) {
4405 sh3add(Rd, Rs1, Rs2);
4406 return;
4407 }
4408 }
4409
4410 if (shamt != 0) {
4411 slli(tmp, Rs1, shamt);
4412 add(Rd, Rs2, tmp);
4413 } else {
4414 add(Rd, Rs1, Rs2);
4415 }
4416 }
4417
4418 void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
4419 if (UseZba && bits == 32) {
4420 zext_w(dst, src);
4421 return;
4422 }
4423
4424 if (UseZbb && bits == 16) {
4425 zext_h(dst, src);
4426 return;
4427 }
4428
4429 if (bits == 8) {
4430 zext_b(dst, src);
4431 } else {
4432 slli(dst, src, XLEN - bits);
4433 srli(dst, dst, XLEN - bits);
4434 }
4435 }
4436
4437 void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
4438 if (UseZbb) {
4439 if (bits == 8) {
4440 sext_b(dst, src);
4441 return;
4442 } else if (bits == 16) {
4443 sext_h(dst, src);
4444 return;
4445 }
4446 }
4447
4448 if (bits == 32) {
4449 sext_w(dst, src);
4450 } else {
4451 slli(dst, src, XLEN - bits);
4452 srai(dst, dst, XLEN - bits);
4453 }
4454 }
4455
4456 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
4457 {
4458 if (src1 == src2) {
4459 mv(dst, zr);
4460 return;
4461 }
4462 Label done;
4463 Register left = src1;
4464 Register right = src2;
4465 if (dst == src1) {
4466 assert_different_registers(dst, src2, tmp);
4467 mv(tmp, src1);
4468 left = tmp;
4469 } else if (dst == src2) {
4470 assert_different_registers(dst, src1, tmp);
4471 mv(tmp, src2);
4472 right = tmp;
4473 }
4474
4475 // installs 1 if gt else 0
4476 slt(dst, right, left);
4477 bnez(dst, done);
4478 slt(dst, left, right);
4479 // dst = -1 if lt; else if eq , dst = 0
4480 neg(dst, dst);
4481 bind(done);
4482 }
4483
4484 // The java_calling_convention describes stack locations as ideal slots on
4485 // a frame with no abi restrictions. Since we must observe abi restrictions
4486 // (like the placement of the register window) the slots must be biased by
4487 // the following value.
4488 static int reg2offset_in(VMReg r) {
4489 // Account for saved fp and ra
4490 // This should really be in_preserve_stack_slots
4491 return r->reg2stack() * VMRegImpl::stack_slot_size;
4492 }
4493
4494 static int reg2offset_out(VMReg r) {
4495 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
4496 }
4497
4498 // On 64 bit we will store integer like items to the stack as
4499 // 64 bits items (riscv64 abi) even though java would only store
4500 // 32bits for a parameter. On 32bit it will simply be 32 bits
4501 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
4502 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
4503 if (src.first()->is_stack()) {
4504 if (dst.first()->is_stack()) {
4505 // stack to stack
4506 ld(tmp, Address(fp, reg2offset_in(src.first())));
4507 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4508 } else {
4509 // stack to reg
4510 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4511 }
4512 } else if (dst.first()->is_stack()) {
4513 // reg to stack
4514 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4515 } else {
4516 if (dst.first() != src.first()) {
4517 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32);
4518 }
4519 }
4520 }
4521
4522 // An oop arg. Must pass a handle not the oop itself
4523 void MacroAssembler::object_move(OopMap* map,
4524 int oop_handle_offset,
4525 int framesize_in_slots,
4526 VMRegPair src,
4527 VMRegPair dst,
4528 bool is_receiver,
4529 int* receiver_offset) {
4530 assert_cond(map != nullptr && receiver_offset != nullptr);
4531
4532 // must pass a handle. First figure out the location we use as a handle
4533 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
4534
4535 // See if oop is null if it is we need no handle
4536
4537 if (src.first()->is_stack()) {
4538 // Oop is already on the stack as an argument
4539 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
4540 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
4541 if (is_receiver) {
4542 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
4543 }
4544
4545 ld(t0, Address(fp, reg2offset_in(src.first())));
4546 la(rHandle, Address(fp, reg2offset_in(src.first())));
4547 // conditionally move a null
4548 Label notZero1;
4549 bnez(t0, notZero1);
4550 mv(rHandle, zr);
4551 bind(notZero1);
4552 } else {
4553
4554 // Oop is in a register we must store it to the space we reserve
4555 // on the stack for oop_handles and pass a handle if oop is non-null
4556
4557 const Register rOop = src.first()->as_Register();
4558 int oop_slot = -1;
4559 if (rOop == j_rarg0) {
4560 oop_slot = 0;
4561 } else if (rOop == j_rarg1) {
4562 oop_slot = 1;
4563 } else if (rOop == j_rarg2) {
4564 oop_slot = 2;
4565 } else if (rOop == j_rarg3) {
4566 oop_slot = 3;
4567 } else if (rOop == j_rarg4) {
4568 oop_slot = 4;
4569 } else if (rOop == j_rarg5) {
4570 oop_slot = 5;
4571 } else if (rOop == j_rarg6) {
4572 oop_slot = 6;
4573 } else {
4574 assert(rOop == j_rarg7, "wrong register");
4575 oop_slot = 7;
4576 }
4577
4578 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
4579 int offset = oop_slot * VMRegImpl::stack_slot_size;
4580
4581 map->set_oop(VMRegImpl::stack2reg(oop_slot));
4582 // Store oop in handle area, may be null
4583 sd(rOop, Address(sp, offset));
4584 if (is_receiver) {
4585 *receiver_offset = offset;
4586 }
4587
4588 //rOop maybe the same as rHandle
4589 if (rOop == rHandle) {
4590 Label isZero;
4591 beqz(rOop, isZero);
4592 la(rHandle, Address(sp, offset));
4593 bind(isZero);
4594 } else {
4595 Label notZero2;
4596 la(rHandle, Address(sp, offset));
4597 bnez(rOop, notZero2);
4598 mv(rHandle, zr);
4599 bind(notZero2);
4600 }
4601 }
4602
4603 // If arg is on the stack then place it otherwise it is already in correct reg.
4604 if (dst.first()->is_stack()) {
4605 sd(rHandle, Address(sp, reg2offset_out(dst.first())));
4606 }
4607 }
4608
4609 // A float arg may have to do float reg int reg conversion
4610 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
4611 assert(src.first()->is_stack() && dst.first()->is_stack() ||
4612 src.first()->is_reg() && dst.first()->is_reg() ||
4613 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4614 if (src.first()->is_stack()) {
4615 if (dst.first()->is_stack()) {
4616 lwu(tmp, Address(fp, reg2offset_in(src.first())));
4617 sw(tmp, Address(sp, reg2offset_out(dst.first())));
4618 } else if (dst.first()->is_Register()) {
4619 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4620 } else {
4621 ShouldNotReachHere();
4622 }
4623 } else if (src.first() != dst.first()) {
4624 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4625 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4626 } else {
4627 ShouldNotReachHere();
4628 }
4629 }
4630 }
4631
4632 // A long move
4633 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
4634 if (src.first()->is_stack()) {
4635 if (dst.first()->is_stack()) {
4636 // stack to stack
4637 ld(tmp, Address(fp, reg2offset_in(src.first())));
4638 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4639 } else {
4640 // stack to reg
4641 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4642 }
4643 } else if (dst.first()->is_stack()) {
4644 // reg to stack
4645 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4646 } else {
4647 if (dst.first() != src.first()) {
4648 mv(dst.first()->as_Register(), src.first()->as_Register());
4649 }
4650 }
4651 }
4652
4653 // A double move
4654 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
4655 assert(src.first()->is_stack() && dst.first()->is_stack() ||
4656 src.first()->is_reg() && dst.first()->is_reg() ||
4657 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4658 if (src.first()->is_stack()) {
4659 if (dst.first()->is_stack()) {
4660 ld(tmp, Address(fp, reg2offset_in(src.first())));
4661 sd(tmp, Address(sp, reg2offset_out(dst.first())));
4662 } else if (dst.first()-> is_Register()) {
4663 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4664 } else {
4665 ShouldNotReachHere();
4666 }
4667 } else if (src.first() != dst.first()) {
4668 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4669 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4670 } else {
4671 ShouldNotReachHere();
4672 }
4673 }
4674 }
4675
4676 void MacroAssembler::rt_call(address dest, Register tmp) {
4677 CodeBlob *cb = CodeCache::find_blob(dest);
4678 RuntimeAddress target(dest);
4679 if (cb) {
4680 far_call(target);
4681 } else {
4682 relocate(target.rspec(), [&] {
4683 int32_t offset;
4684 la_patchable(tmp, target, offset);
4685 jalr(x1, tmp, offset);
4686 });
4687 }
4688 }
4689
4690 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) {
4691 assert(bit_pos < 64, "invalid bit range");
4692 if (UseZbs) {
4693 bexti(Rd, Rs, bit_pos);
4694 return;
4695 }
4696 int64_t imm = (int64_t)(1UL << bit_pos);
4697 if (is_simm12(imm)) {
4698 and_imm12(Rd, Rs, imm);
4699 } else {
4700 srli(Rd, Rs, bit_pos);
4701 and_imm12(Rd, Rd, 1);
4702 }
4703 }
4704
4705 // Implements lightweight-locking.
4706 // Branches to slow upon failure to lock the object.
4707 // Falls through upon success.
4708 //
4709 // - obj: the object to be locked
4710 // - hdr: the header, already loaded from obj, will be destroyed
4711 // - tmp1, tmp2: temporary registers, will be destroyed
4712 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) {
4713 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4714 assert_different_registers(obj, hdr, tmp1, tmp2, t0);
4715
4716 // Check if we would have space on lock-stack for the object.
4717 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4718 mv(tmp2, (unsigned)LockStack::end_offset());
4719 bge(tmp1, tmp2, slow, /* is_far */ true);
4720
4721 // Load (object->mark() | 1) into hdr
4722 ori(hdr, hdr, markWord::unlocked_value);
4723 // Clear lock-bits, into tmp2
4724 xori(tmp2, hdr, markWord::unlocked_value);
4725
4726 // Try to swing header from unlocked to locked
4727 Label success;
4728 cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow);
4729 bind(success);
4730
4731 // After successful lock, push object on lock-stack
4732 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4733 add(tmp2, xthread, tmp1);
4734 sd(obj, Address(tmp2, 0));
4735 addw(tmp1, tmp1, oopSize);
4736 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4737 }
4738
4739 // Implements ligthweight-unlocking.
4740 // Branches to slow upon failure.
4741 // Falls through upon success.
4742 //
4743 // - obj: the object to be unlocked
4744 // - hdr: the (pre-loaded) header of the object
4745 // - tmp1, tmp2: temporary registers
4746 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) {
4747 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
4748 assert_different_registers(obj, hdr, tmp1, tmp2, t0);
4749
4750 #ifdef ASSERT
4751 {
4752 // The following checks rely on the fact that LockStack is only ever modified by
4753 // its owning thread, even if the lock got inflated concurrently; removal of LockStack
4754 // entries after inflation will happen delayed in that case.
4755
4756 // Check for lock-stack underflow.
4757 Label stack_ok;
4758 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4759 mv(tmp2, (unsigned)LockStack::start_offset());
4760 bgt(tmp1, tmp2, stack_ok);
4761 STOP("Lock-stack underflow");
4762 bind(stack_ok);
4763 }
4764 {
4765 // Check if the top of the lock-stack matches the unlocked object.
4766 Label tos_ok;
4767 subw(tmp1, tmp1, oopSize);
4768 add(tmp1, xthread, tmp1);
4769 ld(tmp1, Address(tmp1, 0));
4770 beq(tmp1, obj, tos_ok);
4771 STOP("Top of lock-stack does not match the unlocked object");
4772 bind(tos_ok);
4773 }
4774 {
4775 // Check that hdr is fast-locked.
4776 Label hdr_ok;
4777 andi(tmp1, hdr, markWord::lock_mask_in_place);
4778 beqz(tmp1, hdr_ok);
4779 STOP("Header is not fast-locked");
4780 bind(hdr_ok);
4781 }
4782 #endif
4783
4784 // Load the new header (unlocked) into tmp1
4785 ori(tmp1, hdr, markWord::unlocked_value);
4786
4787 // Try to swing header from locked to unlocked
4788 Label success;
4789 cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow);
4790 bind(success);
4791
4792 // After successful unlock, pop object from lock-stack
4793 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4794 subw(tmp1, tmp1, oopSize);
4795 #ifdef ASSERT
4796 add(tmp2, xthread, tmp1);
4797 sd(zr, Address(tmp2, 0));
4798 #endif
4799 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset()));
4800 }