1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "gc/shared/collectedHeap.hpp" 36 #include "interpreter/bytecodeHistogram.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "memory/universe.hpp" 40 #include "nativeInst_riscv.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedOops.inline.hpp" 43 #include "oops/klass.inline.hpp" 44 #include "oops/oop.hpp" 45 #include "runtime/interfaceSupport.inline.hpp" 46 #include "runtime/javaThread.hpp" 47 #include "runtime/jniHandles.inline.hpp" 48 #include "runtime/sharedRuntime.hpp" 49 #include "runtime/stubRoutines.hpp" 50 #include "utilities/globalDefinitions.hpp" 51 #include "utilities/powerOfTwo.hpp" 52 #ifdef COMPILER2 53 #include "opto/compile.hpp" 54 #include "opto/node.hpp" 55 #include "opto/output.hpp" 56 #endif 57 58 #ifdef PRODUCT 59 #define BLOCK_COMMENT(str) /* nothing */ 60 #else 61 #define BLOCK_COMMENT(str) block_comment(str) 62 #endif 63 #define STOP(str) stop(str); 64 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 65 66 static void pass_arg0(MacroAssembler* masm, Register arg) { 67 if (c_rarg0 != arg) { 68 masm->mv(c_rarg0, arg); 69 } 70 } 71 72 static void pass_arg1(MacroAssembler* masm, Register arg) { 73 if (c_rarg1 != arg) { 74 masm->mv(c_rarg1, arg); 75 } 76 } 77 78 static void pass_arg2(MacroAssembler* masm, Register arg) { 79 if (c_rarg2 != arg) { 80 masm->mv(c_rarg2, arg); 81 } 82 } 83 84 static void pass_arg3(MacroAssembler* masm, Register arg) { 85 if (c_rarg3 != arg) { 86 masm->mv(c_rarg3, arg); 87 } 88 } 89 90 void MacroAssembler::push_cont_fastpath(Register java_thread) { 91 if (!Continuations::enabled()) return; 92 Label done; 93 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 94 bleu(sp, t0, done); 95 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 96 bind(done); 97 } 98 99 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 100 if (!Continuations::enabled()) return; 101 Label done; 102 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 103 bltu(sp, t0, done); 104 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 105 bind(done); 106 } 107 108 int MacroAssembler::align(int modulus, int extra_offset) { 109 CompressibleRegion cr(this); 110 intptr_t before = offset(); 111 while ((offset() + extra_offset) % modulus != 0) { nop(); } 112 return (int)(offset() - before); 113 } 114 115 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 116 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 117 } 118 119 // Implementation of call_VM versions 120 121 void MacroAssembler::call_VM(Register oop_result, 122 address entry_point, 123 bool check_exceptions) { 124 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 125 } 126 127 void MacroAssembler::call_VM(Register oop_result, 128 address entry_point, 129 Register arg_1, 130 bool check_exceptions) { 131 pass_arg1(this, arg_1); 132 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 133 } 134 135 void MacroAssembler::call_VM(Register oop_result, 136 address entry_point, 137 Register arg_1, 138 Register arg_2, 139 bool check_exceptions) { 140 assert(arg_1 != c_rarg2, "smashed arg"); 141 pass_arg2(this, arg_2); 142 pass_arg1(this, arg_1); 143 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 144 } 145 146 void MacroAssembler::call_VM(Register oop_result, 147 address entry_point, 148 Register arg_1, 149 Register arg_2, 150 Register arg_3, 151 bool check_exceptions) { 152 assert(arg_1 != c_rarg3, "smashed arg"); 153 assert(arg_2 != c_rarg3, "smashed arg"); 154 pass_arg3(this, arg_3); 155 156 assert(arg_1 != c_rarg2, "smashed arg"); 157 pass_arg2(this, arg_2); 158 159 pass_arg1(this, arg_1); 160 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 161 } 162 163 void MacroAssembler::call_VM(Register oop_result, 164 Register last_java_sp, 165 address entry_point, 166 int number_of_arguments, 167 bool check_exceptions) { 168 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 169 } 170 171 void MacroAssembler::call_VM(Register oop_result, 172 Register last_java_sp, 173 address entry_point, 174 Register arg_1, 175 bool check_exceptions) { 176 pass_arg1(this, arg_1); 177 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 178 } 179 180 void MacroAssembler::call_VM(Register oop_result, 181 Register last_java_sp, 182 address entry_point, 183 Register arg_1, 184 Register arg_2, 185 bool check_exceptions) { 186 187 assert(arg_1 != c_rarg2, "smashed arg"); 188 pass_arg2(this, arg_2); 189 pass_arg1(this, arg_1); 190 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 191 } 192 193 void MacroAssembler::call_VM(Register oop_result, 194 Register last_java_sp, 195 address entry_point, 196 Register arg_1, 197 Register arg_2, 198 Register arg_3, 199 bool check_exceptions) { 200 assert(arg_1 != c_rarg3, "smashed arg"); 201 assert(arg_2 != c_rarg3, "smashed arg"); 202 pass_arg3(this, arg_3); 203 assert(arg_1 != c_rarg2, "smashed arg"); 204 pass_arg2(this, arg_2); 205 pass_arg1(this, arg_1); 206 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 207 } 208 209 void MacroAssembler::post_call_nop() { 210 if (!Continuations::enabled()) { 211 return; 212 } 213 relocate(post_call_nop_Relocation::spec(), [&] { 214 InlineSkippedInstructionsCounter skipCounter(this); 215 nop(); 216 li32(zr, 0); 217 }); 218 } 219 220 // these are no-ops overridden by InterpreterMacroAssembler 221 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 222 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 223 224 // Calls to C land 225 // 226 // When entering C land, the fp, & esp of the last Java frame have to be recorded 227 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 228 // has to be reset to 0. This is required to allow proper stack traversal. 229 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 230 Register last_java_fp, 231 Register last_java_pc, 232 Register tmp) { 233 234 if (last_java_pc->is_valid()) { 235 sd(last_java_pc, Address(xthread, 236 JavaThread::frame_anchor_offset() + 237 JavaFrameAnchor::last_Java_pc_offset())); 238 } 239 240 // determine last_java_sp register 241 if (last_java_sp == sp) { 242 mv(tmp, sp); 243 last_java_sp = tmp; 244 } else if (!last_java_sp->is_valid()) { 245 last_java_sp = esp; 246 } 247 248 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 249 250 // last_java_fp is optional 251 if (last_java_fp->is_valid()) { 252 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 253 } 254 } 255 256 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 257 Register last_java_fp, 258 address last_java_pc, 259 Register tmp) { 260 assert(last_java_pc != nullptr, "must provide a valid PC"); 261 262 la(tmp, last_java_pc); 263 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 264 265 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); 266 } 267 268 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 269 Register last_java_fp, 270 Label &L, 271 Register tmp) { 272 if (L.is_bound()) { 273 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 274 } else { 275 L.add_patch_at(code(), locator()); 276 IncompressibleRegion ir(this); // the label address will be patched back. 277 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 278 } 279 } 280 281 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 282 // we must set sp to zero to clear frame 283 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 284 285 // must clear fp, so that compiled frames are not confused; it is 286 // possible that we need it only for debugging 287 if (clear_fp) { 288 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 289 } 290 291 // Always clear the pc because it could have been set by make_walkable() 292 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 293 } 294 295 void MacroAssembler::call_VM_base(Register oop_result, 296 Register java_thread, 297 Register last_java_sp, 298 address entry_point, 299 int number_of_arguments, 300 bool check_exceptions) { 301 // determine java_thread register 302 if (!java_thread->is_valid()) { 303 java_thread = xthread; 304 } 305 // determine last_java_sp register 306 if (!last_java_sp->is_valid()) { 307 last_java_sp = esp; 308 } 309 310 // debugging support 311 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 312 assert(java_thread == xthread, "unexpected register"); 313 314 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 315 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 316 317 // push java thread (becomes first argument of C function) 318 mv(c_rarg0, java_thread); 319 320 // set last Java frame before call 321 assert(last_java_sp != fp, "can't use fp"); 322 323 Label l; 324 set_last_Java_frame(last_java_sp, fp, l, t0); 325 326 // do the call, remove parameters 327 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 328 329 // reset last Java frame 330 // Only interpreter should have to clear fp 331 reset_last_Java_frame(true); 332 333 // C++ interp handles this in the interpreter 334 check_and_handle_popframe(java_thread); 335 check_and_handle_earlyret(java_thread); 336 337 if (check_exceptions) { 338 // check for pending exceptions (java_thread is set upon return) 339 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 340 Label ok; 341 beqz(t0, ok); 342 RuntimeAddress target(StubRoutines::forward_exception_entry()); 343 relocate(target.rspec(), [&] { 344 int32_t offset; 345 la_patchable(t0, target, offset); 346 jalr(x0, t0, offset); 347 }); 348 bind(ok); 349 } 350 351 // get oop result if there is one and reset the value in the thread 352 if (oop_result->is_valid()) { 353 get_vm_result(oop_result, java_thread); 354 } 355 } 356 357 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 358 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 359 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 360 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 361 } 362 363 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 364 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 365 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 366 } 367 368 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 369 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 370 assert_different_registers(klass, xthread, tmp); 371 372 Label L_fallthrough, L_tmp; 373 if (L_fast_path == nullptr) { 374 L_fast_path = &L_fallthrough; 375 } else if (L_slow_path == nullptr) { 376 L_slow_path = &L_fallthrough; 377 } 378 379 // Fast path check: class is fully initialized 380 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 381 sub(tmp, tmp, InstanceKlass::fully_initialized); 382 beqz(tmp, *L_fast_path); 383 384 // Fast path check: current thread is initializer thread 385 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 386 387 if (L_slow_path == &L_fallthrough) { 388 beq(xthread, tmp, *L_fast_path); 389 bind(*L_slow_path); 390 } else if (L_fast_path == &L_fallthrough) { 391 bne(xthread, tmp, *L_slow_path); 392 bind(*L_fast_path); 393 } else { 394 Unimplemented(); 395 } 396 } 397 398 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 399 if (!VerifyOops) { return; } 400 401 // Pass register number to verify_oop_subroutine 402 const char* b = nullptr; 403 { 404 ResourceMark rm; 405 stringStream ss; 406 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 407 b = code_string(ss.as_string()); 408 } 409 BLOCK_COMMENT("verify_oop {"); 410 411 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 412 413 mv(c_rarg0, reg); // c_rarg0 : x10 414 { 415 // The length of the instruction sequence emitted should not depend 416 // on the address of the char buffer so that the size of mach nodes for 417 // scratch emit and normal emit matches. 418 IncompressibleRegion ir(this); // Fixed length 419 movptr(t0, (address) b); 420 } 421 422 // call indirectly to solve generation ordering problem 423 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 424 relocate(target.rspec(), [&] { 425 int32_t offset; 426 la_patchable(t1, target, offset); 427 ld(t1, Address(t1, offset)); 428 }); 429 jalr(t1); 430 431 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 432 433 BLOCK_COMMENT("} verify_oop"); 434 } 435 436 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 437 if (!VerifyOops) { 438 return; 439 } 440 441 const char* b = nullptr; 442 { 443 ResourceMark rm; 444 stringStream ss; 445 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 446 b = code_string(ss.as_string()); 447 } 448 BLOCK_COMMENT("verify_oop_addr {"); 449 450 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 451 452 if (addr.uses(sp)) { 453 la(x10, addr); 454 ld(x10, Address(x10, 4 * wordSize)); 455 } else { 456 ld(x10, addr); 457 } 458 459 { 460 // The length of the instruction sequence emitted should not depend 461 // on the address of the char buffer so that the size of mach nodes for 462 // scratch emit and normal emit matches. 463 IncompressibleRegion ir(this); // Fixed length 464 movptr(t0, (address) b); 465 } 466 467 // call indirectly to solve generation ordering problem 468 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 469 relocate(target.rspec(), [&] { 470 int32_t offset; 471 la_patchable(t1, target, offset); 472 ld(t1, Address(t1, offset)); 473 }); 474 jalr(t1); 475 476 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 477 478 BLOCK_COMMENT("} verify_oop_addr"); 479 } 480 481 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 482 int extra_slot_offset) { 483 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 484 int stackElementSize = Interpreter::stackElementSize; 485 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 486 #ifdef ASSERT 487 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 488 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 489 #endif 490 if (arg_slot.is_constant()) { 491 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 492 } else { 493 assert_different_registers(t0, arg_slot.as_register()); 494 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 495 return Address(t0, offset); 496 } 497 } 498 499 #ifndef PRODUCT 500 extern "C" void findpc(intptr_t x); 501 #endif 502 503 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 504 { 505 // In order to get locks to work, we need to fake a in_VM state 506 if (ShowMessageBoxOnError) { 507 JavaThread* thread = JavaThread::current(); 508 JavaThreadState saved_state = thread->thread_state(); 509 thread->set_thread_state(_thread_in_vm); 510 #ifndef PRODUCT 511 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 512 ttyLocker ttyl; 513 BytecodeCounter::print(); 514 } 515 #endif 516 if (os::message_box(msg, "Execution stopped, print registers?")) { 517 ttyLocker ttyl; 518 tty->print_cr(" pc = 0x%016lx", pc); 519 #ifndef PRODUCT 520 tty->cr(); 521 findpc(pc); 522 tty->cr(); 523 #endif 524 tty->print_cr(" x0 = 0x%016lx", regs[0]); 525 tty->print_cr(" x1 = 0x%016lx", regs[1]); 526 tty->print_cr(" x2 = 0x%016lx", regs[2]); 527 tty->print_cr(" x3 = 0x%016lx", regs[3]); 528 tty->print_cr(" x4 = 0x%016lx", regs[4]); 529 tty->print_cr(" x5 = 0x%016lx", regs[5]); 530 tty->print_cr(" x6 = 0x%016lx", regs[6]); 531 tty->print_cr(" x7 = 0x%016lx", regs[7]); 532 tty->print_cr(" x8 = 0x%016lx", regs[8]); 533 tty->print_cr(" x9 = 0x%016lx", regs[9]); 534 tty->print_cr("x10 = 0x%016lx", regs[10]); 535 tty->print_cr("x11 = 0x%016lx", regs[11]); 536 tty->print_cr("x12 = 0x%016lx", regs[12]); 537 tty->print_cr("x13 = 0x%016lx", regs[13]); 538 tty->print_cr("x14 = 0x%016lx", regs[14]); 539 tty->print_cr("x15 = 0x%016lx", regs[15]); 540 tty->print_cr("x16 = 0x%016lx", regs[16]); 541 tty->print_cr("x17 = 0x%016lx", regs[17]); 542 tty->print_cr("x18 = 0x%016lx", regs[18]); 543 tty->print_cr("x19 = 0x%016lx", regs[19]); 544 tty->print_cr("x20 = 0x%016lx", regs[20]); 545 tty->print_cr("x21 = 0x%016lx", regs[21]); 546 tty->print_cr("x22 = 0x%016lx", regs[22]); 547 tty->print_cr("x23 = 0x%016lx", regs[23]); 548 tty->print_cr("x24 = 0x%016lx", regs[24]); 549 tty->print_cr("x25 = 0x%016lx", regs[25]); 550 tty->print_cr("x26 = 0x%016lx", regs[26]); 551 tty->print_cr("x27 = 0x%016lx", regs[27]); 552 tty->print_cr("x28 = 0x%016lx", regs[28]); 553 tty->print_cr("x30 = 0x%016lx", regs[30]); 554 tty->print_cr("x31 = 0x%016lx", regs[31]); 555 BREAKPOINT; 556 } 557 } 558 fatal("DEBUG MESSAGE: %s", msg); 559 } 560 561 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 562 assert_different_registers(value, tmp1, tmp2); 563 Label done, tagged, weak_tagged; 564 565 beqz(value, done); // Use null as-is. 566 // Test for tag. 567 andi(tmp1, value, JNIHandles::tag_mask); 568 bnez(tmp1, tagged); 569 570 // Resolve local handle 571 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 572 verify_oop(value); 573 j(done); 574 575 bind(tagged); 576 // Test for jweak tag. 577 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 578 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 579 bnez(tmp1, weak_tagged); 580 581 // Resolve global handle 582 access_load_at(T_OBJECT, IN_NATIVE, value, 583 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 584 verify_oop(value); 585 j(done); 586 587 bind(weak_tagged); 588 // Resolve jweak. 589 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 590 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 591 verify_oop(value); 592 593 bind(done); 594 } 595 596 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 597 assert_different_registers(value, tmp1, tmp2); 598 Label done; 599 600 beqz(value, done); // Use null as-is. 601 602 #ifdef ASSERT 603 { 604 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 605 Label valid_global_tag; 606 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 607 bnez(tmp1, valid_global_tag); 608 stop("non global jobject using resolve_global_jobject"); 609 bind(valid_global_tag); 610 } 611 #endif 612 613 // Resolve global handle 614 access_load_at(T_OBJECT, IN_NATIVE, value, 615 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 616 verify_oop(value); 617 618 bind(done); 619 } 620 621 void MacroAssembler::stop(const char* msg) { 622 BLOCK_COMMENT(msg); 623 illegal_instruction(Assembler::csr::time); 624 emit_int64((uintptr_t)msg); 625 } 626 627 void MacroAssembler::unimplemented(const char* what) { 628 const char* buf = nullptr; 629 { 630 ResourceMark rm; 631 stringStream ss; 632 ss.print("unimplemented: %s", what); 633 buf = code_string(ss.as_string()); 634 } 635 stop(buf); 636 } 637 638 void MacroAssembler::emit_static_call_stub() { 639 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size(). 640 // CompiledDirectStaticCall::set_to_interpreted knows the 641 // exact layout of this stub. 642 643 mov_metadata(xmethod, (Metadata*)nullptr); 644 645 // Jump to the entry point of the c2i stub. 646 int32_t offset = 0; 647 movptr(t0, 0, offset); 648 jalr(x0, t0, offset); 649 } 650 651 void MacroAssembler::call_VM_leaf_base(address entry_point, 652 int number_of_arguments, 653 Label *retaddr) { 654 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 655 call(entry_point); 656 if (retaddr != nullptr) { 657 bind(*retaddr); 658 } 659 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 660 } 661 662 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 663 call_VM_leaf_base(entry_point, number_of_arguments); 664 } 665 666 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 667 pass_arg0(this, arg_0); 668 call_VM_leaf_base(entry_point, 1); 669 } 670 671 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 672 pass_arg0(this, arg_0); 673 pass_arg1(this, arg_1); 674 call_VM_leaf_base(entry_point, 2); 675 } 676 677 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 678 Register arg_1, Register arg_2) { 679 pass_arg0(this, arg_0); 680 pass_arg1(this, arg_1); 681 pass_arg2(this, arg_2); 682 call_VM_leaf_base(entry_point, 3); 683 } 684 685 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 686 pass_arg0(this, arg_0); 687 MacroAssembler::call_VM_leaf_base(entry_point, 1); 688 } 689 690 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 691 692 assert(arg_0 != c_rarg1, "smashed arg"); 693 pass_arg1(this, arg_1); 694 pass_arg0(this, arg_0); 695 MacroAssembler::call_VM_leaf_base(entry_point, 2); 696 } 697 698 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 699 assert(arg_0 != c_rarg2, "smashed arg"); 700 assert(arg_1 != c_rarg2, "smashed arg"); 701 pass_arg2(this, arg_2); 702 assert(arg_0 != c_rarg1, "smashed arg"); 703 pass_arg1(this, arg_1); 704 pass_arg0(this, arg_0); 705 MacroAssembler::call_VM_leaf_base(entry_point, 3); 706 } 707 708 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 709 assert(arg_0 != c_rarg3, "smashed arg"); 710 assert(arg_1 != c_rarg3, "smashed arg"); 711 assert(arg_2 != c_rarg3, "smashed arg"); 712 pass_arg3(this, arg_3); 713 assert(arg_0 != c_rarg2, "smashed arg"); 714 assert(arg_1 != c_rarg2, "smashed arg"); 715 pass_arg2(this, arg_2); 716 assert(arg_0 != c_rarg1, "smashed arg"); 717 pass_arg1(this, arg_1); 718 pass_arg0(this, arg_0); 719 MacroAssembler::call_VM_leaf_base(entry_point, 4); 720 } 721 722 void MacroAssembler::la(Register Rd, const address dest) { 723 int64_t offset = dest - pc(); 724 if (is_valid_32bit_offset(offset)) { 725 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit 726 addi(Rd, Rd, ((int64_t)offset << 52) >> 52); 727 } else { 728 movptr(Rd, dest); 729 } 730 } 731 732 void MacroAssembler::la(Register Rd, const Address &adr) { 733 switch (adr.getMode()) { 734 case Address::literal: { 735 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 736 if (rtype == relocInfo::none) { 737 mv(Rd, (intptr_t)(adr.target())); 738 } else { 739 relocate(adr.rspec(), [&] { 740 movptr(Rd, adr.target()); 741 }); 742 } 743 break; 744 } 745 case Address::base_plus_offset: { 746 Address new_adr = legitimize_address(Rd, adr); 747 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 748 addi(Rd, new_adr.base(), new_adr.offset()); 749 } 750 break; 751 } 752 default: 753 ShouldNotReachHere(); 754 } 755 } 756 757 void MacroAssembler::la(Register Rd, Label &label) { 758 IncompressibleRegion ir(this); // the label address may be patched back. 759 wrap_label(Rd, label, &MacroAssembler::la); 760 } 761 762 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 763 lui(Rd, (uint32_t)imm << 12); 764 srli(Rd, Rd, 12); 765 } 766 767 void MacroAssembler::li32(Register Rd, int32_t imm) { 768 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 769 int64_t upper = imm, lower = imm; 770 lower = (imm << 20) >> 20; 771 upper -= lower; 772 upper = (int32_t)upper; 773 // lui Rd, imm[31:12] + imm[11] 774 lui(Rd, upper); 775 // use addiw to distinguish li32 to li64 776 addiw(Rd, Rd, lower); 777 } 778 779 void MacroAssembler::li64(Register Rd, int64_t imm) { 780 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 781 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 782 int64_t lower = imm & 0xffffffff; 783 lower -= ((lower << 44) >> 44); 784 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 785 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 786 787 // Load upper 32 bits 788 int64_t up = upper, lo = upper; 789 lo = (lo << 52) >> 52; 790 up -= lo; 791 up = (int32_t)up; 792 lui(Rd, up); 793 addi(Rd, Rd, lo); 794 795 // Load the rest 32 bits. 796 slli(Rd, Rd, 12); 797 addi(Rd, Rd, (int32_t)lower >> 20); 798 slli(Rd, Rd, 12); 799 lower = ((int32_t)imm << 12) >> 20; 800 addi(Rd, Rd, lower); 801 slli(Rd, Rd, 8); 802 lower = imm & 0xff; 803 addi(Rd, Rd, lower); 804 } 805 806 void MacroAssembler::li(Register Rd, int64_t imm) { 807 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 808 // li -> c.li 809 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 810 c_li(Rd, imm); 811 return; 812 } 813 814 int shift = 12; 815 int64_t upper = imm, lower = imm; 816 // Split imm to a lower 12-bit sign-extended part and the remainder, 817 // because addi will sign-extend the lower imm. 818 lower = ((int32_t)imm << 20) >> 20; 819 upper -= lower; 820 821 // Test whether imm is a 32-bit integer. 822 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 823 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 824 while (((upper >> shift) & 1) == 0) { shift++; } 825 upper >>= shift; 826 li(Rd, upper); 827 slli(Rd, Rd, shift); 828 if (lower != 0) { 829 addi(Rd, Rd, lower); 830 } 831 } else { 832 // 32-bit integer 833 Register hi_Rd = zr; 834 if (upper != 0) { 835 lui(Rd, (int32_t)upper); 836 hi_Rd = Rd; 837 } 838 if (lower != 0 || hi_Rd == zr) { 839 addiw(Rd, hi_Rd, lower); 840 } 841 } 842 } 843 844 #define INSN(NAME, REGISTER) \ 845 void MacroAssembler::NAME(const address dest, Register temp) { \ 846 assert_cond(dest != nullptr); \ 847 int64_t distance = dest - pc(); \ 848 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 849 Assembler::jal(REGISTER, distance); \ 850 } else { \ 851 assert(temp != noreg, "expecting a register"); \ 852 int32_t offset = 0; \ 853 movptr(temp, dest, offset); \ 854 Assembler::jalr(REGISTER, temp, offset); \ 855 } \ 856 } \ 857 858 INSN(j, x0); 859 INSN(jal, x1); 860 861 #undef INSN 862 863 #define INSN(NAME, REGISTER) \ 864 void MacroAssembler::NAME(const Address &adr, Register temp) { \ 865 switch (adr.getMode()) { \ 866 case Address::literal: { \ 867 relocate(adr.rspec(), [&] { \ 868 NAME(adr.target(), temp); \ 869 }); \ 870 break; \ 871 } \ 872 case Address::base_plus_offset: { \ 873 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 874 la(temp, Address(adr.base(), adr.offset() - offset)); \ 875 Assembler::jalr(REGISTER, temp, offset); \ 876 break; \ 877 } \ 878 default: \ 879 ShouldNotReachHere(); \ 880 } \ 881 } 882 883 INSN(j, x0); 884 INSN(jal, x1); 885 886 #undef INSN 887 888 #define INSN(NAME) \ 889 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \ 890 assert_cond(dest != nullptr); \ 891 int64_t distance = dest - pc(); \ 892 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 893 Assembler::NAME(Rd, distance); \ 894 } else { \ 895 assert_different_registers(Rd, temp); \ 896 int32_t offset = 0; \ 897 movptr(temp, dest, offset); \ 898 jalr(Rd, temp, offset); \ 899 } \ 900 } \ 901 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \ 902 assert_different_registers(Rd, temp); \ 903 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \ 904 } 905 906 INSN(jal); 907 908 #undef INSN 909 910 #define INSN(NAME, REGISTER) \ 911 void MacroAssembler::NAME(Label &l, Register temp) { \ 912 jal(REGISTER, l, temp); \ 913 } \ 914 915 INSN(j, x0); 916 INSN(jal, x1); 917 918 #undef INSN 919 920 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { 921 if (L.is_bound()) { 922 (this->*insn)(Rt, target(L), tmp); 923 } else { 924 L.add_patch_at(code(), locator()); 925 (this->*insn)(Rt, pc(), tmp); 926 } 927 } 928 929 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 930 if (L.is_bound()) { 931 (this->*insn)(Rt, target(L)); 932 } else { 933 L.add_patch_at(code(), locator()); 934 (this->*insn)(Rt, pc()); 935 } 936 } 937 938 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 939 compare_and_branch_insn insn, 940 compare_and_branch_label_insn neg_insn, bool is_far) { 941 if (is_far) { 942 Label done; 943 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 944 j(L); 945 bind(done); 946 } else { 947 if (L.is_bound()) { 948 (this->*insn)(r1, r2, target(L)); 949 } else { 950 L.add_patch_at(code(), locator()); 951 (this->*insn)(r1, r2, pc()); 952 } 953 } 954 } 955 956 #define INSN(NAME, NEG_INSN) \ 957 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 958 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 959 } 960 961 INSN(beq, bne); 962 INSN(bne, beq); 963 INSN(blt, bge); 964 INSN(bge, blt); 965 INSN(bltu, bgeu); 966 INSN(bgeu, bltu); 967 968 #undef INSN 969 970 #define INSN(NAME) \ 971 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 972 NAME(Rs, zr, dest); \ 973 } \ 974 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 975 NAME(Rs, zr, l, is_far); \ 976 } \ 977 978 INSN(beq); 979 INSN(bne); 980 INSN(blt); 981 INSN(ble); 982 INSN(bge); 983 INSN(bgt); 984 985 #undef INSN 986 987 #define INSN(NAME, NEG_INSN) \ 988 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 989 NEG_INSN(Rt, Rs, dest); \ 990 } \ 991 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 992 NEG_INSN(Rt, Rs, l, is_far); \ 993 } 994 995 INSN(bgt, blt); 996 INSN(ble, bge); 997 INSN(bgtu, bltu); 998 INSN(bleu, bgeu); 999 1000 #undef INSN 1001 1002 // Float compare branch instructions 1003 1004 #define INSN(NAME, FLOATCMP, BRANCH) \ 1005 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1006 FLOATCMP##_s(t0, Rs1, Rs2); \ 1007 BRANCH(t0, l, is_far); \ 1008 } \ 1009 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1010 FLOATCMP##_d(t0, Rs1, Rs2); \ 1011 BRANCH(t0, l, is_far); \ 1012 } 1013 1014 INSN(beq, feq, bnez); 1015 INSN(bne, feq, beqz); 1016 1017 #undef INSN 1018 1019 1020 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1021 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1022 bool is_far, bool is_unordered) { \ 1023 if (is_unordered) { \ 1024 /* jump if either source is NaN or condition is expected */ \ 1025 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1026 beqz(t0, l, is_far); \ 1027 } else { \ 1028 /* jump if no NaN in source and condition is expected */ \ 1029 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1030 bnez(t0, l, is_far); \ 1031 } \ 1032 } \ 1033 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1034 bool is_far, bool is_unordered) { \ 1035 if (is_unordered) { \ 1036 /* jump if either source is NaN or condition is expected */ \ 1037 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1038 beqz(t0, l, is_far); \ 1039 } else { \ 1040 /* jump if no NaN in source and condition is expected */ \ 1041 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1042 bnez(t0, l, is_far); \ 1043 } \ 1044 } 1045 1046 INSN(ble, fle, flt); 1047 INSN(blt, flt, fle); 1048 1049 #undef INSN 1050 1051 #define INSN(NAME, CMP) \ 1052 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1053 bool is_far, bool is_unordered) { \ 1054 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1055 } \ 1056 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1057 bool is_far, bool is_unordered) { \ 1058 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1059 } 1060 1061 INSN(bgt, blt); 1062 INSN(bge, ble); 1063 1064 #undef INSN 1065 1066 1067 #define INSN(NAME, CSR) \ 1068 void MacroAssembler::NAME(Register Rd) { \ 1069 csrr(Rd, CSR); \ 1070 } 1071 1072 INSN(rdinstret, CSR_INSTRET); 1073 INSN(rdcycle, CSR_CYCLE); 1074 INSN(rdtime, CSR_TIME); 1075 INSN(frcsr, CSR_FCSR); 1076 INSN(frrm, CSR_FRM); 1077 INSN(frflags, CSR_FFLAGS); 1078 1079 #undef INSN 1080 1081 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1082 csrrs(Rd, csr, x0); 1083 } 1084 1085 #define INSN(NAME, OPFUN) \ 1086 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1087 OPFUN(x0, csr, Rs); \ 1088 } 1089 1090 INSN(csrw, csrrw); 1091 INSN(csrs, csrrs); 1092 INSN(csrc, csrrc); 1093 1094 #undef INSN 1095 1096 #define INSN(NAME, OPFUN) \ 1097 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1098 OPFUN(x0, csr, imm); \ 1099 } 1100 1101 INSN(csrwi, csrrwi); 1102 INSN(csrsi, csrrsi); 1103 INSN(csrci, csrrci); 1104 1105 #undef INSN 1106 1107 #define INSN(NAME, CSR) \ 1108 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1109 csrrw(Rd, CSR, Rs); \ 1110 } 1111 1112 INSN(fscsr, CSR_FCSR); 1113 INSN(fsrm, CSR_FRM); 1114 INSN(fsflags, CSR_FFLAGS); 1115 1116 #undef INSN 1117 1118 #define INSN(NAME) \ 1119 void MacroAssembler::NAME(Register Rs) { \ 1120 NAME(x0, Rs); \ 1121 } 1122 1123 INSN(fscsr); 1124 INSN(fsrm); 1125 INSN(fsflags); 1126 1127 #undef INSN 1128 1129 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1130 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1131 csrrwi(Rd, CSR_FRM, imm); 1132 } 1133 1134 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1135 csrrwi(Rd, CSR_FFLAGS, imm); 1136 } 1137 1138 #define INSN(NAME) \ 1139 void MacroAssembler::NAME(unsigned imm) { \ 1140 NAME(x0, imm); \ 1141 } 1142 1143 INSN(fsrmi); 1144 INSN(fsflagsi); 1145 1146 #undef INSN 1147 1148 void MacroAssembler::push_reg(Register Rs) 1149 { 1150 addi(esp, esp, 0 - wordSize); 1151 sd(Rs, Address(esp, 0)); 1152 } 1153 1154 void MacroAssembler::pop_reg(Register Rd) 1155 { 1156 ld(Rd, Address(esp, 0)); 1157 addi(esp, esp, wordSize); 1158 } 1159 1160 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1161 int count = 0; 1162 // Scan bitset to accumulate register pairs 1163 for (int reg = 31; reg >= 0; reg--) { 1164 if ((1U << 31) & bitset) { 1165 regs[count++] = reg; 1166 } 1167 bitset <<= 1; 1168 } 1169 return count; 1170 } 1171 1172 // Push integer registers in the bitset supplied. Don't push sp. 1173 // Return the number of words pushed 1174 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1175 DEBUG_ONLY(int words_pushed = 0;) 1176 unsigned char regs[32]; 1177 int count = bitset_to_regs(bitset, regs); 1178 // reserve one slot to align for odd count 1179 int offset = is_even(count) ? 0 : wordSize; 1180 1181 if (count) { 1182 addi(stack, stack, -count * wordSize - offset); 1183 } 1184 for (int i = count - 1; i >= 0; i--) { 1185 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1186 DEBUG_ONLY(words_pushed++;) 1187 } 1188 1189 assert(words_pushed == count, "oops, pushed != count"); 1190 1191 return count; 1192 } 1193 1194 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1195 DEBUG_ONLY(int words_popped = 0;) 1196 unsigned char regs[32]; 1197 int count = bitset_to_regs(bitset, regs); 1198 // reserve one slot to align for odd count 1199 int offset = is_even(count) ? 0 : wordSize; 1200 1201 for (int i = count - 1; i >= 0; i--) { 1202 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1203 DEBUG_ONLY(words_popped++;) 1204 } 1205 1206 if (count) { 1207 addi(stack, stack, count * wordSize + offset); 1208 } 1209 assert(words_popped == count, "oops, popped != count"); 1210 1211 return count; 1212 } 1213 1214 // Push floating-point registers in the bitset supplied. 1215 // Return the number of words pushed 1216 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1217 DEBUG_ONLY(int words_pushed = 0;) 1218 unsigned char regs[32]; 1219 int count = bitset_to_regs(bitset, regs); 1220 int push_slots = count + (count & 1); 1221 1222 if (count) { 1223 addi(stack, stack, -push_slots * wordSize); 1224 } 1225 1226 for (int i = count - 1; i >= 0; i--) { 1227 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1228 DEBUG_ONLY(words_pushed++;) 1229 } 1230 1231 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1232 1233 return count; 1234 } 1235 1236 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1237 DEBUG_ONLY(int words_popped = 0;) 1238 unsigned char regs[32]; 1239 int count = bitset_to_regs(bitset, regs); 1240 int pop_slots = count + (count & 1); 1241 1242 for (int i = count - 1; i >= 0; i--) { 1243 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1244 DEBUG_ONLY(words_popped++;) 1245 } 1246 1247 if (count) { 1248 addi(stack, stack, pop_slots * wordSize); 1249 } 1250 1251 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1252 1253 return count; 1254 } 1255 1256 #ifdef COMPILER2 1257 // Push vector registers in the bitset supplied. 1258 // Return the number of words pushed 1259 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1260 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1261 1262 // Scan bitset to accumulate register pairs 1263 unsigned char regs[32]; 1264 int count = bitset_to_regs(bitset, regs); 1265 1266 for (int i = 0; i < count; i++) { 1267 sub(stack, stack, vector_size_in_bytes); 1268 vs1r_v(as_VectorRegister(regs[i]), stack); 1269 } 1270 1271 return count * vector_size_in_bytes / wordSize; 1272 } 1273 1274 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1275 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1276 1277 // Scan bitset to accumulate register pairs 1278 unsigned char regs[32]; 1279 int count = bitset_to_regs(bitset, regs); 1280 1281 for (int i = count - 1; i >= 0; i--) { 1282 vl1r_v(as_VectorRegister(regs[i]), stack); 1283 add(stack, stack, vector_size_in_bytes); 1284 } 1285 1286 return count * vector_size_in_bytes / wordSize; 1287 } 1288 #endif // COMPILER2 1289 1290 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1291 // Push integer registers x7, x10-x17, x28-x31. 1292 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1293 1294 // Push float registers f0-f7, f10-f17, f28-f31. 1295 addi(sp, sp, - wordSize * 20); 1296 int offset = 0; 1297 for (int i = 0; i < 32; i++) { 1298 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1299 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1300 } 1301 } 1302 } 1303 1304 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1305 int offset = 0; 1306 for (int i = 0; i < 32; i++) { 1307 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1308 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1309 } 1310 } 1311 addi(sp, sp, wordSize * 20); 1312 1313 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1314 } 1315 1316 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1317 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1318 push_reg(RegSet::range(x5, x31), sp); 1319 1320 // float registers 1321 addi(sp, sp, - 32 * wordSize); 1322 for (int i = 0; i < 32; i++) { 1323 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1324 } 1325 1326 // vector registers 1327 if (save_vectors) { 1328 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1329 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1330 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1331 add(t0, sp, vector_size_in_bytes * i); 1332 vse64_v(as_VectorRegister(i), t0); 1333 } 1334 } 1335 } 1336 1337 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1338 // vector registers 1339 if (restore_vectors) { 1340 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1341 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1342 vle64_v(as_VectorRegister(i), sp); 1343 add(sp, sp, vector_size_in_bytes * 8); 1344 } 1345 } 1346 1347 // float registers 1348 for (int i = 0; i < 32; i++) { 1349 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1350 } 1351 addi(sp, sp, 32 * wordSize); 1352 1353 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1354 pop_reg(RegSet::range(x5, x31), sp); 1355 } 1356 1357 static int patch_offset_in_jal(address branch, int64_t offset) { 1358 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1359 "offset is too large to be patched in one jal instruction!\n"); 1360 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1361 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1362 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1363 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1364 return NativeInstruction::instruction_size; // only one instruction 1365 } 1366 1367 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1368 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1369 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1370 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1371 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1372 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1373 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1374 return NativeInstruction::instruction_size; // only one instruction 1375 } 1376 1377 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1378 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1379 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1380 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1381 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; 1382 } 1383 1384 static int patch_addr_in_movptr(address branch, address target) { 1385 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load 1386 int32_t lower = ((intptr_t)target << 35) >> 35; 1387 int64_t upper = ((intptr_t)target - lower) >> 29; 1388 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1389 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1390 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1391 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1392 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1393 } 1394 1395 static int patch_imm_in_li64(address branch, address target) { 1396 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1397 int64_t lower = (intptr_t)target & 0xffffffff; 1398 lower = lower - ((lower << 44) >> 44); 1399 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1400 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1401 int64_t tmp_upper = upper, tmp_lower = upper; 1402 tmp_lower = (tmp_lower << 52) >> 52; 1403 tmp_upper -= tmp_lower; 1404 tmp_upper >>= 12; 1405 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1406 // upper = target[63:32] + 1. 1407 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1408 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1409 // Load the rest 32 bits. 1410 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1411 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1412 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1413 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1414 } 1415 1416 static int patch_imm_in_li16u(address branch, uint16_t target) { 1417 Assembler::patch(branch, 31, 12, target); // patch lui only 1418 return NativeInstruction::instruction_size; 1419 } 1420 1421 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1422 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1423 int64_t upper = (intptr_t)target; 1424 int32_t lower = (((int32_t)target) << 20) >> 20; 1425 upper -= lower; 1426 upper = (int32_t)upper; 1427 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1428 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1429 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1430 } 1431 1432 static long get_offset_of_jal(address insn_addr) { 1433 assert_cond(insn_addr != nullptr); 1434 long offset = 0; 1435 unsigned insn = Assembler::ld_instr(insn_addr); 1436 long val = (long)Assembler::sextract(insn, 31, 12); 1437 offset |= ((val >> 19) & 0x1) << 20; 1438 offset |= (val & 0xff) << 12; 1439 offset |= ((val >> 8) & 0x1) << 11; 1440 offset |= ((val >> 9) & 0x3ff) << 1; 1441 offset = (offset << 43) >> 43; 1442 return offset; 1443 } 1444 1445 static long get_offset_of_conditional_branch(address insn_addr) { 1446 long offset = 0; 1447 assert_cond(insn_addr != nullptr); 1448 unsigned insn = Assembler::ld_instr(insn_addr); 1449 offset = (long)Assembler::sextract(insn, 31, 31); 1450 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1451 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1452 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1453 offset = (offset << 41) >> 41; 1454 return offset; 1455 } 1456 1457 static long get_offset_of_pc_relative(address insn_addr) { 1458 long offset = 0; 1459 assert_cond(insn_addr != nullptr); 1460 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1461 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1462 offset = (offset << 32) >> 32; 1463 return offset; 1464 } 1465 1466 static address get_target_of_movptr(address insn_addr) { 1467 assert_cond(insn_addr != nullptr); 1468 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1469 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1470 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1471 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1472 return (address) target_address; 1473 } 1474 1475 static address get_target_of_li64(address insn_addr) { 1476 assert_cond(insn_addr != nullptr); 1477 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui. 1478 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi. 1479 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi. 1480 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi. 1481 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi. 1482 return (address)target_address; 1483 } 1484 1485 address MacroAssembler::get_target_of_li32(address insn_addr) { 1486 assert_cond(insn_addr != nullptr); 1487 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1488 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1489 return (address)target_address; 1490 } 1491 1492 // Patch any kind of instruction; there may be several instructions. 1493 // Return the total length (in bytes) of the instructions. 1494 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 1495 assert_cond(branch != nullptr); 1496 int64_t offset = target - branch; 1497 if (NativeInstruction::is_jal_at(branch)) { // jal 1498 return patch_offset_in_jal(branch, offset); 1499 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne 1500 return patch_offset_in_conditional_branch(branch, offset); 1501 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load 1502 return patch_offset_in_pc_relative(branch, offset); 1503 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr 1504 return patch_addr_in_movptr(branch, target); 1505 } else if (NativeInstruction::is_li64_at(branch)) { // li64 1506 return patch_imm_in_li64(branch, target); 1507 } else if (NativeInstruction::is_li32_at(branch)) { // li32 1508 int64_t imm = (intptr_t)target; 1509 return patch_imm_in_li32(branch, (int32_t)imm); 1510 } else if (NativeInstruction::is_li16u_at(branch)) { 1511 int64_t imm = (intptr_t)target; 1512 return patch_imm_in_li16u(branch, (uint16_t)imm); 1513 } else { 1514 #ifdef ASSERT 1515 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1516 Assembler::ld_instr(branch), p2i(branch)); 1517 Disassembler::decode(branch - 16, branch + 16); 1518 #endif 1519 ShouldNotReachHere(); 1520 return -1; 1521 } 1522 } 1523 1524 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1525 long offset = 0; 1526 assert_cond(insn_addr != nullptr); 1527 if (NativeInstruction::is_jal_at(insn_addr)) { // jal 1528 offset = get_offset_of_jal(insn_addr); 1529 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1530 offset = get_offset_of_conditional_branch(insn_addr); 1531 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1532 offset = get_offset_of_pc_relative(insn_addr); 1533 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr 1534 return get_target_of_movptr(insn_addr); 1535 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 1536 return get_target_of_li64(insn_addr); 1537 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 1538 return get_target_of_li32(insn_addr); 1539 } else { 1540 ShouldNotReachHere(); 1541 } 1542 return address(((uintptr_t)insn_addr + offset)); 1543 } 1544 1545 int MacroAssembler::patch_oop(address insn_addr, address o) { 1546 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1547 // narrow OOPs by setting the upper 16 bits in the first 1548 // instruction. 1549 if (NativeInstruction::is_li32_at(insn_addr)) { 1550 // Move narrow OOP 1551 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1552 return patch_imm_in_li32(insn_addr, (int32_t)n); 1553 } else if (NativeInstruction::is_movptr_at(insn_addr)) { 1554 // Move wide OOP 1555 return patch_addr_in_movptr(insn_addr, o); 1556 } 1557 ShouldNotReachHere(); 1558 return -1; 1559 } 1560 1561 void MacroAssembler::reinit_heapbase() { 1562 if (UseCompressedOops) { 1563 if (Universe::is_fully_initialized()) { 1564 mv(xheapbase, CompressedOops::ptrs_base()); 1565 } else { 1566 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1567 relocate(target.rspec(), [&] { 1568 int32_t offset; 1569 la_patchable(xheapbase, target, offset); 1570 ld(xheapbase, Address(xheapbase, offset)); 1571 }); 1572 } 1573 } 1574 } 1575 1576 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) { 1577 int64_t imm64 = (int64_t)addr; 1578 #ifndef PRODUCT 1579 { 1580 char buffer[64]; 1581 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); 1582 block_comment(buffer); 1583 } 1584 #endif 1585 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant"); 1586 // Load upper 31 bits 1587 int64_t imm = imm64 >> 17; 1588 int64_t upper = imm, lower = imm; 1589 lower = (lower << 52) >> 52; 1590 upper -= lower; 1591 upper = (int32_t)upper; 1592 lui(Rd, upper); 1593 addi(Rd, Rd, lower); 1594 1595 // Load the rest 17 bits. 1596 slli(Rd, Rd, 11); 1597 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1598 slli(Rd, Rd, 6); 1599 1600 // This offset will be used by following jalr/ld. 1601 offset = imm64 & 0x3f; 1602 } 1603 1604 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1605 if (is_simm12(increment)) { 1606 addi(Rd, Rn, increment); 1607 } else { 1608 assert_different_registers(Rn, temp); 1609 li(temp, increment); 1610 add(Rd, Rn, temp); 1611 } 1612 } 1613 1614 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1615 if (is_simm12(increment)) { 1616 addiw(Rd, Rn, increment); 1617 } else { 1618 assert_different_registers(Rn, temp); 1619 li(temp, increment); 1620 addw(Rd, Rn, temp); 1621 } 1622 } 1623 1624 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1625 if (is_simm12(-decrement)) { 1626 addi(Rd, Rn, -decrement); 1627 } else { 1628 assert_different_registers(Rn, temp); 1629 li(temp, decrement); 1630 sub(Rd, Rn, temp); 1631 } 1632 } 1633 1634 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1635 if (is_simm12(-decrement)) { 1636 addiw(Rd, Rn, -decrement); 1637 } else { 1638 assert_different_registers(Rn, temp); 1639 li(temp, decrement); 1640 subw(Rd, Rn, temp); 1641 } 1642 } 1643 1644 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1645 andr(Rd, Rs1, Rs2); 1646 sign_extend(Rd, Rd, 32); 1647 } 1648 1649 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1650 orr(Rd, Rs1, Rs2); 1651 sign_extend(Rd, Rd, 32); 1652 } 1653 1654 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1655 xorr(Rd, Rs1, Rs2); 1656 sign_extend(Rd, Rd, 32); 1657 } 1658 1659 // Rd = Rs1 & (~Rd2) 1660 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 1661 if (UseZbb) { 1662 Assembler::andn(Rd, Rs1, Rs2); 1663 return; 1664 } 1665 1666 notr(Rd, Rs2); 1667 andr(Rd, Rs1, Rd); 1668 } 1669 1670 // Rd = Rs1 | (~Rd2) 1671 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 1672 if (UseZbb) { 1673 Assembler::orn(Rd, Rs1, Rs2); 1674 return; 1675 } 1676 1677 notr(Rd, Rs2); 1678 orr(Rd, Rs1, Rd); 1679 } 1680 1681 // Note: load_unsigned_short used to be called load_unsigned_word. 1682 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1683 int off = offset(); 1684 lhu(dst, src); 1685 return off; 1686 } 1687 1688 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1689 int off = offset(); 1690 lbu(dst, src); 1691 return off; 1692 } 1693 1694 int MacroAssembler::load_signed_short(Register dst, Address src) { 1695 int off = offset(); 1696 lh(dst, src); 1697 return off; 1698 } 1699 1700 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1701 int off = offset(); 1702 lb(dst, src); 1703 return off; 1704 } 1705 1706 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1707 switch (size_in_bytes) { 1708 case 8: ld(dst, src); break; 1709 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1710 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1711 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1712 default: ShouldNotReachHere(); 1713 } 1714 } 1715 1716 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1717 switch (size_in_bytes) { 1718 case 8: sd(src, dst); break; 1719 case 4: sw(src, dst); break; 1720 case 2: sh(src, dst); break; 1721 case 1: sb(src, dst); break; 1722 default: ShouldNotReachHere(); 1723 } 1724 } 1725 1726 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 1727 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1728 if (granularity != 1 && granularity != 2) { 1729 ShouldNotReachHere(); 1730 } 1731 if (AvoidUnalignedAccesses && (granularity != 2)) { 1732 assert_different_registers(dst, tmp); 1733 assert_different_registers(tmp, src.base()); 1734 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 1735 slli(tmp, tmp, 8); 1736 lbu(dst, src); 1737 add(dst, dst, tmp); 1738 } else { 1739 is_signed ? lh(dst, src) : lhu(dst, src); 1740 } 1741 } 1742 1743 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 1744 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1745 if (AvoidUnalignedAccesses && (granularity != 4)) { 1746 switch(granularity) { 1747 case 1: 1748 assert_different_registers(dst, tmp, src.base()); 1749 lbu(dst, src); 1750 lbu(tmp, Address(src.base(), src.offset() + 1)); 1751 slli(tmp, tmp, 8); 1752 add(dst, dst, tmp); 1753 lbu(tmp, Address(src.base(), src.offset() + 2)); 1754 slli(tmp, tmp, 16); 1755 add(dst, dst, tmp); 1756 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 1757 slli(tmp, tmp, 24); 1758 add(dst, dst, tmp); 1759 break; 1760 case 2: 1761 assert_different_registers(dst, tmp); 1762 assert_different_registers(tmp, src.base()); 1763 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 1764 slli(tmp, tmp, 16); 1765 lhu(dst, src); 1766 add(dst, dst, tmp); 1767 break; 1768 default: 1769 ShouldNotReachHere(); 1770 } 1771 } else { 1772 is_signed ? lw(dst, src) : lwu(dst, src); 1773 } 1774 } 1775 1776 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 1777 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 1778 if (AvoidUnalignedAccesses && (granularity != 8)) { 1779 switch(granularity){ 1780 case 1: 1781 assert_different_registers(dst, tmp, src.base()); 1782 lbu(dst, src); 1783 lbu(tmp, Address(src.base(), src.offset() + 1)); 1784 slli(tmp, tmp, 8); 1785 add(dst, dst, tmp); 1786 lbu(tmp, Address(src.base(), src.offset() + 2)); 1787 slli(tmp, tmp, 16); 1788 add(dst, dst, tmp); 1789 lbu(tmp, Address(src.base(), src.offset() + 3)); 1790 slli(tmp, tmp, 24); 1791 add(dst, dst, tmp); 1792 lbu(tmp, Address(src.base(), src.offset() + 4)); 1793 slli(tmp, tmp, 32); 1794 add(dst, dst, tmp); 1795 lbu(tmp, Address(src.base(), src.offset() + 5)); 1796 slli(tmp, tmp, 40); 1797 add(dst, dst, tmp); 1798 lbu(tmp, Address(src.base(), src.offset() + 6)); 1799 slli(tmp, tmp, 48); 1800 add(dst, dst, tmp); 1801 lbu(tmp, Address(src.base(), src.offset() + 7)); 1802 slli(tmp, tmp, 56); 1803 add(dst, dst, tmp); 1804 break; 1805 case 2: 1806 assert_different_registers(dst, tmp, src.base()); 1807 lhu(dst, src); 1808 lhu(tmp, Address(src.base(), src.offset() + 2)); 1809 slli(tmp, tmp, 16); 1810 add(dst, dst, tmp); 1811 lhu(tmp, Address(src.base(), src.offset() + 4)); 1812 slli(tmp, tmp, 32); 1813 add(dst, dst, tmp); 1814 lhu(tmp, Address(src.base(), src.offset() + 6)); 1815 slli(tmp, tmp, 48); 1816 add(dst, dst, tmp); 1817 break; 1818 case 4: 1819 assert_different_registers(dst, tmp); 1820 assert_different_registers(tmp, src.base()); 1821 lwu(tmp, Address(src.base(), src.offset() + 4)); 1822 slli(tmp, tmp, 32); 1823 lwu(dst, src); 1824 add(dst, dst, tmp); 1825 break; 1826 default: 1827 ShouldNotReachHere(); 1828 } 1829 } else { 1830 ld(dst, src); 1831 } 1832 } 1833 1834 1835 // reverse bytes in halfword in lower 16 bits and sign-extend 1836 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 1837 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 1838 if (UseZbb) { 1839 rev8(Rd, Rs); 1840 srai(Rd, Rd, 48); 1841 return; 1842 } 1843 assert_different_registers(Rs, tmp); 1844 assert_different_registers(Rd, tmp); 1845 srli(tmp, Rs, 8); 1846 andi(tmp, tmp, 0xFF); 1847 slli(Rd, Rs, 56); 1848 srai(Rd, Rd, 48); // sign-extend 1849 orr(Rd, Rd, tmp); 1850 } 1851 1852 // reverse bytes in lower word and sign-extend 1853 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 1854 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1855 if (UseZbb) { 1856 rev8(Rd, Rs); 1857 srai(Rd, Rd, 32); 1858 return; 1859 } 1860 assert_different_registers(Rs, tmp1, tmp2); 1861 assert_different_registers(Rd, tmp1, tmp2); 1862 revb_h_w_u(Rd, Rs, tmp1, tmp2); 1863 slli(tmp2, Rd, 48); 1864 srai(tmp2, tmp2, 32); // sign-extend 1865 srli(Rd, Rd, 16); 1866 orr(Rd, Rd, tmp2); 1867 } 1868 1869 // reverse bytes in halfword in lower 16 bits and zero-extend 1870 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1871 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 1872 if (UseZbb) { 1873 rev8(Rd, Rs); 1874 srli(Rd, Rd, 48); 1875 return; 1876 } 1877 assert_different_registers(Rs, tmp); 1878 assert_different_registers(Rd, tmp); 1879 srli(tmp, Rs, 8); 1880 andi(tmp, tmp, 0xFF); 1881 andi(Rd, Rs, 0xFF); 1882 slli(Rd, Rd, 8); 1883 orr(Rd, Rd, tmp); 1884 } 1885 1886 // reverse bytes in halfwords in lower 32 bits and zero-extend 1887 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1888 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1889 if (UseZbb) { 1890 rev8(Rd, Rs); 1891 rori(Rd, Rd, 32); 1892 roriw(Rd, Rd, 16); 1893 zero_extend(Rd, Rd, 32); 1894 return; 1895 } 1896 assert_different_registers(Rs, tmp1, tmp2); 1897 assert_different_registers(Rd, tmp1, tmp2); 1898 srli(tmp2, Rs, 16); 1899 revb_h_h_u(tmp2, tmp2, tmp1); 1900 revb_h_h_u(Rd, Rs, tmp1); 1901 slli(tmp2, tmp2, 16); 1902 orr(Rd, Rd, tmp2); 1903 } 1904 1905 // This method is only used for revb_h 1906 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 1907 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1908 assert_different_registers(Rs, tmp1, tmp2); 1909 assert_different_registers(Rd, tmp1); 1910 srli(tmp1, Rs, 48); 1911 andi(tmp2, tmp1, 0xFF); 1912 slli(tmp2, tmp2, 8); 1913 srli(tmp1, tmp1, 8); 1914 orr(tmp1, tmp1, tmp2); 1915 slli(Rd, Rs, 16); 1916 orr(Rd, Rd, tmp1); 1917 } 1918 1919 // reverse bytes in each halfword 1920 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 1921 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1922 if (UseZbb) { 1923 assert_different_registers(Rs, tmp1); 1924 assert_different_registers(Rd, tmp1); 1925 rev8(Rd, Rs); 1926 zero_extend(tmp1, Rd, 32); 1927 roriw(tmp1, tmp1, 16); 1928 slli(tmp1, tmp1, 32); 1929 srli(Rd, Rd, 32); 1930 roriw(Rd, Rd, 16); 1931 zero_extend(Rd, Rd, 32); 1932 orr(Rd, Rd, tmp1); 1933 return; 1934 } 1935 assert_different_registers(Rs, tmp1, tmp2); 1936 assert_different_registers(Rd, tmp1, tmp2); 1937 revb_h_helper(Rd, Rs, tmp1, tmp2); 1938 for (int i = 0; i < 3; ++i) { 1939 revb_h_helper(Rd, Rd, tmp1, tmp2); 1940 } 1941 } 1942 1943 // reverse bytes in each word 1944 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 1945 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1946 if (UseZbb) { 1947 rev8(Rd, Rs); 1948 rori(Rd, Rd, 32); 1949 return; 1950 } 1951 assert_different_registers(Rs, tmp1, tmp2); 1952 assert_different_registers(Rd, tmp1, tmp2); 1953 revb(Rd, Rs, tmp1, tmp2); 1954 ror_imm(Rd, Rd, 32); 1955 } 1956 1957 // reverse bytes in doubleword 1958 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 1959 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1960 if (UseZbb) { 1961 rev8(Rd, Rs); 1962 return; 1963 } 1964 assert_different_registers(Rs, tmp1, tmp2); 1965 assert_different_registers(Rd, tmp1, tmp2); 1966 andi(tmp1, Rs, 0xFF); 1967 slli(tmp1, tmp1, 8); 1968 for (int step = 8; step < 56; step += 8) { 1969 srli(tmp2, Rs, step); 1970 andi(tmp2, tmp2, 0xFF); 1971 orr(tmp1, tmp1, tmp2); 1972 slli(tmp1, tmp1, 8); 1973 } 1974 srli(Rd, Rs, 56); 1975 andi(Rd, Rd, 0xFF); 1976 orr(Rd, tmp1, Rd); 1977 } 1978 1979 // rotate right with shift bits 1980 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 1981 { 1982 if (UseZbb) { 1983 rori(dst, src, shift); 1984 return; 1985 } 1986 1987 assert_different_registers(dst, tmp); 1988 assert_different_registers(src, tmp); 1989 assert(shift < 64, "shift amount must be < 64"); 1990 slli(tmp, src, 64 - shift); 1991 srli(dst, src, shift); 1992 orr(dst, dst, tmp); 1993 } 1994 1995 // rotate left with shift bits, 32-bit version 1996 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 1997 if (UseZbb) { 1998 // no roliw available 1999 roriw(dst, src, 32 - shift); 2000 return; 2001 } 2002 2003 assert_different_registers(dst, tmp); 2004 assert_different_registers(src, tmp); 2005 assert(shift < 32, "shift amount must be < 32"); 2006 srliw(tmp, src, 32 - shift); 2007 slliw(dst, src, shift); 2008 orr(dst, dst, tmp); 2009 } 2010 2011 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2012 if (is_simm12(imm)) { 2013 and_imm12(Rd, Rn, imm); 2014 } else { 2015 assert_different_registers(Rn, tmp); 2016 mv(tmp, imm); 2017 andr(Rd, Rn, tmp); 2018 } 2019 } 2020 2021 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2022 ld(tmp1, adr); 2023 if (src.is_register()) { 2024 orr(tmp1, tmp1, src.as_register()); 2025 } else { 2026 if (is_simm12(src.as_constant())) { 2027 ori(tmp1, tmp1, src.as_constant()); 2028 } else { 2029 assert_different_registers(tmp1, tmp2); 2030 mv(tmp2, src.as_constant()); 2031 orr(tmp1, tmp1, tmp2); 2032 } 2033 } 2034 sd(tmp1, adr); 2035 } 2036 2037 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2038 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2039 if (UseCompressedClassPointers) { 2040 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2041 if (CompressedKlassPointers::base() == nullptr) { 2042 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2043 beq(trial_klass, tmp1, L); 2044 return; 2045 } 2046 decode_klass_not_null(tmp1, tmp2); 2047 } else { 2048 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2049 } 2050 beq(trial_klass, tmp1, L); 2051 } 2052 2053 // Move an oop into a register. 2054 void MacroAssembler::movoop(Register dst, jobject obj) { 2055 int oop_index; 2056 if (obj == nullptr) { 2057 oop_index = oop_recorder()->allocate_oop_index(obj); 2058 } else { 2059 #ifdef ASSERT 2060 { 2061 ThreadInVMfromUnknown tiv; 2062 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2063 } 2064 #endif 2065 oop_index = oop_recorder()->find_index(obj); 2066 } 2067 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2068 2069 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2070 mv(dst, Address((address)obj, rspec)); 2071 } else { 2072 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2073 ld_constant(dst, Address(dummy, rspec)); 2074 } 2075 } 2076 2077 // Move a metadata address into a register. 2078 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2079 int oop_index; 2080 if (obj == nullptr) { 2081 oop_index = oop_recorder()->allocate_metadata_index(obj); 2082 } else { 2083 oop_index = oop_recorder()->find_index(obj); 2084 } 2085 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2086 mv(dst, Address((address)obj, rspec)); 2087 } 2088 2089 // Writes to stack successive pages until offset reached to check for 2090 // stack overflow + shadow pages. This clobbers tmp. 2091 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2092 assert_different_registers(tmp, size, t0); 2093 // Bang stack for total size given plus shadow page size. 2094 // Bang one page at a time because large size can bang beyond yellow and 2095 // red zones. 2096 mv(t0, (int)os::vm_page_size()); 2097 Label loop; 2098 bind(loop); 2099 sub(tmp, sp, t0); 2100 subw(size, size, t0); 2101 sd(size, Address(tmp)); 2102 bgtz(size, loop); 2103 2104 // Bang down shadow pages too. 2105 // At this point, (tmp-0) is the last address touched, so don't 2106 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2107 // was post-decremented.) Skip this address by starting at i=1, and 2108 // touch a few more pages below. N.B. It is important to touch all 2109 // the way down to and including i=StackShadowPages. 2110 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2111 // this could be any sized move but this is can be a debugging crumb 2112 // so the bigger the better. 2113 sub(tmp, tmp, (int)os::vm_page_size()); 2114 sd(size, Address(tmp, 0)); 2115 } 2116 } 2117 2118 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2119 int32_t offset = 0; 2120 _masm = masm; 2121 ExternalAddress target((address)flag_addr); 2122 _masm->relocate(target.rspec(), [&] { 2123 int32_t offset; 2124 _masm->la_patchable(t0, target, offset); 2125 _masm->lbu(t0, Address(t0, offset)); 2126 }); 2127 if (value) { 2128 _masm->bnez(t0, _label); 2129 } else { 2130 _masm->beqz(t0, _label); 2131 } 2132 } 2133 2134 SkipIfEqual::~SkipIfEqual() { 2135 _masm->bind(_label); 2136 _masm = nullptr; 2137 } 2138 2139 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2140 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2141 ld(dst, Address(xmethod, Method::const_offset())); 2142 ld(dst, Address(dst, ConstMethod::constants_offset())); 2143 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2144 ld(dst, Address(dst, mirror_offset)); 2145 resolve_oop_handle(dst, tmp1, tmp2); 2146 } 2147 2148 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2149 // OopHandle::resolve is an indirection. 2150 assert_different_registers(result, tmp1, tmp2); 2151 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2152 } 2153 2154 // ((WeakHandle)result).resolve() 2155 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2156 assert_different_registers(result, tmp1, tmp2); 2157 Label resolved; 2158 2159 // A null weak handle resolves to null. 2160 beqz(result, resolved); 2161 2162 // Only 64 bit platforms support GCs that require a tmp register 2163 // Only IN_HEAP loads require a thread_tmp register 2164 // WeakHandle::resolve is an indirection like jweak. 2165 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2166 result, Address(result), tmp1, tmp2); 2167 bind(resolved); 2168 } 2169 2170 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2171 Register dst, Address src, 2172 Register tmp1, Register tmp2) { 2173 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2174 decorators = AccessInternal::decorator_fixup(decorators, type); 2175 bool as_raw = (decorators & AS_RAW) != 0; 2176 if (as_raw) { 2177 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2178 } else { 2179 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2180 } 2181 } 2182 2183 void MacroAssembler::null_check(Register reg, int offset) { 2184 if (needs_explicit_null_check(offset)) { 2185 // provoke OS null exception if reg is null by 2186 // accessing M[reg] w/o changing any registers 2187 // NOTE: this is plenty to provoke a segv 2188 ld(zr, Address(reg, 0)); 2189 } else { 2190 // nothing to do, (later) access of M[reg + offset] 2191 // will provoke OS null exception if reg is null 2192 } 2193 } 2194 2195 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2196 Address dst, Register val, 2197 Register tmp1, Register tmp2, Register tmp3) { 2198 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2199 decorators = AccessInternal::decorator_fixup(decorators, type); 2200 bool as_raw = (decorators & AS_RAW) != 0; 2201 if (as_raw) { 2202 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2203 } else { 2204 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2205 } 2206 } 2207 2208 // Algorithm must match CompressedOops::encode. 2209 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2210 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2211 if (CompressedOops::base() == nullptr) { 2212 if (CompressedOops::shift() != 0) { 2213 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2214 srli(d, s, LogMinObjAlignmentInBytes); 2215 } else { 2216 mv(d, s); 2217 } 2218 } else { 2219 Label notNull; 2220 sub(d, s, xheapbase); 2221 bgez(d, notNull); 2222 mv(d, zr); 2223 bind(notNull); 2224 if (CompressedOops::shift() != 0) { 2225 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2226 srli(d, d, CompressedOops::shift()); 2227 } 2228 } 2229 } 2230 2231 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2232 assert_different_registers(dst, tmp); 2233 assert_different_registers(src, tmp); 2234 if (UseCompressedClassPointers) { 2235 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2236 decode_klass_not_null(dst, tmp); 2237 } else { 2238 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2239 } 2240 } 2241 2242 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2243 // FIXME: Should this be a store release? concurrent gcs assumes 2244 // klass length is valid if klass field is not null. 2245 if (UseCompressedClassPointers) { 2246 encode_klass_not_null(src, tmp); 2247 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2248 } else { 2249 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2250 } 2251 } 2252 2253 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2254 if (UseCompressedClassPointers) { 2255 // Store to klass gap in destination 2256 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2257 } 2258 } 2259 2260 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2261 assert_different_registers(r, tmp); 2262 decode_klass_not_null(r, r, tmp); 2263 } 2264 2265 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2266 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2267 2268 if (CompressedKlassPointers::base() == nullptr) { 2269 if (CompressedKlassPointers::shift() != 0) { 2270 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2271 slli(dst, src, LogKlassAlignmentInBytes); 2272 } else { 2273 mv(dst, src); 2274 } 2275 return; 2276 } 2277 2278 Register xbase = dst; 2279 if (dst == src) { 2280 xbase = tmp; 2281 } 2282 2283 assert_different_registers(src, xbase); 2284 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2285 2286 if (CompressedKlassPointers::shift() != 0) { 2287 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2288 assert_different_registers(t0, xbase); 2289 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2290 } else { 2291 add(dst, xbase, src); 2292 } 2293 } 2294 2295 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2296 assert_different_registers(r, tmp); 2297 encode_klass_not_null(r, r, tmp); 2298 } 2299 2300 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2301 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2302 2303 if (CompressedKlassPointers::base() == nullptr) { 2304 if (CompressedKlassPointers::shift() != 0) { 2305 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2306 srli(dst, src, LogKlassAlignmentInBytes); 2307 } else { 2308 mv(dst, src); 2309 } 2310 return; 2311 } 2312 2313 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2314 CompressedKlassPointers::shift() == 0) { 2315 zero_extend(dst, src, 32); 2316 return; 2317 } 2318 2319 Register xbase = dst; 2320 if (dst == src) { 2321 xbase = tmp; 2322 } 2323 2324 assert_different_registers(src, xbase); 2325 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2326 sub(dst, src, xbase); 2327 if (CompressedKlassPointers::shift() != 0) { 2328 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2329 srli(dst, dst, LogKlassAlignmentInBytes); 2330 } 2331 } 2332 2333 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2334 decode_heap_oop_not_null(r, r); 2335 } 2336 2337 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2338 assert(UseCompressedOops, "should only be used for compressed headers"); 2339 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2340 // Cannot assert, unverified entry point counts instructions (see .ad file) 2341 // vtableStubs also counts instructions in pd_code_size_limit. 2342 // Also do not verify_oop as this is called by verify_oop. 2343 if (CompressedOops::shift() != 0) { 2344 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2345 slli(dst, src, LogMinObjAlignmentInBytes); 2346 if (CompressedOops::base() != nullptr) { 2347 add(dst, xheapbase, dst); 2348 } 2349 } else { 2350 assert(CompressedOops::base() == nullptr, "sanity"); 2351 mv(dst, src); 2352 } 2353 } 2354 2355 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2356 if (CompressedOops::base() == nullptr) { 2357 if (CompressedOops::shift() != 0 || d != s) { 2358 slli(d, s, CompressedOops::shift()); 2359 } 2360 } else { 2361 Label done; 2362 mv(d, s); 2363 beqz(s, done); 2364 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2365 bind(done); 2366 } 2367 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2368 } 2369 2370 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2371 Register tmp2, Register tmp3, DecoratorSet decorators) { 2372 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2373 } 2374 2375 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2376 Register tmp2, DecoratorSet decorators) { 2377 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2378 } 2379 2380 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2381 Register tmp2, DecoratorSet decorators) { 2382 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2383 } 2384 2385 // Used for storing nulls. 2386 void MacroAssembler::store_heap_oop_null(Address dst) { 2387 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2388 } 2389 2390 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2391 bool want_remainder) 2392 { 2393 // Full implementation of Java idiv and irem. The function 2394 // returns the (pc) offset of the div instruction - may be needed 2395 // for implicit exceptions. 2396 // 2397 // input : rs1: dividend 2398 // rs2: divisor 2399 // 2400 // result: either 2401 // quotient (= rs1 idiv rs2) 2402 // remainder (= rs1 irem rs2) 2403 2404 2405 int idivl_offset = offset(); 2406 if (!want_remainder) { 2407 divw(result, rs1, rs2); 2408 } else { 2409 remw(result, rs1, rs2); // result = rs1 % rs2; 2410 } 2411 return idivl_offset; 2412 } 2413 2414 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2415 bool want_remainder) 2416 { 2417 // Full implementation of Java ldiv and lrem. The function 2418 // returns the (pc) offset of the div instruction - may be needed 2419 // for implicit exceptions. 2420 // 2421 // input : rs1: dividend 2422 // rs2: divisor 2423 // 2424 // result: either 2425 // quotient (= rs1 idiv rs2) 2426 // remainder (= rs1 irem rs2) 2427 2428 int idivq_offset = offset(); 2429 if (!want_remainder) { 2430 div(result, rs1, rs2); 2431 } else { 2432 rem(result, rs1, rs2); // result = rs1 % rs2; 2433 } 2434 return idivq_offset; 2435 } 2436 2437 // Look up the method for a megamorpic invkkeinterface call. 2438 // The target method is determined by <intf_klass, itable_index>. 2439 // The receiver klass is in recv_klass. 2440 // On success, the result will be in method_result, and execution falls through. 2441 // On failure, execution transfers to the given label. 2442 void MacroAssembler::lookup_interface_method(Register recv_klass, 2443 Register intf_klass, 2444 RegisterOrConstant itable_index, 2445 Register method_result, 2446 Register scan_tmp, 2447 Label& L_no_such_interface, 2448 bool return_method) { 2449 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2450 assert_different_registers(method_result, intf_klass, scan_tmp); 2451 assert(recv_klass != method_result || !return_method, 2452 "recv_klass can be destroyed when mehtid isn't needed"); 2453 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2454 "caller must be same register for non-constant itable index as for method"); 2455 2456 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2457 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2458 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2459 int scan_step = itableOffsetEntry::size() * wordSize; 2460 int vte_size = vtableEntry::size_in_bytes(); 2461 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2462 2463 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2464 2465 // %%% Could store the aligned, prescaled offset in the klassoop. 2466 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2467 add(scan_tmp, scan_tmp, vtable_base); 2468 2469 if (return_method) { 2470 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2471 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2472 if (itable_index.is_register()) { 2473 slli(t0, itable_index.as_register(), 3); 2474 } else { 2475 mv(t0, itable_index.as_constant() << 3); 2476 } 2477 add(recv_klass, recv_klass, t0); 2478 if (itentry_off) { 2479 add(recv_klass, recv_klass, itentry_off); 2480 } 2481 } 2482 2483 Label search, found_method; 2484 2485 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2486 beq(intf_klass, method_result, found_method); 2487 bind(search); 2488 // Check that the previous entry is non-null. A null entry means that 2489 // the receiver class doesn't implement the interface, and wasn't the 2490 // same as when the caller was compiled. 2491 beqz(method_result, L_no_such_interface, /* is_far */ true); 2492 addi(scan_tmp, scan_tmp, scan_step); 2493 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2494 bne(intf_klass, method_result, search); 2495 2496 bind(found_method); 2497 2498 // Got a hit. 2499 if (return_method) { 2500 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2501 add(method_result, recv_klass, scan_tmp); 2502 ld(method_result, Address(method_result)); 2503 } 2504 } 2505 2506 // virtual method calling 2507 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2508 RegisterOrConstant vtable_index, 2509 Register method_result) { 2510 const ByteSize base = Klass::vtable_start_offset(); 2511 assert(vtableEntry::size() * wordSize == 8, 2512 "adjust the scaling in the code below"); 2513 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 2514 2515 if (vtable_index.is_register()) { 2516 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2517 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2518 } else { 2519 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2520 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2521 } 2522 } 2523 2524 void MacroAssembler::membar(uint32_t order_constraint) { 2525 address prev = pc() - NativeMembar::instruction_size; 2526 address last = code()->last_insn(); 2527 2528 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) { 2529 NativeMembar *bar = NativeMembar_at(prev); 2530 // We are merging two memory barrier instructions. On RISCV we 2531 // can do this simply by ORing them together. 2532 bar->set_kind(bar->get_kind() | order_constraint); 2533 BLOCK_COMMENT("merged membar"); 2534 } else { 2535 code()->set_last_insn(pc()); 2536 2537 uint32_t predecessor = 0; 2538 uint32_t successor = 0; 2539 2540 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2541 fence(predecessor, successor); 2542 } 2543 } 2544 2545 // Form an address from base + offset in Rd. Rd my or may not 2546 // actually be used: you must use the Address that is returned. It 2547 // is up to you to ensure that the shift provided matches the size 2548 // of your data. 2549 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2550 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 2551 return Address(base, byte_offset); 2552 } 2553 2554 assert_different_registers(Rd, base, noreg); 2555 2556 // Do it the hard way 2557 mv(Rd, byte_offset); 2558 add(Rd, base, Rd); 2559 return Address(Rd); 2560 } 2561 2562 void MacroAssembler::check_klass_subtype(Register sub_klass, 2563 Register super_klass, 2564 Register tmp_reg, 2565 Label& L_success) { 2566 Label L_failure; 2567 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 2568 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 2569 bind(L_failure); 2570 } 2571 2572 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2573 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2574 if (acquire) { 2575 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2576 } 2577 if (at_return) { 2578 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 2579 } else { 2580 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 2581 bnez(t0, slow_path, true /* is_far */); 2582 } 2583 } 2584 2585 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2586 Label &succeed, Label *fail) { 2587 assert_different_registers(addr, tmp); 2588 assert_different_registers(newv, tmp); 2589 assert_different_registers(oldv, tmp); 2590 2591 // oldv holds comparison value 2592 // newv holds value to write in exchange 2593 // addr identifies memory word to compare against/update 2594 Label retry_load, nope; 2595 bind(retry_load); 2596 // Load reserved from the memory location 2597 lr_d(tmp, addr, Assembler::aqrl); 2598 // Fail and exit if it is not what we expect 2599 bne(tmp, oldv, nope); 2600 // If the store conditional succeeds, tmp will be zero 2601 sc_d(tmp, newv, addr, Assembler::rl); 2602 beqz(tmp, succeed); 2603 // Retry only when the store conditional failed 2604 j(retry_load); 2605 2606 bind(nope); 2607 membar(AnyAny); 2608 mv(oldv, tmp); 2609 if (fail != nullptr) { 2610 j(*fail); 2611 } 2612 } 2613 2614 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 2615 Label &succeed, Label *fail) { 2616 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 2617 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 2618 } 2619 2620 void MacroAssembler::load_reserved(Register addr, 2621 enum operand_size size, 2622 Assembler::Aqrl acquire) { 2623 switch (size) { 2624 case int64: 2625 lr_d(t0, addr, acquire); 2626 break; 2627 case int32: 2628 lr_w(t0, addr, acquire); 2629 break; 2630 case uint32: 2631 lr_w(t0, addr, acquire); 2632 zero_extend(t0, t0, 32); 2633 break; 2634 default: 2635 ShouldNotReachHere(); 2636 } 2637 } 2638 2639 void MacroAssembler::store_conditional(Register addr, 2640 Register new_val, 2641 enum operand_size size, 2642 Assembler::Aqrl release) { 2643 switch (size) { 2644 case int64: 2645 sc_d(t0, new_val, addr, release); 2646 break; 2647 case int32: 2648 case uint32: 2649 sc_w(t0, new_val, addr, release); 2650 break; 2651 default: 2652 ShouldNotReachHere(); 2653 } 2654 } 2655 2656 2657 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 2658 Register new_val, 2659 enum operand_size size, 2660 Register tmp1, Register tmp2, Register tmp3) { 2661 assert(size == int8 || size == int16, "unsupported operand size"); 2662 2663 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 2664 2665 andi(shift, addr, 3); 2666 slli(shift, shift, 3); 2667 2668 andi(aligned_addr, addr, ~3); 2669 2670 if (size == int8) { 2671 mv(mask, 0xff); 2672 } else { 2673 // size == int16 case 2674 mv(mask, -1); 2675 zero_extend(mask, mask, 16); 2676 } 2677 sll(mask, mask, shift); 2678 2679 xori(not_mask, mask, -1); 2680 2681 sll(expected, expected, shift); 2682 andr(expected, expected, mask); 2683 2684 sll(new_val, new_val, shift); 2685 andr(new_val, new_val, mask); 2686 } 2687 2688 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 2689 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, 2690 // which are forced to work with 4-byte aligned address. 2691 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 2692 Register new_val, 2693 enum operand_size size, 2694 Assembler::Aqrl acquire, Assembler::Aqrl release, 2695 Register result, bool result_as_bool, 2696 Register tmp1, Register tmp2, Register tmp3) { 2697 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2698 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2699 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2700 2701 Label retry, fail, done; 2702 2703 bind(retry); 2704 lr_w(old, aligned_addr, acquire); 2705 andr(tmp, old, mask); 2706 bne(tmp, expected, fail); 2707 2708 andr(tmp, old, not_mask); 2709 orr(tmp, tmp, new_val); 2710 sc_w(tmp, tmp, aligned_addr, release); 2711 bnez(tmp, retry); 2712 2713 if (result_as_bool) { 2714 mv(result, 1); 2715 j(done); 2716 2717 bind(fail); 2718 mv(result, zr); 2719 2720 bind(done); 2721 } else { 2722 andr(tmp, old, mask); 2723 2724 bind(fail); 2725 srl(result, tmp, shift); 2726 2727 if (size == int8) { 2728 sign_extend(result, result, 8); 2729 } else { 2730 // size == int16 case 2731 sign_extend(result, result, 16); 2732 } 2733 } 2734 } 2735 2736 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 2737 // the weak CAS stuff. The major difference is that it just failed when store conditional 2738 // failed. 2739 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 2740 Register new_val, 2741 enum operand_size size, 2742 Assembler::Aqrl acquire, Assembler::Aqrl release, 2743 Register result, 2744 Register tmp1, Register tmp2, Register tmp3) { 2745 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2746 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2747 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2748 2749 Label fail, done; 2750 2751 lr_w(old, aligned_addr, acquire); 2752 andr(tmp, old, mask); 2753 bne(tmp, expected, fail); 2754 2755 andr(tmp, old, not_mask); 2756 orr(tmp, tmp, new_val); 2757 sc_w(tmp, tmp, aligned_addr, release); 2758 bnez(tmp, fail); 2759 2760 // Success 2761 mv(result, 1); 2762 j(done); 2763 2764 // Fail 2765 bind(fail); 2766 mv(result, zr); 2767 2768 bind(done); 2769 } 2770 2771 void MacroAssembler::cmpxchg(Register addr, Register expected, 2772 Register new_val, 2773 enum operand_size size, 2774 Assembler::Aqrl acquire, Assembler::Aqrl release, 2775 Register result, bool result_as_bool) { 2776 assert(size != int8 && size != int16, "unsupported operand size"); 2777 assert_different_registers(addr, t0); 2778 assert_different_registers(expected, t0); 2779 assert_different_registers(new_val, t0); 2780 2781 Label retry_load, done, ne_done; 2782 bind(retry_load); 2783 load_reserved(addr, size, acquire); 2784 bne(t0, expected, ne_done); 2785 store_conditional(addr, new_val, size, release); 2786 bnez(t0, retry_load); 2787 2788 // equal, succeed 2789 if (result_as_bool) { 2790 mv(result, 1); 2791 } else { 2792 mv(result, expected); 2793 } 2794 j(done); 2795 2796 // not equal, failed 2797 bind(ne_done); 2798 if (result_as_bool) { 2799 mv(result, zr); 2800 } else { 2801 mv(result, t0); 2802 } 2803 2804 bind(done); 2805 } 2806 2807 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 2808 Register new_val, 2809 enum operand_size size, 2810 Assembler::Aqrl acquire, Assembler::Aqrl release, 2811 Register result) { 2812 assert_different_registers(addr, t0); 2813 assert_different_registers(expected, t0); 2814 assert_different_registers(new_val, t0); 2815 2816 Label fail, done; 2817 load_reserved(addr, size, acquire); 2818 bne(t0, expected, fail); 2819 store_conditional(addr, new_val, size, release); 2820 bnez(t0, fail); 2821 2822 // Success 2823 mv(result, 1); 2824 j(done); 2825 2826 // Fail 2827 bind(fail); 2828 mv(result, zr); 2829 2830 bind(done); 2831 } 2832 2833 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 2834 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 2835 prev = prev->is_valid() ? prev : zr; \ 2836 if (incr.is_register()) { \ 2837 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2838 } else { \ 2839 mv(t0, incr.as_constant()); \ 2840 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2841 } \ 2842 return; \ 2843 } 2844 2845 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 2846 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 2847 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 2848 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 2849 2850 #undef ATOMIC_OP 2851 2852 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 2853 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 2854 prev = prev->is_valid() ? prev : zr; \ 2855 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2856 return; \ 2857 } 2858 2859 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 2860 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 2861 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 2862 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 2863 2864 #undef ATOMIC_XCHG 2865 2866 #define ATOMIC_XCHGU(OP1, OP2) \ 2867 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 2868 atomic_##OP2(prev, newv, addr); \ 2869 zero_extend(prev, prev, 32); \ 2870 return; \ 2871 } 2872 2873 ATOMIC_XCHGU(xchgwu, xchgw) 2874 ATOMIC_XCHGU(xchgalwu, xchgalw) 2875 2876 #undef ATOMIC_XCHGU 2877 2878 void MacroAssembler::far_jump(Address entry, Register tmp) { 2879 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2880 assert(CodeCache::find_blob(entry.target()) != nullptr, 2881 "destination of far call not found in code cache"); 2882 assert(entry.rspec().type() == relocInfo::external_word_type 2883 || entry.rspec().type() == relocInfo::runtime_call_type 2884 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2885 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2886 if (far_branches()) { 2887 // We can use auipc + jalr here because we know that the total size of 2888 // the code cache cannot exceed 2Gb. 2889 relocate(entry.rspec(), [&] { 2890 int32_t offset; 2891 la_patchable(tmp, entry, offset); 2892 jalr(x0, tmp, offset); 2893 }); 2894 } else { 2895 j(entry); 2896 } 2897 } 2898 2899 void MacroAssembler::far_call(Address entry, Register tmp) { 2900 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2901 assert(CodeCache::find_blob(entry.target()) != nullptr, 2902 "destination of far call not found in code cache"); 2903 assert(entry.rspec().type() == relocInfo::external_word_type 2904 || entry.rspec().type() == relocInfo::runtime_call_type 2905 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2906 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2907 if (far_branches()) { 2908 // We can use auipc + jalr here because we know that the total size of 2909 // the code cache cannot exceed 2Gb. 2910 relocate(entry.rspec(), [&] { 2911 int32_t offset; 2912 la_patchable(tmp, entry, offset); 2913 jalr(x1, tmp, offset); // link 2914 }); 2915 } else { 2916 jal(entry); // link 2917 } 2918 } 2919 2920 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2921 Register super_klass, 2922 Register tmp_reg, 2923 Label* L_success, 2924 Label* L_failure, 2925 Label* L_slow_path, 2926 Register super_check_offset) { 2927 assert_different_registers(sub_klass, super_klass, tmp_reg); 2928 bool must_load_sco = (super_check_offset == noreg); 2929 if (must_load_sco) { 2930 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 2931 } else { 2932 assert_different_registers(sub_klass, super_klass, super_check_offset); 2933 } 2934 2935 Label L_fallthrough; 2936 int label_nulls = 0; 2937 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 2938 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 2939 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 2940 assert(label_nulls <= 1, "at most one null in batch"); 2941 2942 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2943 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2944 Address super_check_offset_addr(super_klass, sco_offset); 2945 2946 // Hacked jmp, which may only be used just before L_fallthrough. 2947 #define final_jmp(label) \ 2948 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2949 else j(label) /*omit semi*/ 2950 2951 // If the pointers are equal, we are done (e.g., String[] elements). 2952 // This self-check enables sharing of secondary supertype arrays among 2953 // non-primary types such as array-of-interface. Otherwise, each such 2954 // type would need its own customized SSA. 2955 // We move this check to the front of the fast path because many 2956 // type checks are in fact trivially successful in this manner, 2957 // so we get a nicely predicted branch right at the start of the check. 2958 beq(sub_klass, super_klass, *L_success); 2959 2960 // Check the supertype display: 2961 if (must_load_sco) { 2962 lwu(tmp_reg, super_check_offset_addr); 2963 super_check_offset = tmp_reg; 2964 } 2965 add(t0, sub_klass, super_check_offset); 2966 Address super_check_addr(t0); 2967 ld(t0, super_check_addr); // load displayed supertype 2968 2969 // This check has worked decisively for primary supers. 2970 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2971 // (Secondary supers are interfaces and very deeply nested subtypes.) 2972 // This works in the same check above because of a tricky aliasing 2973 // between the super_Cache and the primary super display elements. 2974 // (The 'super_check_addr' can address either, as the case requires.) 2975 // Note that the cache is updated below if it does not help us find 2976 // what we need immediately. 2977 // So if it was a primary super, we can just fail immediately. 2978 // Otherwise, it's the slow path for us (no success at this point). 2979 2980 beq(super_klass, t0, *L_success); 2981 mv(t1, sc_offset); 2982 if (L_failure == &L_fallthrough) { 2983 beq(super_check_offset, t1, *L_slow_path); 2984 } else { 2985 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 2986 final_jmp(*L_slow_path); 2987 } 2988 2989 bind(L_fallthrough); 2990 2991 #undef final_jmp 2992 } 2993 2994 // Scans count pointer sized words at [addr] for occurrence of value, 2995 // generic 2996 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 2997 Register tmp) { 2998 Label Lloop, Lexit; 2999 beqz(count, Lexit); 3000 bind(Lloop); 3001 ld(tmp, addr); 3002 beq(value, tmp, Lexit); 3003 add(addr, addr, wordSize); 3004 sub(count, count, 1); 3005 bnez(count, Lloop); 3006 bind(Lexit); 3007 } 3008 3009 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3010 Register super_klass, 3011 Register tmp1_reg, 3012 Register tmp2_reg, 3013 Label* L_success, 3014 Label* L_failure) { 3015 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3016 if (tmp2_reg != noreg) { 3017 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3018 } 3019 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3020 3021 Label L_fallthrough; 3022 int label_nulls = 0; 3023 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3024 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3025 3026 assert(label_nulls <= 1, "at most one null in the batch"); 3027 3028 // A couple of useful fields in sub_klass: 3029 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3030 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3031 Address secondary_supers_addr(sub_klass, ss_offset); 3032 Address super_cache_addr( sub_klass, sc_offset); 3033 3034 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3035 3036 // Do a linear scan of the secondary super-klass chain. 3037 // This code is rarely used, so simplicity is a virtue here. 3038 // The repne_scan instruction uses fixed registers, which we must spill. 3039 // Don't worry too much about pre-existing connections with the input regs. 3040 3041 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3042 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3043 3044 RegSet pushed_registers; 3045 if (!IS_A_TEMP(x12)) { 3046 pushed_registers += x12; 3047 } 3048 if (!IS_A_TEMP(x15)) { 3049 pushed_registers += x15; 3050 } 3051 3052 if (super_klass != x10) { 3053 if (!IS_A_TEMP(x10)) { 3054 pushed_registers += x10; 3055 } 3056 } 3057 3058 push_reg(pushed_registers, sp); 3059 3060 // Get super_klass value into x10 (even if it was in x15 or x12) 3061 mv(x10, super_klass); 3062 3063 #ifndef PRODUCT 3064 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); 3065 Address pst_counter_addr(t1); 3066 ld(t0, pst_counter_addr); 3067 add(t0, t0, 1); 3068 sd(t0, pst_counter_addr); 3069 #endif // PRODUCT 3070 3071 // We will consult the secondary-super array. 3072 ld(x15, secondary_supers_addr); 3073 // Load the array length. 3074 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3075 // Skip to start of data. 3076 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3077 3078 // Set t0 to an obvious invalid value, falling through by default 3079 mv(t0, -1); 3080 // Scan X12 words at [X15] for an occurrence of X10. 3081 repne_scan(x15, x10, x12, t0); 3082 3083 // pop will restore x10, so we should use a temp register to keep its value 3084 mv(t1, x10); 3085 3086 // Unspill the temp registers: 3087 pop_reg(pushed_registers, sp); 3088 3089 bne(t1, t0, *L_failure); 3090 3091 // Success. Cache the super we found an proceed in triumph. 3092 sd(super_klass, super_cache_addr); 3093 3094 if (L_success != &L_fallthrough) { 3095 j(*L_success); 3096 } 3097 3098 #undef IS_A_TEMP 3099 3100 bind(L_fallthrough); 3101 } 3102 3103 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 3104 void MacroAssembler::tlab_allocate(Register obj, 3105 Register var_size_in_bytes, 3106 int con_size_in_bytes, 3107 Register tmp1, 3108 Register tmp2, 3109 Label& slow_case, 3110 bool is_far) { 3111 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3112 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 3113 } 3114 3115 // get_thread() can be called anywhere inside generated code so we 3116 // need to save whatever non-callee save context might get clobbered 3117 // by the call to Thread::current() or, indeed, the call setup code. 3118 void MacroAssembler::get_thread(Register thread) { 3119 // save all call-clobbered regs except thread 3120 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 3121 RegSet::range(x28, x31) + ra - thread; 3122 push_reg(saved_regs, sp); 3123 3124 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 3125 jalr(ra); 3126 if (thread != c_rarg0) { 3127 mv(thread, c_rarg0); 3128 } 3129 3130 // restore pushed registers 3131 pop_reg(saved_regs, sp); 3132 } 3133 3134 void MacroAssembler::load_byte_map_base(Register reg) { 3135 CardTable::CardValue* byte_map_base = 3136 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 3137 mv(reg, (uint64_t)byte_map_base); 3138 } 3139 3140 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { 3141 unsigned long low_address = (uintptr_t)CodeCache::low_bound(); 3142 unsigned long high_address = (uintptr_t)CodeCache::high_bound(); 3143 unsigned long dest_address = (uintptr_t)dest.target(); 3144 long offset_low = dest_address - low_address; 3145 long offset_high = dest_address - high_address; 3146 3147 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); 3148 assert((uintptr_t)dest.target() < (1ull << 48), "bad address"); 3149 3150 // RISC-V doesn't compute a page-aligned address, in order to partially 3151 // compensate for the use of *signed* offsets in its base+disp12 3152 // addressing mode (RISC-V's PC-relative reach remains asymmetric 3153 // [-(2G + 2K), 2G - 2K). 3154 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { 3155 int64_t distance = dest.target() - pc(); 3156 auipc(reg1, (int32_t)distance + 0x800); 3157 offset = ((int32_t)distance << 20) >> 20; 3158 } else { 3159 movptr(reg1, dest.target(), offset); 3160 } 3161 } 3162 3163 void MacroAssembler::build_frame(int framesize) { 3164 assert(framesize >= 2, "framesize must include space for FP/RA"); 3165 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3166 sub(sp, sp, framesize); 3167 sd(fp, Address(sp, framesize - 2 * wordSize)); 3168 sd(ra, Address(sp, framesize - wordSize)); 3169 if (PreserveFramePointer) { add(fp, sp, framesize); } 3170 } 3171 3172 void MacroAssembler::remove_frame(int framesize) { 3173 assert(framesize >= 2, "framesize must include space for FP/RA"); 3174 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3175 ld(fp, Address(sp, framesize - 2 * wordSize)); 3176 ld(ra, Address(sp, framesize - wordSize)); 3177 add(sp, sp, framesize); 3178 } 3179 3180 void MacroAssembler::reserved_stack_check() { 3181 // testing if reserved zone needs to be enabled 3182 Label no_reserved_zone_enabling; 3183 3184 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3185 bltu(sp, t0, no_reserved_zone_enabling); 3186 3187 enter(); // RA and FP are live. 3188 mv(c_rarg0, xthread); 3189 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3190 relocate(target.rspec(), [&] { 3191 int32_t offset; 3192 la_patchable(t0, target, offset); 3193 jalr(x1, t0, offset); 3194 }); 3195 leave(); 3196 3197 // We have already removed our own frame. 3198 // throw_delayed_StackOverflowError will think that it's been 3199 // called by our caller. 3200 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()); 3201 relocate(target.rspec(), [&] { 3202 int32_t offset; 3203 la_patchable(t0, target, offset); 3204 jalr(x0, t0, offset); 3205 }); 3206 should_not_reach_here(); 3207 3208 bind(no_reserved_zone_enabling); 3209 } 3210 3211 // Move the address of the polling page into dest. 3212 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3213 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3214 } 3215 3216 // Read the polling page. The address of the polling page must 3217 // already be in r. 3218 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3219 relocate(rtype, [&] { 3220 lwu(zr, Address(r, offset)); 3221 }); 3222 } 3223 3224 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3225 #ifdef ASSERT 3226 { 3227 ThreadInVMfromUnknown tiv; 3228 assert (UseCompressedOops, "should only be used for compressed oops"); 3229 assert (Universe::heap() != nullptr, "java heap should be initialized"); 3230 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3231 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3232 } 3233 #endif 3234 int oop_index = oop_recorder()->find_index(obj); 3235 relocate(oop_Relocation::spec(oop_index), [&] { 3236 li32(dst, 0xDEADBEEF); 3237 }); 3238 zero_extend(dst, dst, 32); 3239 } 3240 3241 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3242 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3243 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3244 int index = oop_recorder()->find_index(k); 3245 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3246 3247 narrowKlass nk = CompressedKlassPointers::encode(k); 3248 relocate(metadata_Relocation::spec(index), [&] { 3249 li32(dst, nk); 3250 }); 3251 zero_extend(dst, dst, 32); 3252 } 3253 3254 // Maybe emit a call via a trampoline. If the code cache is small 3255 // trampolines won't be emitted. 3256 address MacroAssembler::trampoline_call(Address entry) { 3257 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3258 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3259 entry.rspec().type() == relocInfo::static_call_type || 3260 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3261 3262 address target = entry.target(); 3263 3264 // We need a trampoline if branches are far. 3265 if (far_branches()) { 3266 if (!in_scratch_emit_size()) { 3267 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3268 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3269 code()->share_trampoline_for(entry.target(), offset()); 3270 } else { 3271 address stub = emit_trampoline_stub(offset(), target); 3272 if (stub == nullptr) { 3273 postcond(pc() == badAddress); 3274 return nullptr; // CodeCache is full 3275 } 3276 } 3277 } 3278 target = pc(); 3279 } 3280 3281 address call_pc = pc(); 3282 #ifdef ASSERT 3283 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3284 assert_alignment(call_pc); 3285 } 3286 #endif 3287 relocate(entry.rspec(), [&] { 3288 jal(target); 3289 }); 3290 3291 postcond(pc() != badAddress); 3292 return call_pc; 3293 } 3294 3295 address MacroAssembler::ic_call(address entry, jint method_index) { 3296 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3297 IncompressibleRegion ir(this); // relocations 3298 movptr(t1, (address)Universe::non_oop_word()); 3299 assert_cond(entry != nullptr); 3300 return trampoline_call(Address(entry, rh)); 3301 } 3302 3303 // Emit a trampoline stub for a call to a target which is too far away. 3304 // 3305 // code sequences: 3306 // 3307 // call-site: 3308 // branch-and-link to <destination> or <trampoline stub> 3309 // 3310 // Related trampoline stub for this call site in the stub section: 3311 // load the call target from the constant pool 3312 // branch (RA still points to the call site above) 3313 3314 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3315 address dest) { 3316 // Max stub size: alignment nop, TrampolineStub. 3317 address stub = start_a_stub(max_trampoline_stub_size()); 3318 if (stub == nullptr) { 3319 return nullptr; // CodeBuffer::expand failed 3320 } 3321 3322 // We are always 4-byte aligned here. 3323 assert_alignment(pc()); 3324 3325 // Create a trampoline stub relocation which relates this trampoline stub 3326 // with the call instruction at insts_call_instruction_offset in the 3327 // instructions code-section. 3328 3329 // Make sure the address of destination 8-byte aligned after 3 instructions. 3330 align(wordSize, NativeCallTrampolineStub::data_offset); 3331 3332 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3333 insts_call_instruction_offset); 3334 const int stub_start_offset = offset(); 3335 relocate(rh, [&] { 3336 // Now, create the trampoline stub's code: 3337 // - load the call 3338 // - call 3339 Label target; 3340 ld(t0, target); // auipc + ld 3341 jr(t0); // jalr 3342 bind(target); 3343 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, 3344 "should be"); 3345 assert(offset() % wordSize == 0, "bad alignment"); 3346 emit_int64((int64_t)dest); 3347 }); 3348 3349 const address stub_start_addr = addr_at(stub_start_offset); 3350 3351 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 3352 3353 end_a_stub(); 3354 return stub_start_addr; 3355 } 3356 3357 int MacroAssembler::max_trampoline_stub_size() { 3358 // Max stub size: alignment nop, TrampolineStub. 3359 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; 3360 } 3361 3362 int MacroAssembler::static_call_stub_size() { 3363 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr 3364 return 12 * NativeInstruction::instruction_size; 3365 } 3366 3367 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3368 switch (dst.getMode()) { 3369 case Address::base_plus_offset: 3370 // This is the expected mode, although we allow all the other 3371 // forms below. 3372 return form_address(tmp, dst.base(), dst.offset()); 3373 default: 3374 la(tmp, dst); 3375 return Address(tmp); 3376 } 3377 } 3378 3379 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3380 assert(((dst.getMode() == Address::base_plus_offset && 3381 is_simm12(dst.offset())) || is_simm12(value)), 3382 "invalid value and address mode combination"); 3383 Address adr = add_memory_helper(dst, tmp2); 3384 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3385 ld(tmp1, adr); 3386 add(tmp1, tmp1, value, tmp2); 3387 sd(tmp1, adr); 3388 } 3389 3390 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3391 assert(((dst.getMode() == Address::base_plus_offset && 3392 is_simm12(dst.offset())) || is_simm12(value)), 3393 "invalid value and address mode combination"); 3394 Address adr = add_memory_helper(dst, tmp2); 3395 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3396 lwu(tmp1, adr); 3397 addw(tmp1, tmp1, value, tmp2); 3398 sw(tmp1, adr); 3399 } 3400 3401 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3402 assert(((dst.getMode() == Address::base_plus_offset && 3403 is_simm12(dst.offset())) || is_simm12(value)), 3404 "invalid value and address mode combination"); 3405 Address adr = add_memory_helper(dst, tmp2); 3406 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3407 ld(tmp1, adr); 3408 sub(tmp1, tmp1, value, tmp2); 3409 sd(tmp1, adr); 3410 } 3411 3412 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3413 assert(((dst.getMode() == Address::base_plus_offset && 3414 is_simm12(dst.offset())) || is_simm12(value)), 3415 "invalid value and address mode combination"); 3416 Address adr = add_memory_helper(dst, tmp2); 3417 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3418 lwu(tmp1, adr); 3419 subw(tmp1, tmp1, value, tmp2); 3420 sw(tmp1, adr); 3421 } 3422 3423 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3424 assert_different_registers(src1, t0); 3425 relocate(src2.rspec(), [&] { 3426 int32_t offset; 3427 la_patchable(t0, src2, offset); 3428 ld(t0, Address(t0, offset)); 3429 }); 3430 beq(src1, t0, equal); 3431 } 3432 3433 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3434 load_method_holder(result, method); 3435 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3436 } 3437 3438 void MacroAssembler::load_method_holder(Register holder, Register method) { 3439 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3440 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3441 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 3442 } 3443 3444 // string indexof 3445 // compute index by trailing zeros 3446 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3447 Register match_mask, Register result, 3448 Register ch2, Register tmp, 3449 bool haystack_isL) { 3450 int haystack_chr_shift = haystack_isL ? 0 : 1; 3451 srl(match_mask, match_mask, trailing_zeros); 3452 srli(match_mask, match_mask, 1); 3453 srli(tmp, trailing_zeros, LogBitsPerByte); 3454 if (!haystack_isL) andi(tmp, tmp, 0xE); 3455 add(haystack, haystack, tmp); 3456 ld(ch2, Address(haystack)); 3457 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3458 add(result, result, tmp); 3459 } 3460 3461 // string indexof 3462 // Find pattern element in src, compute match mask, 3463 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3464 // match mask patterns and corresponding indices would be like: 3465 // - 0x8080808080808080 (Latin1) 3466 // - 7 6 5 4 3 2 1 0 (match index) 3467 // - 0x8000800080008000 (UTF16) 3468 // - 3 2 1 0 (match index) 3469 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3470 Register mask1, Register mask2) { 3471 xorr(src, pattern, src); 3472 sub(match_mask, src, mask1); 3473 orr(src, src, mask2); 3474 notr(src, src); 3475 andr(match_mask, match_mask, src); 3476 } 3477 3478 #ifdef COMPILER2 3479 // Code for BigInteger::mulAdd intrinsic 3480 // out = x10 3481 // in = x11 3482 // offset = x12 (already out.length-offset) 3483 // len = x13 3484 // k = x14 3485 // tmp = x28 3486 // 3487 // pseudo code from java implementation: 3488 // long kLong = k & LONG_MASK; 3489 // carry = 0; 3490 // offset = out.length-offset - 1; 3491 // for (int j = len - 1; j >= 0; j--) { 3492 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 3493 // out[offset--] = (int)product; 3494 // carry = product >>> 32; 3495 // } 3496 // return (int)carry; 3497 void MacroAssembler::mul_add(Register out, Register in, Register offset, 3498 Register len, Register k, Register tmp) { 3499 Label L_tail_loop, L_unroll, L_end; 3500 mv(tmp, out); 3501 mv(out, zr); 3502 blez(len, L_end); 3503 zero_extend(k, k, 32); 3504 slliw(t0, offset, LogBytesPerInt); 3505 add(offset, tmp, t0); 3506 slliw(t0, len, LogBytesPerInt); 3507 add(in, in, t0); 3508 3509 const int unroll = 8; 3510 mv(tmp, unroll); 3511 blt(len, tmp, L_tail_loop); 3512 bind(L_unroll); 3513 for (int i = 0; i < unroll; i++) { 3514 sub(in, in, BytesPerInt); 3515 lwu(t0, Address(in, 0)); 3516 mul(t1, t0, k); 3517 add(t0, t1, out); 3518 sub(offset, offset, BytesPerInt); 3519 lwu(t1, Address(offset, 0)); 3520 add(t0, t0, t1); 3521 sw(t0, Address(offset, 0)); 3522 srli(out, t0, 32); 3523 } 3524 subw(len, len, tmp); 3525 bge(len, tmp, L_unroll); 3526 3527 bind(L_tail_loop); 3528 blez(len, L_end); 3529 sub(in, in, BytesPerInt); 3530 lwu(t0, Address(in, 0)); 3531 mul(t1, t0, k); 3532 add(t0, t1, out); 3533 sub(offset, offset, BytesPerInt); 3534 lwu(t1, Address(offset, 0)); 3535 add(t0, t0, t1); 3536 sw(t0, Address(offset, 0)); 3537 srli(out, t0, 32); 3538 subw(len, len, 1); 3539 j(L_tail_loop); 3540 3541 bind(L_end); 3542 } 3543 3544 // add two unsigned input and output carry 3545 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 3546 { 3547 assert_different_registers(dst, carry); 3548 assert_different_registers(dst, src2); 3549 add(dst, src1, src2); 3550 sltu(carry, dst, src2); 3551 } 3552 3553 // add two input with carry 3554 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 3555 assert_different_registers(dst, carry); 3556 add(dst, src1, src2); 3557 add(dst, dst, carry); 3558 } 3559 3560 // add two unsigned input with carry and output carry 3561 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 3562 assert_different_registers(dst, src2); 3563 adc(dst, src1, src2, carry); 3564 sltu(carry, dst, src2); 3565 } 3566 3567 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 3568 Register src1, Register src2, Register carry) { 3569 cad(dest_lo, dest_lo, src1, carry); 3570 add(dest_hi, dest_hi, carry); 3571 cad(dest_lo, dest_lo, src2, carry); 3572 add(final_dest_hi, dest_hi, carry); 3573 } 3574 3575 /** 3576 * Multiply 32 bit by 32 bit first loop. 3577 */ 3578 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 3579 Register y, Register y_idx, Register z, 3580 Register carry, Register product, 3581 Register idx, Register kdx) { 3582 // jlong carry, x[], y[], z[]; 3583 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3584 // long product = y[idx] * x[xstart] + carry; 3585 // z[kdx] = (int)product; 3586 // carry = product >>> 32; 3587 // } 3588 // z[xstart] = (int)carry; 3589 3590 Label L_first_loop, L_first_loop_exit; 3591 blez(idx, L_first_loop_exit); 3592 3593 shadd(t0, xstart, x, t0, LogBytesPerInt); 3594 lwu(x_xstart, Address(t0, 0)); 3595 3596 bind(L_first_loop); 3597 subw(idx, idx, 1); 3598 shadd(t0, idx, y, t0, LogBytesPerInt); 3599 lwu(y_idx, Address(t0, 0)); 3600 mul(product, x_xstart, y_idx); 3601 add(product, product, carry); 3602 srli(carry, product, 32); 3603 subw(kdx, kdx, 1); 3604 shadd(t0, kdx, z, t0, LogBytesPerInt); 3605 sw(product, Address(t0, 0)); 3606 bgtz(idx, L_first_loop); 3607 3608 bind(L_first_loop_exit); 3609 } 3610 3611 /** 3612 * Multiply 64 bit by 64 bit first loop. 3613 */ 3614 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 3615 Register y, Register y_idx, Register z, 3616 Register carry, Register product, 3617 Register idx, Register kdx) { 3618 // 3619 // jlong carry, x[], y[], z[]; 3620 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3621 // huge_128 product = y[idx] * x[xstart] + carry; 3622 // z[kdx] = (jlong)product; 3623 // carry = (jlong)(product >>> 64); 3624 // } 3625 // z[xstart] = carry; 3626 // 3627 3628 Label L_first_loop, L_first_loop_exit; 3629 Label L_one_x, L_one_y, L_multiply; 3630 3631 subw(xstart, xstart, 1); 3632 bltz(xstart, L_one_x); 3633 3634 shadd(t0, xstart, x, t0, LogBytesPerInt); 3635 ld(x_xstart, Address(t0, 0)); 3636 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 3637 3638 bind(L_first_loop); 3639 subw(idx, idx, 1); 3640 bltz(idx, L_first_loop_exit); 3641 subw(idx, idx, 1); 3642 bltz(idx, L_one_y); 3643 3644 shadd(t0, idx, y, t0, LogBytesPerInt); 3645 ld(y_idx, Address(t0, 0)); 3646 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 3647 bind(L_multiply); 3648 3649 mulhu(t0, x_xstart, y_idx); 3650 mul(product, x_xstart, y_idx); 3651 cad(product, product, carry, t1); 3652 adc(carry, t0, zr, t1); 3653 3654 subw(kdx, kdx, 2); 3655 ror_imm(product, product, 32); // back to big-endian 3656 shadd(t0, kdx, z, t0, LogBytesPerInt); 3657 sd(product, Address(t0, 0)); 3658 3659 j(L_first_loop); 3660 3661 bind(L_one_y); 3662 lwu(y_idx, Address(y, 0)); 3663 j(L_multiply); 3664 3665 bind(L_one_x); 3666 lwu(x_xstart, Address(x, 0)); 3667 j(L_first_loop); 3668 3669 bind(L_first_loop_exit); 3670 } 3671 3672 /** 3673 * Multiply 128 bit by 128 bit. Unrolled inner loop. 3674 * 3675 */ 3676 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 3677 Register carry, Register carry2, 3678 Register idx, Register jdx, 3679 Register yz_idx1, Register yz_idx2, 3680 Register tmp, Register tmp3, Register tmp4, 3681 Register tmp6, Register product_hi) { 3682 // jlong carry, x[], y[], z[]; 3683 // int kdx = xstart+1; 3684 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3685 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 3686 // jlong carry2 = (jlong)(tmp3 >>> 64); 3687 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 3688 // carry = (jlong)(tmp4 >>> 64); 3689 // z[kdx+idx+1] = (jlong)tmp3; 3690 // z[kdx+idx] = (jlong)tmp4; 3691 // } 3692 // idx += 2; 3693 // if (idx > 0) { 3694 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 3695 // z[kdx+idx] = (jlong)yz_idx1; 3696 // carry = (jlong)(yz_idx1 >>> 64); 3697 // } 3698 // 3699 3700 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 3701 3702 srliw(jdx, idx, 2); 3703 3704 bind(L_third_loop); 3705 3706 subw(jdx, jdx, 1); 3707 bltz(jdx, L_third_loop_exit); 3708 subw(idx, idx, 4); 3709 3710 shadd(t0, idx, y, t0, LogBytesPerInt); 3711 ld(yz_idx2, Address(t0, 0)); 3712 ld(yz_idx1, Address(t0, wordSize)); 3713 3714 shadd(tmp6, idx, z, t0, LogBytesPerInt); 3715 3716 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 3717 ror_imm(yz_idx2, yz_idx2, 32); 3718 3719 ld(t1, Address(tmp6, 0)); 3720 ld(t0, Address(tmp6, wordSize)); 3721 3722 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3723 mulhu(tmp4, product_hi, yz_idx1); 3724 3725 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 3726 ror_imm(t1, t1, 32, tmp); 3727 3728 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 3729 mulhu(carry2, product_hi, yz_idx2); 3730 3731 cad(tmp3, tmp3, carry, carry); 3732 adc(tmp4, tmp4, zr, carry); 3733 cad(tmp3, tmp3, t0, t0); 3734 cadc(tmp4, tmp4, tmp, t0); 3735 adc(carry, carry2, zr, t0); 3736 cad(tmp4, tmp4, t1, carry2); 3737 adc(carry, carry, zr, carry2); 3738 3739 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 3740 ror_imm(tmp4, tmp4, 32); 3741 sd(tmp4, Address(tmp6, 0)); 3742 sd(tmp3, Address(tmp6, wordSize)); 3743 3744 j(L_third_loop); 3745 3746 bind(L_third_loop_exit); 3747 3748 andi(idx, idx, 0x3); 3749 beqz(idx, L_post_third_loop_done); 3750 3751 Label L_check_1; 3752 subw(idx, idx, 2); 3753 bltz(idx, L_check_1); 3754 3755 shadd(t0, idx, y, t0, LogBytesPerInt); 3756 ld(yz_idx1, Address(t0, 0)); 3757 ror_imm(yz_idx1, yz_idx1, 32); 3758 3759 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3760 mulhu(tmp4, product_hi, yz_idx1); 3761 3762 shadd(t0, idx, z, t0, LogBytesPerInt); 3763 ld(yz_idx2, Address(t0, 0)); 3764 ror_imm(yz_idx2, yz_idx2, 32, tmp); 3765 3766 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 3767 3768 ror_imm(tmp3, tmp3, 32, tmp); 3769 sd(tmp3, Address(t0, 0)); 3770 3771 bind(L_check_1); 3772 3773 andi(idx, idx, 0x1); 3774 subw(idx, idx, 1); 3775 bltz(idx, L_post_third_loop_done); 3776 shadd(t0, idx, y, t0, LogBytesPerInt); 3777 lwu(tmp4, Address(t0, 0)); 3778 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 3779 mulhu(carry2, tmp4, product_hi); 3780 3781 shadd(t0, idx, z, t0, LogBytesPerInt); 3782 lwu(tmp4, Address(t0, 0)); 3783 3784 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 3785 3786 shadd(t0, idx, z, t0, LogBytesPerInt); 3787 sw(tmp3, Address(t0, 0)); 3788 3789 slli(t0, carry2, 32); 3790 srli(carry, tmp3, 32); 3791 orr(carry, carry, t0); 3792 3793 bind(L_post_third_loop_done); 3794 } 3795 3796 /** 3797 * Code for BigInteger::multiplyToLen() intrinsic. 3798 * 3799 * x10: x 3800 * x11: xlen 3801 * x12: y 3802 * x13: ylen 3803 * x14: z 3804 * x15: zlen 3805 * x16: tmp1 3806 * x17: tmp2 3807 * x7: tmp3 3808 * x28: tmp4 3809 * x29: tmp5 3810 * x30: tmp6 3811 * x31: tmp7 3812 */ 3813 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 3814 Register z, Register zlen, 3815 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 3816 Register tmp5, Register tmp6, Register product_hi) { 3817 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 3818 3819 const Register idx = tmp1; 3820 const Register kdx = tmp2; 3821 const Register xstart = tmp3; 3822 3823 const Register y_idx = tmp4; 3824 const Register carry = tmp5; 3825 const Register product = xlen; 3826 const Register x_xstart = zlen; // reuse register 3827 3828 mv(idx, ylen); // idx = ylen; 3829 mv(kdx, zlen); // kdx = xlen+ylen; 3830 mv(carry, zr); // carry = 0; 3831 3832 Label L_multiply_64_x_64_loop, L_done; 3833 3834 subw(xstart, xlen, 1); 3835 bltz(xstart, L_done); 3836 3837 const Register jdx = tmp1; 3838 3839 if (AvoidUnalignedAccesses) { 3840 // Check if x and y are both 8-byte aligned. 3841 orr(t0, xlen, ylen); 3842 test_bit(t0, t0, 0); 3843 beqz(t0, L_multiply_64_x_64_loop); 3844 3845 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3846 shadd(t0, xstart, z, t0, LogBytesPerInt); 3847 sw(carry, Address(t0, 0)); 3848 3849 Label L_second_loop_unaligned; 3850 bind(L_second_loop_unaligned); 3851 mv(carry, zr); 3852 mv(jdx, ylen); 3853 subw(xstart, xstart, 1); 3854 bltz(xstart, L_done); 3855 sub(sp, sp, 2 * wordSize); 3856 sd(z, Address(sp, 0)); 3857 sd(zr, Address(sp, wordSize)); 3858 shadd(t0, xstart, z, t0, LogBytesPerInt); 3859 addi(z, t0, 4); 3860 shadd(t0, xstart, x, t0, LogBytesPerInt); 3861 lwu(product, Address(t0, 0)); 3862 Label L_third_loop, L_third_loop_exit; 3863 3864 blez(jdx, L_third_loop_exit); 3865 3866 bind(L_third_loop); 3867 subw(jdx, jdx, 1); 3868 shadd(t0, jdx, y, t0, LogBytesPerInt); 3869 lwu(t0, Address(t0, 0)); 3870 mul(t1, t0, product); 3871 add(t0, t1, carry); 3872 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 3873 lwu(t1, Address(tmp6, 0)); 3874 add(t0, t0, t1); 3875 sw(t0, Address(tmp6, 0)); 3876 srli(carry, t0, 32); 3877 bgtz(jdx, L_third_loop); 3878 3879 bind(L_third_loop_exit); 3880 ld(z, Address(sp, 0)); 3881 addi(sp, sp, 2 * wordSize); 3882 shadd(t0, xstart, z, t0, LogBytesPerInt); 3883 sw(carry, Address(t0, 0)); 3884 3885 j(L_second_loop_unaligned); 3886 } 3887 3888 bind(L_multiply_64_x_64_loop); 3889 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3890 3891 Label L_second_loop_aligned; 3892 beqz(kdx, L_second_loop_aligned); 3893 3894 Label L_carry; 3895 subw(kdx, kdx, 1); 3896 beqz(kdx, L_carry); 3897 3898 shadd(t0, kdx, z, t0, LogBytesPerInt); 3899 sw(carry, Address(t0, 0)); 3900 srli(carry, carry, 32); 3901 subw(kdx, kdx, 1); 3902 3903 bind(L_carry); 3904 shadd(t0, kdx, z, t0, LogBytesPerInt); 3905 sw(carry, Address(t0, 0)); 3906 3907 // Second and third (nested) loops. 3908 // 3909 // for (int i = xstart-1; i >= 0; i--) { // Second loop 3910 // carry = 0; 3911 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 3912 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 3913 // (z[k] & LONG_MASK) + carry; 3914 // z[k] = (int)product; 3915 // carry = product >>> 32; 3916 // } 3917 // z[i] = (int)carry; 3918 // } 3919 // 3920 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 3921 3922 bind(L_second_loop_aligned); 3923 mv(carry, zr); // carry = 0; 3924 mv(jdx, ylen); // j = ystart+1 3925 3926 subw(xstart, xstart, 1); // i = xstart-1; 3927 bltz(xstart, L_done); 3928 3929 sub(sp, sp, 4 * wordSize); 3930 sd(z, Address(sp, 0)); 3931 3932 Label L_last_x; 3933 shadd(t0, xstart, z, t0, LogBytesPerInt); 3934 addi(z, t0, 4); 3935 subw(xstart, xstart, 1); // i = xstart-1; 3936 bltz(xstart, L_last_x); 3937 3938 shadd(t0, xstart, x, t0, LogBytesPerInt); 3939 ld(product_hi, Address(t0, 0)); 3940 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 3941 3942 Label L_third_loop_prologue; 3943 bind(L_third_loop_prologue); 3944 3945 sd(ylen, Address(sp, wordSize)); 3946 sd(x, Address(sp, 2 * wordSize)); 3947 sd(xstart, Address(sp, 3 * wordSize)); 3948 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 3949 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 3950 ld(z, Address(sp, 0)); 3951 ld(ylen, Address(sp, wordSize)); 3952 ld(x, Address(sp, 2 * wordSize)); 3953 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 3954 addi(sp, sp, 4 * wordSize); 3955 3956 addiw(tmp3, xlen, 1); 3957 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3958 sw(carry, Address(t0, 0)); 3959 3960 subw(tmp3, tmp3, 1); 3961 bltz(tmp3, L_done); 3962 3963 srli(carry, carry, 32); 3964 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3965 sw(carry, Address(t0, 0)); 3966 j(L_second_loop_aligned); 3967 3968 // Next infrequent code is moved outside loops. 3969 bind(L_last_x); 3970 lwu(product_hi, Address(x, 0)); 3971 j(L_third_loop_prologue); 3972 3973 bind(L_done); 3974 } 3975 #endif 3976 3977 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 3978 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 3979 // shift 16 bits once. 3980 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 3981 if (UseZbb) { 3982 assert_different_registers(Rd, Rs, tmp1); 3983 int step = isLL ? 8 : 16; 3984 ctz(Rd, Rs); 3985 andi(tmp1, Rd, step - 1); 3986 sub(Rd, Rd, tmp1); 3987 return; 3988 } 3989 3990 assert_different_registers(Rd, Rs, tmp1, tmp2); 3991 Label Loop; 3992 int step = isLL ? 8 : 16; 3993 mv(Rd, -step); 3994 mv(tmp2, Rs); 3995 3996 bind(Loop); 3997 addi(Rd, Rd, step); 3998 andi(tmp1, tmp2, ((1 << step) - 1)); 3999 srli(tmp2, tmp2, step); 4000 beqz(tmp1, Loop); 4001 } 4002 4003 // This instruction reads adjacent 4 bytes from the lower half of source register, 4004 // inflate into a register, for example: 4005 // Rs: A7A6A5A4A3A2A1A0 4006 // Rd: 00A300A200A100A0 4007 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4008 assert_different_registers(Rd, Rs, tmp1, tmp2); 4009 4010 mv(tmp1, 0xFF000000); // first byte mask at lower word 4011 andr(Rd, Rs, tmp1); 4012 for (int i = 0; i < 2; i++) { 4013 slli(Rd, Rd, wordSize); 4014 srli(tmp1, tmp1, wordSize); 4015 andr(tmp2, Rs, tmp1); 4016 orr(Rd, Rd, tmp2); 4017 } 4018 slli(Rd, Rd, wordSize); 4019 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 4020 orr(Rd, Rd, tmp2); 4021 } 4022 4023 // This instruction reads adjacent 4 bytes from the upper half of source register, 4024 // inflate into a register, for example: 4025 // Rs: A7A6A5A4A3A2A1A0 4026 // Rd: 00A700A600A500A4 4027 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4028 assert_different_registers(Rd, Rs, tmp1, tmp2); 4029 srli(Rs, Rs, 32); // only upper 32 bits are needed 4030 inflate_lo32(Rd, Rs, tmp1, tmp2); 4031 } 4032 4033 // The size of the blocks erased by the zero_blocks stub. We must 4034 // handle anything smaller than this ourselves in zero_words(). 4035 const int MacroAssembler::zero_words_block_size = 8; 4036 4037 // zero_words() is used by C2 ClearArray patterns. It is as small as 4038 // possible, handling small word counts locally and delegating 4039 // anything larger to the zero_blocks stub. It is expanded many times 4040 // in compiled code, so it is important to keep it short. 4041 4042 // ptr: Address of a buffer to be zeroed. 4043 // cnt: Count in HeapWords. 4044 // 4045 // ptr, cnt, and t0 are clobbered. 4046 address MacroAssembler::zero_words(Register ptr, Register cnt) { 4047 assert(is_power_of_2(zero_words_block_size), "adjust this"); 4048 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 4049 assert_different_registers(cnt, t0); 4050 4051 BLOCK_COMMENT("zero_words {"); 4052 4053 mv(t0, zero_words_block_size); 4054 Label around, done, done16; 4055 bltu(cnt, t0, around); 4056 { 4057 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); 4058 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 4059 if (StubRoutines::riscv::complete()) { 4060 address tpc = trampoline_call(zero_blocks); 4061 if (tpc == nullptr) { 4062 DEBUG_ONLY(reset_labels(around)); 4063 postcond(pc() == badAddress); 4064 return nullptr; 4065 } 4066 } else { 4067 jal(zero_blocks); 4068 } 4069 } 4070 bind(around); 4071 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 4072 Label l; 4073 test_bit(t0, cnt, exact_log2(i)); 4074 beqz(t0, l); 4075 for (int j = 0; j < i; j++) { 4076 sd(zr, Address(ptr, j * wordSize)); 4077 } 4078 addi(ptr, ptr, i * wordSize); 4079 bind(l); 4080 } 4081 { 4082 Label l; 4083 test_bit(t0, cnt, 0); 4084 beqz(t0, l); 4085 sd(zr, Address(ptr, 0)); 4086 bind(l); 4087 } 4088 4089 BLOCK_COMMENT("} zero_words"); 4090 postcond(pc() != badAddress); 4091 return pc(); 4092 } 4093 4094 #define SmallArraySize (18 * BytesPerLong) 4095 4096 // base: Address of a buffer to be zeroed, 8 bytes aligned. 4097 // cnt: Immediate count in HeapWords. 4098 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 4099 assert_different_registers(base, t0, t1); 4100 4101 BLOCK_COMMENT("zero_words {"); 4102 4103 if (cnt <= SmallArraySize / BytesPerLong) { 4104 for (int i = 0; i < (int)cnt; i++) { 4105 sd(zr, Address(base, i * wordSize)); 4106 } 4107 } else { 4108 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 4109 int remainder = cnt % unroll; 4110 for (int i = 0; i < remainder; i++) { 4111 sd(zr, Address(base, i * wordSize)); 4112 } 4113 4114 Label loop; 4115 Register cnt_reg = t0; 4116 Register loop_base = t1; 4117 cnt = cnt - remainder; 4118 mv(cnt_reg, cnt); 4119 add(loop_base, base, remainder * wordSize); 4120 bind(loop); 4121 sub(cnt_reg, cnt_reg, unroll); 4122 for (int i = 0; i < unroll; i++) { 4123 sd(zr, Address(loop_base, i * wordSize)); 4124 } 4125 add(loop_base, loop_base, unroll * wordSize); 4126 bnez(cnt_reg, loop); 4127 } 4128 4129 BLOCK_COMMENT("} zero_words"); 4130 } 4131 4132 // base: Address of a buffer to be filled, 8 bytes aligned. 4133 // cnt: Count in 8-byte unit. 4134 // value: Value to be filled with. 4135 // base will point to the end of the buffer after filling. 4136 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 4137 // Algorithm: 4138 // 4139 // t0 = cnt & 7 4140 // cnt -= t0 4141 // p += t0 4142 // switch (t0): 4143 // switch start: 4144 // do while cnt 4145 // cnt -= 8 4146 // p[-8] = value 4147 // case 7: 4148 // p[-7] = value 4149 // case 6: 4150 // p[-6] = value 4151 // // ... 4152 // case 1: 4153 // p[-1] = value 4154 // case 0: 4155 // p += 8 4156 // do-while end 4157 // switch end 4158 4159 assert_different_registers(base, cnt, value, t0, t1); 4160 4161 Label fini, skip, entry, loop; 4162 const int unroll = 8; // Number of sd instructions we'll unroll 4163 4164 beqz(cnt, fini); 4165 4166 andi(t0, cnt, unroll - 1); 4167 sub(cnt, cnt, t0); 4168 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4169 shadd(base, t0, base, t1, 3); 4170 la(t1, entry); 4171 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4172 sub(t1, t1, t0); 4173 jr(t1); 4174 4175 bind(loop); 4176 add(base, base, unroll * 8); 4177 for (int i = -unroll; i < 0; i++) { 4178 sd(value, Address(base, i * 8)); 4179 } 4180 bind(entry); 4181 sub(cnt, cnt, unroll); 4182 bgez(cnt, loop); 4183 4184 bind(fini); 4185 } 4186 4187 // Zero blocks of memory by using CBO.ZERO. 4188 // 4189 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4190 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4191 // zeroed in HeapWords. Returns the count of words left to be zeroed 4192 // in cnt. 4193 // 4194 // NOTE: This is intended to be used in the zero_blocks() stub. If 4195 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4196 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4197 Label initial_table_end, loop; 4198 4199 // Align base with cache line size. 4200 neg(tmp1, base); 4201 andi(tmp1, tmp1, CacheLineSize - 1); 4202 4203 // tmp1: the number of bytes to be filled to align the base with cache line size. 4204 add(base, base, tmp1); 4205 srai(tmp2, tmp1, 3); 4206 sub(cnt, cnt, tmp2); 4207 srli(tmp2, tmp1, 1); 4208 la(tmp1, initial_table_end); 4209 sub(tmp2, tmp1, tmp2); 4210 jr(tmp2); 4211 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4212 sd(zr, Address(base, i)); 4213 } 4214 bind(initial_table_end); 4215 4216 mv(tmp1, CacheLineSize / wordSize); 4217 bind(loop); 4218 cbo_zero(base); 4219 sub(cnt, cnt, tmp1); 4220 add(base, base, CacheLineSize); 4221 bge(cnt, tmp1, loop); 4222 } 4223 4224 // java.lang.Math.round(float a) 4225 // Returns the closest int to the argument, with ties rounding to positive infinity. 4226 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 4227 // this instructions calling sequence provides performance improvement on all tested devices; 4228 // don't change it without re-verification 4229 Label done; 4230 mv(t0, jint_cast(0.5f)); 4231 fmv_w_x(ftmp, t0); 4232 4233 // dst = 0 if NaN 4234 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 4235 mv(dst, zr); 4236 beqz(t0, done); 4237 4238 // dst = (src + 0.5f) rounded down towards negative infinity 4239 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 4240 // RDN is required for fadd_s, RNE gives incorrect results: 4241 // -------------------------------------------------------------------- 4242 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 4243 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 4244 // -------------------------------------------------------------------- 4245 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 4246 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 4247 // -------------------------------------------------------------------- 4248 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 4249 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 4250 4251 bind(done); 4252 } 4253 4254 // java.lang.Math.round(double a) 4255 // Returns the closest long to the argument, with ties rounding to positive infinity. 4256 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 4257 // this instructions calling sequence provides performance improvement on all tested devices; 4258 // don't change it without re-verification 4259 Label done; 4260 mv(t0, julong_cast(0.5)); 4261 fmv_d_x(ftmp, t0); 4262 4263 // dst = 0 if NaN 4264 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 4265 mv(dst, zr); 4266 beqz(t0, done); 4267 4268 // dst = (src + 0.5) rounded down towards negative infinity 4269 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 4270 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 4271 4272 bind(done); 4273 } 4274 4275 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 4276 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4277 Label done; \ 4278 assert_different_registers(dst, tmp); \ 4279 fclass_##FLOATSIG(tmp, src); \ 4280 mv(dst, zr); \ 4281 /* check if src is NaN */ \ 4282 andi(tmp, tmp, 0b1100000000); \ 4283 bnez(tmp, done); \ 4284 FLOATCVT(dst, src); \ 4285 bind(done); \ 4286 } 4287 4288 FCVT_SAFE(fcvt_w_s, s); 4289 FCVT_SAFE(fcvt_l_s, s); 4290 FCVT_SAFE(fcvt_w_d, d); 4291 FCVT_SAFE(fcvt_l_d, d); 4292 4293 #undef FCVT_SAFE 4294 4295 #define FCMP(FLOATTYPE, FLOATSIG) \ 4296 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4297 FloatRegister Rs2, int unordered_result) { \ 4298 Label Ldone; \ 4299 if (unordered_result < 0) { \ 4300 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4301 /* installs 1 if gt else 0 */ \ 4302 flt_##FLOATSIG(result, Rs2, Rs1); \ 4303 /* Rs1 > Rs2, install 1 */ \ 4304 bgtz(result, Ldone); \ 4305 feq_##FLOATSIG(result, Rs1, Rs2); \ 4306 addi(result, result, -1); \ 4307 /* Rs1 = Rs2, install 0 */ \ 4308 /* NaN or Rs1 < Rs2, install -1 */ \ 4309 bind(Ldone); \ 4310 } else { \ 4311 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4312 /* installs 1 if gt or unordered else 0 */ \ 4313 flt_##FLOATSIG(result, Rs1, Rs2); \ 4314 /* Rs1 < Rs2, install -1 */ \ 4315 bgtz(result, Ldone); \ 4316 feq_##FLOATSIG(result, Rs1, Rs2); \ 4317 addi(result, result, -1); \ 4318 /* Rs1 = Rs2, install 0 */ \ 4319 /* NaN or Rs1 > Rs2, install 1 */ \ 4320 bind(Ldone); \ 4321 neg(result, result); \ 4322 } \ 4323 } 4324 4325 FCMP(float, s); 4326 FCMP(double, d); 4327 4328 #undef FCMP 4329 4330 // Zero words; len is in bytes 4331 // Destroys all registers except addr 4332 // len must be a nonzero multiple of wordSize 4333 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4334 assert_different_registers(addr, len, tmp, t0, t1); 4335 4336 #ifdef ASSERT 4337 { 4338 Label L; 4339 andi(t0, len, BytesPerWord - 1); 4340 beqz(t0, L); 4341 stop("len is not a multiple of BytesPerWord"); 4342 bind(L); 4343 } 4344 #endif // ASSERT 4345 4346 #ifndef PRODUCT 4347 block_comment("zero memory"); 4348 #endif // PRODUCT 4349 4350 Label loop; 4351 Label entry; 4352 4353 // Algorithm: 4354 // 4355 // t0 = cnt & 7 4356 // cnt -= t0 4357 // p += t0 4358 // switch (t0) { 4359 // do { 4360 // cnt -= 8 4361 // p[-8] = 0 4362 // case 7: 4363 // p[-7] = 0 4364 // case 6: 4365 // p[-6] = 0 4366 // ... 4367 // case 1: 4368 // p[-1] = 0 4369 // case 0: 4370 // p += 8 4371 // } while (cnt) 4372 // } 4373 4374 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4375 4376 srli(len, len, LogBytesPerWord); 4377 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4378 sub(len, len, t0); // cnt -= unroll 4379 // tmp always points to the end of the region we're about to zero 4380 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4381 la(t1, entry); 4382 slli(t0, t0, 2); 4383 sub(t1, t1, t0); 4384 jr(t1); 4385 bind(loop); 4386 sub(len, len, unroll); 4387 for (int i = -unroll; i < 0; i++) { 4388 sd(zr, Address(tmp, i * wordSize)); 4389 } 4390 bind(entry); 4391 add(tmp, tmp, unroll * wordSize); 4392 bnez(len, loop); 4393 } 4394 4395 // shift left by shamt and add 4396 // Rd = (Rs1 << shamt) + Rs2 4397 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4398 if (UseZba) { 4399 if (shamt == 1) { 4400 sh1add(Rd, Rs1, Rs2); 4401 return; 4402 } else if (shamt == 2) { 4403 sh2add(Rd, Rs1, Rs2); 4404 return; 4405 } else if (shamt == 3) { 4406 sh3add(Rd, Rs1, Rs2); 4407 return; 4408 } 4409 } 4410 4411 if (shamt != 0) { 4412 slli(tmp, Rs1, shamt); 4413 add(Rd, Rs2, tmp); 4414 } else { 4415 add(Rd, Rs1, Rs2); 4416 } 4417 } 4418 4419 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4420 if (UseZba && bits == 32) { 4421 zext_w(dst, src); 4422 return; 4423 } 4424 4425 if (UseZbb && bits == 16) { 4426 zext_h(dst, src); 4427 return; 4428 } 4429 4430 if (bits == 8) { 4431 zext_b(dst, src); 4432 } else { 4433 slli(dst, src, XLEN - bits); 4434 srli(dst, dst, XLEN - bits); 4435 } 4436 } 4437 4438 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4439 if (UseZbb) { 4440 if (bits == 8) { 4441 sext_b(dst, src); 4442 return; 4443 } else if (bits == 16) { 4444 sext_h(dst, src); 4445 return; 4446 } 4447 } 4448 4449 if (bits == 32) { 4450 sext_w(dst, src); 4451 } else { 4452 slli(dst, src, XLEN - bits); 4453 srai(dst, dst, XLEN - bits); 4454 } 4455 } 4456 4457 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 4458 { 4459 if (src1 == src2) { 4460 mv(dst, zr); 4461 return; 4462 } 4463 Label done; 4464 Register left = src1; 4465 Register right = src2; 4466 if (dst == src1) { 4467 assert_different_registers(dst, src2, tmp); 4468 mv(tmp, src1); 4469 left = tmp; 4470 } else if (dst == src2) { 4471 assert_different_registers(dst, src1, tmp); 4472 mv(tmp, src2); 4473 right = tmp; 4474 } 4475 4476 // installs 1 if gt else 0 4477 slt(dst, right, left); 4478 bnez(dst, done); 4479 slt(dst, left, right); 4480 // dst = -1 if lt; else if eq , dst = 0 4481 neg(dst, dst); 4482 bind(done); 4483 } 4484 4485 // The java_calling_convention describes stack locations as ideal slots on 4486 // a frame with no abi restrictions. Since we must observe abi restrictions 4487 // (like the placement of the register window) the slots must be biased by 4488 // the following value. 4489 static int reg2offset_in(VMReg r) { 4490 // Account for saved fp and ra 4491 // This should really be in_preserve_stack_slots 4492 return r->reg2stack() * VMRegImpl::stack_slot_size; 4493 } 4494 4495 static int reg2offset_out(VMReg r) { 4496 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 4497 } 4498 4499 // On 64 bit we will store integer like items to the stack as 4500 // 64 bits items (riscv64 abi) even though java would only store 4501 // 32bits for a parameter. On 32bit it will simply be 32 bits 4502 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 4503 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 4504 if (src.first()->is_stack()) { 4505 if (dst.first()->is_stack()) { 4506 // stack to stack 4507 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4508 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4509 } else { 4510 // stack to reg 4511 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4512 } 4513 } else if (dst.first()->is_stack()) { 4514 // reg to stack 4515 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4516 } else { 4517 if (dst.first() != src.first()) { 4518 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 4519 } 4520 } 4521 } 4522 4523 // An oop arg. Must pass a handle not the oop itself 4524 void MacroAssembler::object_move(OopMap* map, 4525 int oop_handle_offset, 4526 int framesize_in_slots, 4527 VMRegPair src, 4528 VMRegPair dst, 4529 bool is_receiver, 4530 int* receiver_offset) { 4531 assert_cond(map != nullptr && receiver_offset != nullptr); 4532 4533 // must pass a handle. First figure out the location we use as a handle 4534 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 4535 4536 // See if oop is null if it is we need no handle 4537 4538 if (src.first()->is_stack()) { 4539 // Oop is already on the stack as an argument 4540 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 4541 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 4542 if (is_receiver) { 4543 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 4544 } 4545 4546 ld(t0, Address(fp, reg2offset_in(src.first()))); 4547 la(rHandle, Address(fp, reg2offset_in(src.first()))); 4548 // conditionally move a null 4549 Label notZero1; 4550 bnez(t0, notZero1); 4551 mv(rHandle, zr); 4552 bind(notZero1); 4553 } else { 4554 4555 // Oop is in a register we must store it to the space we reserve 4556 // on the stack for oop_handles and pass a handle if oop is non-null 4557 4558 const Register rOop = src.first()->as_Register(); 4559 int oop_slot = -1; 4560 if (rOop == j_rarg0) { 4561 oop_slot = 0; 4562 } else if (rOop == j_rarg1) { 4563 oop_slot = 1; 4564 } else if (rOop == j_rarg2) { 4565 oop_slot = 2; 4566 } else if (rOop == j_rarg3) { 4567 oop_slot = 3; 4568 } else if (rOop == j_rarg4) { 4569 oop_slot = 4; 4570 } else if (rOop == j_rarg5) { 4571 oop_slot = 5; 4572 } else if (rOop == j_rarg6) { 4573 oop_slot = 6; 4574 } else { 4575 assert(rOop == j_rarg7, "wrong register"); 4576 oop_slot = 7; 4577 } 4578 4579 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 4580 int offset = oop_slot * VMRegImpl::stack_slot_size; 4581 4582 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 4583 // Store oop in handle area, may be null 4584 sd(rOop, Address(sp, offset)); 4585 if (is_receiver) { 4586 *receiver_offset = offset; 4587 } 4588 4589 //rOop maybe the same as rHandle 4590 if (rOop == rHandle) { 4591 Label isZero; 4592 beqz(rOop, isZero); 4593 la(rHandle, Address(sp, offset)); 4594 bind(isZero); 4595 } else { 4596 Label notZero2; 4597 la(rHandle, Address(sp, offset)); 4598 bnez(rOop, notZero2); 4599 mv(rHandle, zr); 4600 bind(notZero2); 4601 } 4602 } 4603 4604 // If arg is on the stack then place it otherwise it is already in correct reg. 4605 if (dst.first()->is_stack()) { 4606 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 4607 } 4608 } 4609 4610 // A float arg may have to do float reg int reg conversion 4611 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 4612 assert(src.first()->is_stack() && dst.first()->is_stack() || 4613 src.first()->is_reg() && dst.first()->is_reg() || 4614 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4615 if (src.first()->is_stack()) { 4616 if (dst.first()->is_stack()) { 4617 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 4618 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 4619 } else if (dst.first()->is_Register()) { 4620 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4621 } else { 4622 ShouldNotReachHere(); 4623 } 4624 } else if (src.first() != dst.first()) { 4625 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4626 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4627 } else { 4628 ShouldNotReachHere(); 4629 } 4630 } 4631 } 4632 4633 // A long move 4634 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 4635 if (src.first()->is_stack()) { 4636 if (dst.first()->is_stack()) { 4637 // stack to stack 4638 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4639 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4640 } else { 4641 // stack to reg 4642 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4643 } 4644 } else if (dst.first()->is_stack()) { 4645 // reg to stack 4646 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4647 } else { 4648 if (dst.first() != src.first()) { 4649 mv(dst.first()->as_Register(), src.first()->as_Register()); 4650 } 4651 } 4652 } 4653 4654 // A double move 4655 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 4656 assert(src.first()->is_stack() && dst.first()->is_stack() || 4657 src.first()->is_reg() && dst.first()->is_reg() || 4658 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4659 if (src.first()->is_stack()) { 4660 if (dst.first()->is_stack()) { 4661 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4662 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4663 } else if (dst.first()-> is_Register()) { 4664 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4665 } else { 4666 ShouldNotReachHere(); 4667 } 4668 } else if (src.first() != dst.first()) { 4669 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4670 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4671 } else { 4672 ShouldNotReachHere(); 4673 } 4674 } 4675 } 4676 4677 void MacroAssembler::rt_call(address dest, Register tmp) { 4678 CodeBlob *cb = CodeCache::find_blob(dest); 4679 RuntimeAddress target(dest); 4680 if (cb) { 4681 far_call(target); 4682 } else { 4683 relocate(target.rspec(), [&] { 4684 int32_t offset; 4685 la_patchable(tmp, target, offset); 4686 jalr(x1, tmp, offset); 4687 }); 4688 } 4689 } 4690 4691 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 4692 assert(bit_pos < 64, "invalid bit range"); 4693 if (UseZbs) { 4694 bexti(Rd, Rs, bit_pos); 4695 return; 4696 } 4697 int64_t imm = (int64_t)(1UL << bit_pos); 4698 if (is_simm12(imm)) { 4699 and_imm12(Rd, Rs, imm); 4700 } else { 4701 srli(Rd, Rs, bit_pos); 4702 and_imm12(Rd, Rd, 1); 4703 } 4704 } 4705 4706 // Implements lightweight-locking. 4707 // 4708 // - obj: the object to be locked 4709 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 4710 // - slow: branched to if locking fails 4711 void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 4712 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4713 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 4714 4715 Label push; 4716 const Register top = tmp1; 4717 const Register mark = tmp2; 4718 const Register t = tmp3; 4719 4720 // Preload the markWord. It is important that this is the first 4721 // instruction emitted as it is part of C1's null check semantics. 4722 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 4723 4724 // Check if the lock-stack is full. 4725 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 4726 mv(t, (unsigned)LockStack::end_offset()); 4727 bge(top, t, slow, /* is_far */ true); 4728 4729 // Check for recursion. 4730 add(t, xthread, top); 4731 ld(t, Address(t, -oopSize)); 4732 beq(obj, t, push); 4733 4734 // Check header for monitor (0b10). 4735 test_bit(t, mark, exact_log2(markWord::monitor_value)); 4736 bnez(t, slow, /* is_far */ true); 4737 4738 // Try to lock. Transition lock-bits 0b01 => 0b00 4739 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 4740 ori(mark, mark, markWord::unlocked_value); 4741 xori(t, mark, markWord::unlocked_value); 4742 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 4743 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 4744 bne(mark, t, slow, /* is_far */ true); 4745 4746 bind(push); 4747 // After successful lock, push object on lock-stack. 4748 add(t, xthread, top); 4749 sd(obj, Address(t)); 4750 addw(top, top, oopSize); 4751 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 4752 } 4753 4754 // Implements ligthweight-unlocking. 4755 // 4756 // - obj: the object to be unlocked 4757 // - tmp1, tmp2, tmp3: temporary registers 4758 // - slow: branched to if unlocking fails 4759 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 4760 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4761 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 4762 4763 #ifdef ASSERT 4764 { 4765 // Check for lock-stack underflow. 4766 Label stack_ok; 4767 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4768 mv(tmp2, (unsigned)LockStack::start_offset()); 4769 bge(tmp1, tmp2, stack_ok); 4770 STOP("Lock-stack underflow"); 4771 bind(stack_ok); 4772 } 4773 #endif 4774 4775 Label unlocked, push_and_slow; 4776 const Register top = tmp1; 4777 const Register mark = tmp2; 4778 const Register t = tmp3; 4779 4780 // Check if obj is top of lock-stack. 4781 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 4782 subw(top, top, oopSize); 4783 add(t, xthread, top); 4784 ld(t, Address(t)); 4785 bne(obj, t, slow, /* is_far */ true); 4786 4787 // Pop lock-stack. 4788 DEBUG_ONLY(add(t, xthread, top);) 4789 DEBUG_ONLY(sd(zr, Address(t));) 4790 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 4791 4792 // Check if recursive. 4793 add(t, xthread, top); 4794 ld(t, Address(t, -oopSize)); 4795 beq(obj, t, unlocked); 4796 4797 // Not recursive. Check header for monitor (0b10). 4798 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 4799 test_bit(t, mark, exact_log2(markWord::monitor_value)); 4800 bnez(t, push_and_slow); 4801 4802 #ifdef ASSERT 4803 // Check header not unlocked (0b01). 4804 Label not_unlocked; 4805 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 4806 beqz(t, not_unlocked); 4807 stop("lightweight_unlock already unlocked"); 4808 bind(not_unlocked); 4809 #endif 4810 4811 // Try to unlock. Transition lock bits 0b00 => 0b01 4812 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 4813 ori(t, mark, markWord::unlocked_value); 4814 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 4815 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 4816 beq(mark, t, unlocked); 4817 4818 bind(push_and_slow); 4819 // Restore lock-stack and handle the unlock in runtime. 4820 DEBUG_ONLY(add(t, xthread, top);) 4821 DEBUG_ONLY(sd(obj, Address(t));) 4822 addw(top, top, oopSize); 4823 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 4824 j(slow); 4825 4826 bind(unlocked); 4827 }