1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "gc/shared/collectedHeap.hpp" 36 #include "interpreter/bytecodeHistogram.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "memory/universe.hpp" 40 #include "nativeInst_riscv.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedKlass.inline.hpp" 43 #include "oops/compressedOops.inline.hpp" 44 #include "oops/klass.inline.hpp" 45 #include "oops/oop.hpp" 46 #include "runtime/interfaceSupport.inline.hpp" 47 #include "runtime/javaThread.hpp" 48 #include "runtime/jniHandles.inline.hpp" 49 #include "runtime/sharedRuntime.hpp" 50 #include "runtime/stubRoutines.hpp" 51 #include "utilities/powerOfTwo.hpp" 52 #ifdef COMPILER2 53 #include "opto/compile.hpp" 54 #include "opto/node.hpp" 55 #include "opto/output.hpp" 56 #endif 57 58 #ifdef PRODUCT 59 #define BLOCK_COMMENT(str) /* nothing */ 60 #else 61 #define BLOCK_COMMENT(str) block_comment(str) 62 #endif 63 #define STOP(str) stop(str); 64 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 65 66 static void pass_arg0(MacroAssembler* masm, Register arg) { 67 if (c_rarg0 != arg) { 68 masm->mv(c_rarg0, arg); 69 } 70 } 71 72 static void pass_arg1(MacroAssembler* masm, Register arg) { 73 if (c_rarg1 != arg) { 74 masm->mv(c_rarg1, arg); 75 } 76 } 77 78 static void pass_arg2(MacroAssembler* masm, Register arg) { 79 if (c_rarg2 != arg) { 80 masm->mv(c_rarg2, arg); 81 } 82 } 83 84 static void pass_arg3(MacroAssembler* masm, Register arg) { 85 if (c_rarg3 != arg) { 86 masm->mv(c_rarg3, arg); 87 } 88 } 89 90 void MacroAssembler::push_cont_fastpath(Register java_thread) { 91 if (!Continuations::enabled()) return; 92 Label done; 93 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 94 bleu(sp, t0, done); 95 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 96 bind(done); 97 } 98 99 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 100 if (!Continuations::enabled()) return; 101 Label done; 102 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 103 bltu(sp, t0, done); 104 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 105 bind(done); 106 } 107 108 int MacroAssembler::align(int modulus, int extra_offset) { 109 CompressibleRegion cr(this); 110 intptr_t before = offset(); 111 while ((offset() + extra_offset) % modulus != 0) { nop(); } 112 return (int)(offset() - before); 113 } 114 115 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 116 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 117 } 118 119 // Implementation of call_VM versions 120 121 void MacroAssembler::call_VM(Register oop_result, 122 address entry_point, 123 bool check_exceptions) { 124 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 125 } 126 127 void MacroAssembler::call_VM(Register oop_result, 128 address entry_point, 129 Register arg_1, 130 bool check_exceptions) { 131 pass_arg1(this, arg_1); 132 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 133 } 134 135 void MacroAssembler::call_VM(Register oop_result, 136 address entry_point, 137 Register arg_1, 138 Register arg_2, 139 bool check_exceptions) { 140 assert_different_registers(arg_1, c_rarg2); 141 pass_arg2(this, arg_2); 142 pass_arg1(this, arg_1); 143 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 144 } 145 146 void MacroAssembler::call_VM(Register oop_result, 147 address entry_point, 148 Register arg_1, 149 Register arg_2, 150 Register arg_3, 151 bool check_exceptions) { 152 assert_different_registers(arg_1, c_rarg2, c_rarg3); 153 assert_different_registers(arg_2, c_rarg3); 154 pass_arg3(this, arg_3); 155 156 pass_arg2(this, arg_2); 157 158 pass_arg1(this, arg_1); 159 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 160 } 161 162 void MacroAssembler::call_VM(Register oop_result, 163 Register last_java_sp, 164 address entry_point, 165 int number_of_arguments, 166 bool check_exceptions) { 167 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 168 } 169 170 void MacroAssembler::call_VM(Register oop_result, 171 Register last_java_sp, 172 address entry_point, 173 Register arg_1, 174 bool check_exceptions) { 175 pass_arg1(this, arg_1); 176 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 177 } 178 179 void MacroAssembler::call_VM(Register oop_result, 180 Register last_java_sp, 181 address entry_point, 182 Register arg_1, 183 Register arg_2, 184 bool check_exceptions) { 185 186 assert_different_registers(arg_1, c_rarg2); 187 pass_arg2(this, arg_2); 188 pass_arg1(this, arg_1); 189 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 190 } 191 192 void MacroAssembler::call_VM(Register oop_result, 193 Register last_java_sp, 194 address entry_point, 195 Register arg_1, 196 Register arg_2, 197 Register arg_3, 198 bool check_exceptions) { 199 assert_different_registers(arg_1, c_rarg2, c_rarg3); 200 assert_different_registers(arg_2, c_rarg3); 201 pass_arg3(this, arg_3); 202 pass_arg2(this, arg_2); 203 pass_arg1(this, arg_1); 204 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 205 } 206 207 void MacroAssembler::post_call_nop() { 208 if (!Continuations::enabled()) { 209 return; 210 } 211 relocate(post_call_nop_Relocation::spec(), [&] { 212 InlineSkippedInstructionsCounter skipCounter(this); 213 nop(); 214 li32(zr, 0); 215 }); 216 } 217 218 // these are no-ops overridden by InterpreterMacroAssembler 219 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 220 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 221 222 // Calls to C land 223 // 224 // When entering C land, the fp, & esp of the last Java frame have to be recorded 225 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 226 // has to be reset to 0. This is required to allow proper stack traversal. 227 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 228 Register last_java_fp, 229 Register last_java_pc, 230 Register tmp) { 231 232 if (last_java_pc->is_valid()) { 233 sd(last_java_pc, Address(xthread, 234 JavaThread::frame_anchor_offset() + 235 JavaFrameAnchor::last_Java_pc_offset())); 236 } 237 238 // determine last_java_sp register 239 if (last_java_sp == sp) { 240 mv(tmp, sp); 241 last_java_sp = tmp; 242 } else if (!last_java_sp->is_valid()) { 243 last_java_sp = esp; 244 } 245 246 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 247 248 // last_java_fp is optional 249 if (last_java_fp->is_valid()) { 250 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 251 } 252 } 253 254 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 255 Register last_java_fp, 256 address last_java_pc, 257 Register tmp) { 258 assert(last_java_pc != nullptr, "must provide a valid PC"); 259 260 la(tmp, last_java_pc); 261 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 262 263 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); 264 } 265 266 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 267 Register last_java_fp, 268 Label &L, 269 Register tmp) { 270 if (L.is_bound()) { 271 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 272 } else { 273 L.add_patch_at(code(), locator()); 274 IncompressibleRegion ir(this); // the label address will be patched back. 275 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 276 } 277 } 278 279 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 280 // we must set sp to zero to clear frame 281 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 282 283 // must clear fp, so that compiled frames are not confused; it is 284 // possible that we need it only for debugging 285 if (clear_fp) { 286 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 287 } 288 289 // Always clear the pc because it could have been set by make_walkable() 290 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 291 } 292 293 void MacroAssembler::call_VM_base(Register oop_result, 294 Register java_thread, 295 Register last_java_sp, 296 address entry_point, 297 int number_of_arguments, 298 bool check_exceptions) { 299 // determine java_thread register 300 if (!java_thread->is_valid()) { 301 java_thread = xthread; 302 } 303 // determine last_java_sp register 304 if (!last_java_sp->is_valid()) { 305 last_java_sp = esp; 306 } 307 308 // debugging support 309 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 310 assert(java_thread == xthread, "unexpected register"); 311 312 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 313 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 314 315 // push java thread (becomes first argument of C function) 316 mv(c_rarg0, java_thread); 317 318 // set last Java frame before call 319 assert(last_java_sp != fp, "can't use fp"); 320 321 Label l; 322 set_last_Java_frame(last_java_sp, fp, l, t0); 323 324 // do the call, remove parameters 325 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 326 327 // reset last Java frame 328 // Only interpreter should have to clear fp 329 reset_last_Java_frame(true); 330 331 // C++ interp handles this in the interpreter 332 check_and_handle_popframe(java_thread); 333 check_and_handle_earlyret(java_thread); 334 335 if (check_exceptions) { 336 // check for pending exceptions (java_thread is set upon return) 337 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 338 Label ok; 339 beqz(t0, ok); 340 RuntimeAddress target(StubRoutines::forward_exception_entry()); 341 relocate(target.rspec(), [&] { 342 int32_t offset; 343 la_patchable(t0, target, offset); 344 jalr(x0, t0, offset); 345 }); 346 bind(ok); 347 } 348 349 // get oop result if there is one and reset the value in the thread 350 if (oop_result->is_valid()) { 351 get_vm_result(oop_result, java_thread); 352 } 353 } 354 355 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 356 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 357 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 358 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 359 } 360 361 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 362 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 363 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 364 } 365 366 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 367 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 368 assert_different_registers(klass, xthread, tmp); 369 370 Label L_fallthrough, L_tmp; 371 if (L_fast_path == nullptr) { 372 L_fast_path = &L_fallthrough; 373 } else if (L_slow_path == nullptr) { 374 L_slow_path = &L_fallthrough; 375 } 376 377 // Fast path check: class is fully initialized 378 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 379 sub(tmp, tmp, InstanceKlass::fully_initialized); 380 beqz(tmp, *L_fast_path); 381 382 // Fast path check: current thread is initializer thread 383 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 384 385 if (L_slow_path == &L_fallthrough) { 386 beq(xthread, tmp, *L_fast_path); 387 bind(*L_slow_path); 388 } else if (L_fast_path == &L_fallthrough) { 389 bne(xthread, tmp, *L_slow_path); 390 bind(*L_fast_path); 391 } else { 392 Unimplemented(); 393 } 394 } 395 396 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 397 if (!VerifyOops) { return; } 398 399 // Pass register number to verify_oop_subroutine 400 const char* b = nullptr; 401 { 402 ResourceMark rm; 403 stringStream ss; 404 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 405 b = code_string(ss.as_string()); 406 } 407 BLOCK_COMMENT("verify_oop {"); 408 409 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 410 411 mv(c_rarg0, reg); // c_rarg0 : x10 412 { 413 // The length of the instruction sequence emitted should not depend 414 // on the address of the char buffer so that the size of mach nodes for 415 // scratch emit and normal emit matches. 416 IncompressibleRegion ir(this); // Fixed length 417 movptr(t0, (address) b); 418 } 419 420 // call indirectly to solve generation ordering problem 421 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 422 relocate(target.rspec(), [&] { 423 int32_t offset; 424 la_patchable(t1, target, offset); 425 ld(t1, Address(t1, offset)); 426 }); 427 jalr(t1); 428 429 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 430 431 BLOCK_COMMENT("} verify_oop"); 432 } 433 434 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 435 if (!VerifyOops) { 436 return; 437 } 438 439 const char* b = nullptr; 440 { 441 ResourceMark rm; 442 stringStream ss; 443 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 444 b = code_string(ss.as_string()); 445 } 446 BLOCK_COMMENT("verify_oop_addr {"); 447 448 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 449 450 if (addr.uses(sp)) { 451 la(x10, addr); 452 ld(x10, Address(x10, 4 * wordSize)); 453 } else { 454 ld(x10, addr); 455 } 456 457 { 458 // The length of the instruction sequence emitted should not depend 459 // on the address of the char buffer so that the size of mach nodes for 460 // scratch emit and normal emit matches. 461 IncompressibleRegion ir(this); // Fixed length 462 movptr(t0, (address) b); 463 } 464 465 // call indirectly to solve generation ordering problem 466 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 467 relocate(target.rspec(), [&] { 468 int32_t offset; 469 la_patchable(t1, target, offset); 470 ld(t1, Address(t1, offset)); 471 }); 472 jalr(t1); 473 474 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 475 476 BLOCK_COMMENT("} verify_oop_addr"); 477 } 478 479 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 480 int extra_slot_offset) { 481 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 482 int stackElementSize = Interpreter::stackElementSize; 483 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 484 #ifdef ASSERT 485 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 486 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 487 #endif 488 if (arg_slot.is_constant()) { 489 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 490 } else { 491 assert_different_registers(t0, arg_slot.as_register()); 492 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 493 return Address(t0, offset); 494 } 495 } 496 497 #ifndef PRODUCT 498 extern "C" void findpc(intptr_t x); 499 #endif 500 501 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 502 { 503 // In order to get locks to work, we need to fake a in_VM state 504 if (ShowMessageBoxOnError) { 505 JavaThread* thread = JavaThread::current(); 506 JavaThreadState saved_state = thread->thread_state(); 507 thread->set_thread_state(_thread_in_vm); 508 #ifndef PRODUCT 509 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 510 ttyLocker ttyl; 511 BytecodeCounter::print(); 512 } 513 #endif 514 if (os::message_box(msg, "Execution stopped, print registers?")) { 515 ttyLocker ttyl; 516 tty->print_cr(" pc = 0x%016lx", pc); 517 #ifndef PRODUCT 518 tty->cr(); 519 findpc(pc); 520 tty->cr(); 521 #endif 522 tty->print_cr(" x0 = 0x%016lx", regs[0]); 523 tty->print_cr(" x1 = 0x%016lx", regs[1]); 524 tty->print_cr(" x2 = 0x%016lx", regs[2]); 525 tty->print_cr(" x3 = 0x%016lx", regs[3]); 526 tty->print_cr(" x4 = 0x%016lx", regs[4]); 527 tty->print_cr(" x5 = 0x%016lx", regs[5]); 528 tty->print_cr(" x6 = 0x%016lx", regs[6]); 529 tty->print_cr(" x7 = 0x%016lx", regs[7]); 530 tty->print_cr(" x8 = 0x%016lx", regs[8]); 531 tty->print_cr(" x9 = 0x%016lx", regs[9]); 532 tty->print_cr("x10 = 0x%016lx", regs[10]); 533 tty->print_cr("x11 = 0x%016lx", regs[11]); 534 tty->print_cr("x12 = 0x%016lx", regs[12]); 535 tty->print_cr("x13 = 0x%016lx", regs[13]); 536 tty->print_cr("x14 = 0x%016lx", regs[14]); 537 tty->print_cr("x15 = 0x%016lx", regs[15]); 538 tty->print_cr("x16 = 0x%016lx", regs[16]); 539 tty->print_cr("x17 = 0x%016lx", regs[17]); 540 tty->print_cr("x18 = 0x%016lx", regs[18]); 541 tty->print_cr("x19 = 0x%016lx", regs[19]); 542 tty->print_cr("x20 = 0x%016lx", regs[20]); 543 tty->print_cr("x21 = 0x%016lx", regs[21]); 544 tty->print_cr("x22 = 0x%016lx", regs[22]); 545 tty->print_cr("x23 = 0x%016lx", regs[23]); 546 tty->print_cr("x24 = 0x%016lx", regs[24]); 547 tty->print_cr("x25 = 0x%016lx", regs[25]); 548 tty->print_cr("x26 = 0x%016lx", regs[26]); 549 tty->print_cr("x27 = 0x%016lx", regs[27]); 550 tty->print_cr("x28 = 0x%016lx", regs[28]); 551 tty->print_cr("x30 = 0x%016lx", regs[30]); 552 tty->print_cr("x31 = 0x%016lx", regs[31]); 553 BREAKPOINT; 554 } 555 } 556 fatal("DEBUG MESSAGE: %s", msg); 557 } 558 559 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 560 assert_different_registers(value, tmp1, tmp2); 561 Label done, tagged, weak_tagged; 562 563 beqz(value, done); // Use null as-is. 564 // Test for tag. 565 andi(tmp1, value, JNIHandles::tag_mask); 566 bnez(tmp1, tagged); 567 568 // Resolve local handle 569 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 570 verify_oop(value); 571 j(done); 572 573 bind(tagged); 574 // Test for jweak tag. 575 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 576 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 577 bnez(tmp1, weak_tagged); 578 579 // Resolve global handle 580 access_load_at(T_OBJECT, IN_NATIVE, value, 581 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 582 verify_oop(value); 583 j(done); 584 585 bind(weak_tagged); 586 // Resolve jweak. 587 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 588 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 589 verify_oop(value); 590 591 bind(done); 592 } 593 594 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 595 assert_different_registers(value, tmp1, tmp2); 596 Label done; 597 598 beqz(value, done); // Use null as-is. 599 600 #ifdef ASSERT 601 { 602 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 603 Label valid_global_tag; 604 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 605 bnez(tmp1, valid_global_tag); 606 stop("non global jobject using resolve_global_jobject"); 607 bind(valid_global_tag); 608 } 609 #endif 610 611 // Resolve global handle 612 access_load_at(T_OBJECT, IN_NATIVE, value, 613 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 614 verify_oop(value); 615 616 bind(done); 617 } 618 619 void MacroAssembler::stop(const char* msg) { 620 BLOCK_COMMENT(msg); 621 illegal_instruction(Assembler::csr::time); 622 emit_int64((uintptr_t)msg); 623 } 624 625 void MacroAssembler::unimplemented(const char* what) { 626 const char* buf = nullptr; 627 { 628 ResourceMark rm; 629 stringStream ss; 630 ss.print("unimplemented: %s", what); 631 buf = code_string(ss.as_string()); 632 } 633 stop(buf); 634 } 635 636 void MacroAssembler::emit_static_call_stub() { 637 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size(). 638 // CompiledDirectStaticCall::set_to_interpreted knows the 639 // exact layout of this stub. 640 641 mov_metadata(xmethod, (Metadata*)nullptr); 642 643 // Jump to the entry point of the c2i stub. 644 int32_t offset = 0; 645 movptr(t0, 0, offset); 646 jalr(x0, t0, offset); 647 } 648 649 void MacroAssembler::call_VM_leaf_base(address entry_point, 650 int number_of_arguments, 651 Label *retaddr) { 652 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 653 call(entry_point); 654 if (retaddr != nullptr) { 655 bind(*retaddr); 656 } 657 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 658 } 659 660 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 661 call_VM_leaf_base(entry_point, number_of_arguments); 662 } 663 664 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 665 pass_arg0(this, arg_0); 666 call_VM_leaf_base(entry_point, 1); 667 } 668 669 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 670 assert_different_registers(arg_1, c_rarg0); 671 pass_arg0(this, arg_0); 672 pass_arg1(this, arg_1); 673 call_VM_leaf_base(entry_point, 2); 674 } 675 676 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 677 Register arg_1, Register arg_2) { 678 assert_different_registers(arg_1, c_rarg0); 679 assert_different_registers(arg_2, c_rarg0, c_rarg1); 680 pass_arg0(this, arg_0); 681 pass_arg1(this, arg_1); 682 pass_arg2(this, arg_2); 683 call_VM_leaf_base(entry_point, 3); 684 } 685 686 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 687 pass_arg0(this, arg_0); 688 MacroAssembler::call_VM_leaf_base(entry_point, 1); 689 } 690 691 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 692 693 assert_different_registers(arg_0, c_rarg1); 694 pass_arg1(this, arg_1); 695 pass_arg0(this, arg_0); 696 MacroAssembler::call_VM_leaf_base(entry_point, 2); 697 } 698 699 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 700 assert_different_registers(arg_0, c_rarg1, c_rarg2); 701 assert_different_registers(arg_1, c_rarg2); 702 pass_arg2(this, arg_2); 703 pass_arg1(this, arg_1); 704 pass_arg0(this, arg_0); 705 MacroAssembler::call_VM_leaf_base(entry_point, 3); 706 } 707 708 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 709 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 710 assert_different_registers(arg_1, c_rarg2, c_rarg3); 711 assert_different_registers(arg_2, c_rarg3); 712 713 pass_arg3(this, arg_3); 714 pass_arg2(this, arg_2); 715 pass_arg1(this, arg_1); 716 pass_arg0(this, arg_0); 717 MacroAssembler::call_VM_leaf_base(entry_point, 4); 718 } 719 720 void MacroAssembler::la(Register Rd, const address dest) { 721 int64_t offset = dest - pc(); 722 if (is_simm32(offset)) { 723 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit 724 addi(Rd, Rd, ((int64_t)offset << 52) >> 52); 725 } else { 726 movptr(Rd, dest); 727 } 728 } 729 730 void MacroAssembler::la(Register Rd, const Address &adr) { 731 switch (adr.getMode()) { 732 case Address::literal: { 733 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 734 if (rtype == relocInfo::none) { 735 mv(Rd, (intptr_t)(adr.target())); 736 } else { 737 relocate(adr.rspec(), [&] { 738 movptr(Rd, adr.target()); 739 }); 740 } 741 break; 742 } 743 case Address::base_plus_offset: { 744 Address new_adr = legitimize_address(Rd, adr); 745 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 746 addi(Rd, new_adr.base(), new_adr.offset()); 747 } 748 break; 749 } 750 default: 751 ShouldNotReachHere(); 752 } 753 } 754 755 void MacroAssembler::la(Register Rd, Label &label) { 756 IncompressibleRegion ir(this); // the label address may be patched back. 757 wrap_label(Rd, label, &MacroAssembler::la); 758 } 759 760 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 761 lui(Rd, (uint32_t)imm << 12); 762 srli(Rd, Rd, 12); 763 } 764 765 void MacroAssembler::li32(Register Rd, int32_t imm) { 766 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 767 int64_t upper = imm, lower = imm; 768 lower = (imm << 20) >> 20; 769 upper -= lower; 770 upper = (int32_t)upper; 771 // lui Rd, imm[31:12] + imm[11] 772 lui(Rd, upper); 773 // use addiw to distinguish li32 to li64 774 addiw(Rd, Rd, lower); 775 } 776 777 void MacroAssembler::li64(Register Rd, int64_t imm) { 778 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 779 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 780 int64_t lower = imm & 0xffffffff; 781 lower -= ((lower << 44) >> 44); 782 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 783 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 784 785 // Load upper 32 bits 786 int64_t up = upper, lo = upper; 787 lo = (lo << 52) >> 52; 788 up -= lo; 789 up = (int32_t)up; 790 lui(Rd, up); 791 addi(Rd, Rd, lo); 792 793 // Load the rest 32 bits. 794 slli(Rd, Rd, 12); 795 addi(Rd, Rd, (int32_t)lower >> 20); 796 slli(Rd, Rd, 12); 797 lower = ((int32_t)imm << 12) >> 20; 798 addi(Rd, Rd, lower); 799 slli(Rd, Rd, 8); 800 lower = imm & 0xff; 801 addi(Rd, Rd, lower); 802 } 803 804 void MacroAssembler::li(Register Rd, int64_t imm) { 805 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 806 // li -> c.li 807 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 808 c_li(Rd, imm); 809 return; 810 } 811 812 int shift = 12; 813 int64_t upper = imm, lower = imm; 814 // Split imm to a lower 12-bit sign-extended part and the remainder, 815 // because addi will sign-extend the lower imm. 816 lower = ((int32_t)imm << 20) >> 20; 817 upper -= lower; 818 819 // Test whether imm is a 32-bit integer. 820 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 821 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 822 while (((upper >> shift) & 1) == 0) { shift++; } 823 upper >>= shift; 824 li(Rd, upper); 825 slli(Rd, Rd, shift); 826 if (lower != 0) { 827 addi(Rd, Rd, lower); 828 } 829 } else { 830 // 32-bit integer 831 Register hi_Rd = zr; 832 if (upper != 0) { 833 lui(Rd, (int32_t)upper); 834 hi_Rd = Rd; 835 } 836 if (lower != 0 || hi_Rd == zr) { 837 addiw(Rd, hi_Rd, lower); 838 } 839 } 840 } 841 842 #define INSN(NAME, REGISTER) \ 843 void MacroAssembler::NAME(const address dest, Register temp) { \ 844 assert_cond(dest != nullptr); \ 845 int64_t distance = dest - pc(); \ 846 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 847 Assembler::jal(REGISTER, distance); \ 848 } else { \ 849 assert(temp != noreg, "expecting a register"); \ 850 int32_t offset = 0; \ 851 movptr(temp, dest, offset); \ 852 Assembler::jalr(REGISTER, temp, offset); \ 853 } \ 854 } \ 855 856 INSN(j, x0); 857 INSN(jal, x1); 858 859 #undef INSN 860 861 #define INSN(NAME, REGISTER) \ 862 void MacroAssembler::NAME(const Address &adr, Register temp) { \ 863 switch (adr.getMode()) { \ 864 case Address::literal: { \ 865 relocate(adr.rspec(), [&] { \ 866 NAME(adr.target(), temp); \ 867 }); \ 868 break; \ 869 } \ 870 case Address::base_plus_offset: { \ 871 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 872 la(temp, Address(adr.base(), adr.offset() - offset)); \ 873 Assembler::jalr(REGISTER, temp, offset); \ 874 break; \ 875 } \ 876 default: \ 877 ShouldNotReachHere(); \ 878 } \ 879 } 880 881 INSN(j, x0); 882 INSN(jal, x1); 883 884 #undef INSN 885 886 #define INSN(NAME) \ 887 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \ 888 assert_cond(dest != nullptr); \ 889 int64_t distance = dest - pc(); \ 890 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 891 Assembler::NAME(Rd, distance); \ 892 } else { \ 893 assert_different_registers(Rd, temp); \ 894 int32_t offset = 0; \ 895 movptr(temp, dest, offset); \ 896 jalr(Rd, temp, offset); \ 897 } \ 898 } \ 899 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \ 900 assert_different_registers(Rd, temp); \ 901 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \ 902 } 903 904 INSN(jal); 905 906 #undef INSN 907 908 #define INSN(NAME, REGISTER) \ 909 void MacroAssembler::NAME(Label &l, Register temp) { \ 910 jal(REGISTER, l, temp); \ 911 } \ 912 913 INSN(j, x0); 914 INSN(jal, x1); 915 916 #undef INSN 917 918 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { 919 if (L.is_bound()) { 920 (this->*insn)(Rt, target(L), tmp); 921 } else { 922 L.add_patch_at(code(), locator()); 923 (this->*insn)(Rt, pc(), tmp); 924 } 925 } 926 927 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 928 if (L.is_bound()) { 929 (this->*insn)(Rt, target(L)); 930 } else { 931 L.add_patch_at(code(), locator()); 932 (this->*insn)(Rt, pc()); 933 } 934 } 935 936 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 937 compare_and_branch_insn insn, 938 compare_and_branch_label_insn neg_insn, bool is_far) { 939 if (is_far) { 940 Label done; 941 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 942 j(L); 943 bind(done); 944 } else { 945 if (L.is_bound()) { 946 (this->*insn)(r1, r2, target(L)); 947 } else { 948 L.add_patch_at(code(), locator()); 949 (this->*insn)(r1, r2, pc()); 950 } 951 } 952 } 953 954 #define INSN(NAME, NEG_INSN) \ 955 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 956 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 957 } 958 959 INSN(beq, bne); 960 INSN(bne, beq); 961 INSN(blt, bge); 962 INSN(bge, blt); 963 INSN(bltu, bgeu); 964 INSN(bgeu, bltu); 965 966 #undef INSN 967 968 #define INSN(NAME) \ 969 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 970 NAME(Rs, zr, dest); \ 971 } \ 972 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 973 NAME(Rs, zr, l, is_far); \ 974 } \ 975 976 INSN(beq); 977 INSN(bne); 978 INSN(blt); 979 INSN(ble); 980 INSN(bge); 981 INSN(bgt); 982 983 #undef INSN 984 985 #define INSN(NAME, NEG_INSN) \ 986 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 987 NEG_INSN(Rt, Rs, dest); \ 988 } \ 989 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 990 NEG_INSN(Rt, Rs, l, is_far); \ 991 } 992 993 INSN(bgt, blt); 994 INSN(ble, bge); 995 INSN(bgtu, bltu); 996 INSN(bleu, bgeu); 997 998 #undef INSN 999 1000 // Float compare branch instructions 1001 1002 #define INSN(NAME, FLOATCMP, BRANCH) \ 1003 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1004 FLOATCMP##_s(t0, Rs1, Rs2); \ 1005 BRANCH(t0, l, is_far); \ 1006 } \ 1007 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1008 FLOATCMP##_d(t0, Rs1, Rs2); \ 1009 BRANCH(t0, l, is_far); \ 1010 } 1011 1012 INSN(beq, feq, bnez); 1013 INSN(bne, feq, beqz); 1014 1015 #undef INSN 1016 1017 1018 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1019 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1020 bool is_far, bool is_unordered) { \ 1021 if (is_unordered) { \ 1022 /* jump if either source is NaN or condition is expected */ \ 1023 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1024 beqz(t0, l, is_far); \ 1025 } else { \ 1026 /* jump if no NaN in source and condition is expected */ \ 1027 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1028 bnez(t0, l, is_far); \ 1029 } \ 1030 } \ 1031 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1032 bool is_far, bool is_unordered) { \ 1033 if (is_unordered) { \ 1034 /* jump if either source is NaN or condition is expected */ \ 1035 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1036 beqz(t0, l, is_far); \ 1037 } else { \ 1038 /* jump if no NaN in source and condition is expected */ \ 1039 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1040 bnez(t0, l, is_far); \ 1041 } \ 1042 } 1043 1044 INSN(ble, fle, flt); 1045 INSN(blt, flt, fle); 1046 1047 #undef INSN 1048 1049 #define INSN(NAME, CMP) \ 1050 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1051 bool is_far, bool is_unordered) { \ 1052 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1053 } \ 1054 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1055 bool is_far, bool is_unordered) { \ 1056 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1057 } 1058 1059 INSN(bgt, blt); 1060 INSN(bge, ble); 1061 1062 #undef INSN 1063 1064 1065 #define INSN(NAME, CSR) \ 1066 void MacroAssembler::NAME(Register Rd) { \ 1067 csrr(Rd, CSR); \ 1068 } 1069 1070 INSN(rdinstret, CSR_INSTRET); 1071 INSN(rdcycle, CSR_CYCLE); 1072 INSN(rdtime, CSR_TIME); 1073 INSN(frcsr, CSR_FCSR); 1074 INSN(frrm, CSR_FRM); 1075 INSN(frflags, CSR_FFLAGS); 1076 1077 #undef INSN 1078 1079 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1080 csrrs(Rd, csr, x0); 1081 } 1082 1083 #define INSN(NAME, OPFUN) \ 1084 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1085 OPFUN(x0, csr, Rs); \ 1086 } 1087 1088 INSN(csrw, csrrw); 1089 INSN(csrs, csrrs); 1090 INSN(csrc, csrrc); 1091 1092 #undef INSN 1093 1094 #define INSN(NAME, OPFUN) \ 1095 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1096 OPFUN(x0, csr, imm); \ 1097 } 1098 1099 INSN(csrwi, csrrwi); 1100 INSN(csrsi, csrrsi); 1101 INSN(csrci, csrrci); 1102 1103 #undef INSN 1104 1105 #define INSN(NAME, CSR) \ 1106 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1107 csrrw(Rd, CSR, Rs); \ 1108 } 1109 1110 INSN(fscsr, CSR_FCSR); 1111 INSN(fsrm, CSR_FRM); 1112 INSN(fsflags, CSR_FFLAGS); 1113 1114 #undef INSN 1115 1116 #define INSN(NAME) \ 1117 void MacroAssembler::NAME(Register Rs) { \ 1118 NAME(x0, Rs); \ 1119 } 1120 1121 INSN(fscsr); 1122 INSN(fsrm); 1123 INSN(fsflags); 1124 1125 #undef INSN 1126 1127 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1128 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1129 csrrwi(Rd, CSR_FRM, imm); 1130 } 1131 1132 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1133 csrrwi(Rd, CSR_FFLAGS, imm); 1134 } 1135 1136 #define INSN(NAME) \ 1137 void MacroAssembler::NAME(unsigned imm) { \ 1138 NAME(x0, imm); \ 1139 } 1140 1141 INSN(fsrmi); 1142 INSN(fsflagsi); 1143 1144 #undef INSN 1145 1146 void MacroAssembler::push_reg(Register Rs) 1147 { 1148 addi(esp, esp, 0 - wordSize); 1149 sd(Rs, Address(esp, 0)); 1150 } 1151 1152 void MacroAssembler::pop_reg(Register Rd) 1153 { 1154 ld(Rd, Address(esp, 0)); 1155 addi(esp, esp, wordSize); 1156 } 1157 1158 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1159 int count = 0; 1160 // Scan bitset to accumulate register pairs 1161 for (int reg = 31; reg >= 0; reg--) { 1162 if ((1U << 31) & bitset) { 1163 regs[count++] = reg; 1164 } 1165 bitset <<= 1; 1166 } 1167 return count; 1168 } 1169 1170 // Push integer registers in the bitset supplied. Don't push sp. 1171 // Return the number of words pushed 1172 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1173 DEBUG_ONLY(int words_pushed = 0;) 1174 unsigned char regs[32]; 1175 int count = bitset_to_regs(bitset, regs); 1176 // reserve one slot to align for odd count 1177 int offset = is_even(count) ? 0 : wordSize; 1178 1179 if (count) { 1180 addi(stack, stack, -count * wordSize - offset); 1181 } 1182 for (int i = count - 1; i >= 0; i--) { 1183 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1184 DEBUG_ONLY(words_pushed++;) 1185 } 1186 1187 assert(words_pushed == count, "oops, pushed != count"); 1188 1189 return count; 1190 } 1191 1192 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1193 DEBUG_ONLY(int words_popped = 0;) 1194 unsigned char regs[32]; 1195 int count = bitset_to_regs(bitset, regs); 1196 // reserve one slot to align for odd count 1197 int offset = is_even(count) ? 0 : wordSize; 1198 1199 for (int i = count - 1; i >= 0; i--) { 1200 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1201 DEBUG_ONLY(words_popped++;) 1202 } 1203 1204 if (count) { 1205 addi(stack, stack, count * wordSize + offset); 1206 } 1207 assert(words_popped == count, "oops, popped != count"); 1208 1209 return count; 1210 } 1211 1212 // Push floating-point registers in the bitset supplied. 1213 // Return the number of words pushed 1214 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1215 DEBUG_ONLY(int words_pushed = 0;) 1216 unsigned char regs[32]; 1217 int count = bitset_to_regs(bitset, regs); 1218 int push_slots = count + (count & 1); 1219 1220 if (count) { 1221 addi(stack, stack, -push_slots * wordSize); 1222 } 1223 1224 for (int i = count - 1; i >= 0; i--) { 1225 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1226 DEBUG_ONLY(words_pushed++;) 1227 } 1228 1229 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1230 1231 return count; 1232 } 1233 1234 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1235 DEBUG_ONLY(int words_popped = 0;) 1236 unsigned char regs[32]; 1237 int count = bitset_to_regs(bitset, regs); 1238 int pop_slots = count + (count & 1); 1239 1240 for (int i = count - 1; i >= 0; i--) { 1241 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1242 DEBUG_ONLY(words_popped++;) 1243 } 1244 1245 if (count) { 1246 addi(stack, stack, pop_slots * wordSize); 1247 } 1248 1249 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1250 1251 return count; 1252 } 1253 1254 #ifdef COMPILER2 1255 // Push vector registers in the bitset supplied. 1256 // Return the number of words pushed 1257 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1258 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1259 1260 // Scan bitset to accumulate register pairs 1261 unsigned char regs[32]; 1262 int count = bitset_to_regs(bitset, regs); 1263 1264 for (int i = 0; i < count; i++) { 1265 sub(stack, stack, vector_size_in_bytes); 1266 vs1r_v(as_VectorRegister(regs[i]), stack); 1267 } 1268 1269 return count * vector_size_in_bytes / wordSize; 1270 } 1271 1272 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1273 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1274 1275 // Scan bitset to accumulate register pairs 1276 unsigned char regs[32]; 1277 int count = bitset_to_regs(bitset, regs); 1278 1279 for (int i = count - 1; i >= 0; i--) { 1280 vl1r_v(as_VectorRegister(regs[i]), stack); 1281 add(stack, stack, vector_size_in_bytes); 1282 } 1283 1284 return count * vector_size_in_bytes / wordSize; 1285 } 1286 #endif // COMPILER2 1287 1288 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1289 // Push integer registers x7, x10-x17, x28-x31. 1290 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1291 1292 // Push float registers f0-f7, f10-f17, f28-f31. 1293 addi(sp, sp, - wordSize * 20); 1294 int offset = 0; 1295 for (int i = 0; i < 32; i++) { 1296 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1297 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1298 } 1299 } 1300 } 1301 1302 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1303 int offset = 0; 1304 for (int i = 0; i < 32; i++) { 1305 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1306 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1307 } 1308 } 1309 addi(sp, sp, wordSize * 20); 1310 1311 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1312 } 1313 1314 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1315 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1316 push_reg(RegSet::range(x5, x31), sp); 1317 1318 // float registers 1319 addi(sp, sp, - 32 * wordSize); 1320 for (int i = 0; i < 32; i++) { 1321 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1322 } 1323 1324 // vector registers 1325 if (save_vectors) { 1326 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1327 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1328 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1329 add(t0, sp, vector_size_in_bytes * i); 1330 vse64_v(as_VectorRegister(i), t0); 1331 } 1332 } 1333 } 1334 1335 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1336 // vector registers 1337 if (restore_vectors) { 1338 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1339 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1340 vle64_v(as_VectorRegister(i), sp); 1341 add(sp, sp, vector_size_in_bytes * 8); 1342 } 1343 } 1344 1345 // float registers 1346 for (int i = 0; i < 32; i++) { 1347 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1348 } 1349 addi(sp, sp, 32 * wordSize); 1350 1351 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1352 pop_reg(RegSet::range(x5, x31), sp); 1353 } 1354 1355 static int patch_offset_in_jal(address branch, int64_t offset) { 1356 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1357 "offset is too large to be patched in one jal instruction!\n"); 1358 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1359 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1360 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1361 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1362 return NativeInstruction::instruction_size; // only one instruction 1363 } 1364 1365 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1366 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1367 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1368 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1369 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1370 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1371 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1372 return NativeInstruction::instruction_size; // only one instruction 1373 } 1374 1375 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1376 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1377 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1378 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1379 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; 1380 } 1381 1382 static int patch_addr_in_movptr(address branch, address target) { 1383 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load 1384 int32_t lower = ((intptr_t)target << 35) >> 35; 1385 int64_t upper = ((intptr_t)target - lower) >> 29; 1386 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1387 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1388 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1389 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1390 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1391 } 1392 1393 static int patch_imm_in_li64(address branch, address target) { 1394 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1395 int64_t lower = (intptr_t)target & 0xffffffff; 1396 lower = lower - ((lower << 44) >> 44); 1397 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1398 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1399 int64_t tmp_upper = upper, tmp_lower = upper; 1400 tmp_lower = (tmp_lower << 52) >> 52; 1401 tmp_upper -= tmp_lower; 1402 tmp_upper >>= 12; 1403 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1404 // upper = target[63:32] + 1. 1405 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1406 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1407 // Load the rest 32 bits. 1408 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1409 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1410 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1411 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1412 } 1413 1414 static int patch_imm_in_li16u(address branch, uint16_t target) { 1415 Assembler::patch(branch, 31, 12, target); // patch lui only 1416 return NativeInstruction::instruction_size; 1417 } 1418 1419 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1420 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1421 int64_t upper = (intptr_t)target; 1422 int32_t lower = (((int32_t)target) << 20) >> 20; 1423 upper -= lower; 1424 upper = (int32_t)upper; 1425 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1426 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1427 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1428 } 1429 1430 static long get_offset_of_jal(address insn_addr) { 1431 assert_cond(insn_addr != nullptr); 1432 long offset = 0; 1433 unsigned insn = Assembler::ld_instr(insn_addr); 1434 long val = (long)Assembler::sextract(insn, 31, 12); 1435 offset |= ((val >> 19) & 0x1) << 20; 1436 offset |= (val & 0xff) << 12; 1437 offset |= ((val >> 8) & 0x1) << 11; 1438 offset |= ((val >> 9) & 0x3ff) << 1; 1439 offset = (offset << 43) >> 43; 1440 return offset; 1441 } 1442 1443 static long get_offset_of_conditional_branch(address insn_addr) { 1444 long offset = 0; 1445 assert_cond(insn_addr != nullptr); 1446 unsigned insn = Assembler::ld_instr(insn_addr); 1447 offset = (long)Assembler::sextract(insn, 31, 31); 1448 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1449 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1450 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1451 offset = (offset << 41) >> 41; 1452 return offset; 1453 } 1454 1455 static long get_offset_of_pc_relative(address insn_addr) { 1456 long offset = 0; 1457 assert_cond(insn_addr != nullptr); 1458 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1459 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1460 offset = (offset << 32) >> 32; 1461 return offset; 1462 } 1463 1464 static address get_target_of_movptr(address insn_addr) { 1465 assert_cond(insn_addr != nullptr); 1466 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1467 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1468 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1469 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1470 return (address) target_address; 1471 } 1472 1473 static address get_target_of_li64(address insn_addr) { 1474 assert_cond(insn_addr != nullptr); 1475 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui. 1476 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi. 1477 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi. 1478 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi. 1479 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi. 1480 return (address)target_address; 1481 } 1482 1483 address MacroAssembler::get_target_of_li32(address insn_addr) { 1484 assert_cond(insn_addr != nullptr); 1485 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1486 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1487 return (address)target_address; 1488 } 1489 1490 // Patch any kind of instruction; there may be several instructions. 1491 // Return the total length (in bytes) of the instructions. 1492 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 1493 assert_cond(branch != nullptr); 1494 int64_t offset = target - branch; 1495 if (NativeInstruction::is_jal_at(branch)) { // jal 1496 return patch_offset_in_jal(branch, offset); 1497 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne 1498 return patch_offset_in_conditional_branch(branch, offset); 1499 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load 1500 return patch_offset_in_pc_relative(branch, offset); 1501 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr 1502 return patch_addr_in_movptr(branch, target); 1503 } else if (NativeInstruction::is_li64_at(branch)) { // li64 1504 return patch_imm_in_li64(branch, target); 1505 } else if (NativeInstruction::is_li32_at(branch)) { // li32 1506 int64_t imm = (intptr_t)target; 1507 return patch_imm_in_li32(branch, (int32_t)imm); 1508 } else if (NativeInstruction::is_li16u_at(branch)) { 1509 int64_t imm = (intptr_t)target; 1510 return patch_imm_in_li16u(branch, (uint16_t)imm); 1511 } else { 1512 #ifdef ASSERT 1513 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1514 Assembler::ld_instr(branch), p2i(branch)); 1515 Disassembler::decode(branch - 16, branch + 16); 1516 #endif 1517 ShouldNotReachHere(); 1518 return -1; 1519 } 1520 } 1521 1522 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1523 long offset = 0; 1524 assert_cond(insn_addr != nullptr); 1525 if (NativeInstruction::is_jal_at(insn_addr)) { // jal 1526 offset = get_offset_of_jal(insn_addr); 1527 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1528 offset = get_offset_of_conditional_branch(insn_addr); 1529 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1530 offset = get_offset_of_pc_relative(insn_addr); 1531 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr 1532 return get_target_of_movptr(insn_addr); 1533 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 1534 return get_target_of_li64(insn_addr); 1535 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 1536 return get_target_of_li32(insn_addr); 1537 } else { 1538 ShouldNotReachHere(); 1539 } 1540 return address(((uintptr_t)insn_addr + offset)); 1541 } 1542 1543 int MacroAssembler::patch_oop(address insn_addr, address o) { 1544 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1545 // narrow OOPs by setting the upper 16 bits in the first 1546 // instruction. 1547 if (NativeInstruction::is_li32_at(insn_addr)) { 1548 // Move narrow OOP 1549 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1550 return patch_imm_in_li32(insn_addr, (int32_t)n); 1551 } else if (NativeInstruction::is_movptr_at(insn_addr)) { 1552 // Move wide OOP 1553 return patch_addr_in_movptr(insn_addr, o); 1554 } 1555 ShouldNotReachHere(); 1556 return -1; 1557 } 1558 1559 void MacroAssembler::reinit_heapbase() { 1560 if (UseCompressedOops) { 1561 if (Universe::is_fully_initialized()) { 1562 mv(xheapbase, CompressedOops::ptrs_base()); 1563 } else { 1564 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1565 relocate(target.rspec(), [&] { 1566 int32_t offset; 1567 la_patchable(xheapbase, target, offset); 1568 ld(xheapbase, Address(xheapbase, offset)); 1569 }); 1570 } 1571 } 1572 } 1573 1574 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) { 1575 int64_t imm64 = (int64_t)addr; 1576 #ifndef PRODUCT 1577 { 1578 char buffer[64]; 1579 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); 1580 block_comment(buffer); 1581 } 1582 #endif 1583 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant"); 1584 // Load upper 31 bits 1585 int64_t imm = imm64 >> 17; 1586 int64_t upper = imm, lower = imm; 1587 lower = (lower << 52) >> 52; 1588 upper -= lower; 1589 upper = (int32_t)upper; 1590 lui(Rd, upper); 1591 addi(Rd, Rd, lower); 1592 1593 // Load the rest 17 bits. 1594 slli(Rd, Rd, 11); 1595 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1596 slli(Rd, Rd, 6); 1597 1598 // This offset will be used by following jalr/ld. 1599 offset = imm64 & 0x3f; 1600 } 1601 1602 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1603 if (is_simm12(increment)) { 1604 addi(Rd, Rn, increment); 1605 } else { 1606 assert_different_registers(Rn, temp); 1607 li(temp, increment); 1608 add(Rd, Rn, temp); 1609 } 1610 } 1611 1612 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1613 if (is_simm12(increment)) { 1614 addiw(Rd, Rn, increment); 1615 } else { 1616 assert_different_registers(Rn, temp); 1617 li(temp, increment); 1618 addw(Rd, Rn, temp); 1619 } 1620 } 1621 1622 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1623 if (is_simm12(-decrement)) { 1624 addi(Rd, Rn, -decrement); 1625 } else { 1626 assert_different_registers(Rn, temp); 1627 li(temp, decrement); 1628 sub(Rd, Rn, temp); 1629 } 1630 } 1631 1632 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1633 if (is_simm12(-decrement)) { 1634 addiw(Rd, Rn, -decrement); 1635 } else { 1636 assert_different_registers(Rn, temp); 1637 li(temp, decrement); 1638 subw(Rd, Rn, temp); 1639 } 1640 } 1641 1642 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1643 andr(Rd, Rs1, Rs2); 1644 sign_extend(Rd, Rd, 32); 1645 } 1646 1647 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1648 orr(Rd, Rs1, Rs2); 1649 sign_extend(Rd, Rd, 32); 1650 } 1651 1652 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1653 xorr(Rd, Rs1, Rs2); 1654 sign_extend(Rd, Rd, 32); 1655 } 1656 1657 // Rd = Rs1 & (~Rd2) 1658 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 1659 if (UseZbb) { 1660 Assembler::andn(Rd, Rs1, Rs2); 1661 return; 1662 } 1663 1664 notr(Rd, Rs2); 1665 andr(Rd, Rs1, Rd); 1666 } 1667 1668 // Rd = Rs1 | (~Rd2) 1669 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 1670 if (UseZbb) { 1671 Assembler::orn(Rd, Rs1, Rs2); 1672 return; 1673 } 1674 1675 notr(Rd, Rs2); 1676 orr(Rd, Rs1, Rd); 1677 } 1678 1679 // Note: load_unsigned_short used to be called load_unsigned_word. 1680 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1681 int off = offset(); 1682 lhu(dst, src); 1683 return off; 1684 } 1685 1686 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1687 int off = offset(); 1688 lbu(dst, src); 1689 return off; 1690 } 1691 1692 int MacroAssembler::load_signed_short(Register dst, Address src) { 1693 int off = offset(); 1694 lh(dst, src); 1695 return off; 1696 } 1697 1698 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1699 int off = offset(); 1700 lb(dst, src); 1701 return off; 1702 } 1703 1704 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1705 switch (size_in_bytes) { 1706 case 8: ld(dst, src); break; 1707 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1708 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1709 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1710 default: ShouldNotReachHere(); 1711 } 1712 } 1713 1714 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1715 switch (size_in_bytes) { 1716 case 8: sd(src, dst); break; 1717 case 4: sw(src, dst); break; 1718 case 2: sh(src, dst); break; 1719 case 1: sb(src, dst); break; 1720 default: ShouldNotReachHere(); 1721 } 1722 } 1723 1724 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 1725 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1726 if (granularity != 1 && granularity != 2) { 1727 ShouldNotReachHere(); 1728 } 1729 if (AvoidUnalignedAccesses && (granularity != 2)) { 1730 assert_different_registers(dst, tmp); 1731 assert_different_registers(tmp, src.base()); 1732 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 1733 slli(tmp, tmp, 8); 1734 lbu(dst, src); 1735 add(dst, dst, tmp); 1736 } else { 1737 is_signed ? lh(dst, src) : lhu(dst, src); 1738 } 1739 } 1740 1741 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 1742 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1743 if (AvoidUnalignedAccesses && (granularity != 4)) { 1744 switch(granularity) { 1745 case 1: 1746 assert_different_registers(dst, tmp, src.base()); 1747 lbu(dst, src); 1748 lbu(tmp, Address(src.base(), src.offset() + 1)); 1749 slli(tmp, tmp, 8); 1750 add(dst, dst, tmp); 1751 lbu(tmp, Address(src.base(), src.offset() + 2)); 1752 slli(tmp, tmp, 16); 1753 add(dst, dst, tmp); 1754 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 1755 slli(tmp, tmp, 24); 1756 add(dst, dst, tmp); 1757 break; 1758 case 2: 1759 assert_different_registers(dst, tmp); 1760 assert_different_registers(tmp, src.base()); 1761 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 1762 slli(tmp, tmp, 16); 1763 lhu(dst, src); 1764 add(dst, dst, tmp); 1765 break; 1766 default: 1767 ShouldNotReachHere(); 1768 } 1769 } else { 1770 is_signed ? lw(dst, src) : lwu(dst, src); 1771 } 1772 } 1773 1774 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 1775 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 1776 if (AvoidUnalignedAccesses && (granularity != 8)) { 1777 switch(granularity){ 1778 case 1: 1779 assert_different_registers(dst, tmp, src.base()); 1780 lbu(dst, src); 1781 lbu(tmp, Address(src.base(), src.offset() + 1)); 1782 slli(tmp, tmp, 8); 1783 add(dst, dst, tmp); 1784 lbu(tmp, Address(src.base(), src.offset() + 2)); 1785 slli(tmp, tmp, 16); 1786 add(dst, dst, tmp); 1787 lbu(tmp, Address(src.base(), src.offset() + 3)); 1788 slli(tmp, tmp, 24); 1789 add(dst, dst, tmp); 1790 lbu(tmp, Address(src.base(), src.offset() + 4)); 1791 slli(tmp, tmp, 32); 1792 add(dst, dst, tmp); 1793 lbu(tmp, Address(src.base(), src.offset() + 5)); 1794 slli(tmp, tmp, 40); 1795 add(dst, dst, tmp); 1796 lbu(tmp, Address(src.base(), src.offset() + 6)); 1797 slli(tmp, tmp, 48); 1798 add(dst, dst, tmp); 1799 lbu(tmp, Address(src.base(), src.offset() + 7)); 1800 slli(tmp, tmp, 56); 1801 add(dst, dst, tmp); 1802 break; 1803 case 2: 1804 assert_different_registers(dst, tmp, src.base()); 1805 lhu(dst, src); 1806 lhu(tmp, Address(src.base(), src.offset() + 2)); 1807 slli(tmp, tmp, 16); 1808 add(dst, dst, tmp); 1809 lhu(tmp, Address(src.base(), src.offset() + 4)); 1810 slli(tmp, tmp, 32); 1811 add(dst, dst, tmp); 1812 lhu(tmp, Address(src.base(), src.offset() + 6)); 1813 slli(tmp, tmp, 48); 1814 add(dst, dst, tmp); 1815 break; 1816 case 4: 1817 assert_different_registers(dst, tmp); 1818 assert_different_registers(tmp, src.base()); 1819 lwu(tmp, Address(src.base(), src.offset() + 4)); 1820 slli(tmp, tmp, 32); 1821 lwu(dst, src); 1822 add(dst, dst, tmp); 1823 break; 1824 default: 1825 ShouldNotReachHere(); 1826 } 1827 } else { 1828 ld(dst, src); 1829 } 1830 } 1831 1832 1833 // reverse bytes in halfword in lower 16 bits and sign-extend 1834 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 1835 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 1836 if (UseZbb) { 1837 rev8(Rd, Rs); 1838 srai(Rd, Rd, 48); 1839 return; 1840 } 1841 assert_different_registers(Rs, tmp); 1842 assert_different_registers(Rd, tmp); 1843 srli(tmp, Rs, 8); 1844 andi(tmp, tmp, 0xFF); 1845 slli(Rd, Rs, 56); 1846 srai(Rd, Rd, 48); // sign-extend 1847 orr(Rd, Rd, tmp); 1848 } 1849 1850 // reverse bytes in lower word and sign-extend 1851 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 1852 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1853 if (UseZbb) { 1854 rev8(Rd, Rs); 1855 srai(Rd, Rd, 32); 1856 return; 1857 } 1858 assert_different_registers(Rs, tmp1, tmp2); 1859 assert_different_registers(Rd, tmp1, tmp2); 1860 revb_h_w_u(Rd, Rs, tmp1, tmp2); 1861 slli(tmp2, Rd, 48); 1862 srai(tmp2, tmp2, 32); // sign-extend 1863 srli(Rd, Rd, 16); 1864 orr(Rd, Rd, tmp2); 1865 } 1866 1867 // reverse bytes in halfword in lower 16 bits and zero-extend 1868 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1869 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 1870 if (UseZbb) { 1871 rev8(Rd, Rs); 1872 srli(Rd, Rd, 48); 1873 return; 1874 } 1875 assert_different_registers(Rs, tmp); 1876 assert_different_registers(Rd, tmp); 1877 srli(tmp, Rs, 8); 1878 andi(tmp, tmp, 0xFF); 1879 andi(Rd, Rs, 0xFF); 1880 slli(Rd, Rd, 8); 1881 orr(Rd, Rd, tmp); 1882 } 1883 1884 // reverse bytes in halfwords in lower 32 bits and zero-extend 1885 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1886 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1887 if (UseZbb) { 1888 rev8(Rd, Rs); 1889 rori(Rd, Rd, 32); 1890 roriw(Rd, Rd, 16); 1891 zero_extend(Rd, Rd, 32); 1892 return; 1893 } 1894 assert_different_registers(Rs, tmp1, tmp2); 1895 assert_different_registers(Rd, tmp1, tmp2); 1896 srli(tmp2, Rs, 16); 1897 revb_h_h_u(tmp2, tmp2, tmp1); 1898 revb_h_h_u(Rd, Rs, tmp1); 1899 slli(tmp2, tmp2, 16); 1900 orr(Rd, Rd, tmp2); 1901 } 1902 1903 // This method is only used for revb_h 1904 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 1905 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1906 assert_different_registers(Rs, tmp1, tmp2); 1907 assert_different_registers(Rd, tmp1); 1908 srli(tmp1, Rs, 48); 1909 andi(tmp2, tmp1, 0xFF); 1910 slli(tmp2, tmp2, 8); 1911 srli(tmp1, tmp1, 8); 1912 orr(tmp1, tmp1, tmp2); 1913 slli(Rd, Rs, 16); 1914 orr(Rd, Rd, tmp1); 1915 } 1916 1917 // reverse bytes in each halfword 1918 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 1919 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1920 if (UseZbb) { 1921 assert_different_registers(Rs, tmp1); 1922 assert_different_registers(Rd, tmp1); 1923 rev8(Rd, Rs); 1924 zero_extend(tmp1, Rd, 32); 1925 roriw(tmp1, tmp1, 16); 1926 slli(tmp1, tmp1, 32); 1927 srli(Rd, Rd, 32); 1928 roriw(Rd, Rd, 16); 1929 zero_extend(Rd, Rd, 32); 1930 orr(Rd, Rd, tmp1); 1931 return; 1932 } 1933 assert_different_registers(Rs, tmp1, tmp2); 1934 assert_different_registers(Rd, tmp1, tmp2); 1935 revb_h_helper(Rd, Rs, tmp1, tmp2); 1936 for (int i = 0; i < 3; ++i) { 1937 revb_h_helper(Rd, Rd, tmp1, tmp2); 1938 } 1939 } 1940 1941 // reverse bytes in each word 1942 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 1943 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1944 if (UseZbb) { 1945 rev8(Rd, Rs); 1946 rori(Rd, Rd, 32); 1947 return; 1948 } 1949 assert_different_registers(Rs, tmp1, tmp2); 1950 assert_different_registers(Rd, tmp1, tmp2); 1951 revb(Rd, Rs, tmp1, tmp2); 1952 ror_imm(Rd, Rd, 32); 1953 } 1954 1955 // reverse bytes in doubleword 1956 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 1957 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1958 if (UseZbb) { 1959 rev8(Rd, Rs); 1960 return; 1961 } 1962 assert_different_registers(Rs, tmp1, tmp2); 1963 assert_different_registers(Rd, tmp1, tmp2); 1964 andi(tmp1, Rs, 0xFF); 1965 slli(tmp1, tmp1, 8); 1966 for (int step = 8; step < 56; step += 8) { 1967 srli(tmp2, Rs, step); 1968 andi(tmp2, tmp2, 0xFF); 1969 orr(tmp1, tmp1, tmp2); 1970 slli(tmp1, tmp1, 8); 1971 } 1972 srli(Rd, Rs, 56); 1973 andi(Rd, Rd, 0xFF); 1974 orr(Rd, tmp1, Rd); 1975 } 1976 1977 // rotate right with shift bits 1978 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 1979 { 1980 if (UseZbb) { 1981 rori(dst, src, shift); 1982 return; 1983 } 1984 1985 assert_different_registers(dst, tmp); 1986 assert_different_registers(src, tmp); 1987 assert(shift < 64, "shift amount must be < 64"); 1988 slli(tmp, src, 64 - shift); 1989 srli(dst, src, shift); 1990 orr(dst, dst, tmp); 1991 } 1992 1993 // rotate left with shift bits, 32-bit version 1994 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 1995 if (UseZbb) { 1996 // no roliw available 1997 roriw(dst, src, 32 - shift); 1998 return; 1999 } 2000 2001 assert_different_registers(dst, tmp); 2002 assert_different_registers(src, tmp); 2003 assert(shift < 32, "shift amount must be < 32"); 2004 srliw(tmp, src, 32 - shift); 2005 slliw(dst, src, shift); 2006 orr(dst, dst, tmp); 2007 } 2008 2009 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2010 if (is_simm12(imm)) { 2011 and_imm12(Rd, Rn, imm); 2012 } else { 2013 assert_different_registers(Rn, tmp); 2014 mv(tmp, imm); 2015 andr(Rd, Rn, tmp); 2016 } 2017 } 2018 2019 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2020 ld(tmp1, adr); 2021 if (src.is_register()) { 2022 orr(tmp1, tmp1, src.as_register()); 2023 } else { 2024 if (is_simm12(src.as_constant())) { 2025 ori(tmp1, tmp1, src.as_constant()); 2026 } else { 2027 assert_different_registers(tmp1, tmp2); 2028 mv(tmp2, src.as_constant()); 2029 orr(tmp1, tmp1, tmp2); 2030 } 2031 } 2032 sd(tmp1, adr); 2033 } 2034 2035 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2036 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2037 if (UseCompressedClassPointers) { 2038 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2039 if (CompressedKlassPointers::base() == nullptr) { 2040 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2041 beq(trial_klass, tmp1, L); 2042 return; 2043 } 2044 decode_klass_not_null(tmp1, tmp2); 2045 } else { 2046 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2047 } 2048 beq(trial_klass, tmp1, L); 2049 } 2050 2051 // Move an oop into a register. 2052 void MacroAssembler::movoop(Register dst, jobject obj) { 2053 int oop_index; 2054 if (obj == nullptr) { 2055 oop_index = oop_recorder()->allocate_oop_index(obj); 2056 } else { 2057 #ifdef ASSERT 2058 { 2059 ThreadInVMfromUnknown tiv; 2060 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2061 } 2062 #endif 2063 oop_index = oop_recorder()->find_index(obj); 2064 } 2065 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2066 2067 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2068 mv(dst, Address((address)obj, rspec)); 2069 } else { 2070 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2071 ld_constant(dst, Address(dummy, rspec)); 2072 } 2073 } 2074 2075 // Move a metadata address into a register. 2076 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2077 int oop_index; 2078 if (obj == nullptr) { 2079 oop_index = oop_recorder()->allocate_metadata_index(obj); 2080 } else { 2081 oop_index = oop_recorder()->find_index(obj); 2082 } 2083 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2084 mv(dst, Address((address)obj, rspec)); 2085 } 2086 2087 // Writes to stack successive pages until offset reached to check for 2088 // stack overflow + shadow pages. This clobbers tmp. 2089 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2090 assert_different_registers(tmp, size, t0); 2091 // Bang stack for total size given plus shadow page size. 2092 // Bang one page at a time because large size can bang beyond yellow and 2093 // red zones. 2094 mv(t0, (int)os::vm_page_size()); 2095 Label loop; 2096 bind(loop); 2097 sub(tmp, sp, t0); 2098 subw(size, size, t0); 2099 sd(size, Address(tmp)); 2100 bgtz(size, loop); 2101 2102 // Bang down shadow pages too. 2103 // At this point, (tmp-0) is the last address touched, so don't 2104 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2105 // was post-decremented.) Skip this address by starting at i=1, and 2106 // touch a few more pages below. N.B. It is important to touch all 2107 // the way down to and including i=StackShadowPages. 2108 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2109 // this could be any sized move but this is can be a debugging crumb 2110 // so the bigger the better. 2111 sub(tmp, tmp, (int)os::vm_page_size()); 2112 sd(size, Address(tmp, 0)); 2113 } 2114 } 2115 2116 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2117 int32_t offset = 0; 2118 _masm = masm; 2119 ExternalAddress target((address)flag_addr); 2120 _masm->relocate(target.rspec(), [&] { 2121 int32_t offset; 2122 _masm->la_patchable(t0, target, offset); 2123 _masm->lbu(t0, Address(t0, offset)); 2124 }); 2125 if (value) { 2126 _masm->bnez(t0, _label); 2127 } else { 2128 _masm->beqz(t0, _label); 2129 } 2130 } 2131 2132 SkipIfEqual::~SkipIfEqual() { 2133 _masm->bind(_label); 2134 _masm = nullptr; 2135 } 2136 2137 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2138 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2139 ld(dst, Address(xmethod, Method::const_offset())); 2140 ld(dst, Address(dst, ConstMethod::constants_offset())); 2141 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2142 ld(dst, Address(dst, mirror_offset)); 2143 resolve_oop_handle(dst, tmp1, tmp2); 2144 } 2145 2146 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2147 // OopHandle::resolve is an indirection. 2148 assert_different_registers(result, tmp1, tmp2); 2149 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2150 } 2151 2152 // ((WeakHandle)result).resolve() 2153 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2154 assert_different_registers(result, tmp1, tmp2); 2155 Label resolved; 2156 2157 // A null weak handle resolves to null. 2158 beqz(result, resolved); 2159 2160 // Only 64 bit platforms support GCs that require a tmp register 2161 // Only IN_HEAP loads require a thread_tmp register 2162 // WeakHandle::resolve is an indirection like jweak. 2163 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2164 result, Address(result), tmp1, tmp2); 2165 bind(resolved); 2166 } 2167 2168 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2169 Register dst, Address src, 2170 Register tmp1, Register tmp2) { 2171 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2172 decorators = AccessInternal::decorator_fixup(decorators, type); 2173 bool as_raw = (decorators & AS_RAW) != 0; 2174 if (as_raw) { 2175 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2176 } else { 2177 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2178 } 2179 } 2180 2181 void MacroAssembler::null_check(Register reg, int offset) { 2182 if (needs_explicit_null_check(offset)) { 2183 // provoke OS null exception if reg is null by 2184 // accessing M[reg] w/o changing any registers 2185 // NOTE: this is plenty to provoke a segv 2186 ld(zr, Address(reg, 0)); 2187 } else { 2188 // nothing to do, (later) access of M[reg + offset] 2189 // will provoke OS null exception if reg is null 2190 } 2191 } 2192 2193 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2194 Address dst, Register val, 2195 Register tmp1, Register tmp2, Register tmp3) { 2196 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2197 decorators = AccessInternal::decorator_fixup(decorators, type); 2198 bool as_raw = (decorators & AS_RAW) != 0; 2199 if (as_raw) { 2200 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2201 } else { 2202 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2203 } 2204 } 2205 2206 // Algorithm must match CompressedOops::encode. 2207 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2208 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2209 if (CompressedOops::base() == nullptr) { 2210 if (CompressedOops::shift() != 0) { 2211 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2212 srli(d, s, LogMinObjAlignmentInBytes); 2213 } else { 2214 mv(d, s); 2215 } 2216 } else { 2217 Label notNull; 2218 sub(d, s, xheapbase); 2219 bgez(d, notNull); 2220 mv(d, zr); 2221 bind(notNull); 2222 if (CompressedOops::shift() != 0) { 2223 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2224 srli(d, d, CompressedOops::shift()); 2225 } 2226 } 2227 } 2228 2229 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2230 assert_different_registers(dst, tmp); 2231 assert_different_registers(src, tmp); 2232 if (UseCompressedClassPointers) { 2233 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2234 decode_klass_not_null(dst, tmp); 2235 } else { 2236 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2237 } 2238 } 2239 2240 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2241 // FIXME: Should this be a store release? concurrent gcs assumes 2242 // klass length is valid if klass field is not null. 2243 if (UseCompressedClassPointers) { 2244 encode_klass_not_null(src, tmp); 2245 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2246 } else { 2247 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2248 } 2249 } 2250 2251 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2252 if (UseCompressedClassPointers) { 2253 // Store to klass gap in destination 2254 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2255 } 2256 } 2257 2258 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2259 assert_different_registers(r, tmp); 2260 decode_klass_not_null(r, r, tmp); 2261 } 2262 2263 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2264 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2265 2266 if (CompressedKlassPointers::base() == nullptr) { 2267 if (CompressedKlassPointers::shift() != 0) { 2268 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2269 slli(dst, src, LogKlassAlignmentInBytes); 2270 } else { 2271 mv(dst, src); 2272 } 2273 return; 2274 } 2275 2276 Register xbase = dst; 2277 if (dst == src) { 2278 xbase = tmp; 2279 } 2280 2281 assert_different_registers(src, xbase); 2282 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2283 2284 if (CompressedKlassPointers::shift() != 0) { 2285 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2286 assert_different_registers(t0, xbase); 2287 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2288 } else { 2289 add(dst, xbase, src); 2290 } 2291 } 2292 2293 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2294 assert_different_registers(r, tmp); 2295 encode_klass_not_null(r, r, tmp); 2296 } 2297 2298 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2299 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2300 2301 if (CompressedKlassPointers::base() == nullptr) { 2302 if (CompressedKlassPointers::shift() != 0) { 2303 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2304 srli(dst, src, LogKlassAlignmentInBytes); 2305 } else { 2306 mv(dst, src); 2307 } 2308 return; 2309 } 2310 2311 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2312 CompressedKlassPointers::shift() == 0) { 2313 zero_extend(dst, src, 32); 2314 return; 2315 } 2316 2317 Register xbase = dst; 2318 if (dst == src) { 2319 xbase = tmp; 2320 } 2321 2322 assert_different_registers(src, xbase); 2323 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2324 sub(dst, src, xbase); 2325 if (CompressedKlassPointers::shift() != 0) { 2326 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2327 srli(dst, dst, LogKlassAlignmentInBytes); 2328 } 2329 } 2330 2331 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2332 decode_heap_oop_not_null(r, r); 2333 } 2334 2335 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2336 assert(UseCompressedOops, "should only be used for compressed headers"); 2337 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2338 // Cannot assert, unverified entry point counts instructions (see .ad file) 2339 // vtableStubs also counts instructions in pd_code_size_limit. 2340 // Also do not verify_oop as this is called by verify_oop. 2341 if (CompressedOops::shift() != 0) { 2342 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2343 slli(dst, src, LogMinObjAlignmentInBytes); 2344 if (CompressedOops::base() != nullptr) { 2345 add(dst, xheapbase, dst); 2346 } 2347 } else { 2348 assert(CompressedOops::base() == nullptr, "sanity"); 2349 mv(dst, src); 2350 } 2351 } 2352 2353 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2354 if (CompressedOops::base() == nullptr) { 2355 if (CompressedOops::shift() != 0 || d != s) { 2356 slli(d, s, CompressedOops::shift()); 2357 } 2358 } else { 2359 Label done; 2360 mv(d, s); 2361 beqz(s, done); 2362 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2363 bind(done); 2364 } 2365 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2366 } 2367 2368 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2369 Register tmp2, Register tmp3, DecoratorSet decorators) { 2370 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2371 } 2372 2373 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2374 Register tmp2, DecoratorSet decorators) { 2375 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2376 } 2377 2378 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2379 Register tmp2, DecoratorSet decorators) { 2380 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2381 } 2382 2383 // Used for storing nulls. 2384 void MacroAssembler::store_heap_oop_null(Address dst) { 2385 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2386 } 2387 2388 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2389 bool want_remainder) 2390 { 2391 // Full implementation of Java idiv and irem. The function 2392 // returns the (pc) offset of the div instruction - may be needed 2393 // for implicit exceptions. 2394 // 2395 // input : rs1: dividend 2396 // rs2: divisor 2397 // 2398 // result: either 2399 // quotient (= rs1 idiv rs2) 2400 // remainder (= rs1 irem rs2) 2401 2402 2403 int idivl_offset = offset(); 2404 if (!want_remainder) { 2405 divw(result, rs1, rs2); 2406 } else { 2407 remw(result, rs1, rs2); // result = rs1 % rs2; 2408 } 2409 return idivl_offset; 2410 } 2411 2412 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2413 bool want_remainder) 2414 { 2415 // Full implementation of Java ldiv and lrem. The function 2416 // returns the (pc) offset of the div instruction - may be needed 2417 // for implicit exceptions. 2418 // 2419 // input : rs1: dividend 2420 // rs2: divisor 2421 // 2422 // result: either 2423 // quotient (= rs1 idiv rs2) 2424 // remainder (= rs1 irem rs2) 2425 2426 int idivq_offset = offset(); 2427 if (!want_remainder) { 2428 div(result, rs1, rs2); 2429 } else { 2430 rem(result, rs1, rs2); // result = rs1 % rs2; 2431 } 2432 return idivq_offset; 2433 } 2434 2435 // Look up the method for a megamorpic invkkeinterface call. 2436 // The target method is determined by <intf_klass, itable_index>. 2437 // The receiver klass is in recv_klass. 2438 // On success, the result will be in method_result, and execution falls through. 2439 // On failure, execution transfers to the given label. 2440 void MacroAssembler::lookup_interface_method(Register recv_klass, 2441 Register intf_klass, 2442 RegisterOrConstant itable_index, 2443 Register method_result, 2444 Register scan_tmp, 2445 Label& L_no_such_interface, 2446 bool return_method) { 2447 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2448 assert_different_registers(method_result, intf_klass, scan_tmp); 2449 assert(recv_klass != method_result || !return_method, 2450 "recv_klass can be destroyed when mehtid isn't needed"); 2451 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2452 "caller must be same register for non-constant itable index as for method"); 2453 2454 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2455 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2456 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2457 int scan_step = itableOffsetEntry::size() * wordSize; 2458 int vte_size = vtableEntry::size_in_bytes(); 2459 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2460 2461 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2462 2463 // %%% Could store the aligned, prescaled offset in the klassoop. 2464 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2465 add(scan_tmp, scan_tmp, vtable_base); 2466 2467 if (return_method) { 2468 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2469 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2470 if (itable_index.is_register()) { 2471 slli(t0, itable_index.as_register(), 3); 2472 } else { 2473 mv(t0, itable_index.as_constant() << 3); 2474 } 2475 add(recv_klass, recv_klass, t0); 2476 if (itentry_off) { 2477 add(recv_klass, recv_klass, itentry_off); 2478 } 2479 } 2480 2481 Label search, found_method; 2482 2483 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2484 beq(intf_klass, method_result, found_method); 2485 bind(search); 2486 // Check that the previous entry is non-null. A null entry means that 2487 // the receiver class doesn't implement the interface, and wasn't the 2488 // same as when the caller was compiled. 2489 beqz(method_result, L_no_such_interface, /* is_far */ true); 2490 addi(scan_tmp, scan_tmp, scan_step); 2491 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2492 bne(intf_klass, method_result, search); 2493 2494 bind(found_method); 2495 2496 // Got a hit. 2497 if (return_method) { 2498 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2499 add(method_result, recv_klass, scan_tmp); 2500 ld(method_result, Address(method_result)); 2501 } 2502 } 2503 2504 // virtual method calling 2505 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2506 RegisterOrConstant vtable_index, 2507 Register method_result) { 2508 const ByteSize base = Klass::vtable_start_offset(); 2509 assert(vtableEntry::size() * wordSize == 8, 2510 "adjust the scaling in the code below"); 2511 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 2512 2513 if (vtable_index.is_register()) { 2514 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2515 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2516 } else { 2517 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2518 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2519 } 2520 } 2521 2522 void MacroAssembler::membar(uint32_t order_constraint) { 2523 address prev = pc() - NativeMembar::instruction_size; 2524 address last = code()->last_insn(); 2525 2526 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) { 2527 NativeMembar *bar = NativeMembar_at(prev); 2528 // We are merging two memory barrier instructions. On RISCV we 2529 // can do this simply by ORing them together. 2530 bar->set_kind(bar->get_kind() | order_constraint); 2531 BLOCK_COMMENT("merged membar"); 2532 } else { 2533 code()->set_last_insn(pc()); 2534 2535 uint32_t predecessor = 0; 2536 uint32_t successor = 0; 2537 2538 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2539 fence(predecessor, successor); 2540 } 2541 } 2542 2543 // Form an address from base + offset in Rd. Rd my or may not 2544 // actually be used: you must use the Address that is returned. It 2545 // is up to you to ensure that the shift provided matches the size 2546 // of your data. 2547 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2548 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 2549 return Address(base, byte_offset); 2550 } 2551 2552 assert_different_registers(Rd, base, noreg); 2553 2554 // Do it the hard way 2555 mv(Rd, byte_offset); 2556 add(Rd, base, Rd); 2557 return Address(Rd); 2558 } 2559 2560 void MacroAssembler::check_klass_subtype(Register sub_klass, 2561 Register super_klass, 2562 Register tmp_reg, 2563 Label& L_success) { 2564 Label L_failure; 2565 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 2566 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 2567 bind(L_failure); 2568 } 2569 2570 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2571 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2572 if (acquire) { 2573 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2574 } 2575 if (at_return) { 2576 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 2577 } else { 2578 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 2579 bnez(t0, slow_path, true /* is_far */); 2580 } 2581 } 2582 2583 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2584 Label &succeed, Label *fail) { 2585 assert_different_registers(addr, tmp); 2586 assert_different_registers(newv, tmp); 2587 assert_different_registers(oldv, tmp); 2588 2589 // oldv holds comparison value 2590 // newv holds value to write in exchange 2591 // addr identifies memory word to compare against/update 2592 Label retry_load, nope; 2593 bind(retry_load); 2594 // Load reserved from the memory location 2595 lr_d(tmp, addr, Assembler::aqrl); 2596 // Fail and exit if it is not what we expect 2597 bne(tmp, oldv, nope); 2598 // If the store conditional succeeds, tmp will be zero 2599 sc_d(tmp, newv, addr, Assembler::rl); 2600 beqz(tmp, succeed); 2601 // Retry only when the store conditional failed 2602 j(retry_load); 2603 2604 bind(nope); 2605 membar(AnyAny); 2606 mv(oldv, tmp); 2607 if (fail != nullptr) { 2608 j(*fail); 2609 } 2610 } 2611 2612 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 2613 Label &succeed, Label *fail) { 2614 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 2615 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 2616 } 2617 2618 void MacroAssembler::load_reserved(Register addr, 2619 enum operand_size size, 2620 Assembler::Aqrl acquire) { 2621 switch (size) { 2622 case int64: 2623 lr_d(t0, addr, acquire); 2624 break; 2625 case int32: 2626 lr_w(t0, addr, acquire); 2627 break; 2628 case uint32: 2629 lr_w(t0, addr, acquire); 2630 zero_extend(t0, t0, 32); 2631 break; 2632 default: 2633 ShouldNotReachHere(); 2634 } 2635 } 2636 2637 void MacroAssembler::store_conditional(Register addr, 2638 Register new_val, 2639 enum operand_size size, 2640 Assembler::Aqrl release) { 2641 switch (size) { 2642 case int64: 2643 sc_d(t0, new_val, addr, release); 2644 break; 2645 case int32: 2646 case uint32: 2647 sc_w(t0, new_val, addr, release); 2648 break; 2649 default: 2650 ShouldNotReachHere(); 2651 } 2652 } 2653 2654 2655 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 2656 Register new_val, 2657 enum operand_size size, 2658 Register tmp1, Register tmp2, Register tmp3) { 2659 assert(size == int8 || size == int16, "unsupported operand size"); 2660 2661 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 2662 2663 andi(shift, addr, 3); 2664 slli(shift, shift, 3); 2665 2666 andi(aligned_addr, addr, ~3); 2667 2668 if (size == int8) { 2669 mv(mask, 0xff); 2670 } else { 2671 // size == int16 case 2672 mv(mask, -1); 2673 zero_extend(mask, mask, 16); 2674 } 2675 sll(mask, mask, shift); 2676 2677 xori(not_mask, mask, -1); 2678 2679 sll(expected, expected, shift); 2680 andr(expected, expected, mask); 2681 2682 sll(new_val, new_val, shift); 2683 andr(new_val, new_val, mask); 2684 } 2685 2686 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 2687 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, 2688 // which are forced to work with 4-byte aligned address. 2689 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 2690 Register new_val, 2691 enum operand_size size, 2692 Assembler::Aqrl acquire, Assembler::Aqrl release, 2693 Register result, bool result_as_bool, 2694 Register tmp1, Register tmp2, Register tmp3) { 2695 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2696 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2697 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2698 2699 Label retry, fail, done; 2700 2701 bind(retry); 2702 lr_w(old, aligned_addr, acquire); 2703 andr(tmp, old, mask); 2704 bne(tmp, expected, fail); 2705 2706 andr(tmp, old, not_mask); 2707 orr(tmp, tmp, new_val); 2708 sc_w(tmp, tmp, aligned_addr, release); 2709 bnez(tmp, retry); 2710 2711 if (result_as_bool) { 2712 mv(result, 1); 2713 j(done); 2714 2715 bind(fail); 2716 mv(result, zr); 2717 2718 bind(done); 2719 } else { 2720 andr(tmp, old, mask); 2721 2722 bind(fail); 2723 srl(result, tmp, shift); 2724 2725 if (size == int8) { 2726 sign_extend(result, result, 8); 2727 } else { 2728 // size == int16 case 2729 sign_extend(result, result, 16); 2730 } 2731 } 2732 } 2733 2734 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 2735 // the weak CAS stuff. The major difference is that it just failed when store conditional 2736 // failed. 2737 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 2738 Register new_val, 2739 enum operand_size size, 2740 Assembler::Aqrl acquire, Assembler::Aqrl release, 2741 Register result, 2742 Register tmp1, Register tmp2, Register tmp3) { 2743 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2744 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2745 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2746 2747 Label fail, done; 2748 2749 lr_w(old, aligned_addr, acquire); 2750 andr(tmp, old, mask); 2751 bne(tmp, expected, fail); 2752 2753 andr(tmp, old, not_mask); 2754 orr(tmp, tmp, new_val); 2755 sc_w(tmp, tmp, aligned_addr, release); 2756 bnez(tmp, fail); 2757 2758 // Success 2759 mv(result, 1); 2760 j(done); 2761 2762 // Fail 2763 bind(fail); 2764 mv(result, zr); 2765 2766 bind(done); 2767 } 2768 2769 void MacroAssembler::cmpxchg(Register addr, Register expected, 2770 Register new_val, 2771 enum operand_size size, 2772 Assembler::Aqrl acquire, Assembler::Aqrl release, 2773 Register result, bool result_as_bool) { 2774 assert(size != int8 && size != int16, "unsupported operand size"); 2775 assert_different_registers(addr, t0); 2776 assert_different_registers(expected, t0); 2777 assert_different_registers(new_val, t0); 2778 2779 Label retry_load, done, ne_done; 2780 bind(retry_load); 2781 load_reserved(addr, size, acquire); 2782 bne(t0, expected, ne_done); 2783 store_conditional(addr, new_val, size, release); 2784 bnez(t0, retry_load); 2785 2786 // equal, succeed 2787 if (result_as_bool) { 2788 mv(result, 1); 2789 } else { 2790 mv(result, expected); 2791 } 2792 j(done); 2793 2794 // not equal, failed 2795 bind(ne_done); 2796 if (result_as_bool) { 2797 mv(result, zr); 2798 } else { 2799 mv(result, t0); 2800 } 2801 2802 bind(done); 2803 } 2804 2805 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 2806 Register new_val, 2807 enum operand_size size, 2808 Assembler::Aqrl acquire, Assembler::Aqrl release, 2809 Register result) { 2810 assert_different_registers(addr, t0); 2811 assert_different_registers(expected, t0); 2812 assert_different_registers(new_val, t0); 2813 2814 Label fail, done; 2815 load_reserved(addr, size, acquire); 2816 bne(t0, expected, fail); 2817 store_conditional(addr, new_val, size, release); 2818 bnez(t0, fail); 2819 2820 // Success 2821 mv(result, 1); 2822 j(done); 2823 2824 // Fail 2825 bind(fail); 2826 mv(result, zr); 2827 2828 bind(done); 2829 } 2830 2831 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 2832 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 2833 prev = prev->is_valid() ? prev : zr; \ 2834 if (incr.is_register()) { \ 2835 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2836 } else { \ 2837 mv(t0, incr.as_constant()); \ 2838 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2839 } \ 2840 return; \ 2841 } 2842 2843 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 2844 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 2845 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 2846 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 2847 2848 #undef ATOMIC_OP 2849 2850 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 2851 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 2852 prev = prev->is_valid() ? prev : zr; \ 2853 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2854 return; \ 2855 } 2856 2857 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 2858 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 2859 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 2860 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 2861 2862 #undef ATOMIC_XCHG 2863 2864 #define ATOMIC_XCHGU(OP1, OP2) \ 2865 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 2866 atomic_##OP2(prev, newv, addr); \ 2867 zero_extend(prev, prev, 32); \ 2868 return; \ 2869 } 2870 2871 ATOMIC_XCHGU(xchgwu, xchgw) 2872 ATOMIC_XCHGU(xchgalwu, xchgalw) 2873 2874 #undef ATOMIC_XCHGU 2875 2876 void MacroAssembler::far_jump(Address entry, Register tmp) { 2877 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2878 assert(CodeCache::find_blob(entry.target()) != nullptr, 2879 "destination of far call not found in code cache"); 2880 assert(entry.rspec().type() == relocInfo::external_word_type 2881 || entry.rspec().type() == relocInfo::runtime_call_type 2882 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2883 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2884 if (far_branches()) { 2885 // We can use auipc + jalr here because we know that the total size of 2886 // the code cache cannot exceed 2Gb. 2887 relocate(entry.rspec(), [&] { 2888 int32_t offset; 2889 la_patchable(tmp, entry, offset); 2890 jalr(x0, tmp, offset); 2891 }); 2892 } else { 2893 j(entry); 2894 } 2895 } 2896 2897 void MacroAssembler::far_call(Address entry, Register tmp) { 2898 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2899 assert(CodeCache::find_blob(entry.target()) != nullptr, 2900 "destination of far call not found in code cache"); 2901 assert(entry.rspec().type() == relocInfo::external_word_type 2902 || entry.rspec().type() == relocInfo::runtime_call_type 2903 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2904 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2905 if (far_branches()) { 2906 // We can use auipc + jalr here because we know that the total size of 2907 // the code cache cannot exceed 2Gb. 2908 relocate(entry.rspec(), [&] { 2909 int32_t offset; 2910 la_patchable(tmp, entry, offset); 2911 jalr(x1, tmp, offset); // link 2912 }); 2913 } else { 2914 jal(entry); // link 2915 } 2916 } 2917 2918 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2919 Register super_klass, 2920 Register tmp_reg, 2921 Label* L_success, 2922 Label* L_failure, 2923 Label* L_slow_path, 2924 Register super_check_offset) { 2925 assert_different_registers(sub_klass, super_klass, tmp_reg); 2926 bool must_load_sco = (super_check_offset == noreg); 2927 if (must_load_sco) { 2928 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 2929 } else { 2930 assert_different_registers(sub_klass, super_klass, super_check_offset); 2931 } 2932 2933 Label L_fallthrough; 2934 int label_nulls = 0; 2935 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 2936 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 2937 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 2938 assert(label_nulls <= 1, "at most one null in batch"); 2939 2940 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2941 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2942 Address super_check_offset_addr(super_klass, sco_offset); 2943 2944 // Hacked jmp, which may only be used just before L_fallthrough. 2945 #define final_jmp(label) \ 2946 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2947 else j(label) /*omit semi*/ 2948 2949 // If the pointers are equal, we are done (e.g., String[] elements). 2950 // This self-check enables sharing of secondary supertype arrays among 2951 // non-primary types such as array-of-interface. Otherwise, each such 2952 // type would need its own customized SSA. 2953 // We move this check to the front of the fast path because many 2954 // type checks are in fact trivially successful in this manner, 2955 // so we get a nicely predicted branch right at the start of the check. 2956 beq(sub_klass, super_klass, *L_success); 2957 2958 // Check the supertype display: 2959 if (must_load_sco) { 2960 lwu(tmp_reg, super_check_offset_addr); 2961 super_check_offset = tmp_reg; 2962 } 2963 add(t0, sub_klass, super_check_offset); 2964 Address super_check_addr(t0); 2965 ld(t0, super_check_addr); // load displayed supertype 2966 2967 // This check has worked decisively for primary supers. 2968 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2969 // (Secondary supers are interfaces and very deeply nested subtypes.) 2970 // This works in the same check above because of a tricky aliasing 2971 // between the super_Cache and the primary super display elements. 2972 // (The 'super_check_addr' can address either, as the case requires.) 2973 // Note that the cache is updated below if it does not help us find 2974 // what we need immediately. 2975 // So if it was a primary super, we can just fail immediately. 2976 // Otherwise, it's the slow path for us (no success at this point). 2977 2978 beq(super_klass, t0, *L_success); 2979 mv(t1, sc_offset); 2980 if (L_failure == &L_fallthrough) { 2981 beq(super_check_offset, t1, *L_slow_path); 2982 } else { 2983 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 2984 final_jmp(*L_slow_path); 2985 } 2986 2987 bind(L_fallthrough); 2988 2989 #undef final_jmp 2990 } 2991 2992 // Scans count pointer sized words at [addr] for occurrence of value, 2993 // generic 2994 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 2995 Register tmp) { 2996 Label Lloop, Lexit; 2997 beqz(count, Lexit); 2998 bind(Lloop); 2999 ld(tmp, addr); 3000 beq(value, tmp, Lexit); 3001 add(addr, addr, wordSize); 3002 sub(count, count, 1); 3003 bnez(count, Lloop); 3004 bind(Lexit); 3005 } 3006 3007 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3008 Register super_klass, 3009 Register tmp1_reg, 3010 Register tmp2_reg, 3011 Label* L_success, 3012 Label* L_failure) { 3013 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3014 if (tmp2_reg != noreg) { 3015 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3016 } 3017 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3018 3019 Label L_fallthrough; 3020 int label_nulls = 0; 3021 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3022 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3023 3024 assert(label_nulls <= 1, "at most one null in the batch"); 3025 3026 // A couple of useful fields in sub_klass: 3027 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3028 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3029 Address secondary_supers_addr(sub_klass, ss_offset); 3030 Address super_cache_addr( sub_klass, sc_offset); 3031 3032 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3033 3034 // Do a linear scan of the secondary super-klass chain. 3035 // This code is rarely used, so simplicity is a virtue here. 3036 // The repne_scan instruction uses fixed registers, which we must spill. 3037 // Don't worry too much about pre-existing connections with the input regs. 3038 3039 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3040 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3041 3042 RegSet pushed_registers; 3043 if (!IS_A_TEMP(x12)) { 3044 pushed_registers += x12; 3045 } 3046 if (!IS_A_TEMP(x15)) { 3047 pushed_registers += x15; 3048 } 3049 3050 if (super_klass != x10) { 3051 if (!IS_A_TEMP(x10)) { 3052 pushed_registers += x10; 3053 } 3054 } 3055 3056 push_reg(pushed_registers, sp); 3057 3058 // Get super_klass value into x10 (even if it was in x15 or x12) 3059 mv(x10, super_klass); 3060 3061 #ifndef PRODUCT 3062 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); 3063 Address pst_counter_addr(t1); 3064 ld(t0, pst_counter_addr); 3065 add(t0, t0, 1); 3066 sd(t0, pst_counter_addr); 3067 #endif // PRODUCT 3068 3069 // We will consult the secondary-super array. 3070 ld(x15, secondary_supers_addr); 3071 // Load the array length. 3072 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3073 // Skip to start of data. 3074 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3075 3076 // Set t0 to an obvious invalid value, falling through by default 3077 mv(t0, -1); 3078 // Scan X12 words at [X15] for an occurrence of X10. 3079 repne_scan(x15, x10, x12, t0); 3080 3081 // pop will restore x10, so we should use a temp register to keep its value 3082 mv(t1, x10); 3083 3084 // Unspill the temp registers: 3085 pop_reg(pushed_registers, sp); 3086 3087 bne(t1, t0, *L_failure); 3088 3089 // Success. Cache the super we found an proceed in triumph. 3090 sd(super_klass, super_cache_addr); 3091 3092 if (L_success != &L_fallthrough) { 3093 j(*L_success); 3094 } 3095 3096 #undef IS_A_TEMP 3097 3098 bind(L_fallthrough); 3099 } 3100 3101 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 3102 void MacroAssembler::tlab_allocate(Register obj, 3103 Register var_size_in_bytes, 3104 int con_size_in_bytes, 3105 Register tmp1, 3106 Register tmp2, 3107 Label& slow_case, 3108 bool is_far) { 3109 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3110 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 3111 } 3112 3113 // get_thread() can be called anywhere inside generated code so we 3114 // need to save whatever non-callee save context might get clobbered 3115 // by the call to Thread::current() or, indeed, the call setup code. 3116 void MacroAssembler::get_thread(Register thread) { 3117 // save all call-clobbered regs except thread 3118 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 3119 RegSet::range(x28, x31) + ra - thread; 3120 push_reg(saved_regs, sp); 3121 3122 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 3123 jalr(ra); 3124 if (thread != c_rarg0) { 3125 mv(thread, c_rarg0); 3126 } 3127 3128 // restore pushed registers 3129 pop_reg(saved_regs, sp); 3130 } 3131 3132 void MacroAssembler::load_byte_map_base(Register reg) { 3133 CardTable::CardValue* byte_map_base = 3134 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 3135 mv(reg, (uint64_t)byte_map_base); 3136 } 3137 3138 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { 3139 unsigned long low_address = (uintptr_t)CodeCache::low_bound(); 3140 unsigned long high_address = (uintptr_t)CodeCache::high_bound(); 3141 unsigned long dest_address = (uintptr_t)dest.target(); 3142 long offset_low = dest_address - low_address; 3143 long offset_high = dest_address - high_address; 3144 3145 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); 3146 assert((uintptr_t)dest.target() < (1ull << 48), "bad address"); 3147 3148 // RISC-V doesn't compute a page-aligned address, in order to partially 3149 // compensate for the use of *signed* offsets in its base+disp12 3150 // addressing mode (RISC-V's PC-relative reach remains asymmetric 3151 // [-(2G + 2K), 2G - 2K). 3152 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { 3153 int64_t distance = dest.target() - pc(); 3154 auipc(reg1, (int32_t)distance + 0x800); 3155 offset = ((int32_t)distance << 20) >> 20; 3156 } else { 3157 movptr(reg1, dest.target(), offset); 3158 } 3159 } 3160 3161 void MacroAssembler::build_frame(int framesize) { 3162 assert(framesize >= 2, "framesize must include space for FP/RA"); 3163 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3164 sub(sp, sp, framesize); 3165 sd(fp, Address(sp, framesize - 2 * wordSize)); 3166 sd(ra, Address(sp, framesize - wordSize)); 3167 if (PreserveFramePointer) { add(fp, sp, framesize); } 3168 } 3169 3170 void MacroAssembler::remove_frame(int framesize) { 3171 assert(framesize >= 2, "framesize must include space for FP/RA"); 3172 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3173 ld(fp, Address(sp, framesize - 2 * wordSize)); 3174 ld(ra, Address(sp, framesize - wordSize)); 3175 add(sp, sp, framesize); 3176 } 3177 3178 void MacroAssembler::reserved_stack_check() { 3179 // testing if reserved zone needs to be enabled 3180 Label no_reserved_zone_enabling; 3181 3182 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3183 bltu(sp, t0, no_reserved_zone_enabling); 3184 3185 enter(); // RA and FP are live. 3186 mv(c_rarg0, xthread); 3187 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3188 relocate(target.rspec(), [&] { 3189 int32_t offset; 3190 la_patchable(t0, target, offset); 3191 jalr(x1, t0, offset); 3192 }); 3193 leave(); 3194 3195 // We have already removed our own frame. 3196 // throw_delayed_StackOverflowError will think that it's been 3197 // called by our caller. 3198 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()); 3199 relocate(target.rspec(), [&] { 3200 int32_t offset; 3201 la_patchable(t0, target, offset); 3202 jalr(x0, t0, offset); 3203 }); 3204 should_not_reach_here(); 3205 3206 bind(no_reserved_zone_enabling); 3207 } 3208 3209 // Move the address of the polling page into dest. 3210 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3211 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3212 } 3213 3214 // Read the polling page. The address of the polling page must 3215 // already be in r. 3216 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3217 relocate(rtype, [&] { 3218 lwu(zr, Address(r, offset)); 3219 }); 3220 } 3221 3222 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3223 #ifdef ASSERT 3224 { 3225 ThreadInVMfromUnknown tiv; 3226 assert (UseCompressedOops, "should only be used for compressed oops"); 3227 assert (Universe::heap() != nullptr, "java heap should be initialized"); 3228 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3229 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3230 } 3231 #endif 3232 int oop_index = oop_recorder()->find_index(obj); 3233 relocate(oop_Relocation::spec(oop_index), [&] { 3234 li32(dst, 0xDEADBEEF); 3235 }); 3236 zero_extend(dst, dst, 32); 3237 } 3238 3239 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3240 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3241 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3242 int index = oop_recorder()->find_index(k); 3243 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3244 3245 narrowKlass nk = CompressedKlassPointers::encode(k); 3246 relocate(metadata_Relocation::spec(index), [&] { 3247 li32(dst, nk); 3248 }); 3249 zero_extend(dst, dst, 32); 3250 } 3251 3252 // Maybe emit a call via a trampoline. If the code cache is small 3253 // trampolines won't be emitted. 3254 address MacroAssembler::trampoline_call(Address entry) { 3255 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3256 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3257 entry.rspec().type() == relocInfo::static_call_type || 3258 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3259 3260 address target = entry.target(); 3261 3262 // We need a trampoline if branches are far. 3263 if (far_branches()) { 3264 if (!in_scratch_emit_size()) { 3265 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3266 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3267 code()->share_trampoline_for(entry.target(), offset()); 3268 } else { 3269 address stub = emit_trampoline_stub(offset(), target); 3270 if (stub == nullptr) { 3271 postcond(pc() == badAddress); 3272 return nullptr; // CodeCache is full 3273 } 3274 } 3275 } 3276 target = pc(); 3277 } 3278 3279 address call_pc = pc(); 3280 #ifdef ASSERT 3281 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3282 assert_alignment(call_pc); 3283 } 3284 #endif 3285 relocate(entry.rspec(), [&] { 3286 jal(target); 3287 }); 3288 3289 postcond(pc() != badAddress); 3290 return call_pc; 3291 } 3292 3293 address MacroAssembler::ic_call(address entry, jint method_index) { 3294 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3295 IncompressibleRegion ir(this); // relocations 3296 movptr(t1, (address)Universe::non_oop_word()); 3297 assert_cond(entry != nullptr); 3298 return trampoline_call(Address(entry, rh)); 3299 } 3300 3301 // Emit a trampoline stub for a call to a target which is too far away. 3302 // 3303 // code sequences: 3304 // 3305 // call-site: 3306 // branch-and-link to <destination> or <trampoline stub> 3307 // 3308 // Related trampoline stub for this call site in the stub section: 3309 // load the call target from the constant pool 3310 // branch (RA still points to the call site above) 3311 3312 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3313 address dest) { 3314 // Max stub size: alignment nop, TrampolineStub. 3315 address stub = start_a_stub(max_trampoline_stub_size()); 3316 if (stub == nullptr) { 3317 return nullptr; // CodeBuffer::expand failed 3318 } 3319 3320 // We are always 4-byte aligned here. 3321 assert_alignment(pc()); 3322 3323 // Create a trampoline stub relocation which relates this trampoline stub 3324 // with the call instruction at insts_call_instruction_offset in the 3325 // instructions code-section. 3326 3327 // Make sure the address of destination 8-byte aligned after 3 instructions. 3328 align(wordSize, NativeCallTrampolineStub::data_offset); 3329 3330 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3331 insts_call_instruction_offset); 3332 const int stub_start_offset = offset(); 3333 relocate(rh, [&] { 3334 // Now, create the trampoline stub's code: 3335 // - load the call 3336 // - call 3337 Label target; 3338 ld(t0, target); // auipc + ld 3339 jr(t0); // jalr 3340 bind(target); 3341 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, 3342 "should be"); 3343 assert(offset() % wordSize == 0, "bad alignment"); 3344 emit_int64((int64_t)dest); 3345 }); 3346 3347 const address stub_start_addr = addr_at(stub_start_offset); 3348 3349 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 3350 3351 end_a_stub(); 3352 return stub_start_addr; 3353 } 3354 3355 int MacroAssembler::max_trampoline_stub_size() { 3356 // Max stub size: alignment nop, TrampolineStub. 3357 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; 3358 } 3359 3360 int MacroAssembler::static_call_stub_size() { 3361 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr 3362 return 12 * NativeInstruction::instruction_size; 3363 } 3364 3365 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3366 switch (dst.getMode()) { 3367 case Address::base_plus_offset: 3368 // This is the expected mode, although we allow all the other 3369 // forms below. 3370 return form_address(tmp, dst.base(), dst.offset()); 3371 default: 3372 la(tmp, dst); 3373 return Address(tmp); 3374 } 3375 } 3376 3377 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3378 assert(((dst.getMode() == Address::base_plus_offset && 3379 is_simm12(dst.offset())) || is_simm12(value)), 3380 "invalid value and address mode combination"); 3381 Address adr = add_memory_helper(dst, tmp2); 3382 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3383 ld(tmp1, adr); 3384 add(tmp1, tmp1, value, tmp2); 3385 sd(tmp1, adr); 3386 } 3387 3388 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3389 assert(((dst.getMode() == Address::base_plus_offset && 3390 is_simm12(dst.offset())) || is_simm12(value)), 3391 "invalid value and address mode combination"); 3392 Address adr = add_memory_helper(dst, tmp2); 3393 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3394 lwu(tmp1, adr); 3395 addw(tmp1, tmp1, value, tmp2); 3396 sw(tmp1, adr); 3397 } 3398 3399 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3400 assert(((dst.getMode() == Address::base_plus_offset && 3401 is_simm12(dst.offset())) || is_simm12(value)), 3402 "invalid value and address mode combination"); 3403 Address adr = add_memory_helper(dst, tmp2); 3404 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3405 ld(tmp1, adr); 3406 sub(tmp1, tmp1, value, tmp2); 3407 sd(tmp1, adr); 3408 } 3409 3410 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3411 assert(((dst.getMode() == Address::base_plus_offset && 3412 is_simm12(dst.offset())) || is_simm12(value)), 3413 "invalid value and address mode combination"); 3414 Address adr = add_memory_helper(dst, tmp2); 3415 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3416 lwu(tmp1, adr); 3417 subw(tmp1, tmp1, value, tmp2); 3418 sw(tmp1, adr); 3419 } 3420 3421 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3422 assert_different_registers(src1, t0); 3423 relocate(src2.rspec(), [&] { 3424 int32_t offset; 3425 la_patchable(t0, src2, offset); 3426 ld(t0, Address(t0, offset)); 3427 }); 3428 beq(src1, t0, equal); 3429 } 3430 3431 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3432 load_method_holder(result, method); 3433 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3434 } 3435 3436 void MacroAssembler::load_method_holder(Register holder, Register method) { 3437 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3438 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3439 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 3440 } 3441 3442 // string indexof 3443 // compute index by trailing zeros 3444 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3445 Register match_mask, Register result, 3446 Register ch2, Register tmp, 3447 bool haystack_isL) { 3448 int haystack_chr_shift = haystack_isL ? 0 : 1; 3449 srl(match_mask, match_mask, trailing_zeros); 3450 srli(match_mask, match_mask, 1); 3451 srli(tmp, trailing_zeros, LogBitsPerByte); 3452 if (!haystack_isL) andi(tmp, tmp, 0xE); 3453 add(haystack, haystack, tmp); 3454 ld(ch2, Address(haystack)); 3455 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3456 add(result, result, tmp); 3457 } 3458 3459 // string indexof 3460 // Find pattern element in src, compute match mask, 3461 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3462 // match mask patterns and corresponding indices would be like: 3463 // - 0x8080808080808080 (Latin1) 3464 // - 7 6 5 4 3 2 1 0 (match index) 3465 // - 0x8000800080008000 (UTF16) 3466 // - 3 2 1 0 (match index) 3467 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3468 Register mask1, Register mask2) { 3469 xorr(src, pattern, src); 3470 sub(match_mask, src, mask1); 3471 orr(src, src, mask2); 3472 notr(src, src); 3473 andr(match_mask, match_mask, src); 3474 } 3475 3476 #ifdef COMPILER2 3477 // Code for BigInteger::mulAdd intrinsic 3478 // out = x10 3479 // in = x11 3480 // offset = x12 (already out.length-offset) 3481 // len = x13 3482 // k = x14 3483 // tmp = x28 3484 // 3485 // pseudo code from java implementation: 3486 // long kLong = k & LONG_MASK; 3487 // carry = 0; 3488 // offset = out.length-offset - 1; 3489 // for (int j = len - 1; j >= 0; j--) { 3490 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 3491 // out[offset--] = (int)product; 3492 // carry = product >>> 32; 3493 // } 3494 // return (int)carry; 3495 void MacroAssembler::mul_add(Register out, Register in, Register offset, 3496 Register len, Register k, Register tmp) { 3497 Label L_tail_loop, L_unroll, L_end; 3498 mv(tmp, out); 3499 mv(out, zr); 3500 blez(len, L_end); 3501 zero_extend(k, k, 32); 3502 slliw(t0, offset, LogBytesPerInt); 3503 add(offset, tmp, t0); 3504 slliw(t0, len, LogBytesPerInt); 3505 add(in, in, t0); 3506 3507 const int unroll = 8; 3508 mv(tmp, unroll); 3509 blt(len, tmp, L_tail_loop); 3510 bind(L_unroll); 3511 for (int i = 0; i < unroll; i++) { 3512 sub(in, in, BytesPerInt); 3513 lwu(t0, Address(in, 0)); 3514 mul(t1, t0, k); 3515 add(t0, t1, out); 3516 sub(offset, offset, BytesPerInt); 3517 lwu(t1, Address(offset, 0)); 3518 add(t0, t0, t1); 3519 sw(t0, Address(offset, 0)); 3520 srli(out, t0, 32); 3521 } 3522 subw(len, len, tmp); 3523 bge(len, tmp, L_unroll); 3524 3525 bind(L_tail_loop); 3526 blez(len, L_end); 3527 sub(in, in, BytesPerInt); 3528 lwu(t0, Address(in, 0)); 3529 mul(t1, t0, k); 3530 add(t0, t1, out); 3531 sub(offset, offset, BytesPerInt); 3532 lwu(t1, Address(offset, 0)); 3533 add(t0, t0, t1); 3534 sw(t0, Address(offset, 0)); 3535 srli(out, t0, 32); 3536 subw(len, len, 1); 3537 j(L_tail_loop); 3538 3539 bind(L_end); 3540 } 3541 3542 // add two unsigned input and output carry 3543 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 3544 { 3545 assert_different_registers(dst, carry); 3546 assert_different_registers(dst, src2); 3547 add(dst, src1, src2); 3548 sltu(carry, dst, src2); 3549 } 3550 3551 // add two input with carry 3552 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 3553 assert_different_registers(dst, carry); 3554 add(dst, src1, src2); 3555 add(dst, dst, carry); 3556 } 3557 3558 // add two unsigned input with carry and output carry 3559 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 3560 assert_different_registers(dst, src2); 3561 adc(dst, src1, src2, carry); 3562 sltu(carry, dst, src2); 3563 } 3564 3565 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 3566 Register src1, Register src2, Register carry) { 3567 cad(dest_lo, dest_lo, src1, carry); 3568 add(dest_hi, dest_hi, carry); 3569 cad(dest_lo, dest_lo, src2, carry); 3570 add(final_dest_hi, dest_hi, carry); 3571 } 3572 3573 /** 3574 * Multiply 32 bit by 32 bit first loop. 3575 */ 3576 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 3577 Register y, Register y_idx, Register z, 3578 Register carry, Register product, 3579 Register idx, Register kdx) { 3580 // jlong carry, x[], y[], z[]; 3581 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3582 // long product = y[idx] * x[xstart] + carry; 3583 // z[kdx] = (int)product; 3584 // carry = product >>> 32; 3585 // } 3586 // z[xstart] = (int)carry; 3587 3588 Label L_first_loop, L_first_loop_exit; 3589 blez(idx, L_first_loop_exit); 3590 3591 shadd(t0, xstart, x, t0, LogBytesPerInt); 3592 lwu(x_xstart, Address(t0, 0)); 3593 3594 bind(L_first_loop); 3595 subw(idx, idx, 1); 3596 shadd(t0, idx, y, t0, LogBytesPerInt); 3597 lwu(y_idx, Address(t0, 0)); 3598 mul(product, x_xstart, y_idx); 3599 add(product, product, carry); 3600 srli(carry, product, 32); 3601 subw(kdx, kdx, 1); 3602 shadd(t0, kdx, z, t0, LogBytesPerInt); 3603 sw(product, Address(t0, 0)); 3604 bgtz(idx, L_first_loop); 3605 3606 bind(L_first_loop_exit); 3607 } 3608 3609 /** 3610 * Multiply 64 bit by 64 bit first loop. 3611 */ 3612 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 3613 Register y, Register y_idx, Register z, 3614 Register carry, Register product, 3615 Register idx, Register kdx) { 3616 // 3617 // jlong carry, x[], y[], z[]; 3618 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3619 // huge_128 product = y[idx] * x[xstart] + carry; 3620 // z[kdx] = (jlong)product; 3621 // carry = (jlong)(product >>> 64); 3622 // } 3623 // z[xstart] = carry; 3624 // 3625 3626 Label L_first_loop, L_first_loop_exit; 3627 Label L_one_x, L_one_y, L_multiply; 3628 3629 subw(xstart, xstart, 1); 3630 bltz(xstart, L_one_x); 3631 3632 shadd(t0, xstart, x, t0, LogBytesPerInt); 3633 ld(x_xstart, Address(t0, 0)); 3634 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 3635 3636 bind(L_first_loop); 3637 subw(idx, idx, 1); 3638 bltz(idx, L_first_loop_exit); 3639 subw(idx, idx, 1); 3640 bltz(idx, L_one_y); 3641 3642 shadd(t0, idx, y, t0, LogBytesPerInt); 3643 ld(y_idx, Address(t0, 0)); 3644 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 3645 bind(L_multiply); 3646 3647 mulhu(t0, x_xstart, y_idx); 3648 mul(product, x_xstart, y_idx); 3649 cad(product, product, carry, t1); 3650 adc(carry, t0, zr, t1); 3651 3652 subw(kdx, kdx, 2); 3653 ror_imm(product, product, 32); // back to big-endian 3654 shadd(t0, kdx, z, t0, LogBytesPerInt); 3655 sd(product, Address(t0, 0)); 3656 3657 j(L_first_loop); 3658 3659 bind(L_one_y); 3660 lwu(y_idx, Address(y, 0)); 3661 j(L_multiply); 3662 3663 bind(L_one_x); 3664 lwu(x_xstart, Address(x, 0)); 3665 j(L_first_loop); 3666 3667 bind(L_first_loop_exit); 3668 } 3669 3670 /** 3671 * Multiply 128 bit by 128 bit. Unrolled inner loop. 3672 * 3673 */ 3674 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 3675 Register carry, Register carry2, 3676 Register idx, Register jdx, 3677 Register yz_idx1, Register yz_idx2, 3678 Register tmp, Register tmp3, Register tmp4, 3679 Register tmp6, Register product_hi) { 3680 // jlong carry, x[], y[], z[]; 3681 // int kdx = xstart+1; 3682 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3683 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 3684 // jlong carry2 = (jlong)(tmp3 >>> 64); 3685 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 3686 // carry = (jlong)(tmp4 >>> 64); 3687 // z[kdx+idx+1] = (jlong)tmp3; 3688 // z[kdx+idx] = (jlong)tmp4; 3689 // } 3690 // idx += 2; 3691 // if (idx > 0) { 3692 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 3693 // z[kdx+idx] = (jlong)yz_idx1; 3694 // carry = (jlong)(yz_idx1 >>> 64); 3695 // } 3696 // 3697 3698 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 3699 3700 srliw(jdx, idx, 2); 3701 3702 bind(L_third_loop); 3703 3704 subw(jdx, jdx, 1); 3705 bltz(jdx, L_third_loop_exit); 3706 subw(idx, idx, 4); 3707 3708 shadd(t0, idx, y, t0, LogBytesPerInt); 3709 ld(yz_idx2, Address(t0, 0)); 3710 ld(yz_idx1, Address(t0, wordSize)); 3711 3712 shadd(tmp6, idx, z, t0, LogBytesPerInt); 3713 3714 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 3715 ror_imm(yz_idx2, yz_idx2, 32); 3716 3717 ld(t1, Address(tmp6, 0)); 3718 ld(t0, Address(tmp6, wordSize)); 3719 3720 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3721 mulhu(tmp4, product_hi, yz_idx1); 3722 3723 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 3724 ror_imm(t1, t1, 32, tmp); 3725 3726 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 3727 mulhu(carry2, product_hi, yz_idx2); 3728 3729 cad(tmp3, tmp3, carry, carry); 3730 adc(tmp4, tmp4, zr, carry); 3731 cad(tmp3, tmp3, t0, t0); 3732 cadc(tmp4, tmp4, tmp, t0); 3733 adc(carry, carry2, zr, t0); 3734 cad(tmp4, tmp4, t1, carry2); 3735 adc(carry, carry, zr, carry2); 3736 3737 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 3738 ror_imm(tmp4, tmp4, 32); 3739 sd(tmp4, Address(tmp6, 0)); 3740 sd(tmp3, Address(tmp6, wordSize)); 3741 3742 j(L_third_loop); 3743 3744 bind(L_third_loop_exit); 3745 3746 andi(idx, idx, 0x3); 3747 beqz(idx, L_post_third_loop_done); 3748 3749 Label L_check_1; 3750 subw(idx, idx, 2); 3751 bltz(idx, L_check_1); 3752 3753 shadd(t0, idx, y, t0, LogBytesPerInt); 3754 ld(yz_idx1, Address(t0, 0)); 3755 ror_imm(yz_idx1, yz_idx1, 32); 3756 3757 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3758 mulhu(tmp4, product_hi, yz_idx1); 3759 3760 shadd(t0, idx, z, t0, LogBytesPerInt); 3761 ld(yz_idx2, Address(t0, 0)); 3762 ror_imm(yz_idx2, yz_idx2, 32, tmp); 3763 3764 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 3765 3766 ror_imm(tmp3, tmp3, 32, tmp); 3767 sd(tmp3, Address(t0, 0)); 3768 3769 bind(L_check_1); 3770 3771 andi(idx, idx, 0x1); 3772 subw(idx, idx, 1); 3773 bltz(idx, L_post_third_loop_done); 3774 shadd(t0, idx, y, t0, LogBytesPerInt); 3775 lwu(tmp4, Address(t0, 0)); 3776 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 3777 mulhu(carry2, tmp4, product_hi); 3778 3779 shadd(t0, idx, z, t0, LogBytesPerInt); 3780 lwu(tmp4, Address(t0, 0)); 3781 3782 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 3783 3784 shadd(t0, idx, z, t0, LogBytesPerInt); 3785 sw(tmp3, Address(t0, 0)); 3786 3787 slli(t0, carry2, 32); 3788 srli(carry, tmp3, 32); 3789 orr(carry, carry, t0); 3790 3791 bind(L_post_third_loop_done); 3792 } 3793 3794 /** 3795 * Code for BigInteger::multiplyToLen() intrinsic. 3796 * 3797 * x10: x 3798 * x11: xlen 3799 * x12: y 3800 * x13: ylen 3801 * x14: z 3802 * x15: zlen 3803 * x16: tmp1 3804 * x17: tmp2 3805 * x7: tmp3 3806 * x28: tmp4 3807 * x29: tmp5 3808 * x30: tmp6 3809 * x31: tmp7 3810 */ 3811 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 3812 Register z, Register zlen, 3813 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 3814 Register tmp5, Register tmp6, Register product_hi) { 3815 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 3816 3817 const Register idx = tmp1; 3818 const Register kdx = tmp2; 3819 const Register xstart = tmp3; 3820 3821 const Register y_idx = tmp4; 3822 const Register carry = tmp5; 3823 const Register product = xlen; 3824 const Register x_xstart = zlen; // reuse register 3825 3826 mv(idx, ylen); // idx = ylen; 3827 mv(kdx, zlen); // kdx = xlen+ylen; 3828 mv(carry, zr); // carry = 0; 3829 3830 Label L_multiply_64_x_64_loop, L_done; 3831 3832 subw(xstart, xlen, 1); 3833 bltz(xstart, L_done); 3834 3835 const Register jdx = tmp1; 3836 3837 if (AvoidUnalignedAccesses) { 3838 // Check if x and y are both 8-byte aligned. 3839 orr(t0, xlen, ylen); 3840 test_bit(t0, t0, 0); 3841 beqz(t0, L_multiply_64_x_64_loop); 3842 3843 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3844 shadd(t0, xstart, z, t0, LogBytesPerInt); 3845 sw(carry, Address(t0, 0)); 3846 3847 Label L_second_loop_unaligned; 3848 bind(L_second_loop_unaligned); 3849 mv(carry, zr); 3850 mv(jdx, ylen); 3851 subw(xstart, xstart, 1); 3852 bltz(xstart, L_done); 3853 sub(sp, sp, 2 * wordSize); 3854 sd(z, Address(sp, 0)); 3855 sd(zr, Address(sp, wordSize)); 3856 shadd(t0, xstart, z, t0, LogBytesPerInt); 3857 addi(z, t0, 4); 3858 shadd(t0, xstart, x, t0, LogBytesPerInt); 3859 lwu(product, Address(t0, 0)); 3860 Label L_third_loop, L_third_loop_exit; 3861 3862 blez(jdx, L_third_loop_exit); 3863 3864 bind(L_third_loop); 3865 subw(jdx, jdx, 1); 3866 shadd(t0, jdx, y, t0, LogBytesPerInt); 3867 lwu(t0, Address(t0, 0)); 3868 mul(t1, t0, product); 3869 add(t0, t1, carry); 3870 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 3871 lwu(t1, Address(tmp6, 0)); 3872 add(t0, t0, t1); 3873 sw(t0, Address(tmp6, 0)); 3874 srli(carry, t0, 32); 3875 bgtz(jdx, L_third_loop); 3876 3877 bind(L_third_loop_exit); 3878 ld(z, Address(sp, 0)); 3879 addi(sp, sp, 2 * wordSize); 3880 shadd(t0, xstart, z, t0, LogBytesPerInt); 3881 sw(carry, Address(t0, 0)); 3882 3883 j(L_second_loop_unaligned); 3884 } 3885 3886 bind(L_multiply_64_x_64_loop); 3887 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3888 3889 Label L_second_loop_aligned; 3890 beqz(kdx, L_second_loop_aligned); 3891 3892 Label L_carry; 3893 subw(kdx, kdx, 1); 3894 beqz(kdx, L_carry); 3895 3896 shadd(t0, kdx, z, t0, LogBytesPerInt); 3897 sw(carry, Address(t0, 0)); 3898 srli(carry, carry, 32); 3899 subw(kdx, kdx, 1); 3900 3901 bind(L_carry); 3902 shadd(t0, kdx, z, t0, LogBytesPerInt); 3903 sw(carry, Address(t0, 0)); 3904 3905 // Second and third (nested) loops. 3906 // 3907 // for (int i = xstart-1; i >= 0; i--) { // Second loop 3908 // carry = 0; 3909 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 3910 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 3911 // (z[k] & LONG_MASK) + carry; 3912 // z[k] = (int)product; 3913 // carry = product >>> 32; 3914 // } 3915 // z[i] = (int)carry; 3916 // } 3917 // 3918 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 3919 3920 bind(L_second_loop_aligned); 3921 mv(carry, zr); // carry = 0; 3922 mv(jdx, ylen); // j = ystart+1 3923 3924 subw(xstart, xstart, 1); // i = xstart-1; 3925 bltz(xstart, L_done); 3926 3927 sub(sp, sp, 4 * wordSize); 3928 sd(z, Address(sp, 0)); 3929 3930 Label L_last_x; 3931 shadd(t0, xstart, z, t0, LogBytesPerInt); 3932 addi(z, t0, 4); 3933 subw(xstart, xstart, 1); // i = xstart-1; 3934 bltz(xstart, L_last_x); 3935 3936 shadd(t0, xstart, x, t0, LogBytesPerInt); 3937 ld(product_hi, Address(t0, 0)); 3938 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 3939 3940 Label L_third_loop_prologue; 3941 bind(L_third_loop_prologue); 3942 3943 sd(ylen, Address(sp, wordSize)); 3944 sd(x, Address(sp, 2 * wordSize)); 3945 sd(xstart, Address(sp, 3 * wordSize)); 3946 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 3947 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 3948 ld(z, Address(sp, 0)); 3949 ld(ylen, Address(sp, wordSize)); 3950 ld(x, Address(sp, 2 * wordSize)); 3951 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 3952 addi(sp, sp, 4 * wordSize); 3953 3954 addiw(tmp3, xlen, 1); 3955 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3956 sw(carry, Address(t0, 0)); 3957 3958 subw(tmp3, tmp3, 1); 3959 bltz(tmp3, L_done); 3960 3961 srli(carry, carry, 32); 3962 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3963 sw(carry, Address(t0, 0)); 3964 j(L_second_loop_aligned); 3965 3966 // Next infrequent code is moved outside loops. 3967 bind(L_last_x); 3968 lwu(product_hi, Address(x, 0)); 3969 j(L_third_loop_prologue); 3970 3971 bind(L_done); 3972 } 3973 #endif 3974 3975 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 3976 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 3977 // shift 16 bits once. 3978 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 3979 if (UseZbb) { 3980 assert_different_registers(Rd, Rs, tmp1); 3981 int step = isLL ? 8 : 16; 3982 ctz(Rd, Rs); 3983 andi(tmp1, Rd, step - 1); 3984 sub(Rd, Rd, tmp1); 3985 return; 3986 } 3987 3988 assert_different_registers(Rd, Rs, tmp1, tmp2); 3989 Label Loop; 3990 int step = isLL ? 8 : 16; 3991 mv(Rd, -step); 3992 mv(tmp2, Rs); 3993 3994 bind(Loop); 3995 addi(Rd, Rd, step); 3996 andi(tmp1, tmp2, ((1 << step) - 1)); 3997 srli(tmp2, tmp2, step); 3998 beqz(tmp1, Loop); 3999 } 4000 4001 // This instruction reads adjacent 4 bytes from the lower half of source register, 4002 // inflate into a register, for example: 4003 // Rs: A7A6A5A4A3A2A1A0 4004 // Rd: 00A300A200A100A0 4005 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4006 assert_different_registers(Rd, Rs, tmp1, tmp2); 4007 4008 mv(tmp1, 0xFF000000); // first byte mask at lower word 4009 andr(Rd, Rs, tmp1); 4010 for (int i = 0; i < 2; i++) { 4011 slli(Rd, Rd, wordSize); 4012 srli(tmp1, tmp1, wordSize); 4013 andr(tmp2, Rs, tmp1); 4014 orr(Rd, Rd, tmp2); 4015 } 4016 slli(Rd, Rd, wordSize); 4017 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 4018 orr(Rd, Rd, tmp2); 4019 } 4020 4021 // This instruction reads adjacent 4 bytes from the upper half of source register, 4022 // inflate into a register, for example: 4023 // Rs: A7A6A5A4A3A2A1A0 4024 // Rd: 00A700A600A500A4 4025 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4026 assert_different_registers(Rd, Rs, tmp1, tmp2); 4027 srli(Rs, Rs, 32); // only upper 32 bits are needed 4028 inflate_lo32(Rd, Rs, tmp1, tmp2); 4029 } 4030 4031 // The size of the blocks erased by the zero_blocks stub. We must 4032 // handle anything smaller than this ourselves in zero_words(). 4033 const int MacroAssembler::zero_words_block_size = 8; 4034 4035 // zero_words() is used by C2 ClearArray patterns. It is as small as 4036 // possible, handling small word counts locally and delegating 4037 // anything larger to the zero_blocks stub. It is expanded many times 4038 // in compiled code, so it is important to keep it short. 4039 4040 // ptr: Address of a buffer to be zeroed. 4041 // cnt: Count in HeapWords. 4042 // 4043 // ptr, cnt, and t0 are clobbered. 4044 address MacroAssembler::zero_words(Register ptr, Register cnt) { 4045 assert(is_power_of_2(zero_words_block_size), "adjust this"); 4046 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 4047 assert_different_registers(cnt, t0); 4048 4049 BLOCK_COMMENT("zero_words {"); 4050 4051 mv(t0, zero_words_block_size); 4052 Label around, done, done16; 4053 bltu(cnt, t0, around); 4054 { 4055 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); 4056 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 4057 if (StubRoutines::riscv::complete()) { 4058 address tpc = trampoline_call(zero_blocks); 4059 if (tpc == nullptr) { 4060 DEBUG_ONLY(reset_labels(around)); 4061 postcond(pc() == badAddress); 4062 return nullptr; 4063 } 4064 } else { 4065 jal(zero_blocks); 4066 } 4067 } 4068 bind(around); 4069 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 4070 Label l; 4071 test_bit(t0, cnt, exact_log2(i)); 4072 beqz(t0, l); 4073 for (int j = 0; j < i; j++) { 4074 sd(zr, Address(ptr, j * wordSize)); 4075 } 4076 addi(ptr, ptr, i * wordSize); 4077 bind(l); 4078 } 4079 { 4080 Label l; 4081 test_bit(t0, cnt, 0); 4082 beqz(t0, l); 4083 sd(zr, Address(ptr, 0)); 4084 bind(l); 4085 } 4086 4087 BLOCK_COMMENT("} zero_words"); 4088 postcond(pc() != badAddress); 4089 return pc(); 4090 } 4091 4092 #define SmallArraySize (18 * BytesPerLong) 4093 4094 // base: Address of a buffer to be zeroed, 8 bytes aligned. 4095 // cnt: Immediate count in HeapWords. 4096 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 4097 assert_different_registers(base, t0, t1); 4098 4099 BLOCK_COMMENT("zero_words {"); 4100 4101 if (cnt <= SmallArraySize / BytesPerLong) { 4102 for (int i = 0; i < (int)cnt; i++) { 4103 sd(zr, Address(base, i * wordSize)); 4104 } 4105 } else { 4106 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 4107 int remainder = cnt % unroll; 4108 for (int i = 0; i < remainder; i++) { 4109 sd(zr, Address(base, i * wordSize)); 4110 } 4111 4112 Label loop; 4113 Register cnt_reg = t0; 4114 Register loop_base = t1; 4115 cnt = cnt - remainder; 4116 mv(cnt_reg, cnt); 4117 add(loop_base, base, remainder * wordSize); 4118 bind(loop); 4119 sub(cnt_reg, cnt_reg, unroll); 4120 for (int i = 0; i < unroll; i++) { 4121 sd(zr, Address(loop_base, i * wordSize)); 4122 } 4123 add(loop_base, loop_base, unroll * wordSize); 4124 bnez(cnt_reg, loop); 4125 } 4126 4127 BLOCK_COMMENT("} zero_words"); 4128 } 4129 4130 // base: Address of a buffer to be filled, 8 bytes aligned. 4131 // cnt: Count in 8-byte unit. 4132 // value: Value to be filled with. 4133 // base will point to the end of the buffer after filling. 4134 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 4135 // Algorithm: 4136 // 4137 // t0 = cnt & 7 4138 // cnt -= t0 4139 // p += t0 4140 // switch (t0): 4141 // switch start: 4142 // do while cnt 4143 // cnt -= 8 4144 // p[-8] = value 4145 // case 7: 4146 // p[-7] = value 4147 // case 6: 4148 // p[-6] = value 4149 // // ... 4150 // case 1: 4151 // p[-1] = value 4152 // case 0: 4153 // p += 8 4154 // do-while end 4155 // switch end 4156 4157 assert_different_registers(base, cnt, value, t0, t1); 4158 4159 Label fini, skip, entry, loop; 4160 const int unroll = 8; // Number of sd instructions we'll unroll 4161 4162 beqz(cnt, fini); 4163 4164 andi(t0, cnt, unroll - 1); 4165 sub(cnt, cnt, t0); 4166 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4167 shadd(base, t0, base, t1, 3); 4168 la(t1, entry); 4169 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4170 sub(t1, t1, t0); 4171 jr(t1); 4172 4173 bind(loop); 4174 add(base, base, unroll * 8); 4175 for (int i = -unroll; i < 0; i++) { 4176 sd(value, Address(base, i * 8)); 4177 } 4178 bind(entry); 4179 sub(cnt, cnt, unroll); 4180 bgez(cnt, loop); 4181 4182 bind(fini); 4183 } 4184 4185 // Zero blocks of memory by using CBO.ZERO. 4186 // 4187 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4188 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4189 // zeroed in HeapWords. Returns the count of words left to be zeroed 4190 // in cnt. 4191 // 4192 // NOTE: This is intended to be used in the zero_blocks() stub. If 4193 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4194 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4195 Label initial_table_end, loop; 4196 4197 // Align base with cache line size. 4198 neg(tmp1, base); 4199 andi(tmp1, tmp1, CacheLineSize - 1); 4200 4201 // tmp1: the number of bytes to be filled to align the base with cache line size. 4202 add(base, base, tmp1); 4203 srai(tmp2, tmp1, 3); 4204 sub(cnt, cnt, tmp2); 4205 srli(tmp2, tmp1, 1); 4206 la(tmp1, initial_table_end); 4207 sub(tmp2, tmp1, tmp2); 4208 jr(tmp2); 4209 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4210 sd(zr, Address(base, i)); 4211 } 4212 bind(initial_table_end); 4213 4214 mv(tmp1, CacheLineSize / wordSize); 4215 bind(loop); 4216 cbo_zero(base); 4217 sub(cnt, cnt, tmp1); 4218 add(base, base, CacheLineSize); 4219 bge(cnt, tmp1, loop); 4220 } 4221 4222 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 4223 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4224 Label done; \ 4225 assert_different_registers(dst, tmp); \ 4226 fclass_##FLOATSIG(tmp, src); \ 4227 mv(dst, zr); \ 4228 /* check if src is NaN */ \ 4229 andi(tmp, tmp, 0b1100000000); \ 4230 bnez(tmp, done); \ 4231 FLOATCVT(dst, src); \ 4232 bind(done); \ 4233 } 4234 4235 FCVT_SAFE(fcvt_w_s, s); 4236 FCVT_SAFE(fcvt_l_s, s); 4237 FCVT_SAFE(fcvt_w_d, d); 4238 FCVT_SAFE(fcvt_l_d, d); 4239 4240 #undef FCVT_SAFE 4241 4242 #define FCMP(FLOATTYPE, FLOATSIG) \ 4243 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4244 FloatRegister Rs2, int unordered_result) { \ 4245 Label Ldone; \ 4246 if (unordered_result < 0) { \ 4247 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4248 /* installs 1 if gt else 0 */ \ 4249 flt_##FLOATSIG(result, Rs2, Rs1); \ 4250 /* Rs1 > Rs2, install 1 */ \ 4251 bgtz(result, Ldone); \ 4252 feq_##FLOATSIG(result, Rs1, Rs2); \ 4253 addi(result, result, -1); \ 4254 /* Rs1 = Rs2, install 0 */ \ 4255 /* NaN or Rs1 < Rs2, install -1 */ \ 4256 bind(Ldone); \ 4257 } else { \ 4258 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4259 /* installs 1 if gt or unordered else 0 */ \ 4260 flt_##FLOATSIG(result, Rs1, Rs2); \ 4261 /* Rs1 < Rs2, install -1 */ \ 4262 bgtz(result, Ldone); \ 4263 feq_##FLOATSIG(result, Rs1, Rs2); \ 4264 addi(result, result, -1); \ 4265 /* Rs1 = Rs2, install 0 */ \ 4266 /* NaN or Rs1 > Rs2, install 1 */ \ 4267 bind(Ldone); \ 4268 neg(result, result); \ 4269 } \ 4270 } 4271 4272 FCMP(float, s); 4273 FCMP(double, d); 4274 4275 #undef FCMP 4276 4277 // Zero words; len is in bytes 4278 // Destroys all registers except addr 4279 // len must be a nonzero multiple of wordSize 4280 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4281 assert_different_registers(addr, len, tmp, t0, t1); 4282 4283 #ifdef ASSERT 4284 { 4285 Label L; 4286 andi(t0, len, BytesPerWord - 1); 4287 beqz(t0, L); 4288 stop("len is not a multiple of BytesPerWord"); 4289 bind(L); 4290 } 4291 #endif // ASSERT 4292 4293 #ifndef PRODUCT 4294 block_comment("zero memory"); 4295 #endif // PRODUCT 4296 4297 Label loop; 4298 Label entry; 4299 4300 // Algorithm: 4301 // 4302 // t0 = cnt & 7 4303 // cnt -= t0 4304 // p += t0 4305 // switch (t0) { 4306 // do { 4307 // cnt -= 8 4308 // p[-8] = 0 4309 // case 7: 4310 // p[-7] = 0 4311 // case 6: 4312 // p[-6] = 0 4313 // ... 4314 // case 1: 4315 // p[-1] = 0 4316 // case 0: 4317 // p += 8 4318 // } while (cnt) 4319 // } 4320 4321 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4322 4323 srli(len, len, LogBytesPerWord); 4324 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4325 sub(len, len, t0); // cnt -= unroll 4326 // tmp always points to the end of the region we're about to zero 4327 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4328 la(t1, entry); 4329 slli(t0, t0, 2); 4330 sub(t1, t1, t0); 4331 jr(t1); 4332 bind(loop); 4333 sub(len, len, unroll); 4334 for (int i = -unroll; i < 0; i++) { 4335 sd(zr, Address(tmp, i * wordSize)); 4336 } 4337 bind(entry); 4338 add(tmp, tmp, unroll * wordSize); 4339 bnez(len, loop); 4340 } 4341 4342 // shift left by shamt and add 4343 // Rd = (Rs1 << shamt) + Rs2 4344 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4345 if (UseZba) { 4346 if (shamt == 1) { 4347 sh1add(Rd, Rs1, Rs2); 4348 return; 4349 } else if (shamt == 2) { 4350 sh2add(Rd, Rs1, Rs2); 4351 return; 4352 } else if (shamt == 3) { 4353 sh3add(Rd, Rs1, Rs2); 4354 return; 4355 } 4356 } 4357 4358 if (shamt != 0) { 4359 assert_different_registers(Rs2, tmp); 4360 slli(tmp, Rs1, shamt); 4361 add(Rd, Rs2, tmp); 4362 } else { 4363 add(Rd, Rs1, Rs2); 4364 } 4365 } 4366 4367 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4368 if (UseZba && bits == 32) { 4369 zext_w(dst, src); 4370 return; 4371 } 4372 4373 if (UseZbb && bits == 16) { 4374 zext_h(dst, src); 4375 return; 4376 } 4377 4378 if (bits == 8) { 4379 zext_b(dst, src); 4380 } else { 4381 slli(dst, src, XLEN - bits); 4382 srli(dst, dst, XLEN - bits); 4383 } 4384 } 4385 4386 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4387 if (UseZbb) { 4388 if (bits == 8) { 4389 sext_b(dst, src); 4390 return; 4391 } else if (bits == 16) { 4392 sext_h(dst, src); 4393 return; 4394 } 4395 } 4396 4397 if (bits == 32) { 4398 sext_w(dst, src); 4399 } else { 4400 slli(dst, src, XLEN - bits); 4401 srai(dst, dst, XLEN - bits); 4402 } 4403 } 4404 4405 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 4406 { 4407 if (src1 == src2) { 4408 mv(dst, zr); 4409 return; 4410 } 4411 Label done; 4412 Register left = src1; 4413 Register right = src2; 4414 if (dst == src1) { 4415 assert_different_registers(dst, src2, tmp); 4416 mv(tmp, src1); 4417 left = tmp; 4418 } else if (dst == src2) { 4419 assert_different_registers(dst, src1, tmp); 4420 mv(tmp, src2); 4421 right = tmp; 4422 } 4423 4424 // installs 1 if gt else 0 4425 slt(dst, right, left); 4426 bnez(dst, done); 4427 slt(dst, left, right); 4428 // dst = -1 if lt; else if eq , dst = 0 4429 neg(dst, dst); 4430 bind(done); 4431 } 4432 4433 // The java_calling_convention describes stack locations as ideal slots on 4434 // a frame with no abi restrictions. Since we must observe abi restrictions 4435 // (like the placement of the register window) the slots must be biased by 4436 // the following value. 4437 static int reg2offset_in(VMReg r) { 4438 // Account for saved fp and ra 4439 // This should really be in_preserve_stack_slots 4440 return r->reg2stack() * VMRegImpl::stack_slot_size; 4441 } 4442 4443 static int reg2offset_out(VMReg r) { 4444 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 4445 } 4446 4447 // On 64 bit we will store integer like items to the stack as 4448 // 64 bits items (riscv64 abi) even though java would only store 4449 // 32bits for a parameter. On 32bit it will simply be 32 bits 4450 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 4451 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 4452 if (src.first()->is_stack()) { 4453 if (dst.first()->is_stack()) { 4454 // stack to stack 4455 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4456 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4457 } else { 4458 // stack to reg 4459 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4460 } 4461 } else if (dst.first()->is_stack()) { 4462 // reg to stack 4463 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4464 } else { 4465 if (dst.first() != src.first()) { 4466 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 4467 } 4468 } 4469 } 4470 4471 // An oop arg. Must pass a handle not the oop itself 4472 void MacroAssembler::object_move(OopMap* map, 4473 int oop_handle_offset, 4474 int framesize_in_slots, 4475 VMRegPair src, 4476 VMRegPair dst, 4477 bool is_receiver, 4478 int* receiver_offset) { 4479 assert_cond(map != nullptr && receiver_offset != nullptr); 4480 4481 // must pass a handle. First figure out the location we use as a handle 4482 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 4483 4484 // See if oop is null if it is we need no handle 4485 4486 if (src.first()->is_stack()) { 4487 // Oop is already on the stack as an argument 4488 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 4489 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 4490 if (is_receiver) { 4491 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 4492 } 4493 4494 ld(t0, Address(fp, reg2offset_in(src.first()))); 4495 la(rHandle, Address(fp, reg2offset_in(src.first()))); 4496 // conditionally move a null 4497 Label notZero1; 4498 bnez(t0, notZero1); 4499 mv(rHandle, zr); 4500 bind(notZero1); 4501 } else { 4502 4503 // Oop is in a register we must store it to the space we reserve 4504 // on the stack for oop_handles and pass a handle if oop is non-null 4505 4506 const Register rOop = src.first()->as_Register(); 4507 int oop_slot = -1; 4508 if (rOop == j_rarg0) { 4509 oop_slot = 0; 4510 } else if (rOop == j_rarg1) { 4511 oop_slot = 1; 4512 } else if (rOop == j_rarg2) { 4513 oop_slot = 2; 4514 } else if (rOop == j_rarg3) { 4515 oop_slot = 3; 4516 } else if (rOop == j_rarg4) { 4517 oop_slot = 4; 4518 } else if (rOop == j_rarg5) { 4519 oop_slot = 5; 4520 } else if (rOop == j_rarg6) { 4521 oop_slot = 6; 4522 } else { 4523 assert(rOop == j_rarg7, "wrong register"); 4524 oop_slot = 7; 4525 } 4526 4527 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 4528 int offset = oop_slot * VMRegImpl::stack_slot_size; 4529 4530 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 4531 // Store oop in handle area, may be null 4532 sd(rOop, Address(sp, offset)); 4533 if (is_receiver) { 4534 *receiver_offset = offset; 4535 } 4536 4537 //rOop maybe the same as rHandle 4538 if (rOop == rHandle) { 4539 Label isZero; 4540 beqz(rOop, isZero); 4541 la(rHandle, Address(sp, offset)); 4542 bind(isZero); 4543 } else { 4544 Label notZero2; 4545 la(rHandle, Address(sp, offset)); 4546 bnez(rOop, notZero2); 4547 mv(rHandle, zr); 4548 bind(notZero2); 4549 } 4550 } 4551 4552 // If arg is on the stack then place it otherwise it is already in correct reg. 4553 if (dst.first()->is_stack()) { 4554 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 4555 } 4556 } 4557 4558 // A float arg may have to do float reg int reg conversion 4559 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 4560 assert(src.first()->is_stack() && dst.first()->is_stack() || 4561 src.first()->is_reg() && dst.first()->is_reg() || 4562 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4563 if (src.first()->is_stack()) { 4564 if (dst.first()->is_stack()) { 4565 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 4566 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 4567 } else if (dst.first()->is_Register()) { 4568 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4569 } else { 4570 ShouldNotReachHere(); 4571 } 4572 } else if (src.first() != dst.first()) { 4573 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4574 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4575 } else { 4576 ShouldNotReachHere(); 4577 } 4578 } 4579 } 4580 4581 // A long move 4582 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 4583 if (src.first()->is_stack()) { 4584 if (dst.first()->is_stack()) { 4585 // stack to stack 4586 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4587 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4588 } else { 4589 // stack to reg 4590 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4591 } 4592 } else if (dst.first()->is_stack()) { 4593 // reg to stack 4594 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4595 } else { 4596 if (dst.first() != src.first()) { 4597 mv(dst.first()->as_Register(), src.first()->as_Register()); 4598 } 4599 } 4600 } 4601 4602 // A double move 4603 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 4604 assert(src.first()->is_stack() && dst.first()->is_stack() || 4605 src.first()->is_reg() && dst.first()->is_reg() || 4606 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4607 if (src.first()->is_stack()) { 4608 if (dst.first()->is_stack()) { 4609 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4610 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4611 } else if (dst.first()-> is_Register()) { 4612 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4613 } else { 4614 ShouldNotReachHere(); 4615 } 4616 } else if (src.first() != dst.first()) { 4617 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4618 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4619 } else { 4620 ShouldNotReachHere(); 4621 } 4622 } 4623 } 4624 4625 void MacroAssembler::rt_call(address dest, Register tmp) { 4626 CodeBlob *cb = CodeCache::find_blob(dest); 4627 RuntimeAddress target(dest); 4628 if (cb) { 4629 far_call(target); 4630 } else { 4631 relocate(target.rspec(), [&] { 4632 int32_t offset; 4633 la_patchable(tmp, target, offset); 4634 jalr(x1, tmp, offset); 4635 }); 4636 } 4637 } 4638 4639 // Implements fast-locking. 4640 // Branches to slow upon failure to lock the object. 4641 // Falls through upon success. 4642 // 4643 // - obj: the object to be locked 4644 // - hdr: the header, already loaded from obj, will be destroyed 4645 // - tmp1, tmp2: temporary registers, will be destroyed 4646 void MacroAssembler::fast_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { 4647 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4648 assert_different_registers(obj, hdr, tmp1, tmp2); 4649 4650 // Check if we would have space on lock-stack for the object. 4651 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4652 mv(tmp2, (unsigned)LockStack::end_offset()); 4653 bge(tmp1, tmp2, slow, /* is_far */ true); 4654 4655 // Load (object->mark() | 1) into hdr 4656 ori(hdr, hdr, markWord::unlocked_value); 4657 // Clear lock-bits, into tmp2 4658 xori(tmp2, hdr, markWord::unlocked_value); 4659 4660 // Try to swing header from unlocked to locked 4661 Label success; 4662 cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); 4663 bind(success); 4664 4665 // After successful lock, push object on lock-stack 4666 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4667 add(tmp2, xthread, tmp1); 4668 sd(obj, Address(tmp2, 0)); 4669 addw(tmp1, tmp1, oopSize); 4670 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4671 } 4672 4673 // Implements fast-unlocking. 4674 // Branches to slow upon failure. 4675 // Falls through upon success. 4676 // 4677 // - obj: the object to be unlocked 4678 // - hdr: the (pre-loaded) header of the object 4679 // - tmp1, tmp2: temporary registers 4680 void MacroAssembler::fast_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { 4681 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4682 assert_different_registers(obj, hdr, tmp1, tmp2); 4683 4684 #ifdef ASSERT 4685 { 4686 // The following checks rely on the fact that LockStack is only ever modified by 4687 // its owning thread, even if the lock got inflated concurrently; removal of LockStack 4688 // entries after inflation will happen delayed in that case. 4689 4690 // Check for lock-stack underflow. 4691 Label stack_ok; 4692 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4693 mv(tmp2, (unsigned)LockStack::start_offset()); 4694 bgt(tmp1, tmp2, stack_ok); 4695 STOP("Lock-stack underflow"); 4696 bind(stack_ok); 4697 } 4698 { 4699 // Check if the top of the lock-stack matches the unlocked object. 4700 Label tos_ok; 4701 subw(tmp1, tmp1, oopSize); 4702 add(tmp1, xthread, tmp1); 4703 ld(tmp1, Address(tmp1, 0)); 4704 beq(tmp1, obj, tos_ok); 4705 STOP("Top of lock-stack does not match the unlocked object"); 4706 bind(tos_ok); 4707 } 4708 { 4709 // Check that hdr is fast-locked. 4710 Label hdr_ok; 4711 andi(tmp1, hdr, markWord::lock_mask_in_place); 4712 beqz(tmp1, hdr_ok); 4713 STOP("Header is not fast-locked"); 4714 bind(hdr_ok); 4715 } 4716 #endif 4717 4718 // Load the new header (unlocked) into tmp1 4719 ori(tmp1, hdr, markWord::unlocked_value); 4720 4721 // Try to swing header from locked to unlocked 4722 Label success; 4723 cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); 4724 bind(success); 4725 4726 // After successful unlock, pop object from lock-stack 4727 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4728 subw(tmp1, tmp1, oopSize); 4729 #ifdef ASSERT 4730 add(tmp2, xthread, tmp1); 4731 sd(zr, Address(tmp2, 0)); 4732 #endif 4733 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4734 }