1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "gc/shared/barrierSet.hpp" 32 #include "gc/shared/barrierSetAssembler.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "gc/shared/collectedHeap.hpp" 36 #include "interpreter/bytecodeHistogram.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "memory/universe.hpp" 40 #include "nativeInst_riscv.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedOops.inline.hpp" 43 #include "oops/klass.inline.hpp" 44 #include "oops/oop.hpp" 45 #include "runtime/interfaceSupport.inline.hpp" 46 #include "runtime/javaThread.hpp" 47 #include "runtime/jniHandles.inline.hpp" 48 #include "runtime/sharedRuntime.hpp" 49 #include "runtime/stubRoutines.hpp" 50 #include "utilities/powerOfTwo.hpp" 51 #ifdef COMPILER2 52 #include "opto/compile.hpp" 53 #include "opto/node.hpp" 54 #include "opto/output.hpp" 55 #endif 56 57 #ifdef PRODUCT 58 #define BLOCK_COMMENT(str) /* nothing */ 59 #else 60 #define BLOCK_COMMENT(str) block_comment(str) 61 #endif 62 #define STOP(str) stop(str); 63 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 64 65 static void pass_arg0(MacroAssembler* masm, Register arg) { 66 if (c_rarg0 != arg) { 67 masm->mv(c_rarg0, arg); 68 } 69 } 70 71 static void pass_arg1(MacroAssembler* masm, Register arg) { 72 if (c_rarg1 != arg) { 73 masm->mv(c_rarg1, arg); 74 } 75 } 76 77 static void pass_arg2(MacroAssembler* masm, Register arg) { 78 if (c_rarg2 != arg) { 79 masm->mv(c_rarg2, arg); 80 } 81 } 82 83 static void pass_arg3(MacroAssembler* masm, Register arg) { 84 if (c_rarg3 != arg) { 85 masm->mv(c_rarg3, arg); 86 } 87 } 88 89 void MacroAssembler::push_cont_fastpath(Register java_thread) { 90 if (!Continuations::enabled()) return; 91 Label done; 92 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 93 bleu(sp, t0, done); 94 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 95 bind(done); 96 } 97 98 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 99 if (!Continuations::enabled()) return; 100 Label done; 101 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 102 bltu(sp, t0, done); 103 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 104 bind(done); 105 } 106 107 int MacroAssembler::align(int modulus, int extra_offset) { 108 CompressibleRegion cr(this); 109 intptr_t before = offset(); 110 while ((offset() + extra_offset) % modulus != 0) { nop(); } 111 return (int)(offset() - before); 112 } 113 114 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 115 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 116 } 117 118 // Implementation of call_VM versions 119 120 void MacroAssembler::call_VM(Register oop_result, 121 address entry_point, 122 bool check_exceptions) { 123 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 124 } 125 126 void MacroAssembler::call_VM(Register oop_result, 127 address entry_point, 128 Register arg_1, 129 bool check_exceptions) { 130 pass_arg1(this, arg_1); 131 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 132 } 133 134 void MacroAssembler::call_VM(Register oop_result, 135 address entry_point, 136 Register arg_1, 137 Register arg_2, 138 bool check_exceptions) { 139 assert(arg_1 != c_rarg2, "smashed arg"); 140 pass_arg2(this, arg_2); 141 pass_arg1(this, arg_1); 142 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 143 } 144 145 void MacroAssembler::call_VM(Register oop_result, 146 address entry_point, 147 Register arg_1, 148 Register arg_2, 149 Register arg_3, 150 bool check_exceptions) { 151 assert(arg_1 != c_rarg3, "smashed arg"); 152 assert(arg_2 != c_rarg3, "smashed arg"); 153 pass_arg3(this, arg_3); 154 155 assert(arg_1 != c_rarg2, "smashed arg"); 156 pass_arg2(this, arg_2); 157 158 pass_arg1(this, arg_1); 159 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 160 } 161 162 void MacroAssembler::call_VM(Register oop_result, 163 Register last_java_sp, 164 address entry_point, 165 int number_of_arguments, 166 bool check_exceptions) { 167 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 168 } 169 170 void MacroAssembler::call_VM(Register oop_result, 171 Register last_java_sp, 172 address entry_point, 173 Register arg_1, 174 bool check_exceptions) { 175 pass_arg1(this, arg_1); 176 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 177 } 178 179 void MacroAssembler::call_VM(Register oop_result, 180 Register last_java_sp, 181 address entry_point, 182 Register arg_1, 183 Register arg_2, 184 bool check_exceptions) { 185 186 assert(arg_1 != c_rarg2, "smashed arg"); 187 pass_arg2(this, arg_2); 188 pass_arg1(this, arg_1); 189 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 190 } 191 192 void MacroAssembler::call_VM(Register oop_result, 193 Register last_java_sp, 194 address entry_point, 195 Register arg_1, 196 Register arg_2, 197 Register arg_3, 198 bool check_exceptions) { 199 assert(arg_1 != c_rarg3, "smashed arg"); 200 assert(arg_2 != c_rarg3, "smashed arg"); 201 pass_arg3(this, arg_3); 202 assert(arg_1 != c_rarg2, "smashed arg"); 203 pass_arg2(this, arg_2); 204 pass_arg1(this, arg_1); 205 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 206 } 207 208 void MacroAssembler::post_call_nop() { 209 if (!Continuations::enabled()) { 210 return; 211 } 212 relocate(post_call_nop_Relocation::spec(), [&] { 213 InlineSkippedInstructionsCounter skipCounter(this); 214 nop(); 215 li32(zr, 0); 216 }); 217 } 218 219 // these are no-ops overridden by InterpreterMacroAssembler 220 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 221 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 222 223 // Calls to C land 224 // 225 // When entering C land, the fp, & esp of the last Java frame have to be recorded 226 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 227 // has to be reset to 0. This is required to allow proper stack traversal. 228 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 229 Register last_java_fp, 230 Register last_java_pc, 231 Register tmp) { 232 233 if (last_java_pc->is_valid()) { 234 sd(last_java_pc, Address(xthread, 235 JavaThread::frame_anchor_offset() + 236 JavaFrameAnchor::last_Java_pc_offset())); 237 } 238 239 // determine last_java_sp register 240 if (last_java_sp == sp) { 241 mv(tmp, sp); 242 last_java_sp = tmp; 243 } else if (!last_java_sp->is_valid()) { 244 last_java_sp = esp; 245 } 246 247 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 248 249 // last_java_fp is optional 250 if (last_java_fp->is_valid()) { 251 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 252 } 253 } 254 255 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 256 Register last_java_fp, 257 address last_java_pc, 258 Register tmp) { 259 assert(last_java_pc != nullptr, "must provide a valid PC"); 260 261 la(tmp, last_java_pc); 262 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 263 264 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); 265 } 266 267 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 268 Register last_java_fp, 269 Label &L, 270 Register tmp) { 271 if (L.is_bound()) { 272 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 273 } else { 274 L.add_patch_at(code(), locator()); 275 IncompressibleRegion ir(this); // the label address will be patched back. 276 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 277 } 278 } 279 280 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 281 // we must set sp to zero to clear frame 282 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 283 284 // must clear fp, so that compiled frames are not confused; it is 285 // possible that we need it only for debugging 286 if (clear_fp) { 287 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 288 } 289 290 // Always clear the pc because it could have been set by make_walkable() 291 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 292 } 293 294 void MacroAssembler::call_VM_base(Register oop_result, 295 Register java_thread, 296 Register last_java_sp, 297 address entry_point, 298 int number_of_arguments, 299 bool check_exceptions) { 300 // determine java_thread register 301 if (!java_thread->is_valid()) { 302 java_thread = xthread; 303 } 304 // determine last_java_sp register 305 if (!last_java_sp->is_valid()) { 306 last_java_sp = esp; 307 } 308 309 // debugging support 310 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 311 assert(java_thread == xthread, "unexpected register"); 312 313 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 314 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 315 316 // push java thread (becomes first argument of C function) 317 mv(c_rarg0, java_thread); 318 319 // set last Java frame before call 320 assert(last_java_sp != fp, "can't use fp"); 321 322 Label l; 323 set_last_Java_frame(last_java_sp, fp, l, t0); 324 325 // do the call, remove parameters 326 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 327 328 // reset last Java frame 329 // Only interpreter should have to clear fp 330 reset_last_Java_frame(true); 331 332 // C++ interp handles this in the interpreter 333 check_and_handle_popframe(java_thread); 334 check_and_handle_earlyret(java_thread); 335 336 if (check_exceptions) { 337 // check for pending exceptions (java_thread is set upon return) 338 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 339 Label ok; 340 beqz(t0, ok); 341 RuntimeAddress target(StubRoutines::forward_exception_entry()); 342 relocate(target.rspec(), [&] { 343 int32_t offset; 344 la_patchable(t0, target, offset); 345 jalr(x0, t0, offset); 346 }); 347 bind(ok); 348 } 349 350 // get oop result if there is one and reset the value in the thread 351 if (oop_result->is_valid()) { 352 get_vm_result(oop_result, java_thread); 353 } 354 } 355 356 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 357 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 358 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 359 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 360 } 361 362 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 363 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 364 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 365 } 366 367 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 368 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 369 assert_different_registers(klass, xthread, tmp); 370 371 Label L_fallthrough, L_tmp; 372 if (L_fast_path == nullptr) { 373 L_fast_path = &L_fallthrough; 374 } else if (L_slow_path == nullptr) { 375 L_slow_path = &L_fallthrough; 376 } 377 378 // Fast path check: class is fully initialized 379 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 380 sub(tmp, tmp, InstanceKlass::fully_initialized); 381 beqz(tmp, *L_fast_path); 382 383 // Fast path check: current thread is initializer thread 384 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 385 386 if (L_slow_path == &L_fallthrough) { 387 beq(xthread, tmp, *L_fast_path); 388 bind(*L_slow_path); 389 } else if (L_fast_path == &L_fallthrough) { 390 bne(xthread, tmp, *L_slow_path); 391 bind(*L_fast_path); 392 } else { 393 Unimplemented(); 394 } 395 } 396 397 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 398 if (!VerifyOops) { return; } 399 400 // Pass register number to verify_oop_subroutine 401 const char* b = nullptr; 402 { 403 ResourceMark rm; 404 stringStream ss; 405 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 406 b = code_string(ss.as_string()); 407 } 408 BLOCK_COMMENT("verify_oop {"); 409 410 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 411 412 mv(c_rarg0, reg); // c_rarg0 : x10 413 { 414 // The length of the instruction sequence emitted should not depend 415 // on the address of the char buffer so that the size of mach nodes for 416 // scratch emit and normal emit matches. 417 IncompressibleRegion ir(this); // Fixed length 418 movptr(t0, (address) b); 419 } 420 421 // call indirectly to solve generation ordering problem 422 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 423 relocate(target.rspec(), [&] { 424 int32_t offset; 425 la_patchable(t1, target, offset); 426 ld(t1, Address(t1, offset)); 427 }); 428 jalr(t1); 429 430 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 431 432 BLOCK_COMMENT("} verify_oop"); 433 } 434 435 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 436 if (!VerifyOops) { 437 return; 438 } 439 440 const char* b = nullptr; 441 { 442 ResourceMark rm; 443 stringStream ss; 444 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 445 b = code_string(ss.as_string()); 446 } 447 BLOCK_COMMENT("verify_oop_addr {"); 448 449 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 450 451 if (addr.uses(sp)) { 452 la(x10, addr); 453 ld(x10, Address(x10, 4 * wordSize)); 454 } else { 455 ld(x10, addr); 456 } 457 458 { 459 // The length of the instruction sequence emitted should not depend 460 // on the address of the char buffer so that the size of mach nodes for 461 // scratch emit and normal emit matches. 462 IncompressibleRegion ir(this); // Fixed length 463 movptr(t0, (address) b); 464 } 465 466 // call indirectly to solve generation ordering problem 467 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 468 relocate(target.rspec(), [&] { 469 int32_t offset; 470 la_patchable(t1, target, offset); 471 ld(t1, Address(t1, offset)); 472 }); 473 jalr(t1); 474 475 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 476 477 BLOCK_COMMENT("} verify_oop_addr"); 478 } 479 480 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 481 int extra_slot_offset) { 482 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 483 int stackElementSize = Interpreter::stackElementSize; 484 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 485 #ifdef ASSERT 486 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 487 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 488 #endif 489 if (arg_slot.is_constant()) { 490 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 491 } else { 492 assert_different_registers(t0, arg_slot.as_register()); 493 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 494 return Address(t0, offset); 495 } 496 } 497 498 #ifndef PRODUCT 499 extern "C" void findpc(intptr_t x); 500 #endif 501 502 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 503 { 504 // In order to get locks to work, we need to fake a in_VM state 505 if (ShowMessageBoxOnError) { 506 JavaThread* thread = JavaThread::current(); 507 JavaThreadState saved_state = thread->thread_state(); 508 thread->set_thread_state(_thread_in_vm); 509 #ifndef PRODUCT 510 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 511 ttyLocker ttyl; 512 BytecodeCounter::print(); 513 } 514 #endif 515 if (os::message_box(msg, "Execution stopped, print registers?")) { 516 ttyLocker ttyl; 517 tty->print_cr(" pc = 0x%016lx", pc); 518 #ifndef PRODUCT 519 tty->cr(); 520 findpc(pc); 521 tty->cr(); 522 #endif 523 tty->print_cr(" x0 = 0x%016lx", regs[0]); 524 tty->print_cr(" x1 = 0x%016lx", regs[1]); 525 tty->print_cr(" x2 = 0x%016lx", regs[2]); 526 tty->print_cr(" x3 = 0x%016lx", regs[3]); 527 tty->print_cr(" x4 = 0x%016lx", regs[4]); 528 tty->print_cr(" x5 = 0x%016lx", regs[5]); 529 tty->print_cr(" x6 = 0x%016lx", regs[6]); 530 tty->print_cr(" x7 = 0x%016lx", regs[7]); 531 tty->print_cr(" x8 = 0x%016lx", regs[8]); 532 tty->print_cr(" x9 = 0x%016lx", regs[9]); 533 tty->print_cr("x10 = 0x%016lx", regs[10]); 534 tty->print_cr("x11 = 0x%016lx", regs[11]); 535 tty->print_cr("x12 = 0x%016lx", regs[12]); 536 tty->print_cr("x13 = 0x%016lx", regs[13]); 537 tty->print_cr("x14 = 0x%016lx", regs[14]); 538 tty->print_cr("x15 = 0x%016lx", regs[15]); 539 tty->print_cr("x16 = 0x%016lx", regs[16]); 540 tty->print_cr("x17 = 0x%016lx", regs[17]); 541 tty->print_cr("x18 = 0x%016lx", regs[18]); 542 tty->print_cr("x19 = 0x%016lx", regs[19]); 543 tty->print_cr("x20 = 0x%016lx", regs[20]); 544 tty->print_cr("x21 = 0x%016lx", regs[21]); 545 tty->print_cr("x22 = 0x%016lx", regs[22]); 546 tty->print_cr("x23 = 0x%016lx", regs[23]); 547 tty->print_cr("x24 = 0x%016lx", regs[24]); 548 tty->print_cr("x25 = 0x%016lx", regs[25]); 549 tty->print_cr("x26 = 0x%016lx", regs[26]); 550 tty->print_cr("x27 = 0x%016lx", regs[27]); 551 tty->print_cr("x28 = 0x%016lx", regs[28]); 552 tty->print_cr("x30 = 0x%016lx", regs[30]); 553 tty->print_cr("x31 = 0x%016lx", regs[31]); 554 BREAKPOINT; 555 } 556 } 557 fatal("DEBUG MESSAGE: %s", msg); 558 } 559 560 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 561 assert_different_registers(value, tmp1, tmp2); 562 Label done, tagged, weak_tagged; 563 564 beqz(value, done); // Use null as-is. 565 // Test for tag. 566 andi(tmp1, value, JNIHandles::tag_mask); 567 bnez(tmp1, tagged); 568 569 // Resolve local handle 570 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 571 verify_oop(value); 572 j(done); 573 574 bind(tagged); 575 // Test for jweak tag. 576 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 577 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 578 bnez(tmp1, weak_tagged); 579 580 // Resolve global handle 581 access_load_at(T_OBJECT, IN_NATIVE, value, 582 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 583 verify_oop(value); 584 j(done); 585 586 bind(weak_tagged); 587 // Resolve jweak. 588 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 589 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 590 verify_oop(value); 591 592 bind(done); 593 } 594 595 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 596 assert_different_registers(value, tmp1, tmp2); 597 Label done; 598 599 beqz(value, done); // Use null as-is. 600 601 #ifdef ASSERT 602 { 603 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 604 Label valid_global_tag; 605 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 606 bnez(tmp1, valid_global_tag); 607 stop("non global jobject using resolve_global_jobject"); 608 bind(valid_global_tag); 609 } 610 #endif 611 612 // Resolve global handle 613 access_load_at(T_OBJECT, IN_NATIVE, value, 614 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 615 verify_oop(value); 616 617 bind(done); 618 } 619 620 void MacroAssembler::stop(const char* msg) { 621 BLOCK_COMMENT(msg); 622 illegal_instruction(Assembler::csr::time); 623 emit_int64((uintptr_t)msg); 624 } 625 626 void MacroAssembler::unimplemented(const char* what) { 627 const char* buf = nullptr; 628 { 629 ResourceMark rm; 630 stringStream ss; 631 ss.print("unimplemented: %s", what); 632 buf = code_string(ss.as_string()); 633 } 634 stop(buf); 635 } 636 637 void MacroAssembler::emit_static_call_stub() { 638 IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size(). 639 // CompiledDirectStaticCall::set_to_interpreted knows the 640 // exact layout of this stub. 641 642 mov_metadata(xmethod, (Metadata*)nullptr); 643 644 // Jump to the entry point of the c2i stub. 645 int32_t offset = 0; 646 movptr(t0, 0, offset); 647 jalr(x0, t0, offset); 648 } 649 650 void MacroAssembler::call_VM_leaf_base(address entry_point, 651 int number_of_arguments, 652 Label *retaddr) { 653 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 654 call(entry_point); 655 if (retaddr != nullptr) { 656 bind(*retaddr); 657 } 658 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 659 } 660 661 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 662 call_VM_leaf_base(entry_point, number_of_arguments); 663 } 664 665 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 666 pass_arg0(this, arg_0); 667 call_VM_leaf_base(entry_point, 1); 668 } 669 670 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 671 pass_arg0(this, arg_0); 672 pass_arg1(this, arg_1); 673 call_VM_leaf_base(entry_point, 2); 674 } 675 676 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 677 Register arg_1, Register arg_2) { 678 pass_arg0(this, arg_0); 679 pass_arg1(this, arg_1); 680 pass_arg2(this, arg_2); 681 call_VM_leaf_base(entry_point, 3); 682 } 683 684 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 685 pass_arg0(this, arg_0); 686 MacroAssembler::call_VM_leaf_base(entry_point, 1); 687 } 688 689 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 690 691 assert(arg_0 != c_rarg1, "smashed arg"); 692 pass_arg1(this, arg_1); 693 pass_arg0(this, arg_0); 694 MacroAssembler::call_VM_leaf_base(entry_point, 2); 695 } 696 697 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 698 assert(arg_0 != c_rarg2, "smashed arg"); 699 assert(arg_1 != c_rarg2, "smashed arg"); 700 pass_arg2(this, arg_2); 701 assert(arg_0 != c_rarg1, "smashed arg"); 702 pass_arg1(this, arg_1); 703 pass_arg0(this, arg_0); 704 MacroAssembler::call_VM_leaf_base(entry_point, 3); 705 } 706 707 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 708 assert(arg_0 != c_rarg3, "smashed arg"); 709 assert(arg_1 != c_rarg3, "smashed arg"); 710 assert(arg_2 != c_rarg3, "smashed arg"); 711 pass_arg3(this, arg_3); 712 assert(arg_0 != c_rarg2, "smashed arg"); 713 assert(arg_1 != c_rarg2, "smashed arg"); 714 pass_arg2(this, arg_2); 715 assert(arg_0 != c_rarg1, "smashed arg"); 716 pass_arg1(this, arg_1); 717 pass_arg0(this, arg_0); 718 MacroAssembler::call_VM_leaf_base(entry_point, 4); 719 } 720 721 void MacroAssembler::la(Register Rd, const address dest) { 722 int64_t offset = dest - pc(); 723 if (is_valid_32bit_offset(offset)) { 724 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit 725 addi(Rd, Rd, ((int64_t)offset << 52) >> 52); 726 } else { 727 movptr(Rd, dest); 728 } 729 } 730 731 void MacroAssembler::la(Register Rd, const Address &adr) { 732 switch (adr.getMode()) { 733 case Address::literal: { 734 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 735 if (rtype == relocInfo::none) { 736 mv(Rd, (intptr_t)(adr.target())); 737 } else { 738 relocate(adr.rspec(), [&] { 739 movptr(Rd, adr.target()); 740 }); 741 } 742 break; 743 } 744 case Address::base_plus_offset: { 745 Address new_adr = legitimize_address(Rd, adr); 746 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 747 addi(Rd, new_adr.base(), new_adr.offset()); 748 } 749 break; 750 } 751 default: 752 ShouldNotReachHere(); 753 } 754 } 755 756 void MacroAssembler::la(Register Rd, Label &label) { 757 IncompressibleRegion ir(this); // the label address may be patched back. 758 wrap_label(Rd, label, &MacroAssembler::la); 759 } 760 761 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 762 lui(Rd, (uint32_t)imm << 12); 763 srli(Rd, Rd, 12); 764 } 765 766 void MacroAssembler::li32(Register Rd, int32_t imm) { 767 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 768 int64_t upper = imm, lower = imm; 769 lower = (imm << 20) >> 20; 770 upper -= lower; 771 upper = (int32_t)upper; 772 // lui Rd, imm[31:12] + imm[11] 773 lui(Rd, upper); 774 // use addiw to distinguish li32 to li64 775 addiw(Rd, Rd, lower); 776 } 777 778 void MacroAssembler::li64(Register Rd, int64_t imm) { 779 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 780 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 781 int64_t lower = imm & 0xffffffff; 782 lower -= ((lower << 44) >> 44); 783 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 784 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 785 786 // Load upper 32 bits 787 int64_t up = upper, lo = upper; 788 lo = (lo << 52) >> 52; 789 up -= lo; 790 up = (int32_t)up; 791 lui(Rd, up); 792 addi(Rd, Rd, lo); 793 794 // Load the rest 32 bits. 795 slli(Rd, Rd, 12); 796 addi(Rd, Rd, (int32_t)lower >> 20); 797 slli(Rd, Rd, 12); 798 lower = ((int32_t)imm << 12) >> 20; 799 addi(Rd, Rd, lower); 800 slli(Rd, Rd, 8); 801 lower = imm & 0xff; 802 addi(Rd, Rd, lower); 803 } 804 805 void MacroAssembler::li(Register Rd, int64_t imm) { 806 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 807 // li -> c.li 808 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 809 c_li(Rd, imm); 810 return; 811 } 812 813 int shift = 12; 814 int64_t upper = imm, lower = imm; 815 // Split imm to a lower 12-bit sign-extended part and the remainder, 816 // because addi will sign-extend the lower imm. 817 lower = ((int32_t)imm << 20) >> 20; 818 upper -= lower; 819 820 // Test whether imm is a 32-bit integer. 821 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 822 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 823 while (((upper >> shift) & 1) == 0) { shift++; } 824 upper >>= shift; 825 li(Rd, upper); 826 slli(Rd, Rd, shift); 827 if (lower != 0) { 828 addi(Rd, Rd, lower); 829 } 830 } else { 831 // 32-bit integer 832 Register hi_Rd = zr; 833 if (upper != 0) { 834 lui(Rd, (int32_t)upper); 835 hi_Rd = Rd; 836 } 837 if (lower != 0 || hi_Rd == zr) { 838 addiw(Rd, hi_Rd, lower); 839 } 840 } 841 } 842 843 #define INSN(NAME, REGISTER) \ 844 void MacroAssembler::NAME(const address dest, Register temp) { \ 845 assert_cond(dest != nullptr); \ 846 int64_t distance = dest - pc(); \ 847 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 848 Assembler::jal(REGISTER, distance); \ 849 } else { \ 850 assert(temp != noreg, "expecting a register"); \ 851 int32_t offset = 0; \ 852 movptr(temp, dest, offset); \ 853 Assembler::jalr(REGISTER, temp, offset); \ 854 } \ 855 } \ 856 857 INSN(j, x0); 858 INSN(jal, x1); 859 860 #undef INSN 861 862 #define INSN(NAME, REGISTER) \ 863 void MacroAssembler::NAME(const Address &adr, Register temp) { \ 864 switch (adr.getMode()) { \ 865 case Address::literal: { \ 866 relocate(adr.rspec(), [&] { \ 867 NAME(adr.target(), temp); \ 868 }); \ 869 break; \ 870 } \ 871 case Address::base_plus_offset: { \ 872 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 873 la(temp, Address(adr.base(), adr.offset() - offset)); \ 874 Assembler::jalr(REGISTER, temp, offset); \ 875 break; \ 876 } \ 877 default: \ 878 ShouldNotReachHere(); \ 879 } \ 880 } 881 882 INSN(j, x0); 883 INSN(jal, x1); 884 885 #undef INSN 886 887 #define INSN(NAME) \ 888 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \ 889 assert_cond(dest != nullptr); \ 890 int64_t distance = dest - pc(); \ 891 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 892 Assembler::NAME(Rd, distance); \ 893 } else { \ 894 assert_different_registers(Rd, temp); \ 895 int32_t offset = 0; \ 896 movptr(temp, dest, offset); \ 897 jalr(Rd, temp, offset); \ 898 } \ 899 } \ 900 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \ 901 assert_different_registers(Rd, temp); \ 902 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \ 903 } 904 905 INSN(jal); 906 907 #undef INSN 908 909 #define INSN(NAME, REGISTER) \ 910 void MacroAssembler::NAME(Label &l, Register temp) { \ 911 jal(REGISTER, l, temp); \ 912 } \ 913 914 INSN(j, x0); 915 INSN(jal, x1); 916 917 #undef INSN 918 919 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { 920 if (L.is_bound()) { 921 (this->*insn)(Rt, target(L), tmp); 922 } else { 923 L.add_patch_at(code(), locator()); 924 (this->*insn)(Rt, pc(), tmp); 925 } 926 } 927 928 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 929 if (L.is_bound()) { 930 (this->*insn)(Rt, target(L)); 931 } else { 932 L.add_patch_at(code(), locator()); 933 (this->*insn)(Rt, pc()); 934 } 935 } 936 937 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 938 compare_and_branch_insn insn, 939 compare_and_branch_label_insn neg_insn, bool is_far) { 940 if (is_far) { 941 Label done; 942 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 943 j(L); 944 bind(done); 945 } else { 946 if (L.is_bound()) { 947 (this->*insn)(r1, r2, target(L)); 948 } else { 949 L.add_patch_at(code(), locator()); 950 (this->*insn)(r1, r2, pc()); 951 } 952 } 953 } 954 955 #define INSN(NAME, NEG_INSN) \ 956 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 957 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 958 } 959 960 INSN(beq, bne); 961 INSN(bne, beq); 962 INSN(blt, bge); 963 INSN(bge, blt); 964 INSN(bltu, bgeu); 965 INSN(bgeu, bltu); 966 967 #undef INSN 968 969 #define INSN(NAME) \ 970 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 971 NAME(Rs, zr, dest); \ 972 } \ 973 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 974 NAME(Rs, zr, l, is_far); \ 975 } \ 976 977 INSN(beq); 978 INSN(bne); 979 INSN(blt); 980 INSN(ble); 981 INSN(bge); 982 INSN(bgt); 983 984 #undef INSN 985 986 #define INSN(NAME, NEG_INSN) \ 987 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 988 NEG_INSN(Rt, Rs, dest); \ 989 } \ 990 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 991 NEG_INSN(Rt, Rs, l, is_far); \ 992 } 993 994 INSN(bgt, blt); 995 INSN(ble, bge); 996 INSN(bgtu, bltu); 997 INSN(bleu, bgeu); 998 999 #undef INSN 1000 1001 // Float compare branch instructions 1002 1003 #define INSN(NAME, FLOATCMP, BRANCH) \ 1004 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1005 FLOATCMP##_s(t0, Rs1, Rs2); \ 1006 BRANCH(t0, l, is_far); \ 1007 } \ 1008 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1009 FLOATCMP##_d(t0, Rs1, Rs2); \ 1010 BRANCH(t0, l, is_far); \ 1011 } 1012 1013 INSN(beq, feq, bnez); 1014 INSN(bne, feq, beqz); 1015 1016 #undef INSN 1017 1018 1019 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1020 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1021 bool is_far, bool is_unordered) { \ 1022 if (is_unordered) { \ 1023 /* jump if either source is NaN or condition is expected */ \ 1024 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1025 beqz(t0, l, is_far); \ 1026 } else { \ 1027 /* jump if no NaN in source and condition is expected */ \ 1028 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1029 bnez(t0, l, is_far); \ 1030 } \ 1031 } \ 1032 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1033 bool is_far, bool is_unordered) { \ 1034 if (is_unordered) { \ 1035 /* jump if either source is NaN or condition is expected */ \ 1036 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1037 beqz(t0, l, is_far); \ 1038 } else { \ 1039 /* jump if no NaN in source and condition is expected */ \ 1040 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1041 bnez(t0, l, is_far); \ 1042 } \ 1043 } 1044 1045 INSN(ble, fle, flt); 1046 INSN(blt, flt, fle); 1047 1048 #undef INSN 1049 1050 #define INSN(NAME, CMP) \ 1051 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1052 bool is_far, bool is_unordered) { \ 1053 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1054 } \ 1055 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1056 bool is_far, bool is_unordered) { \ 1057 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1058 } 1059 1060 INSN(bgt, blt); 1061 INSN(bge, ble); 1062 1063 #undef INSN 1064 1065 1066 #define INSN(NAME, CSR) \ 1067 void MacroAssembler::NAME(Register Rd) { \ 1068 csrr(Rd, CSR); \ 1069 } 1070 1071 INSN(rdinstret, CSR_INSTRET); 1072 INSN(rdcycle, CSR_CYCLE); 1073 INSN(rdtime, CSR_TIME); 1074 INSN(frcsr, CSR_FCSR); 1075 INSN(frrm, CSR_FRM); 1076 INSN(frflags, CSR_FFLAGS); 1077 1078 #undef INSN 1079 1080 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1081 csrrs(Rd, csr, x0); 1082 } 1083 1084 #define INSN(NAME, OPFUN) \ 1085 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1086 OPFUN(x0, csr, Rs); \ 1087 } 1088 1089 INSN(csrw, csrrw); 1090 INSN(csrs, csrrs); 1091 INSN(csrc, csrrc); 1092 1093 #undef INSN 1094 1095 #define INSN(NAME, OPFUN) \ 1096 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1097 OPFUN(x0, csr, imm); \ 1098 } 1099 1100 INSN(csrwi, csrrwi); 1101 INSN(csrsi, csrrsi); 1102 INSN(csrci, csrrci); 1103 1104 #undef INSN 1105 1106 #define INSN(NAME, CSR) \ 1107 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1108 csrrw(Rd, CSR, Rs); \ 1109 } 1110 1111 INSN(fscsr, CSR_FCSR); 1112 INSN(fsrm, CSR_FRM); 1113 INSN(fsflags, CSR_FFLAGS); 1114 1115 #undef INSN 1116 1117 #define INSN(NAME) \ 1118 void MacroAssembler::NAME(Register Rs) { \ 1119 NAME(x0, Rs); \ 1120 } 1121 1122 INSN(fscsr); 1123 INSN(fsrm); 1124 INSN(fsflags); 1125 1126 #undef INSN 1127 1128 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1129 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1130 csrrwi(Rd, CSR_FRM, imm); 1131 } 1132 1133 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1134 csrrwi(Rd, CSR_FFLAGS, imm); 1135 } 1136 1137 #define INSN(NAME) \ 1138 void MacroAssembler::NAME(unsigned imm) { \ 1139 NAME(x0, imm); \ 1140 } 1141 1142 INSN(fsrmi); 1143 INSN(fsflagsi); 1144 1145 #undef INSN 1146 1147 void MacroAssembler::push_reg(Register Rs) 1148 { 1149 addi(esp, esp, 0 - wordSize); 1150 sd(Rs, Address(esp, 0)); 1151 } 1152 1153 void MacroAssembler::pop_reg(Register Rd) 1154 { 1155 ld(Rd, Address(esp, 0)); 1156 addi(esp, esp, wordSize); 1157 } 1158 1159 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1160 int count = 0; 1161 // Scan bitset to accumulate register pairs 1162 for (int reg = 31; reg >= 0; reg--) { 1163 if ((1U << 31) & bitset) { 1164 regs[count++] = reg; 1165 } 1166 bitset <<= 1; 1167 } 1168 return count; 1169 } 1170 1171 // Push integer registers in the bitset supplied. Don't push sp. 1172 // Return the number of words pushed 1173 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1174 DEBUG_ONLY(int words_pushed = 0;) 1175 unsigned char regs[32]; 1176 int count = bitset_to_regs(bitset, regs); 1177 // reserve one slot to align for odd count 1178 int offset = is_even(count) ? 0 : wordSize; 1179 1180 if (count) { 1181 addi(stack, stack, -count * wordSize - offset); 1182 } 1183 for (int i = count - 1; i >= 0; i--) { 1184 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1185 DEBUG_ONLY(words_pushed++;) 1186 } 1187 1188 assert(words_pushed == count, "oops, pushed != count"); 1189 1190 return count; 1191 } 1192 1193 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1194 DEBUG_ONLY(int words_popped = 0;) 1195 unsigned char regs[32]; 1196 int count = bitset_to_regs(bitset, regs); 1197 // reserve one slot to align for odd count 1198 int offset = is_even(count) ? 0 : wordSize; 1199 1200 for (int i = count - 1; i >= 0; i--) { 1201 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1202 DEBUG_ONLY(words_popped++;) 1203 } 1204 1205 if (count) { 1206 addi(stack, stack, count * wordSize + offset); 1207 } 1208 assert(words_popped == count, "oops, popped != count"); 1209 1210 return count; 1211 } 1212 1213 // Push floating-point registers in the bitset supplied. 1214 // Return the number of words pushed 1215 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1216 DEBUG_ONLY(int words_pushed = 0;) 1217 unsigned char regs[32]; 1218 int count = bitset_to_regs(bitset, regs); 1219 int push_slots = count + (count & 1); 1220 1221 if (count) { 1222 addi(stack, stack, -push_slots * wordSize); 1223 } 1224 1225 for (int i = count - 1; i >= 0; i--) { 1226 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1227 DEBUG_ONLY(words_pushed++;) 1228 } 1229 1230 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1231 1232 return count; 1233 } 1234 1235 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1236 DEBUG_ONLY(int words_popped = 0;) 1237 unsigned char regs[32]; 1238 int count = bitset_to_regs(bitset, regs); 1239 int pop_slots = count + (count & 1); 1240 1241 for (int i = count - 1; i >= 0; i--) { 1242 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1243 DEBUG_ONLY(words_popped++;) 1244 } 1245 1246 if (count) { 1247 addi(stack, stack, pop_slots * wordSize); 1248 } 1249 1250 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1251 1252 return count; 1253 } 1254 1255 #ifdef COMPILER2 1256 // Push vector registers in the bitset supplied. 1257 // Return the number of words pushed 1258 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1259 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1260 1261 // Scan bitset to accumulate register pairs 1262 unsigned char regs[32]; 1263 int count = bitset_to_regs(bitset, regs); 1264 1265 for (int i = 0; i < count; i++) { 1266 sub(stack, stack, vector_size_in_bytes); 1267 vs1r_v(as_VectorRegister(regs[i]), stack); 1268 } 1269 1270 return count * vector_size_in_bytes / wordSize; 1271 } 1272 1273 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1274 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1275 1276 // Scan bitset to accumulate register pairs 1277 unsigned char regs[32]; 1278 int count = bitset_to_regs(bitset, regs); 1279 1280 for (int i = count - 1; i >= 0; i--) { 1281 vl1r_v(as_VectorRegister(regs[i]), stack); 1282 add(stack, stack, vector_size_in_bytes); 1283 } 1284 1285 return count * vector_size_in_bytes / wordSize; 1286 } 1287 #endif // COMPILER2 1288 1289 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1290 // Push integer registers x7, x10-x17, x28-x31. 1291 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1292 1293 // Push float registers f0-f7, f10-f17, f28-f31. 1294 addi(sp, sp, - wordSize * 20); 1295 int offset = 0; 1296 for (int i = 0; i < 32; i++) { 1297 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1298 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1299 } 1300 } 1301 } 1302 1303 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1304 int offset = 0; 1305 for (int i = 0; i < 32; i++) { 1306 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1307 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1308 } 1309 } 1310 addi(sp, sp, wordSize * 20); 1311 1312 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1313 } 1314 1315 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1316 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1317 push_reg(RegSet::range(x5, x31), sp); 1318 1319 // float registers 1320 addi(sp, sp, - 32 * wordSize); 1321 for (int i = 0; i < 32; i++) { 1322 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1323 } 1324 1325 // vector registers 1326 if (save_vectors) { 1327 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1328 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1329 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1330 add(t0, sp, vector_size_in_bytes * i); 1331 vse64_v(as_VectorRegister(i), t0); 1332 } 1333 } 1334 } 1335 1336 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1337 // vector registers 1338 if (restore_vectors) { 1339 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1340 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1341 vle64_v(as_VectorRegister(i), sp); 1342 add(sp, sp, vector_size_in_bytes * 8); 1343 } 1344 } 1345 1346 // float registers 1347 for (int i = 0; i < 32; i++) { 1348 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1349 } 1350 addi(sp, sp, 32 * wordSize); 1351 1352 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1353 pop_reg(RegSet::range(x5, x31), sp); 1354 } 1355 1356 static int patch_offset_in_jal(address branch, int64_t offset) { 1357 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1358 "offset is too large to be patched in one jal instruction!\n"); 1359 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1360 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1361 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1362 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1363 return NativeInstruction::instruction_size; // only one instruction 1364 } 1365 1366 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1367 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1368 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1369 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1370 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1371 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1372 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1373 return NativeInstruction::instruction_size; // only one instruction 1374 } 1375 1376 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1377 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1378 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1379 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1380 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; 1381 } 1382 1383 static int patch_addr_in_movptr(address branch, address target) { 1384 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load 1385 int32_t lower = ((intptr_t)target << 35) >> 35; 1386 int64_t upper = ((intptr_t)target - lower) >> 29; 1387 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1388 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1389 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1390 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1391 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1392 } 1393 1394 static int patch_imm_in_li64(address branch, address target) { 1395 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1396 int64_t lower = (intptr_t)target & 0xffffffff; 1397 lower = lower - ((lower << 44) >> 44); 1398 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1399 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1400 int64_t tmp_upper = upper, tmp_lower = upper; 1401 tmp_lower = (tmp_lower << 52) >> 52; 1402 tmp_upper -= tmp_lower; 1403 tmp_upper >>= 12; 1404 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1405 // upper = target[63:32] + 1. 1406 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1407 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1408 // Load the rest 32 bits. 1409 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1410 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1411 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1412 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1413 } 1414 1415 static int patch_imm_in_li16u(address branch, uint16_t target) { 1416 Assembler::patch(branch, 31, 12, target); // patch lui only 1417 return NativeInstruction::instruction_size; 1418 } 1419 1420 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1421 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1422 int64_t upper = (intptr_t)target; 1423 int32_t lower = (((int32_t)target) << 20) >> 20; 1424 upper -= lower; 1425 upper = (int32_t)upper; 1426 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1427 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1428 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1429 } 1430 1431 static long get_offset_of_jal(address insn_addr) { 1432 assert_cond(insn_addr != nullptr); 1433 long offset = 0; 1434 unsigned insn = Assembler::ld_instr(insn_addr); 1435 long val = (long)Assembler::sextract(insn, 31, 12); 1436 offset |= ((val >> 19) & 0x1) << 20; 1437 offset |= (val & 0xff) << 12; 1438 offset |= ((val >> 8) & 0x1) << 11; 1439 offset |= ((val >> 9) & 0x3ff) << 1; 1440 offset = (offset << 43) >> 43; 1441 return offset; 1442 } 1443 1444 static long get_offset_of_conditional_branch(address insn_addr) { 1445 long offset = 0; 1446 assert_cond(insn_addr != nullptr); 1447 unsigned insn = Assembler::ld_instr(insn_addr); 1448 offset = (long)Assembler::sextract(insn, 31, 31); 1449 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1450 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1451 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1452 offset = (offset << 41) >> 41; 1453 return offset; 1454 } 1455 1456 static long get_offset_of_pc_relative(address insn_addr) { 1457 long offset = 0; 1458 assert_cond(insn_addr != nullptr); 1459 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1460 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1461 offset = (offset << 32) >> 32; 1462 return offset; 1463 } 1464 1465 static address get_target_of_movptr(address insn_addr) { 1466 assert_cond(insn_addr != nullptr); 1467 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1468 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1469 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1470 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1471 return (address) target_address; 1472 } 1473 1474 static address get_target_of_li64(address insn_addr) { 1475 assert_cond(insn_addr != nullptr); 1476 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui. 1477 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi. 1478 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi. 1479 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi. 1480 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi. 1481 return (address)target_address; 1482 } 1483 1484 address MacroAssembler::get_target_of_li32(address insn_addr) { 1485 assert_cond(insn_addr != nullptr); 1486 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1487 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1488 return (address)target_address; 1489 } 1490 1491 // Patch any kind of instruction; there may be several instructions. 1492 // Return the total length (in bytes) of the instructions. 1493 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 1494 assert_cond(branch != nullptr); 1495 int64_t offset = target - branch; 1496 if (NativeInstruction::is_jal_at(branch)) { // jal 1497 return patch_offset_in_jal(branch, offset); 1498 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne 1499 return patch_offset_in_conditional_branch(branch, offset); 1500 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load 1501 return patch_offset_in_pc_relative(branch, offset); 1502 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr 1503 return patch_addr_in_movptr(branch, target); 1504 } else if (NativeInstruction::is_li64_at(branch)) { // li64 1505 return patch_imm_in_li64(branch, target); 1506 } else if (NativeInstruction::is_li32_at(branch)) { // li32 1507 int64_t imm = (intptr_t)target; 1508 return patch_imm_in_li32(branch, (int32_t)imm); 1509 } else if (NativeInstruction::is_li16u_at(branch)) { 1510 int64_t imm = (intptr_t)target; 1511 return patch_imm_in_li16u(branch, (uint16_t)imm); 1512 } else { 1513 #ifdef ASSERT 1514 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1515 Assembler::ld_instr(branch), p2i(branch)); 1516 Disassembler::decode(branch - 16, branch + 16); 1517 #endif 1518 ShouldNotReachHere(); 1519 return -1; 1520 } 1521 } 1522 1523 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1524 long offset = 0; 1525 assert_cond(insn_addr != nullptr); 1526 if (NativeInstruction::is_jal_at(insn_addr)) { // jal 1527 offset = get_offset_of_jal(insn_addr); 1528 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1529 offset = get_offset_of_conditional_branch(insn_addr); 1530 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1531 offset = get_offset_of_pc_relative(insn_addr); 1532 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr 1533 return get_target_of_movptr(insn_addr); 1534 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 1535 return get_target_of_li64(insn_addr); 1536 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 1537 return get_target_of_li32(insn_addr); 1538 } else { 1539 ShouldNotReachHere(); 1540 } 1541 return address(((uintptr_t)insn_addr + offset)); 1542 } 1543 1544 int MacroAssembler::patch_oop(address insn_addr, address o) { 1545 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1546 // narrow OOPs by setting the upper 16 bits in the first 1547 // instruction. 1548 if (NativeInstruction::is_li32_at(insn_addr)) { 1549 // Move narrow OOP 1550 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1551 return patch_imm_in_li32(insn_addr, (int32_t)n); 1552 } else if (NativeInstruction::is_movptr_at(insn_addr)) { 1553 // Move wide OOP 1554 return patch_addr_in_movptr(insn_addr, o); 1555 } 1556 ShouldNotReachHere(); 1557 return -1; 1558 } 1559 1560 void MacroAssembler::reinit_heapbase() { 1561 if (UseCompressedOops) { 1562 if (Universe::is_fully_initialized()) { 1563 mv(xheapbase, CompressedOops::ptrs_base()); 1564 } else { 1565 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1566 relocate(target.rspec(), [&] { 1567 int32_t offset; 1568 la_patchable(xheapbase, target, offset); 1569 ld(xheapbase, Address(xheapbase, offset)); 1570 }); 1571 } 1572 } 1573 } 1574 1575 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) { 1576 int64_t imm64 = (int64_t)addr; 1577 #ifndef PRODUCT 1578 { 1579 char buffer[64]; 1580 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); 1581 block_comment(buffer); 1582 } 1583 #endif 1584 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant"); 1585 // Load upper 31 bits 1586 int64_t imm = imm64 >> 17; 1587 int64_t upper = imm, lower = imm; 1588 lower = (lower << 52) >> 52; 1589 upper -= lower; 1590 upper = (int32_t)upper; 1591 lui(Rd, upper); 1592 addi(Rd, Rd, lower); 1593 1594 // Load the rest 17 bits. 1595 slli(Rd, Rd, 11); 1596 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1597 slli(Rd, Rd, 6); 1598 1599 // This offset will be used by following jalr/ld. 1600 offset = imm64 & 0x3f; 1601 } 1602 1603 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1604 if (is_simm12(increment)) { 1605 addi(Rd, Rn, increment); 1606 } else { 1607 assert_different_registers(Rn, temp); 1608 li(temp, increment); 1609 add(Rd, Rn, temp); 1610 } 1611 } 1612 1613 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1614 if (is_simm12(increment)) { 1615 addiw(Rd, Rn, increment); 1616 } else { 1617 assert_different_registers(Rn, temp); 1618 li(temp, increment); 1619 addw(Rd, Rn, temp); 1620 } 1621 } 1622 1623 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1624 if (is_simm12(-decrement)) { 1625 addi(Rd, Rn, -decrement); 1626 } else { 1627 assert_different_registers(Rn, temp); 1628 li(temp, decrement); 1629 sub(Rd, Rn, temp); 1630 } 1631 } 1632 1633 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1634 if (is_simm12(-decrement)) { 1635 addiw(Rd, Rn, -decrement); 1636 } else { 1637 assert_different_registers(Rn, temp); 1638 li(temp, decrement); 1639 subw(Rd, Rn, temp); 1640 } 1641 } 1642 1643 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1644 andr(Rd, Rs1, Rs2); 1645 sign_extend(Rd, Rd, 32); 1646 } 1647 1648 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1649 orr(Rd, Rs1, Rs2); 1650 sign_extend(Rd, Rd, 32); 1651 } 1652 1653 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1654 xorr(Rd, Rs1, Rs2); 1655 sign_extend(Rd, Rd, 32); 1656 } 1657 1658 // Rd = Rs1 & (~Rd2) 1659 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 1660 if (UseZbb) { 1661 Assembler::andn(Rd, Rs1, Rs2); 1662 return; 1663 } 1664 1665 notr(Rd, Rs2); 1666 andr(Rd, Rs1, Rd); 1667 } 1668 1669 // Rd = Rs1 | (~Rd2) 1670 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 1671 if (UseZbb) { 1672 Assembler::orn(Rd, Rs1, Rs2); 1673 return; 1674 } 1675 1676 notr(Rd, Rs2); 1677 orr(Rd, Rs1, Rd); 1678 } 1679 1680 // Note: load_unsigned_short used to be called load_unsigned_word. 1681 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1682 int off = offset(); 1683 lhu(dst, src); 1684 return off; 1685 } 1686 1687 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1688 int off = offset(); 1689 lbu(dst, src); 1690 return off; 1691 } 1692 1693 int MacroAssembler::load_signed_short(Register dst, Address src) { 1694 int off = offset(); 1695 lh(dst, src); 1696 return off; 1697 } 1698 1699 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1700 int off = offset(); 1701 lb(dst, src); 1702 return off; 1703 } 1704 1705 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1706 switch (size_in_bytes) { 1707 case 8: ld(dst, src); break; 1708 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1709 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1710 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1711 default: ShouldNotReachHere(); 1712 } 1713 } 1714 1715 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1716 switch (size_in_bytes) { 1717 case 8: sd(src, dst); break; 1718 case 4: sw(src, dst); break; 1719 case 2: sh(src, dst); break; 1720 case 1: sb(src, dst); break; 1721 default: ShouldNotReachHere(); 1722 } 1723 } 1724 1725 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 1726 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1727 if (granularity != 1 && granularity != 2) { 1728 ShouldNotReachHere(); 1729 } 1730 if (AvoidUnalignedAccesses && (granularity != 2)) { 1731 assert_different_registers(dst, tmp); 1732 assert_different_registers(tmp, src.base()); 1733 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 1734 slli(tmp, tmp, 8); 1735 lbu(dst, src); 1736 add(dst, dst, tmp); 1737 } else { 1738 is_signed ? lh(dst, src) : lhu(dst, src); 1739 } 1740 } 1741 1742 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 1743 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1744 if (AvoidUnalignedAccesses && (granularity != 4)) { 1745 switch(granularity) { 1746 case 1: 1747 assert_different_registers(dst, tmp, src.base()); 1748 lbu(dst, src); 1749 lbu(tmp, Address(src.base(), src.offset() + 1)); 1750 slli(tmp, tmp, 8); 1751 add(dst, dst, tmp); 1752 lbu(tmp, Address(src.base(), src.offset() + 2)); 1753 slli(tmp, tmp, 16); 1754 add(dst, dst, tmp); 1755 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 1756 slli(tmp, tmp, 24); 1757 add(dst, dst, tmp); 1758 break; 1759 case 2: 1760 assert_different_registers(dst, tmp); 1761 assert_different_registers(tmp, src.base()); 1762 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 1763 slli(tmp, tmp, 16); 1764 lhu(dst, src); 1765 add(dst, dst, tmp); 1766 break; 1767 default: 1768 ShouldNotReachHere(); 1769 } 1770 } else { 1771 is_signed ? lw(dst, src) : lwu(dst, src); 1772 } 1773 } 1774 1775 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 1776 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 1777 if (AvoidUnalignedAccesses && (granularity != 8)) { 1778 switch(granularity){ 1779 case 1: 1780 assert_different_registers(dst, tmp, src.base()); 1781 lbu(dst, src); 1782 lbu(tmp, Address(src.base(), src.offset() + 1)); 1783 slli(tmp, tmp, 8); 1784 add(dst, dst, tmp); 1785 lbu(tmp, Address(src.base(), src.offset() + 2)); 1786 slli(tmp, tmp, 16); 1787 add(dst, dst, tmp); 1788 lbu(tmp, Address(src.base(), src.offset() + 3)); 1789 slli(tmp, tmp, 24); 1790 add(dst, dst, tmp); 1791 lbu(tmp, Address(src.base(), src.offset() + 4)); 1792 slli(tmp, tmp, 32); 1793 add(dst, dst, tmp); 1794 lbu(tmp, Address(src.base(), src.offset() + 5)); 1795 slli(tmp, tmp, 40); 1796 add(dst, dst, tmp); 1797 lbu(tmp, Address(src.base(), src.offset() + 6)); 1798 slli(tmp, tmp, 48); 1799 add(dst, dst, tmp); 1800 lbu(tmp, Address(src.base(), src.offset() + 7)); 1801 slli(tmp, tmp, 56); 1802 add(dst, dst, tmp); 1803 break; 1804 case 2: 1805 assert_different_registers(dst, tmp, src.base()); 1806 lhu(dst, src); 1807 lhu(tmp, Address(src.base(), src.offset() + 2)); 1808 slli(tmp, tmp, 16); 1809 add(dst, dst, tmp); 1810 lhu(tmp, Address(src.base(), src.offset() + 4)); 1811 slli(tmp, tmp, 32); 1812 add(dst, dst, tmp); 1813 lhu(tmp, Address(src.base(), src.offset() + 6)); 1814 slli(tmp, tmp, 48); 1815 add(dst, dst, tmp); 1816 break; 1817 case 4: 1818 assert_different_registers(dst, tmp); 1819 assert_different_registers(tmp, src.base()); 1820 lwu(tmp, Address(src.base(), src.offset() + 4)); 1821 slli(tmp, tmp, 32); 1822 lwu(dst, src); 1823 add(dst, dst, tmp); 1824 break; 1825 default: 1826 ShouldNotReachHere(); 1827 } 1828 } else { 1829 ld(dst, src); 1830 } 1831 } 1832 1833 1834 // reverse bytes in halfword in lower 16 bits and sign-extend 1835 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 1836 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 1837 if (UseZbb) { 1838 rev8(Rd, Rs); 1839 srai(Rd, Rd, 48); 1840 return; 1841 } 1842 assert_different_registers(Rs, tmp); 1843 assert_different_registers(Rd, tmp); 1844 srli(tmp, Rs, 8); 1845 andi(tmp, tmp, 0xFF); 1846 slli(Rd, Rs, 56); 1847 srai(Rd, Rd, 48); // sign-extend 1848 orr(Rd, Rd, tmp); 1849 } 1850 1851 // reverse bytes in lower word and sign-extend 1852 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 1853 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1854 if (UseZbb) { 1855 rev8(Rd, Rs); 1856 srai(Rd, Rd, 32); 1857 return; 1858 } 1859 assert_different_registers(Rs, tmp1, tmp2); 1860 assert_different_registers(Rd, tmp1, tmp2); 1861 revb_h_w_u(Rd, Rs, tmp1, tmp2); 1862 slli(tmp2, Rd, 48); 1863 srai(tmp2, tmp2, 32); // sign-extend 1864 srli(Rd, Rd, 16); 1865 orr(Rd, Rd, tmp2); 1866 } 1867 1868 // reverse bytes in halfword in lower 16 bits and zero-extend 1869 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1870 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 1871 if (UseZbb) { 1872 rev8(Rd, Rs); 1873 srli(Rd, Rd, 48); 1874 return; 1875 } 1876 assert_different_registers(Rs, tmp); 1877 assert_different_registers(Rd, tmp); 1878 srli(tmp, Rs, 8); 1879 andi(tmp, tmp, 0xFF); 1880 andi(Rd, Rs, 0xFF); 1881 slli(Rd, Rd, 8); 1882 orr(Rd, Rd, tmp); 1883 } 1884 1885 // reverse bytes in halfwords in lower 32 bits and zero-extend 1886 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1887 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1888 if (UseZbb) { 1889 rev8(Rd, Rs); 1890 rori(Rd, Rd, 32); 1891 roriw(Rd, Rd, 16); 1892 zero_extend(Rd, Rd, 32); 1893 return; 1894 } 1895 assert_different_registers(Rs, tmp1, tmp2); 1896 assert_different_registers(Rd, tmp1, tmp2); 1897 srli(tmp2, Rs, 16); 1898 revb_h_h_u(tmp2, tmp2, tmp1); 1899 revb_h_h_u(Rd, Rs, tmp1); 1900 slli(tmp2, tmp2, 16); 1901 orr(Rd, Rd, tmp2); 1902 } 1903 1904 // This method is only used for revb_h 1905 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 1906 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1907 assert_different_registers(Rs, tmp1, tmp2); 1908 assert_different_registers(Rd, tmp1); 1909 srli(tmp1, Rs, 48); 1910 andi(tmp2, tmp1, 0xFF); 1911 slli(tmp2, tmp2, 8); 1912 srli(tmp1, tmp1, 8); 1913 orr(tmp1, tmp1, tmp2); 1914 slli(Rd, Rs, 16); 1915 orr(Rd, Rd, tmp1); 1916 } 1917 1918 // reverse bytes in each halfword 1919 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 1920 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1921 if (UseZbb) { 1922 assert_different_registers(Rs, tmp1); 1923 assert_different_registers(Rd, tmp1); 1924 rev8(Rd, Rs); 1925 zero_extend(tmp1, Rd, 32); 1926 roriw(tmp1, tmp1, 16); 1927 slli(tmp1, tmp1, 32); 1928 srli(Rd, Rd, 32); 1929 roriw(Rd, Rd, 16); 1930 zero_extend(Rd, Rd, 32); 1931 orr(Rd, Rd, tmp1); 1932 return; 1933 } 1934 assert_different_registers(Rs, tmp1, tmp2); 1935 assert_different_registers(Rd, tmp1, tmp2); 1936 revb_h_helper(Rd, Rs, tmp1, tmp2); 1937 for (int i = 0; i < 3; ++i) { 1938 revb_h_helper(Rd, Rd, tmp1, tmp2); 1939 } 1940 } 1941 1942 // reverse bytes in each word 1943 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 1944 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1945 if (UseZbb) { 1946 rev8(Rd, Rs); 1947 rori(Rd, Rd, 32); 1948 return; 1949 } 1950 assert_different_registers(Rs, tmp1, tmp2); 1951 assert_different_registers(Rd, tmp1, tmp2); 1952 revb(Rd, Rs, tmp1, tmp2); 1953 ror_imm(Rd, Rd, 32); 1954 } 1955 1956 // reverse bytes in doubleword 1957 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 1958 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1959 if (UseZbb) { 1960 rev8(Rd, Rs); 1961 return; 1962 } 1963 assert_different_registers(Rs, tmp1, tmp2); 1964 assert_different_registers(Rd, tmp1, tmp2); 1965 andi(tmp1, Rs, 0xFF); 1966 slli(tmp1, tmp1, 8); 1967 for (int step = 8; step < 56; step += 8) { 1968 srli(tmp2, Rs, step); 1969 andi(tmp2, tmp2, 0xFF); 1970 orr(tmp1, tmp1, tmp2); 1971 slli(tmp1, tmp1, 8); 1972 } 1973 srli(Rd, Rs, 56); 1974 andi(Rd, Rd, 0xFF); 1975 orr(Rd, tmp1, Rd); 1976 } 1977 1978 // rotate right with shift bits 1979 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 1980 { 1981 if (UseZbb) { 1982 rori(dst, src, shift); 1983 return; 1984 } 1985 1986 assert_different_registers(dst, tmp); 1987 assert_different_registers(src, tmp); 1988 assert(shift < 64, "shift amount must be < 64"); 1989 slli(tmp, src, 64 - shift); 1990 srli(dst, src, shift); 1991 orr(dst, dst, tmp); 1992 } 1993 1994 // rotate left with shift bits, 32-bit version 1995 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 1996 if (UseZbb) { 1997 // no roliw available 1998 roriw(dst, src, 32 - shift); 1999 return; 2000 } 2001 2002 assert_different_registers(dst, tmp); 2003 assert_different_registers(src, tmp); 2004 assert(shift < 32, "shift amount must be < 32"); 2005 srliw(tmp, src, 32 - shift); 2006 slliw(dst, src, shift); 2007 orr(dst, dst, tmp); 2008 } 2009 2010 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2011 if (is_simm12(imm)) { 2012 and_imm12(Rd, Rn, imm); 2013 } else { 2014 assert_different_registers(Rn, tmp); 2015 mv(tmp, imm); 2016 andr(Rd, Rn, tmp); 2017 } 2018 } 2019 2020 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2021 ld(tmp1, adr); 2022 if (src.is_register()) { 2023 orr(tmp1, tmp1, src.as_register()); 2024 } else { 2025 if (is_simm12(src.as_constant())) { 2026 ori(tmp1, tmp1, src.as_constant()); 2027 } else { 2028 assert_different_registers(tmp1, tmp2); 2029 mv(tmp2, src.as_constant()); 2030 orr(tmp1, tmp1, tmp2); 2031 } 2032 } 2033 sd(tmp1, adr); 2034 } 2035 2036 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2037 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2038 if (UseCompressedClassPointers) { 2039 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2040 if (CompressedKlassPointers::base() == nullptr) { 2041 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2042 beq(trial_klass, tmp1, L); 2043 return; 2044 } 2045 decode_klass_not_null(tmp1, tmp2); 2046 } else { 2047 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2048 } 2049 beq(trial_klass, tmp1, L); 2050 } 2051 2052 // Move an oop into a register. 2053 void MacroAssembler::movoop(Register dst, jobject obj) { 2054 int oop_index; 2055 if (obj == nullptr) { 2056 oop_index = oop_recorder()->allocate_oop_index(obj); 2057 } else { 2058 #ifdef ASSERT 2059 { 2060 ThreadInVMfromUnknown tiv; 2061 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2062 } 2063 #endif 2064 oop_index = oop_recorder()->find_index(obj); 2065 } 2066 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2067 2068 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2069 mv(dst, Address((address)obj, rspec)); 2070 } else { 2071 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2072 ld_constant(dst, Address(dummy, rspec)); 2073 } 2074 } 2075 2076 // Move a metadata address into a register. 2077 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2078 int oop_index; 2079 if (obj == nullptr) { 2080 oop_index = oop_recorder()->allocate_metadata_index(obj); 2081 } else { 2082 oop_index = oop_recorder()->find_index(obj); 2083 } 2084 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2085 mv(dst, Address((address)obj, rspec)); 2086 } 2087 2088 // Writes to stack successive pages until offset reached to check for 2089 // stack overflow + shadow pages. This clobbers tmp. 2090 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2091 assert_different_registers(tmp, size, t0); 2092 // Bang stack for total size given plus shadow page size. 2093 // Bang one page at a time because large size can bang beyond yellow and 2094 // red zones. 2095 mv(t0, (int)os::vm_page_size()); 2096 Label loop; 2097 bind(loop); 2098 sub(tmp, sp, t0); 2099 subw(size, size, t0); 2100 sd(size, Address(tmp)); 2101 bgtz(size, loop); 2102 2103 // Bang down shadow pages too. 2104 // At this point, (tmp-0) is the last address touched, so don't 2105 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2106 // was post-decremented.) Skip this address by starting at i=1, and 2107 // touch a few more pages below. N.B. It is important to touch all 2108 // the way down to and including i=StackShadowPages. 2109 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2110 // this could be any sized move but this is can be a debugging crumb 2111 // so the bigger the better. 2112 sub(tmp, tmp, (int)os::vm_page_size()); 2113 sd(size, Address(tmp, 0)); 2114 } 2115 } 2116 2117 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2118 int32_t offset = 0; 2119 _masm = masm; 2120 ExternalAddress target((address)flag_addr); 2121 _masm->relocate(target.rspec(), [&] { 2122 int32_t offset; 2123 _masm->la_patchable(t0, target, offset); 2124 _masm->lbu(t0, Address(t0, offset)); 2125 }); 2126 if (value) { 2127 _masm->bnez(t0, _label); 2128 } else { 2129 _masm->beqz(t0, _label); 2130 } 2131 } 2132 2133 SkipIfEqual::~SkipIfEqual() { 2134 _masm->bind(_label); 2135 _masm = nullptr; 2136 } 2137 2138 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2139 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2140 ld(dst, Address(xmethod, Method::const_offset())); 2141 ld(dst, Address(dst, ConstMethod::constants_offset())); 2142 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2143 ld(dst, Address(dst, mirror_offset)); 2144 resolve_oop_handle(dst, tmp1, tmp2); 2145 } 2146 2147 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2148 // OopHandle::resolve is an indirection. 2149 assert_different_registers(result, tmp1, tmp2); 2150 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2151 } 2152 2153 // ((WeakHandle)result).resolve() 2154 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2155 assert_different_registers(result, tmp1, tmp2); 2156 Label resolved; 2157 2158 // A null weak handle resolves to null. 2159 beqz(result, resolved); 2160 2161 // Only 64 bit platforms support GCs that require a tmp register 2162 // Only IN_HEAP loads require a thread_tmp register 2163 // WeakHandle::resolve is an indirection like jweak. 2164 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2165 result, Address(result), tmp1, tmp2); 2166 bind(resolved); 2167 } 2168 2169 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2170 Register dst, Address src, 2171 Register tmp1, Register tmp2) { 2172 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2173 decorators = AccessInternal::decorator_fixup(decorators, type); 2174 bool as_raw = (decorators & AS_RAW) != 0; 2175 if (as_raw) { 2176 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2177 } else { 2178 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2179 } 2180 } 2181 2182 void MacroAssembler::null_check(Register reg, int offset) { 2183 if (needs_explicit_null_check(offset)) { 2184 // provoke OS null exception if reg is null by 2185 // accessing M[reg] w/o changing any registers 2186 // NOTE: this is plenty to provoke a segv 2187 ld(zr, Address(reg, 0)); 2188 } else { 2189 // nothing to do, (later) access of M[reg + offset] 2190 // will provoke OS null exception if reg is null 2191 } 2192 } 2193 2194 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2195 Address dst, Register val, 2196 Register tmp1, Register tmp2, Register tmp3) { 2197 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2198 decorators = AccessInternal::decorator_fixup(decorators, type); 2199 bool as_raw = (decorators & AS_RAW) != 0; 2200 if (as_raw) { 2201 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2202 } else { 2203 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2204 } 2205 } 2206 2207 // Algorithm must match CompressedOops::encode. 2208 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2209 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2210 if (CompressedOops::base() == nullptr) { 2211 if (CompressedOops::shift() != 0) { 2212 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2213 srli(d, s, LogMinObjAlignmentInBytes); 2214 } else { 2215 mv(d, s); 2216 } 2217 } else { 2218 Label notNull; 2219 sub(d, s, xheapbase); 2220 bgez(d, notNull); 2221 mv(d, zr); 2222 bind(notNull); 2223 if (CompressedOops::shift() != 0) { 2224 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2225 srli(d, d, CompressedOops::shift()); 2226 } 2227 } 2228 } 2229 2230 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2231 assert_different_registers(dst, tmp); 2232 assert_different_registers(src, tmp); 2233 if (UseCompressedClassPointers) { 2234 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2235 decode_klass_not_null(dst, tmp); 2236 } else { 2237 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2238 } 2239 } 2240 2241 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2242 // FIXME: Should this be a store release? concurrent gcs assumes 2243 // klass length is valid if klass field is not null. 2244 if (UseCompressedClassPointers) { 2245 encode_klass_not_null(src, tmp); 2246 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2247 } else { 2248 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2249 } 2250 } 2251 2252 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2253 if (UseCompressedClassPointers) { 2254 // Store to klass gap in destination 2255 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2256 } 2257 } 2258 2259 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2260 assert_different_registers(r, tmp); 2261 decode_klass_not_null(r, r, tmp); 2262 } 2263 2264 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2265 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2266 2267 if (CompressedKlassPointers::base() == nullptr) { 2268 if (CompressedKlassPointers::shift() != 0) { 2269 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2270 slli(dst, src, LogKlassAlignmentInBytes); 2271 } else { 2272 mv(dst, src); 2273 } 2274 return; 2275 } 2276 2277 Register xbase = dst; 2278 if (dst == src) { 2279 xbase = tmp; 2280 } 2281 2282 assert_different_registers(src, xbase); 2283 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2284 2285 if (CompressedKlassPointers::shift() != 0) { 2286 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2287 assert_different_registers(t0, xbase); 2288 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2289 } else { 2290 add(dst, xbase, src); 2291 } 2292 } 2293 2294 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2295 assert_different_registers(r, tmp); 2296 encode_klass_not_null(r, r, tmp); 2297 } 2298 2299 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2300 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2301 2302 if (CompressedKlassPointers::base() == nullptr) { 2303 if (CompressedKlassPointers::shift() != 0) { 2304 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2305 srli(dst, src, LogKlassAlignmentInBytes); 2306 } else { 2307 mv(dst, src); 2308 } 2309 return; 2310 } 2311 2312 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2313 CompressedKlassPointers::shift() == 0) { 2314 zero_extend(dst, src, 32); 2315 return; 2316 } 2317 2318 Register xbase = dst; 2319 if (dst == src) { 2320 xbase = tmp; 2321 } 2322 2323 assert_different_registers(src, xbase); 2324 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2325 sub(dst, src, xbase); 2326 if (CompressedKlassPointers::shift() != 0) { 2327 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2328 srli(dst, dst, LogKlassAlignmentInBytes); 2329 } 2330 } 2331 2332 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2333 decode_heap_oop_not_null(r, r); 2334 } 2335 2336 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2337 assert(UseCompressedOops, "should only be used for compressed headers"); 2338 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2339 // Cannot assert, unverified entry point counts instructions (see .ad file) 2340 // vtableStubs also counts instructions in pd_code_size_limit. 2341 // Also do not verify_oop as this is called by verify_oop. 2342 if (CompressedOops::shift() != 0) { 2343 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2344 slli(dst, src, LogMinObjAlignmentInBytes); 2345 if (CompressedOops::base() != nullptr) { 2346 add(dst, xheapbase, dst); 2347 } 2348 } else { 2349 assert(CompressedOops::base() == nullptr, "sanity"); 2350 mv(dst, src); 2351 } 2352 } 2353 2354 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2355 if (CompressedOops::base() == nullptr) { 2356 if (CompressedOops::shift() != 0 || d != s) { 2357 slli(d, s, CompressedOops::shift()); 2358 } 2359 } else { 2360 Label done; 2361 mv(d, s); 2362 beqz(s, done); 2363 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2364 bind(done); 2365 } 2366 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2367 } 2368 2369 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2370 Register tmp2, Register tmp3, DecoratorSet decorators) { 2371 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2372 } 2373 2374 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2375 Register tmp2, DecoratorSet decorators) { 2376 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2377 } 2378 2379 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2380 Register tmp2, DecoratorSet decorators) { 2381 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2382 } 2383 2384 // Used for storing nulls. 2385 void MacroAssembler::store_heap_oop_null(Address dst) { 2386 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2387 } 2388 2389 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2390 bool want_remainder) 2391 { 2392 // Full implementation of Java idiv and irem. The function 2393 // returns the (pc) offset of the div instruction - may be needed 2394 // for implicit exceptions. 2395 // 2396 // input : rs1: dividend 2397 // rs2: divisor 2398 // 2399 // result: either 2400 // quotient (= rs1 idiv rs2) 2401 // remainder (= rs1 irem rs2) 2402 2403 2404 int idivl_offset = offset(); 2405 if (!want_remainder) { 2406 divw(result, rs1, rs2); 2407 } else { 2408 remw(result, rs1, rs2); // result = rs1 % rs2; 2409 } 2410 return idivl_offset; 2411 } 2412 2413 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2414 bool want_remainder) 2415 { 2416 // Full implementation of Java ldiv and lrem. The function 2417 // returns the (pc) offset of the div instruction - may be needed 2418 // for implicit exceptions. 2419 // 2420 // input : rs1: dividend 2421 // rs2: divisor 2422 // 2423 // result: either 2424 // quotient (= rs1 idiv rs2) 2425 // remainder (= rs1 irem rs2) 2426 2427 int idivq_offset = offset(); 2428 if (!want_remainder) { 2429 div(result, rs1, rs2); 2430 } else { 2431 rem(result, rs1, rs2); // result = rs1 % rs2; 2432 } 2433 return idivq_offset; 2434 } 2435 2436 // Look up the method for a megamorpic invkkeinterface call. 2437 // The target method is determined by <intf_klass, itable_index>. 2438 // The receiver klass is in recv_klass. 2439 // On success, the result will be in method_result, and execution falls through. 2440 // On failure, execution transfers to the given label. 2441 void MacroAssembler::lookup_interface_method(Register recv_klass, 2442 Register intf_klass, 2443 RegisterOrConstant itable_index, 2444 Register method_result, 2445 Register scan_tmp, 2446 Label& L_no_such_interface, 2447 bool return_method) { 2448 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2449 assert_different_registers(method_result, intf_klass, scan_tmp); 2450 assert(recv_klass != method_result || !return_method, 2451 "recv_klass can be destroyed when mehtid isn't needed"); 2452 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2453 "caller must be same register for non-constant itable index as for method"); 2454 2455 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2456 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2457 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2458 int scan_step = itableOffsetEntry::size() * wordSize; 2459 int vte_size = vtableEntry::size_in_bytes(); 2460 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2461 2462 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2463 2464 // %%% Could store the aligned, prescaled offset in the klassoop. 2465 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2466 add(scan_tmp, scan_tmp, vtable_base); 2467 2468 if (return_method) { 2469 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2470 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2471 if (itable_index.is_register()) { 2472 slli(t0, itable_index.as_register(), 3); 2473 } else { 2474 mv(t0, itable_index.as_constant() << 3); 2475 } 2476 add(recv_klass, recv_klass, t0); 2477 if (itentry_off) { 2478 add(recv_klass, recv_klass, itentry_off); 2479 } 2480 } 2481 2482 Label search, found_method; 2483 2484 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2485 beq(intf_klass, method_result, found_method); 2486 bind(search); 2487 // Check that the previous entry is non-null. A null entry means that 2488 // the receiver class doesn't implement the interface, and wasn't the 2489 // same as when the caller was compiled. 2490 beqz(method_result, L_no_such_interface, /* is_far */ true); 2491 addi(scan_tmp, scan_tmp, scan_step); 2492 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2493 bne(intf_klass, method_result, search); 2494 2495 bind(found_method); 2496 2497 // Got a hit. 2498 if (return_method) { 2499 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2500 add(method_result, recv_klass, scan_tmp); 2501 ld(method_result, Address(method_result)); 2502 } 2503 } 2504 2505 // virtual method calling 2506 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2507 RegisterOrConstant vtable_index, 2508 Register method_result) { 2509 const ByteSize base = Klass::vtable_start_offset(); 2510 assert(vtableEntry::size() * wordSize == 8, 2511 "adjust the scaling in the code below"); 2512 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 2513 2514 if (vtable_index.is_register()) { 2515 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2516 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2517 } else { 2518 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2519 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2520 } 2521 } 2522 2523 void MacroAssembler::membar(uint32_t order_constraint) { 2524 address prev = pc() - NativeMembar::instruction_size; 2525 address last = code()->last_insn(); 2526 2527 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) { 2528 NativeMembar *bar = NativeMembar_at(prev); 2529 // We are merging two memory barrier instructions. On RISCV we 2530 // can do this simply by ORing them together. 2531 bar->set_kind(bar->get_kind() | order_constraint); 2532 BLOCK_COMMENT("merged membar"); 2533 } else { 2534 code()->set_last_insn(pc()); 2535 2536 uint32_t predecessor = 0; 2537 uint32_t successor = 0; 2538 2539 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2540 fence(predecessor, successor); 2541 } 2542 } 2543 2544 // Form an address from base + offset in Rd. Rd my or may not 2545 // actually be used: you must use the Address that is returned. It 2546 // is up to you to ensure that the shift provided matches the size 2547 // of your data. 2548 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2549 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 2550 return Address(base, byte_offset); 2551 } 2552 2553 assert_different_registers(Rd, base, noreg); 2554 2555 // Do it the hard way 2556 mv(Rd, byte_offset); 2557 add(Rd, base, Rd); 2558 return Address(Rd); 2559 } 2560 2561 void MacroAssembler::check_klass_subtype(Register sub_klass, 2562 Register super_klass, 2563 Register tmp_reg, 2564 Label& L_success) { 2565 Label L_failure; 2566 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 2567 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 2568 bind(L_failure); 2569 } 2570 2571 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2572 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2573 if (acquire) { 2574 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2575 } 2576 if (at_return) { 2577 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 2578 } else { 2579 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 2580 bnez(t0, slow_path, true /* is_far */); 2581 } 2582 } 2583 2584 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2585 Label &succeed, Label *fail) { 2586 assert_different_registers(addr, tmp); 2587 assert_different_registers(newv, tmp); 2588 assert_different_registers(oldv, tmp); 2589 2590 // oldv holds comparison value 2591 // newv holds value to write in exchange 2592 // addr identifies memory word to compare against/update 2593 Label retry_load, nope; 2594 bind(retry_load); 2595 // Load reserved from the memory location 2596 lr_d(tmp, addr, Assembler::aqrl); 2597 // Fail and exit if it is not what we expect 2598 bne(tmp, oldv, nope); 2599 // If the store conditional succeeds, tmp will be zero 2600 sc_d(tmp, newv, addr, Assembler::rl); 2601 beqz(tmp, succeed); 2602 // Retry only when the store conditional failed 2603 j(retry_load); 2604 2605 bind(nope); 2606 membar(AnyAny); 2607 mv(oldv, tmp); 2608 if (fail != nullptr) { 2609 j(*fail); 2610 } 2611 } 2612 2613 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 2614 Label &succeed, Label *fail) { 2615 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 2616 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 2617 } 2618 2619 void MacroAssembler::load_reserved(Register addr, 2620 enum operand_size size, 2621 Assembler::Aqrl acquire) { 2622 switch (size) { 2623 case int64: 2624 lr_d(t0, addr, acquire); 2625 break; 2626 case int32: 2627 lr_w(t0, addr, acquire); 2628 break; 2629 case uint32: 2630 lr_w(t0, addr, acquire); 2631 zero_extend(t0, t0, 32); 2632 break; 2633 default: 2634 ShouldNotReachHere(); 2635 } 2636 } 2637 2638 void MacroAssembler::store_conditional(Register addr, 2639 Register new_val, 2640 enum operand_size size, 2641 Assembler::Aqrl release) { 2642 switch (size) { 2643 case int64: 2644 sc_d(t0, new_val, addr, release); 2645 break; 2646 case int32: 2647 case uint32: 2648 sc_w(t0, new_val, addr, release); 2649 break; 2650 default: 2651 ShouldNotReachHere(); 2652 } 2653 } 2654 2655 2656 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 2657 Register new_val, 2658 enum operand_size size, 2659 Register tmp1, Register tmp2, Register tmp3) { 2660 assert(size == int8 || size == int16, "unsupported operand size"); 2661 2662 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 2663 2664 andi(shift, addr, 3); 2665 slli(shift, shift, 3); 2666 2667 andi(aligned_addr, addr, ~3); 2668 2669 if (size == int8) { 2670 mv(mask, 0xff); 2671 } else { 2672 // size == int16 case 2673 mv(mask, -1); 2674 zero_extend(mask, mask, 16); 2675 } 2676 sll(mask, mask, shift); 2677 2678 xori(not_mask, mask, -1); 2679 2680 sll(expected, expected, shift); 2681 andr(expected, expected, mask); 2682 2683 sll(new_val, new_val, shift); 2684 andr(new_val, new_val, mask); 2685 } 2686 2687 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 2688 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, 2689 // which are forced to work with 4-byte aligned address. 2690 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 2691 Register new_val, 2692 enum operand_size size, 2693 Assembler::Aqrl acquire, Assembler::Aqrl release, 2694 Register result, bool result_as_bool, 2695 Register tmp1, Register tmp2, Register tmp3) { 2696 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2697 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2698 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2699 2700 Label retry, fail, done; 2701 2702 bind(retry); 2703 lr_w(old, aligned_addr, acquire); 2704 andr(tmp, old, mask); 2705 bne(tmp, expected, fail); 2706 2707 andr(tmp, old, not_mask); 2708 orr(tmp, tmp, new_val); 2709 sc_w(tmp, tmp, aligned_addr, release); 2710 bnez(tmp, retry); 2711 2712 if (result_as_bool) { 2713 mv(result, 1); 2714 j(done); 2715 2716 bind(fail); 2717 mv(result, zr); 2718 2719 bind(done); 2720 } else { 2721 andr(tmp, old, mask); 2722 2723 bind(fail); 2724 srl(result, tmp, shift); 2725 2726 if (size == int8) { 2727 sign_extend(result, result, 8); 2728 } else { 2729 // size == int16 case 2730 sign_extend(result, result, 16); 2731 } 2732 } 2733 } 2734 2735 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 2736 // the weak CAS stuff. The major difference is that it just failed when store conditional 2737 // failed. 2738 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 2739 Register new_val, 2740 enum operand_size size, 2741 Assembler::Aqrl acquire, Assembler::Aqrl release, 2742 Register result, 2743 Register tmp1, Register tmp2, Register tmp3) { 2744 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2745 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2746 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2747 2748 Label fail, done; 2749 2750 lr_w(old, aligned_addr, acquire); 2751 andr(tmp, old, mask); 2752 bne(tmp, expected, fail); 2753 2754 andr(tmp, old, not_mask); 2755 orr(tmp, tmp, new_val); 2756 sc_w(tmp, tmp, aligned_addr, release); 2757 bnez(tmp, fail); 2758 2759 // Success 2760 mv(result, 1); 2761 j(done); 2762 2763 // Fail 2764 bind(fail); 2765 mv(result, zr); 2766 2767 bind(done); 2768 } 2769 2770 void MacroAssembler::cmpxchg(Register addr, Register expected, 2771 Register new_val, 2772 enum operand_size size, 2773 Assembler::Aqrl acquire, Assembler::Aqrl release, 2774 Register result, bool result_as_bool) { 2775 assert(size != int8 && size != int16, "unsupported operand size"); 2776 assert_different_registers(addr, t0); 2777 assert_different_registers(expected, t0); 2778 assert_different_registers(new_val, t0); 2779 2780 Label retry_load, done, ne_done; 2781 bind(retry_load); 2782 load_reserved(addr, size, acquire); 2783 bne(t0, expected, ne_done); 2784 store_conditional(addr, new_val, size, release); 2785 bnez(t0, retry_load); 2786 2787 // equal, succeed 2788 if (result_as_bool) { 2789 mv(result, 1); 2790 } else { 2791 mv(result, expected); 2792 } 2793 j(done); 2794 2795 // not equal, failed 2796 bind(ne_done); 2797 if (result_as_bool) { 2798 mv(result, zr); 2799 } else { 2800 mv(result, t0); 2801 } 2802 2803 bind(done); 2804 } 2805 2806 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 2807 Register new_val, 2808 enum operand_size size, 2809 Assembler::Aqrl acquire, Assembler::Aqrl release, 2810 Register result) { 2811 assert_different_registers(addr, t0); 2812 assert_different_registers(expected, t0); 2813 assert_different_registers(new_val, t0); 2814 2815 Label fail, done; 2816 load_reserved(addr, size, acquire); 2817 bne(t0, expected, fail); 2818 store_conditional(addr, new_val, size, release); 2819 bnez(t0, fail); 2820 2821 // Success 2822 mv(result, 1); 2823 j(done); 2824 2825 // Fail 2826 bind(fail); 2827 mv(result, zr); 2828 2829 bind(done); 2830 } 2831 2832 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 2833 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 2834 prev = prev->is_valid() ? prev : zr; \ 2835 if (incr.is_register()) { \ 2836 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2837 } else { \ 2838 mv(t0, incr.as_constant()); \ 2839 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2840 } \ 2841 return; \ 2842 } 2843 2844 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 2845 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 2846 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 2847 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 2848 2849 #undef ATOMIC_OP 2850 2851 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 2852 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 2853 prev = prev->is_valid() ? prev : zr; \ 2854 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 2855 return; \ 2856 } 2857 2858 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 2859 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 2860 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 2861 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 2862 2863 #undef ATOMIC_XCHG 2864 2865 #define ATOMIC_XCHGU(OP1, OP2) \ 2866 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 2867 atomic_##OP2(prev, newv, addr); \ 2868 zero_extend(prev, prev, 32); \ 2869 return; \ 2870 } 2871 2872 ATOMIC_XCHGU(xchgwu, xchgw) 2873 ATOMIC_XCHGU(xchgalwu, xchgalw) 2874 2875 #undef ATOMIC_XCHGU 2876 2877 void MacroAssembler::far_jump(Address entry, Register tmp) { 2878 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2879 assert(CodeCache::find_blob(entry.target()) != nullptr, 2880 "destination of far call not found in code cache"); 2881 assert(entry.rspec().type() == relocInfo::external_word_type 2882 || entry.rspec().type() == relocInfo::runtime_call_type 2883 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2884 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2885 if (far_branches()) { 2886 // We can use auipc + jalr here because we know that the total size of 2887 // the code cache cannot exceed 2Gb. 2888 relocate(entry.rspec(), [&] { 2889 int32_t offset; 2890 la_patchable(tmp, entry, offset); 2891 jalr(x0, tmp, offset); 2892 }); 2893 } else { 2894 j(entry); 2895 } 2896 } 2897 2898 void MacroAssembler::far_call(Address entry, Register tmp) { 2899 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 2900 assert(CodeCache::find_blob(entry.target()) != nullptr, 2901 "destination of far call not found in code cache"); 2902 assert(entry.rspec().type() == relocInfo::external_word_type 2903 || entry.rspec().type() == relocInfo::runtime_call_type 2904 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 2905 IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() 2906 if (far_branches()) { 2907 // We can use auipc + jalr here because we know that the total size of 2908 // the code cache cannot exceed 2Gb. 2909 relocate(entry.rspec(), [&] { 2910 int32_t offset; 2911 la_patchable(tmp, entry, offset); 2912 jalr(x1, tmp, offset); // link 2913 }); 2914 } else { 2915 jal(entry); // link 2916 } 2917 } 2918 2919 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 2920 Register super_klass, 2921 Register tmp_reg, 2922 Label* L_success, 2923 Label* L_failure, 2924 Label* L_slow_path, 2925 Register super_check_offset) { 2926 assert_different_registers(sub_klass, super_klass, tmp_reg); 2927 bool must_load_sco = (super_check_offset == noreg); 2928 if (must_load_sco) { 2929 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 2930 } else { 2931 assert_different_registers(sub_klass, super_klass, super_check_offset); 2932 } 2933 2934 Label L_fallthrough; 2935 int label_nulls = 0; 2936 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 2937 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 2938 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 2939 assert(label_nulls <= 1, "at most one null in batch"); 2940 2941 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 2942 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 2943 Address super_check_offset_addr(super_klass, sco_offset); 2944 2945 // Hacked jmp, which may only be used just before L_fallthrough. 2946 #define final_jmp(label) \ 2947 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 2948 else j(label) /*omit semi*/ 2949 2950 // If the pointers are equal, we are done (e.g., String[] elements). 2951 // This self-check enables sharing of secondary supertype arrays among 2952 // non-primary types such as array-of-interface. Otherwise, each such 2953 // type would need its own customized SSA. 2954 // We move this check to the front of the fast path because many 2955 // type checks are in fact trivially successful in this manner, 2956 // so we get a nicely predicted branch right at the start of the check. 2957 beq(sub_klass, super_klass, *L_success); 2958 2959 // Check the supertype display: 2960 if (must_load_sco) { 2961 lwu(tmp_reg, super_check_offset_addr); 2962 super_check_offset = tmp_reg; 2963 } 2964 add(t0, sub_klass, super_check_offset); 2965 Address super_check_addr(t0); 2966 ld(t0, super_check_addr); // load displayed supertype 2967 2968 // This check has worked decisively for primary supers. 2969 // Secondary supers are sought in the super_cache ('super_cache_addr'). 2970 // (Secondary supers are interfaces and very deeply nested subtypes.) 2971 // This works in the same check above because of a tricky aliasing 2972 // between the super_Cache and the primary super display elements. 2973 // (The 'super_check_addr' can address either, as the case requires.) 2974 // Note that the cache is updated below if it does not help us find 2975 // what we need immediately. 2976 // So if it was a primary super, we can just fail immediately. 2977 // Otherwise, it's the slow path for us (no success at this point). 2978 2979 beq(super_klass, t0, *L_success); 2980 mv(t1, sc_offset); 2981 if (L_failure == &L_fallthrough) { 2982 beq(super_check_offset, t1, *L_slow_path); 2983 } else { 2984 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 2985 final_jmp(*L_slow_path); 2986 } 2987 2988 bind(L_fallthrough); 2989 2990 #undef final_jmp 2991 } 2992 2993 // Scans count pointer sized words at [addr] for occurrence of value, 2994 // generic 2995 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 2996 Register tmp) { 2997 Label Lloop, Lexit; 2998 beqz(count, Lexit); 2999 bind(Lloop); 3000 ld(tmp, addr); 3001 beq(value, tmp, Lexit); 3002 add(addr, addr, wordSize); 3003 sub(count, count, 1); 3004 bnez(count, Lloop); 3005 bind(Lexit); 3006 } 3007 3008 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3009 Register super_klass, 3010 Register tmp1_reg, 3011 Register tmp2_reg, 3012 Label* L_success, 3013 Label* L_failure) { 3014 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3015 if (tmp2_reg != noreg) { 3016 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3017 } 3018 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3019 3020 Label L_fallthrough; 3021 int label_nulls = 0; 3022 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3023 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3024 3025 assert(label_nulls <= 1, "at most one null in the batch"); 3026 3027 // A couple of useful fields in sub_klass: 3028 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3029 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3030 Address secondary_supers_addr(sub_klass, ss_offset); 3031 Address super_cache_addr( sub_klass, sc_offset); 3032 3033 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3034 3035 // Do a linear scan of the secondary super-klass chain. 3036 // This code is rarely used, so simplicity is a virtue here. 3037 // The repne_scan instruction uses fixed registers, which we must spill. 3038 // Don't worry too much about pre-existing connections with the input regs. 3039 3040 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3041 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3042 3043 RegSet pushed_registers; 3044 if (!IS_A_TEMP(x12)) { 3045 pushed_registers += x12; 3046 } 3047 if (!IS_A_TEMP(x15)) { 3048 pushed_registers += x15; 3049 } 3050 3051 if (super_klass != x10) { 3052 if (!IS_A_TEMP(x10)) { 3053 pushed_registers += x10; 3054 } 3055 } 3056 3057 push_reg(pushed_registers, sp); 3058 3059 // Get super_klass value into x10 (even if it was in x15 or x12) 3060 mv(x10, super_klass); 3061 3062 #ifndef PRODUCT 3063 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); 3064 Address pst_counter_addr(t1); 3065 ld(t0, pst_counter_addr); 3066 add(t0, t0, 1); 3067 sd(t0, pst_counter_addr); 3068 #endif // PRODUCT 3069 3070 // We will consult the secondary-super array. 3071 ld(x15, secondary_supers_addr); 3072 // Load the array length. 3073 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3074 // Skip to start of data. 3075 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3076 3077 // Set t0 to an obvious invalid value, falling through by default 3078 mv(t0, -1); 3079 // Scan X12 words at [X15] for an occurrence of X10. 3080 repne_scan(x15, x10, x12, t0); 3081 3082 // pop will restore x10, so we should use a temp register to keep its value 3083 mv(t1, x10); 3084 3085 // Unspill the temp registers: 3086 pop_reg(pushed_registers, sp); 3087 3088 bne(t1, t0, *L_failure); 3089 3090 // Success. Cache the super we found an proceed in triumph. 3091 sd(super_klass, super_cache_addr); 3092 3093 if (L_success != &L_fallthrough) { 3094 j(*L_success); 3095 } 3096 3097 #undef IS_A_TEMP 3098 3099 bind(L_fallthrough); 3100 } 3101 3102 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 3103 void MacroAssembler::tlab_allocate(Register obj, 3104 Register var_size_in_bytes, 3105 int con_size_in_bytes, 3106 Register tmp1, 3107 Register tmp2, 3108 Label& slow_case, 3109 bool is_far) { 3110 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3111 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 3112 } 3113 3114 // get_thread() can be called anywhere inside generated code so we 3115 // need to save whatever non-callee save context might get clobbered 3116 // by the call to Thread::current() or, indeed, the call setup code. 3117 void MacroAssembler::get_thread(Register thread) { 3118 // save all call-clobbered regs except thread 3119 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 3120 RegSet::range(x28, x31) + ra - thread; 3121 push_reg(saved_regs, sp); 3122 3123 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 3124 jalr(ra); 3125 if (thread != c_rarg0) { 3126 mv(thread, c_rarg0); 3127 } 3128 3129 // restore pushed registers 3130 pop_reg(saved_regs, sp); 3131 } 3132 3133 void MacroAssembler::load_byte_map_base(Register reg) { 3134 CardTable::CardValue* byte_map_base = 3135 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 3136 mv(reg, (uint64_t)byte_map_base); 3137 } 3138 3139 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { 3140 unsigned long low_address = (uintptr_t)CodeCache::low_bound(); 3141 unsigned long high_address = (uintptr_t)CodeCache::high_bound(); 3142 unsigned long dest_address = (uintptr_t)dest.target(); 3143 long offset_low = dest_address - low_address; 3144 long offset_high = dest_address - high_address; 3145 3146 assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); 3147 assert((uintptr_t)dest.target() < (1ull << 48), "bad address"); 3148 3149 // RISC-V doesn't compute a page-aligned address, in order to partially 3150 // compensate for the use of *signed* offsets in its base+disp12 3151 // addressing mode (RISC-V's PC-relative reach remains asymmetric 3152 // [-(2G + 2K), 2G - 2K). 3153 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { 3154 int64_t distance = dest.target() - pc(); 3155 auipc(reg1, (int32_t)distance + 0x800); 3156 offset = ((int32_t)distance << 20) >> 20; 3157 } else { 3158 movptr(reg1, dest.target(), offset); 3159 } 3160 } 3161 3162 void MacroAssembler::build_frame(int framesize) { 3163 assert(framesize >= 2, "framesize must include space for FP/RA"); 3164 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3165 sub(sp, sp, framesize); 3166 sd(fp, Address(sp, framesize - 2 * wordSize)); 3167 sd(ra, Address(sp, framesize - wordSize)); 3168 if (PreserveFramePointer) { add(fp, sp, framesize); } 3169 } 3170 3171 void MacroAssembler::remove_frame(int framesize) { 3172 assert(framesize >= 2, "framesize must include space for FP/RA"); 3173 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3174 ld(fp, Address(sp, framesize - 2 * wordSize)); 3175 ld(ra, Address(sp, framesize - wordSize)); 3176 add(sp, sp, framesize); 3177 } 3178 3179 void MacroAssembler::reserved_stack_check() { 3180 // testing if reserved zone needs to be enabled 3181 Label no_reserved_zone_enabling; 3182 3183 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3184 bltu(sp, t0, no_reserved_zone_enabling); 3185 3186 enter(); // RA and FP are live. 3187 mv(c_rarg0, xthread); 3188 RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3189 relocate(target.rspec(), [&] { 3190 int32_t offset; 3191 la_patchable(t0, target, offset); 3192 jalr(x1, t0, offset); 3193 }); 3194 leave(); 3195 3196 // We have already removed our own frame. 3197 // throw_delayed_StackOverflowError will think that it's been 3198 // called by our caller. 3199 target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()); 3200 relocate(target.rspec(), [&] { 3201 int32_t offset; 3202 la_patchable(t0, target, offset); 3203 jalr(x0, t0, offset); 3204 }); 3205 should_not_reach_here(); 3206 3207 bind(no_reserved_zone_enabling); 3208 } 3209 3210 // Move the address of the polling page into dest. 3211 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3212 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3213 } 3214 3215 // Read the polling page. The address of the polling page must 3216 // already be in r. 3217 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3218 relocate(rtype, [&] { 3219 lwu(zr, Address(r, offset)); 3220 }); 3221 } 3222 3223 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3224 #ifdef ASSERT 3225 { 3226 ThreadInVMfromUnknown tiv; 3227 assert (UseCompressedOops, "should only be used for compressed oops"); 3228 assert (Universe::heap() != nullptr, "java heap should be initialized"); 3229 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3230 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3231 } 3232 #endif 3233 int oop_index = oop_recorder()->find_index(obj); 3234 relocate(oop_Relocation::spec(oop_index), [&] { 3235 li32(dst, 0xDEADBEEF); 3236 }); 3237 zero_extend(dst, dst, 32); 3238 } 3239 3240 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3241 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3242 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3243 int index = oop_recorder()->find_index(k); 3244 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3245 3246 narrowKlass nk = CompressedKlassPointers::encode(k); 3247 relocate(metadata_Relocation::spec(index), [&] { 3248 li32(dst, nk); 3249 }); 3250 zero_extend(dst, dst, 32); 3251 } 3252 3253 // Maybe emit a call via a trampoline. If the code cache is small 3254 // trampolines won't be emitted. 3255 address MacroAssembler::trampoline_call(Address entry) { 3256 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3257 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3258 entry.rspec().type() == relocInfo::static_call_type || 3259 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3260 3261 address target = entry.target(); 3262 3263 // We need a trampoline if branches are far. 3264 if (far_branches()) { 3265 if (!in_scratch_emit_size()) { 3266 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3267 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3268 code()->share_trampoline_for(entry.target(), offset()); 3269 } else { 3270 address stub = emit_trampoline_stub(offset(), target); 3271 if (stub == nullptr) { 3272 postcond(pc() == badAddress); 3273 return nullptr; // CodeCache is full 3274 } 3275 } 3276 } 3277 target = pc(); 3278 } 3279 3280 address call_pc = pc(); 3281 #ifdef ASSERT 3282 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3283 assert_alignment(call_pc); 3284 } 3285 #endif 3286 relocate(entry.rspec(), [&] { 3287 jal(target); 3288 }); 3289 3290 postcond(pc() != badAddress); 3291 return call_pc; 3292 } 3293 3294 address MacroAssembler::ic_call(address entry, jint method_index) { 3295 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3296 IncompressibleRegion ir(this); // relocations 3297 movptr(t1, (address)Universe::non_oop_word()); 3298 assert_cond(entry != nullptr); 3299 return trampoline_call(Address(entry, rh)); 3300 } 3301 3302 // Emit a trampoline stub for a call to a target which is too far away. 3303 // 3304 // code sequences: 3305 // 3306 // call-site: 3307 // branch-and-link to <destination> or <trampoline stub> 3308 // 3309 // Related trampoline stub for this call site in the stub section: 3310 // load the call target from the constant pool 3311 // branch (RA still points to the call site above) 3312 3313 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3314 address dest) { 3315 // Max stub size: alignment nop, TrampolineStub. 3316 address stub = start_a_stub(max_trampoline_stub_size()); 3317 if (stub == nullptr) { 3318 return nullptr; // CodeBuffer::expand failed 3319 } 3320 3321 // We are always 4-byte aligned here. 3322 assert_alignment(pc()); 3323 3324 // Create a trampoline stub relocation which relates this trampoline stub 3325 // with the call instruction at insts_call_instruction_offset in the 3326 // instructions code-section. 3327 3328 // Make sure the address of destination 8-byte aligned after 3 instructions. 3329 align(wordSize, NativeCallTrampolineStub::data_offset); 3330 3331 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3332 insts_call_instruction_offset); 3333 const int stub_start_offset = offset(); 3334 relocate(rh, [&] { 3335 // Now, create the trampoline stub's code: 3336 // - load the call 3337 // - call 3338 Label target; 3339 ld(t0, target); // auipc + ld 3340 jr(t0); // jalr 3341 bind(target); 3342 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, 3343 "should be"); 3344 assert(offset() % wordSize == 0, "bad alignment"); 3345 emit_int64((int64_t)dest); 3346 }); 3347 3348 const address stub_start_addr = addr_at(stub_start_offset); 3349 3350 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 3351 3352 end_a_stub(); 3353 return stub_start_addr; 3354 } 3355 3356 int MacroAssembler::max_trampoline_stub_size() { 3357 // Max stub size: alignment nop, TrampolineStub. 3358 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; 3359 } 3360 3361 int MacroAssembler::static_call_stub_size() { 3362 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr 3363 return 12 * NativeInstruction::instruction_size; 3364 } 3365 3366 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3367 switch (dst.getMode()) { 3368 case Address::base_plus_offset: 3369 // This is the expected mode, although we allow all the other 3370 // forms below. 3371 return form_address(tmp, dst.base(), dst.offset()); 3372 default: 3373 la(tmp, dst); 3374 return Address(tmp); 3375 } 3376 } 3377 3378 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3379 assert(((dst.getMode() == Address::base_plus_offset && 3380 is_simm12(dst.offset())) || is_simm12(value)), 3381 "invalid value and address mode combination"); 3382 Address adr = add_memory_helper(dst, tmp2); 3383 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3384 ld(tmp1, adr); 3385 add(tmp1, tmp1, value, tmp2); 3386 sd(tmp1, adr); 3387 } 3388 3389 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3390 assert(((dst.getMode() == Address::base_plus_offset && 3391 is_simm12(dst.offset())) || is_simm12(value)), 3392 "invalid value and address mode combination"); 3393 Address adr = add_memory_helper(dst, tmp2); 3394 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3395 lwu(tmp1, adr); 3396 addw(tmp1, tmp1, value, tmp2); 3397 sw(tmp1, adr); 3398 } 3399 3400 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3401 assert(((dst.getMode() == Address::base_plus_offset && 3402 is_simm12(dst.offset())) || is_simm12(value)), 3403 "invalid value and address mode combination"); 3404 Address adr = add_memory_helper(dst, tmp2); 3405 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3406 ld(tmp1, adr); 3407 sub(tmp1, tmp1, value, tmp2); 3408 sd(tmp1, adr); 3409 } 3410 3411 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3412 assert(((dst.getMode() == Address::base_plus_offset && 3413 is_simm12(dst.offset())) || is_simm12(value)), 3414 "invalid value and address mode combination"); 3415 Address adr = add_memory_helper(dst, tmp2); 3416 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3417 lwu(tmp1, adr); 3418 subw(tmp1, tmp1, value, tmp2); 3419 sw(tmp1, adr); 3420 } 3421 3422 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3423 assert_different_registers(src1, t0); 3424 relocate(src2.rspec(), [&] { 3425 int32_t offset; 3426 la_patchable(t0, src2, offset); 3427 ld(t0, Address(t0, offset)); 3428 }); 3429 beq(src1, t0, equal); 3430 } 3431 3432 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3433 load_method_holder(result, method); 3434 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3435 } 3436 3437 void MacroAssembler::load_method_holder(Register holder, Register method) { 3438 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3439 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3440 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 3441 } 3442 3443 // string indexof 3444 // compute index by trailing zeros 3445 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3446 Register match_mask, Register result, 3447 Register ch2, Register tmp, 3448 bool haystack_isL) { 3449 int haystack_chr_shift = haystack_isL ? 0 : 1; 3450 srl(match_mask, match_mask, trailing_zeros); 3451 srli(match_mask, match_mask, 1); 3452 srli(tmp, trailing_zeros, LogBitsPerByte); 3453 if (!haystack_isL) andi(tmp, tmp, 0xE); 3454 add(haystack, haystack, tmp); 3455 ld(ch2, Address(haystack)); 3456 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3457 add(result, result, tmp); 3458 } 3459 3460 // string indexof 3461 // Find pattern element in src, compute match mask, 3462 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3463 // match mask patterns and corresponding indices would be like: 3464 // - 0x8080808080808080 (Latin1) 3465 // - 7 6 5 4 3 2 1 0 (match index) 3466 // - 0x8000800080008000 (UTF16) 3467 // - 3 2 1 0 (match index) 3468 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3469 Register mask1, Register mask2) { 3470 xorr(src, pattern, src); 3471 sub(match_mask, src, mask1); 3472 orr(src, src, mask2); 3473 notr(src, src); 3474 andr(match_mask, match_mask, src); 3475 } 3476 3477 #ifdef COMPILER2 3478 // Code for BigInteger::mulAdd intrinsic 3479 // out = x10 3480 // in = x11 3481 // offset = x12 (already out.length-offset) 3482 // len = x13 3483 // k = x14 3484 // tmp = x28 3485 // 3486 // pseudo code from java implementation: 3487 // long kLong = k & LONG_MASK; 3488 // carry = 0; 3489 // offset = out.length-offset - 1; 3490 // for (int j = len - 1; j >= 0; j--) { 3491 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 3492 // out[offset--] = (int)product; 3493 // carry = product >>> 32; 3494 // } 3495 // return (int)carry; 3496 void MacroAssembler::mul_add(Register out, Register in, Register offset, 3497 Register len, Register k, Register tmp) { 3498 Label L_tail_loop, L_unroll, L_end; 3499 mv(tmp, out); 3500 mv(out, zr); 3501 blez(len, L_end); 3502 zero_extend(k, k, 32); 3503 slliw(t0, offset, LogBytesPerInt); 3504 add(offset, tmp, t0); 3505 slliw(t0, len, LogBytesPerInt); 3506 add(in, in, t0); 3507 3508 const int unroll = 8; 3509 mv(tmp, unroll); 3510 blt(len, tmp, L_tail_loop); 3511 bind(L_unroll); 3512 for (int i = 0; i < unroll; i++) { 3513 sub(in, in, BytesPerInt); 3514 lwu(t0, Address(in, 0)); 3515 mul(t1, t0, k); 3516 add(t0, t1, out); 3517 sub(offset, offset, BytesPerInt); 3518 lwu(t1, Address(offset, 0)); 3519 add(t0, t0, t1); 3520 sw(t0, Address(offset, 0)); 3521 srli(out, t0, 32); 3522 } 3523 subw(len, len, tmp); 3524 bge(len, tmp, L_unroll); 3525 3526 bind(L_tail_loop); 3527 blez(len, L_end); 3528 sub(in, in, BytesPerInt); 3529 lwu(t0, Address(in, 0)); 3530 mul(t1, t0, k); 3531 add(t0, t1, out); 3532 sub(offset, offset, BytesPerInt); 3533 lwu(t1, Address(offset, 0)); 3534 add(t0, t0, t1); 3535 sw(t0, Address(offset, 0)); 3536 srli(out, t0, 32); 3537 subw(len, len, 1); 3538 j(L_tail_loop); 3539 3540 bind(L_end); 3541 } 3542 3543 // add two unsigned input and output carry 3544 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 3545 { 3546 assert_different_registers(dst, carry); 3547 assert_different_registers(dst, src2); 3548 add(dst, src1, src2); 3549 sltu(carry, dst, src2); 3550 } 3551 3552 // add two input with carry 3553 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 3554 assert_different_registers(dst, carry); 3555 add(dst, src1, src2); 3556 add(dst, dst, carry); 3557 } 3558 3559 // add two unsigned input with carry and output carry 3560 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 3561 assert_different_registers(dst, src2); 3562 adc(dst, src1, src2, carry); 3563 sltu(carry, dst, src2); 3564 } 3565 3566 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 3567 Register src1, Register src2, Register carry) { 3568 cad(dest_lo, dest_lo, src1, carry); 3569 add(dest_hi, dest_hi, carry); 3570 cad(dest_lo, dest_lo, src2, carry); 3571 add(final_dest_hi, dest_hi, carry); 3572 } 3573 3574 /** 3575 * Multiply 32 bit by 32 bit first loop. 3576 */ 3577 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 3578 Register y, Register y_idx, Register z, 3579 Register carry, Register product, 3580 Register idx, Register kdx) { 3581 // jlong carry, x[], y[], z[]; 3582 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3583 // long product = y[idx] * x[xstart] + carry; 3584 // z[kdx] = (int)product; 3585 // carry = product >>> 32; 3586 // } 3587 // z[xstart] = (int)carry; 3588 3589 Label L_first_loop, L_first_loop_exit; 3590 blez(idx, L_first_loop_exit); 3591 3592 shadd(t0, xstart, x, t0, LogBytesPerInt); 3593 lwu(x_xstart, Address(t0, 0)); 3594 3595 bind(L_first_loop); 3596 subw(idx, idx, 1); 3597 shadd(t0, idx, y, t0, LogBytesPerInt); 3598 lwu(y_idx, Address(t0, 0)); 3599 mul(product, x_xstart, y_idx); 3600 add(product, product, carry); 3601 srli(carry, product, 32); 3602 subw(kdx, kdx, 1); 3603 shadd(t0, kdx, z, t0, LogBytesPerInt); 3604 sw(product, Address(t0, 0)); 3605 bgtz(idx, L_first_loop); 3606 3607 bind(L_first_loop_exit); 3608 } 3609 3610 /** 3611 * Multiply 64 bit by 64 bit first loop. 3612 */ 3613 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 3614 Register y, Register y_idx, Register z, 3615 Register carry, Register product, 3616 Register idx, Register kdx) { 3617 // 3618 // jlong carry, x[], y[], z[]; 3619 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3620 // huge_128 product = y[idx] * x[xstart] + carry; 3621 // z[kdx] = (jlong)product; 3622 // carry = (jlong)(product >>> 64); 3623 // } 3624 // z[xstart] = carry; 3625 // 3626 3627 Label L_first_loop, L_first_loop_exit; 3628 Label L_one_x, L_one_y, L_multiply; 3629 3630 subw(xstart, xstart, 1); 3631 bltz(xstart, L_one_x); 3632 3633 shadd(t0, xstart, x, t0, LogBytesPerInt); 3634 ld(x_xstart, Address(t0, 0)); 3635 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 3636 3637 bind(L_first_loop); 3638 subw(idx, idx, 1); 3639 bltz(idx, L_first_loop_exit); 3640 subw(idx, idx, 1); 3641 bltz(idx, L_one_y); 3642 3643 shadd(t0, idx, y, t0, LogBytesPerInt); 3644 ld(y_idx, Address(t0, 0)); 3645 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 3646 bind(L_multiply); 3647 3648 mulhu(t0, x_xstart, y_idx); 3649 mul(product, x_xstart, y_idx); 3650 cad(product, product, carry, t1); 3651 adc(carry, t0, zr, t1); 3652 3653 subw(kdx, kdx, 2); 3654 ror_imm(product, product, 32); // back to big-endian 3655 shadd(t0, kdx, z, t0, LogBytesPerInt); 3656 sd(product, Address(t0, 0)); 3657 3658 j(L_first_loop); 3659 3660 bind(L_one_y); 3661 lwu(y_idx, Address(y, 0)); 3662 j(L_multiply); 3663 3664 bind(L_one_x); 3665 lwu(x_xstart, Address(x, 0)); 3666 j(L_first_loop); 3667 3668 bind(L_first_loop_exit); 3669 } 3670 3671 /** 3672 * Multiply 128 bit by 128 bit. Unrolled inner loop. 3673 * 3674 */ 3675 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 3676 Register carry, Register carry2, 3677 Register idx, Register jdx, 3678 Register yz_idx1, Register yz_idx2, 3679 Register tmp, Register tmp3, Register tmp4, 3680 Register tmp6, Register product_hi) { 3681 // jlong carry, x[], y[], z[]; 3682 // int kdx = xstart+1; 3683 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3684 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 3685 // jlong carry2 = (jlong)(tmp3 >>> 64); 3686 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 3687 // carry = (jlong)(tmp4 >>> 64); 3688 // z[kdx+idx+1] = (jlong)tmp3; 3689 // z[kdx+idx] = (jlong)tmp4; 3690 // } 3691 // idx += 2; 3692 // if (idx > 0) { 3693 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 3694 // z[kdx+idx] = (jlong)yz_idx1; 3695 // carry = (jlong)(yz_idx1 >>> 64); 3696 // } 3697 // 3698 3699 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 3700 3701 srliw(jdx, idx, 2); 3702 3703 bind(L_third_loop); 3704 3705 subw(jdx, jdx, 1); 3706 bltz(jdx, L_third_loop_exit); 3707 subw(idx, idx, 4); 3708 3709 shadd(t0, idx, y, t0, LogBytesPerInt); 3710 ld(yz_idx2, Address(t0, 0)); 3711 ld(yz_idx1, Address(t0, wordSize)); 3712 3713 shadd(tmp6, idx, z, t0, LogBytesPerInt); 3714 3715 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 3716 ror_imm(yz_idx2, yz_idx2, 32); 3717 3718 ld(t1, Address(tmp6, 0)); 3719 ld(t0, Address(tmp6, wordSize)); 3720 3721 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3722 mulhu(tmp4, product_hi, yz_idx1); 3723 3724 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 3725 ror_imm(t1, t1, 32, tmp); 3726 3727 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 3728 mulhu(carry2, product_hi, yz_idx2); 3729 3730 cad(tmp3, tmp3, carry, carry); 3731 adc(tmp4, tmp4, zr, carry); 3732 cad(tmp3, tmp3, t0, t0); 3733 cadc(tmp4, tmp4, tmp, t0); 3734 adc(carry, carry2, zr, t0); 3735 cad(tmp4, tmp4, t1, carry2); 3736 adc(carry, carry, zr, carry2); 3737 3738 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 3739 ror_imm(tmp4, tmp4, 32); 3740 sd(tmp4, Address(tmp6, 0)); 3741 sd(tmp3, Address(tmp6, wordSize)); 3742 3743 j(L_third_loop); 3744 3745 bind(L_third_loop_exit); 3746 3747 andi(idx, idx, 0x3); 3748 beqz(idx, L_post_third_loop_done); 3749 3750 Label L_check_1; 3751 subw(idx, idx, 2); 3752 bltz(idx, L_check_1); 3753 3754 shadd(t0, idx, y, t0, LogBytesPerInt); 3755 ld(yz_idx1, Address(t0, 0)); 3756 ror_imm(yz_idx1, yz_idx1, 32); 3757 3758 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 3759 mulhu(tmp4, product_hi, yz_idx1); 3760 3761 shadd(t0, idx, z, t0, LogBytesPerInt); 3762 ld(yz_idx2, Address(t0, 0)); 3763 ror_imm(yz_idx2, yz_idx2, 32, tmp); 3764 3765 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 3766 3767 ror_imm(tmp3, tmp3, 32, tmp); 3768 sd(tmp3, Address(t0, 0)); 3769 3770 bind(L_check_1); 3771 3772 andi(idx, idx, 0x1); 3773 subw(idx, idx, 1); 3774 bltz(idx, L_post_third_loop_done); 3775 shadd(t0, idx, y, t0, LogBytesPerInt); 3776 lwu(tmp4, Address(t0, 0)); 3777 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 3778 mulhu(carry2, tmp4, product_hi); 3779 3780 shadd(t0, idx, z, t0, LogBytesPerInt); 3781 lwu(tmp4, Address(t0, 0)); 3782 3783 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 3784 3785 shadd(t0, idx, z, t0, LogBytesPerInt); 3786 sw(tmp3, Address(t0, 0)); 3787 3788 slli(t0, carry2, 32); 3789 srli(carry, tmp3, 32); 3790 orr(carry, carry, t0); 3791 3792 bind(L_post_third_loop_done); 3793 } 3794 3795 /** 3796 * Code for BigInteger::multiplyToLen() intrinsic. 3797 * 3798 * x10: x 3799 * x11: xlen 3800 * x12: y 3801 * x13: ylen 3802 * x14: z 3803 * x15: zlen 3804 * x16: tmp1 3805 * x17: tmp2 3806 * x7: tmp3 3807 * x28: tmp4 3808 * x29: tmp5 3809 * x30: tmp6 3810 * x31: tmp7 3811 */ 3812 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 3813 Register z, Register zlen, 3814 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 3815 Register tmp5, Register tmp6, Register product_hi) { 3816 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 3817 3818 const Register idx = tmp1; 3819 const Register kdx = tmp2; 3820 const Register xstart = tmp3; 3821 3822 const Register y_idx = tmp4; 3823 const Register carry = tmp5; 3824 const Register product = xlen; 3825 const Register x_xstart = zlen; // reuse register 3826 3827 mv(idx, ylen); // idx = ylen; 3828 mv(kdx, zlen); // kdx = xlen+ylen; 3829 mv(carry, zr); // carry = 0; 3830 3831 Label L_multiply_64_x_64_loop, L_done; 3832 3833 subw(xstart, xlen, 1); 3834 bltz(xstart, L_done); 3835 3836 const Register jdx = tmp1; 3837 3838 if (AvoidUnalignedAccesses) { 3839 // Check if x and y are both 8-byte aligned. 3840 orr(t0, xlen, ylen); 3841 test_bit(t0, t0, 0); 3842 beqz(t0, L_multiply_64_x_64_loop); 3843 3844 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3845 shadd(t0, xstart, z, t0, LogBytesPerInt); 3846 sw(carry, Address(t0, 0)); 3847 3848 Label L_second_loop_unaligned; 3849 bind(L_second_loop_unaligned); 3850 mv(carry, zr); 3851 mv(jdx, ylen); 3852 subw(xstart, xstart, 1); 3853 bltz(xstart, L_done); 3854 sub(sp, sp, 2 * wordSize); 3855 sd(z, Address(sp, 0)); 3856 sd(zr, Address(sp, wordSize)); 3857 shadd(t0, xstart, z, t0, LogBytesPerInt); 3858 addi(z, t0, 4); 3859 shadd(t0, xstart, x, t0, LogBytesPerInt); 3860 lwu(product, Address(t0, 0)); 3861 Label L_third_loop, L_third_loop_exit; 3862 3863 blez(jdx, L_third_loop_exit); 3864 3865 bind(L_third_loop); 3866 subw(jdx, jdx, 1); 3867 shadd(t0, jdx, y, t0, LogBytesPerInt); 3868 lwu(t0, Address(t0, 0)); 3869 mul(t1, t0, product); 3870 add(t0, t1, carry); 3871 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 3872 lwu(t1, Address(tmp6, 0)); 3873 add(t0, t0, t1); 3874 sw(t0, Address(tmp6, 0)); 3875 srli(carry, t0, 32); 3876 bgtz(jdx, L_third_loop); 3877 3878 bind(L_third_loop_exit); 3879 ld(z, Address(sp, 0)); 3880 addi(sp, sp, 2 * wordSize); 3881 shadd(t0, xstart, z, t0, LogBytesPerInt); 3882 sw(carry, Address(t0, 0)); 3883 3884 j(L_second_loop_unaligned); 3885 } 3886 3887 bind(L_multiply_64_x_64_loop); 3888 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 3889 3890 Label L_second_loop_aligned; 3891 beqz(kdx, L_second_loop_aligned); 3892 3893 Label L_carry; 3894 subw(kdx, kdx, 1); 3895 beqz(kdx, L_carry); 3896 3897 shadd(t0, kdx, z, t0, LogBytesPerInt); 3898 sw(carry, Address(t0, 0)); 3899 srli(carry, carry, 32); 3900 subw(kdx, kdx, 1); 3901 3902 bind(L_carry); 3903 shadd(t0, kdx, z, t0, LogBytesPerInt); 3904 sw(carry, Address(t0, 0)); 3905 3906 // Second and third (nested) loops. 3907 // 3908 // for (int i = xstart-1; i >= 0; i--) { // Second loop 3909 // carry = 0; 3910 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 3911 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 3912 // (z[k] & LONG_MASK) + carry; 3913 // z[k] = (int)product; 3914 // carry = product >>> 32; 3915 // } 3916 // z[i] = (int)carry; 3917 // } 3918 // 3919 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 3920 3921 bind(L_second_loop_aligned); 3922 mv(carry, zr); // carry = 0; 3923 mv(jdx, ylen); // j = ystart+1 3924 3925 subw(xstart, xstart, 1); // i = xstart-1; 3926 bltz(xstart, L_done); 3927 3928 sub(sp, sp, 4 * wordSize); 3929 sd(z, Address(sp, 0)); 3930 3931 Label L_last_x; 3932 shadd(t0, xstart, z, t0, LogBytesPerInt); 3933 addi(z, t0, 4); 3934 subw(xstart, xstart, 1); // i = xstart-1; 3935 bltz(xstart, L_last_x); 3936 3937 shadd(t0, xstart, x, t0, LogBytesPerInt); 3938 ld(product_hi, Address(t0, 0)); 3939 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 3940 3941 Label L_third_loop_prologue; 3942 bind(L_third_loop_prologue); 3943 3944 sd(ylen, Address(sp, wordSize)); 3945 sd(x, Address(sp, 2 * wordSize)); 3946 sd(xstart, Address(sp, 3 * wordSize)); 3947 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 3948 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 3949 ld(z, Address(sp, 0)); 3950 ld(ylen, Address(sp, wordSize)); 3951 ld(x, Address(sp, 2 * wordSize)); 3952 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 3953 addi(sp, sp, 4 * wordSize); 3954 3955 addiw(tmp3, xlen, 1); 3956 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3957 sw(carry, Address(t0, 0)); 3958 3959 subw(tmp3, tmp3, 1); 3960 bltz(tmp3, L_done); 3961 3962 srli(carry, carry, 32); 3963 shadd(t0, tmp3, z, t0, LogBytesPerInt); 3964 sw(carry, Address(t0, 0)); 3965 j(L_second_loop_aligned); 3966 3967 // Next infrequent code is moved outside loops. 3968 bind(L_last_x); 3969 lwu(product_hi, Address(x, 0)); 3970 j(L_third_loop_prologue); 3971 3972 bind(L_done); 3973 } 3974 #endif 3975 3976 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 3977 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 3978 // shift 16 bits once. 3979 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 3980 if (UseZbb) { 3981 assert_different_registers(Rd, Rs, tmp1); 3982 int step = isLL ? 8 : 16; 3983 ctz(Rd, Rs); 3984 andi(tmp1, Rd, step - 1); 3985 sub(Rd, Rd, tmp1); 3986 return; 3987 } 3988 3989 assert_different_registers(Rd, Rs, tmp1, tmp2); 3990 Label Loop; 3991 int step = isLL ? 8 : 16; 3992 mv(Rd, -step); 3993 mv(tmp2, Rs); 3994 3995 bind(Loop); 3996 addi(Rd, Rd, step); 3997 andi(tmp1, tmp2, ((1 << step) - 1)); 3998 srli(tmp2, tmp2, step); 3999 beqz(tmp1, Loop); 4000 } 4001 4002 // This instruction reads adjacent 4 bytes from the lower half of source register, 4003 // inflate into a register, for example: 4004 // Rs: A7A6A5A4A3A2A1A0 4005 // Rd: 00A300A200A100A0 4006 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4007 assert_different_registers(Rd, Rs, tmp1, tmp2); 4008 4009 mv(tmp1, 0xFF000000); // first byte mask at lower word 4010 andr(Rd, Rs, tmp1); 4011 for (int i = 0; i < 2; i++) { 4012 slli(Rd, Rd, wordSize); 4013 srli(tmp1, tmp1, wordSize); 4014 andr(tmp2, Rs, tmp1); 4015 orr(Rd, Rd, tmp2); 4016 } 4017 slli(Rd, Rd, wordSize); 4018 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 4019 orr(Rd, Rd, tmp2); 4020 } 4021 4022 // This instruction reads adjacent 4 bytes from the upper half of source register, 4023 // inflate into a register, for example: 4024 // Rs: A7A6A5A4A3A2A1A0 4025 // Rd: 00A700A600A500A4 4026 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4027 assert_different_registers(Rd, Rs, tmp1, tmp2); 4028 srli(Rs, Rs, 32); // only upper 32 bits are needed 4029 inflate_lo32(Rd, Rs, tmp1, tmp2); 4030 } 4031 4032 // The size of the blocks erased by the zero_blocks stub. We must 4033 // handle anything smaller than this ourselves in zero_words(). 4034 const int MacroAssembler::zero_words_block_size = 8; 4035 4036 // zero_words() is used by C2 ClearArray patterns. It is as small as 4037 // possible, handling small word counts locally and delegating 4038 // anything larger to the zero_blocks stub. It is expanded many times 4039 // in compiled code, so it is important to keep it short. 4040 4041 // ptr: Address of a buffer to be zeroed. 4042 // cnt: Count in HeapWords. 4043 // 4044 // ptr, cnt, and t0 are clobbered. 4045 address MacroAssembler::zero_words(Register ptr, Register cnt) { 4046 assert(is_power_of_2(zero_words_block_size), "adjust this"); 4047 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 4048 assert_different_registers(cnt, t0); 4049 4050 BLOCK_COMMENT("zero_words {"); 4051 4052 mv(t0, zero_words_block_size); 4053 Label around, done, done16; 4054 bltu(cnt, t0, around); 4055 { 4056 RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); 4057 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 4058 if (StubRoutines::riscv::complete()) { 4059 address tpc = trampoline_call(zero_blocks); 4060 if (tpc == nullptr) { 4061 DEBUG_ONLY(reset_labels(around)); 4062 postcond(pc() == badAddress); 4063 return nullptr; 4064 } 4065 } else { 4066 jal(zero_blocks); 4067 } 4068 } 4069 bind(around); 4070 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 4071 Label l; 4072 test_bit(t0, cnt, exact_log2(i)); 4073 beqz(t0, l); 4074 for (int j = 0; j < i; j++) { 4075 sd(zr, Address(ptr, j * wordSize)); 4076 } 4077 addi(ptr, ptr, i * wordSize); 4078 bind(l); 4079 } 4080 { 4081 Label l; 4082 test_bit(t0, cnt, 0); 4083 beqz(t0, l); 4084 sd(zr, Address(ptr, 0)); 4085 bind(l); 4086 } 4087 4088 BLOCK_COMMENT("} zero_words"); 4089 postcond(pc() != badAddress); 4090 return pc(); 4091 } 4092 4093 #define SmallArraySize (18 * BytesPerLong) 4094 4095 // base: Address of a buffer to be zeroed, 8 bytes aligned. 4096 // cnt: Immediate count in HeapWords. 4097 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 4098 assert_different_registers(base, t0, t1); 4099 4100 BLOCK_COMMENT("zero_words {"); 4101 4102 if (cnt <= SmallArraySize / BytesPerLong) { 4103 for (int i = 0; i < (int)cnt; i++) { 4104 sd(zr, Address(base, i * wordSize)); 4105 } 4106 } else { 4107 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 4108 int remainder = cnt % unroll; 4109 for (int i = 0; i < remainder; i++) { 4110 sd(zr, Address(base, i * wordSize)); 4111 } 4112 4113 Label loop; 4114 Register cnt_reg = t0; 4115 Register loop_base = t1; 4116 cnt = cnt - remainder; 4117 mv(cnt_reg, cnt); 4118 add(loop_base, base, remainder * wordSize); 4119 bind(loop); 4120 sub(cnt_reg, cnt_reg, unroll); 4121 for (int i = 0; i < unroll; i++) { 4122 sd(zr, Address(loop_base, i * wordSize)); 4123 } 4124 add(loop_base, loop_base, unroll * wordSize); 4125 bnez(cnt_reg, loop); 4126 } 4127 4128 BLOCK_COMMENT("} zero_words"); 4129 } 4130 4131 // base: Address of a buffer to be filled, 8 bytes aligned. 4132 // cnt: Count in 8-byte unit. 4133 // value: Value to be filled with. 4134 // base will point to the end of the buffer after filling. 4135 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 4136 // Algorithm: 4137 // 4138 // t0 = cnt & 7 4139 // cnt -= t0 4140 // p += t0 4141 // switch (t0): 4142 // switch start: 4143 // do while cnt 4144 // cnt -= 8 4145 // p[-8] = value 4146 // case 7: 4147 // p[-7] = value 4148 // case 6: 4149 // p[-6] = value 4150 // // ... 4151 // case 1: 4152 // p[-1] = value 4153 // case 0: 4154 // p += 8 4155 // do-while end 4156 // switch end 4157 4158 assert_different_registers(base, cnt, value, t0, t1); 4159 4160 Label fini, skip, entry, loop; 4161 const int unroll = 8; // Number of sd instructions we'll unroll 4162 4163 beqz(cnt, fini); 4164 4165 andi(t0, cnt, unroll - 1); 4166 sub(cnt, cnt, t0); 4167 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4168 shadd(base, t0, base, t1, 3); 4169 la(t1, entry); 4170 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4171 sub(t1, t1, t0); 4172 jr(t1); 4173 4174 bind(loop); 4175 add(base, base, unroll * 8); 4176 for (int i = -unroll; i < 0; i++) { 4177 sd(value, Address(base, i * 8)); 4178 } 4179 bind(entry); 4180 sub(cnt, cnt, unroll); 4181 bgez(cnt, loop); 4182 4183 bind(fini); 4184 } 4185 4186 // Zero blocks of memory by using CBO.ZERO. 4187 // 4188 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4189 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4190 // zeroed in HeapWords. Returns the count of words left to be zeroed 4191 // in cnt. 4192 // 4193 // NOTE: This is intended to be used in the zero_blocks() stub. If 4194 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4195 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4196 Label initial_table_end, loop; 4197 4198 // Align base with cache line size. 4199 neg(tmp1, base); 4200 andi(tmp1, tmp1, CacheLineSize - 1); 4201 4202 // tmp1: the number of bytes to be filled to align the base with cache line size. 4203 add(base, base, tmp1); 4204 srai(tmp2, tmp1, 3); 4205 sub(cnt, cnt, tmp2); 4206 srli(tmp2, tmp1, 1); 4207 la(tmp1, initial_table_end); 4208 sub(tmp2, tmp1, tmp2); 4209 jr(tmp2); 4210 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4211 sd(zr, Address(base, i)); 4212 } 4213 bind(initial_table_end); 4214 4215 mv(tmp1, CacheLineSize / wordSize); 4216 bind(loop); 4217 cbo_zero(base); 4218 sub(cnt, cnt, tmp1); 4219 add(base, base, CacheLineSize); 4220 bge(cnt, tmp1, loop); 4221 } 4222 4223 // java.lang.Math.round(float a) 4224 // Returns the closest int to the argument, with ties rounding to positive infinity. 4225 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 4226 // this instructions calling sequence provides performance improvement on all tested devices; 4227 // don't change it without re-verification 4228 Label done; 4229 mv(t0, jint_cast(0.5f)); 4230 fmv_w_x(ftmp, t0); 4231 4232 // dst = 0 if NaN 4233 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 4234 mv(dst, zr); 4235 beqz(t0, done); 4236 4237 // dst = (src + 0.5f) rounded down towards negative infinity 4238 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 4239 // RDN is required for fadd_s, RNE gives incorrect results: 4240 // -------------------------------------------------------------------- 4241 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 4242 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 4243 // -------------------------------------------------------------------- 4244 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 4245 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 4246 // -------------------------------------------------------------------- 4247 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 4248 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 4249 4250 bind(done); 4251 } 4252 4253 // java.lang.Math.round(double a) 4254 // Returns the closest long to the argument, with ties rounding to positive infinity. 4255 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 4256 // this instructions calling sequence provides performance improvement on all tested devices; 4257 // don't change it without re-verification 4258 Label done; 4259 mv(t0, julong_cast(0.5)); 4260 fmv_d_x(ftmp, t0); 4261 4262 // dst = 0 if NaN 4263 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 4264 mv(dst, zr); 4265 beqz(t0, done); 4266 4267 // dst = (src + 0.5) rounded down towards negative infinity 4268 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 4269 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 4270 4271 bind(done); 4272 } 4273 4274 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 4275 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4276 Label done; \ 4277 assert_different_registers(dst, tmp); \ 4278 fclass_##FLOATSIG(tmp, src); \ 4279 mv(dst, zr); \ 4280 /* check if src is NaN */ \ 4281 andi(tmp, tmp, 0b1100000000); \ 4282 bnez(tmp, done); \ 4283 FLOATCVT(dst, src); \ 4284 bind(done); \ 4285 } 4286 4287 FCVT_SAFE(fcvt_w_s, s); 4288 FCVT_SAFE(fcvt_l_s, s); 4289 FCVT_SAFE(fcvt_w_d, d); 4290 FCVT_SAFE(fcvt_l_d, d); 4291 4292 #undef FCVT_SAFE 4293 4294 #define FCMP(FLOATTYPE, FLOATSIG) \ 4295 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4296 FloatRegister Rs2, int unordered_result) { \ 4297 Label Ldone; \ 4298 if (unordered_result < 0) { \ 4299 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4300 /* installs 1 if gt else 0 */ \ 4301 flt_##FLOATSIG(result, Rs2, Rs1); \ 4302 /* Rs1 > Rs2, install 1 */ \ 4303 bgtz(result, Ldone); \ 4304 feq_##FLOATSIG(result, Rs1, Rs2); \ 4305 addi(result, result, -1); \ 4306 /* Rs1 = Rs2, install 0 */ \ 4307 /* NaN or Rs1 < Rs2, install -1 */ \ 4308 bind(Ldone); \ 4309 } else { \ 4310 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4311 /* installs 1 if gt or unordered else 0 */ \ 4312 flt_##FLOATSIG(result, Rs1, Rs2); \ 4313 /* Rs1 < Rs2, install -1 */ \ 4314 bgtz(result, Ldone); \ 4315 feq_##FLOATSIG(result, Rs1, Rs2); \ 4316 addi(result, result, -1); \ 4317 /* Rs1 = Rs2, install 0 */ \ 4318 /* NaN or Rs1 > Rs2, install 1 */ \ 4319 bind(Ldone); \ 4320 neg(result, result); \ 4321 } \ 4322 } 4323 4324 FCMP(float, s); 4325 FCMP(double, d); 4326 4327 #undef FCMP 4328 4329 // Zero words; len is in bytes 4330 // Destroys all registers except addr 4331 // len must be a nonzero multiple of wordSize 4332 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4333 assert_different_registers(addr, len, tmp, t0, t1); 4334 4335 #ifdef ASSERT 4336 { 4337 Label L; 4338 andi(t0, len, BytesPerWord - 1); 4339 beqz(t0, L); 4340 stop("len is not a multiple of BytesPerWord"); 4341 bind(L); 4342 } 4343 #endif // ASSERT 4344 4345 #ifndef PRODUCT 4346 block_comment("zero memory"); 4347 #endif // PRODUCT 4348 4349 Label loop; 4350 Label entry; 4351 4352 // Algorithm: 4353 // 4354 // t0 = cnt & 7 4355 // cnt -= t0 4356 // p += t0 4357 // switch (t0) { 4358 // do { 4359 // cnt -= 8 4360 // p[-8] = 0 4361 // case 7: 4362 // p[-7] = 0 4363 // case 6: 4364 // p[-6] = 0 4365 // ... 4366 // case 1: 4367 // p[-1] = 0 4368 // case 0: 4369 // p += 8 4370 // } while (cnt) 4371 // } 4372 4373 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4374 4375 srli(len, len, LogBytesPerWord); 4376 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4377 sub(len, len, t0); // cnt -= unroll 4378 // tmp always points to the end of the region we're about to zero 4379 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4380 la(t1, entry); 4381 slli(t0, t0, 2); 4382 sub(t1, t1, t0); 4383 jr(t1); 4384 bind(loop); 4385 sub(len, len, unroll); 4386 for (int i = -unroll; i < 0; i++) { 4387 sd(zr, Address(tmp, i * wordSize)); 4388 } 4389 bind(entry); 4390 add(tmp, tmp, unroll * wordSize); 4391 bnez(len, loop); 4392 } 4393 4394 // shift left by shamt and add 4395 // Rd = (Rs1 << shamt) + Rs2 4396 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4397 if (UseZba) { 4398 if (shamt == 1) { 4399 sh1add(Rd, Rs1, Rs2); 4400 return; 4401 } else if (shamt == 2) { 4402 sh2add(Rd, Rs1, Rs2); 4403 return; 4404 } else if (shamt == 3) { 4405 sh3add(Rd, Rs1, Rs2); 4406 return; 4407 } 4408 } 4409 4410 if (shamt != 0) { 4411 slli(tmp, Rs1, shamt); 4412 add(Rd, Rs2, tmp); 4413 } else { 4414 add(Rd, Rs1, Rs2); 4415 } 4416 } 4417 4418 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4419 if (UseZba && bits == 32) { 4420 zext_w(dst, src); 4421 return; 4422 } 4423 4424 if (UseZbb && bits == 16) { 4425 zext_h(dst, src); 4426 return; 4427 } 4428 4429 if (bits == 8) { 4430 zext_b(dst, src); 4431 } else { 4432 slli(dst, src, XLEN - bits); 4433 srli(dst, dst, XLEN - bits); 4434 } 4435 } 4436 4437 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4438 if (UseZbb) { 4439 if (bits == 8) { 4440 sext_b(dst, src); 4441 return; 4442 } else if (bits == 16) { 4443 sext_h(dst, src); 4444 return; 4445 } 4446 } 4447 4448 if (bits == 32) { 4449 sext_w(dst, src); 4450 } else { 4451 slli(dst, src, XLEN - bits); 4452 srai(dst, dst, XLEN - bits); 4453 } 4454 } 4455 4456 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 4457 { 4458 if (src1 == src2) { 4459 mv(dst, zr); 4460 return; 4461 } 4462 Label done; 4463 Register left = src1; 4464 Register right = src2; 4465 if (dst == src1) { 4466 assert_different_registers(dst, src2, tmp); 4467 mv(tmp, src1); 4468 left = tmp; 4469 } else if (dst == src2) { 4470 assert_different_registers(dst, src1, tmp); 4471 mv(tmp, src2); 4472 right = tmp; 4473 } 4474 4475 // installs 1 if gt else 0 4476 slt(dst, right, left); 4477 bnez(dst, done); 4478 slt(dst, left, right); 4479 // dst = -1 if lt; else if eq , dst = 0 4480 neg(dst, dst); 4481 bind(done); 4482 } 4483 4484 // The java_calling_convention describes stack locations as ideal slots on 4485 // a frame with no abi restrictions. Since we must observe abi restrictions 4486 // (like the placement of the register window) the slots must be biased by 4487 // the following value. 4488 static int reg2offset_in(VMReg r) { 4489 // Account for saved fp and ra 4490 // This should really be in_preserve_stack_slots 4491 return r->reg2stack() * VMRegImpl::stack_slot_size; 4492 } 4493 4494 static int reg2offset_out(VMReg r) { 4495 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 4496 } 4497 4498 // On 64 bit we will store integer like items to the stack as 4499 // 64 bits items (riscv64 abi) even though java would only store 4500 // 32bits for a parameter. On 32bit it will simply be 32 bits 4501 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 4502 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 4503 if (src.first()->is_stack()) { 4504 if (dst.first()->is_stack()) { 4505 // stack to stack 4506 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4507 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4508 } else { 4509 // stack to reg 4510 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4511 } 4512 } else if (dst.first()->is_stack()) { 4513 // reg to stack 4514 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4515 } else { 4516 if (dst.first() != src.first()) { 4517 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 4518 } 4519 } 4520 } 4521 4522 // An oop arg. Must pass a handle not the oop itself 4523 void MacroAssembler::object_move(OopMap* map, 4524 int oop_handle_offset, 4525 int framesize_in_slots, 4526 VMRegPair src, 4527 VMRegPair dst, 4528 bool is_receiver, 4529 int* receiver_offset) { 4530 assert_cond(map != nullptr && receiver_offset != nullptr); 4531 4532 // must pass a handle. First figure out the location we use as a handle 4533 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 4534 4535 // See if oop is null if it is we need no handle 4536 4537 if (src.first()->is_stack()) { 4538 // Oop is already on the stack as an argument 4539 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 4540 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 4541 if (is_receiver) { 4542 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 4543 } 4544 4545 ld(t0, Address(fp, reg2offset_in(src.first()))); 4546 la(rHandle, Address(fp, reg2offset_in(src.first()))); 4547 // conditionally move a null 4548 Label notZero1; 4549 bnez(t0, notZero1); 4550 mv(rHandle, zr); 4551 bind(notZero1); 4552 } else { 4553 4554 // Oop is in a register we must store it to the space we reserve 4555 // on the stack for oop_handles and pass a handle if oop is non-null 4556 4557 const Register rOop = src.first()->as_Register(); 4558 int oop_slot = -1; 4559 if (rOop == j_rarg0) { 4560 oop_slot = 0; 4561 } else if (rOop == j_rarg1) { 4562 oop_slot = 1; 4563 } else if (rOop == j_rarg2) { 4564 oop_slot = 2; 4565 } else if (rOop == j_rarg3) { 4566 oop_slot = 3; 4567 } else if (rOop == j_rarg4) { 4568 oop_slot = 4; 4569 } else if (rOop == j_rarg5) { 4570 oop_slot = 5; 4571 } else if (rOop == j_rarg6) { 4572 oop_slot = 6; 4573 } else { 4574 assert(rOop == j_rarg7, "wrong register"); 4575 oop_slot = 7; 4576 } 4577 4578 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 4579 int offset = oop_slot * VMRegImpl::stack_slot_size; 4580 4581 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 4582 // Store oop in handle area, may be null 4583 sd(rOop, Address(sp, offset)); 4584 if (is_receiver) { 4585 *receiver_offset = offset; 4586 } 4587 4588 //rOop maybe the same as rHandle 4589 if (rOop == rHandle) { 4590 Label isZero; 4591 beqz(rOop, isZero); 4592 la(rHandle, Address(sp, offset)); 4593 bind(isZero); 4594 } else { 4595 Label notZero2; 4596 la(rHandle, Address(sp, offset)); 4597 bnez(rOop, notZero2); 4598 mv(rHandle, zr); 4599 bind(notZero2); 4600 } 4601 } 4602 4603 // If arg is on the stack then place it otherwise it is already in correct reg. 4604 if (dst.first()->is_stack()) { 4605 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 4606 } 4607 } 4608 4609 // A float arg may have to do float reg int reg conversion 4610 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 4611 assert(src.first()->is_stack() && dst.first()->is_stack() || 4612 src.first()->is_reg() && dst.first()->is_reg() || 4613 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4614 if (src.first()->is_stack()) { 4615 if (dst.first()->is_stack()) { 4616 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 4617 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 4618 } else if (dst.first()->is_Register()) { 4619 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4620 } else { 4621 ShouldNotReachHere(); 4622 } 4623 } else if (src.first() != dst.first()) { 4624 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4625 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4626 } else { 4627 ShouldNotReachHere(); 4628 } 4629 } 4630 } 4631 4632 // A long move 4633 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 4634 if (src.first()->is_stack()) { 4635 if (dst.first()->is_stack()) { 4636 // stack to stack 4637 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4638 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4639 } else { 4640 // stack to reg 4641 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4642 } 4643 } else if (dst.first()->is_stack()) { 4644 // reg to stack 4645 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4646 } else { 4647 if (dst.first() != src.first()) { 4648 mv(dst.first()->as_Register(), src.first()->as_Register()); 4649 } 4650 } 4651 } 4652 4653 // A double move 4654 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 4655 assert(src.first()->is_stack() && dst.first()->is_stack() || 4656 src.first()->is_reg() && dst.first()->is_reg() || 4657 src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); 4658 if (src.first()->is_stack()) { 4659 if (dst.first()->is_stack()) { 4660 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4661 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4662 } else if (dst.first()-> is_Register()) { 4663 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4664 } else { 4665 ShouldNotReachHere(); 4666 } 4667 } else if (src.first() != dst.first()) { 4668 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4669 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4670 } else { 4671 ShouldNotReachHere(); 4672 } 4673 } 4674 } 4675 4676 void MacroAssembler::rt_call(address dest, Register tmp) { 4677 CodeBlob *cb = CodeCache::find_blob(dest); 4678 RuntimeAddress target(dest); 4679 if (cb) { 4680 far_call(target); 4681 } else { 4682 relocate(target.rspec(), [&] { 4683 int32_t offset; 4684 la_patchable(tmp, target, offset); 4685 jalr(x1, tmp, offset); 4686 }); 4687 } 4688 } 4689 4690 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 4691 assert(bit_pos < 64, "invalid bit range"); 4692 if (UseZbs) { 4693 bexti(Rd, Rs, bit_pos); 4694 return; 4695 } 4696 int64_t imm = (int64_t)(1UL << bit_pos); 4697 if (is_simm12(imm)) { 4698 and_imm12(Rd, Rs, imm); 4699 } else { 4700 srli(Rd, Rs, bit_pos); 4701 and_imm12(Rd, Rd, 1); 4702 } 4703 } 4704 4705 // Implements lightweight-locking. 4706 // Branches to slow upon failure to lock the object. 4707 // Falls through upon success. 4708 // 4709 // - obj: the object to be locked 4710 // - hdr: the header, already loaded from obj, will be destroyed 4711 // - tmp1, tmp2: temporary registers, will be destroyed 4712 void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { 4713 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4714 assert_different_registers(obj, hdr, tmp1, tmp2, t0); 4715 4716 // Check if we would have space on lock-stack for the object. 4717 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4718 mv(tmp2, (unsigned)LockStack::end_offset()); 4719 bge(tmp1, tmp2, slow, /* is_far */ true); 4720 4721 // Load (object->mark() | 1) into hdr 4722 ori(hdr, hdr, markWord::unlocked_value); 4723 // Clear lock-bits, into tmp2 4724 xori(tmp2, hdr, markWord::unlocked_value); 4725 4726 // Try to swing header from unlocked to locked 4727 Label success; 4728 cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); 4729 bind(success); 4730 4731 // After successful lock, push object on lock-stack 4732 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4733 add(tmp2, xthread, tmp1); 4734 sd(obj, Address(tmp2, 0)); 4735 addw(tmp1, tmp1, oopSize); 4736 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4737 } 4738 4739 // Implements ligthweight-unlocking. 4740 // Branches to slow upon failure. 4741 // Falls through upon success. 4742 // 4743 // - obj: the object to be unlocked 4744 // - hdr: the (pre-loaded) header of the object 4745 // - tmp1, tmp2: temporary registers 4746 void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { 4747 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 4748 assert_different_registers(obj, hdr, tmp1, tmp2, t0); 4749 4750 #ifdef ASSERT 4751 { 4752 // The following checks rely on the fact that LockStack is only ever modified by 4753 // its owning thread, even if the lock got inflated concurrently; removal of LockStack 4754 // entries after inflation will happen delayed in that case. 4755 4756 // Check for lock-stack underflow. 4757 Label stack_ok; 4758 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4759 mv(tmp2, (unsigned)LockStack::start_offset()); 4760 bgt(tmp1, tmp2, stack_ok); 4761 STOP("Lock-stack underflow"); 4762 bind(stack_ok); 4763 } 4764 { 4765 // Check if the top of the lock-stack matches the unlocked object. 4766 Label tos_ok; 4767 subw(tmp1, tmp1, oopSize); 4768 add(tmp1, xthread, tmp1); 4769 ld(tmp1, Address(tmp1, 0)); 4770 beq(tmp1, obj, tos_ok); 4771 STOP("Top of lock-stack does not match the unlocked object"); 4772 bind(tos_ok); 4773 } 4774 { 4775 // Check that hdr is fast-locked. 4776 Label hdr_ok; 4777 andi(tmp1, hdr, markWord::lock_mask_in_place); 4778 beqz(tmp1, hdr_ok); 4779 STOP("Header is not fast-locked"); 4780 bind(hdr_ok); 4781 } 4782 #endif 4783 4784 // Load the new header (unlocked) into tmp1 4785 ori(tmp1, hdr, markWord::unlocked_value); 4786 4787 // Try to swing header from locked to unlocked 4788 Label success; 4789 cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); 4790 bind(success); 4791 4792 // After successful unlock, pop object from lock-stack 4793 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4794 subw(tmp1, tmp1, oopSize); 4795 #ifdef ASSERT 4796 add(tmp2, xthread, tmp1); 4797 sd(zr, Address(tmp2, 0)); 4798 #endif 4799 sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 4800 }