1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/cardTable.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.hpp" 37 #include "interpreter/bytecodeHistogram.hpp" 38 #include "interpreter/interpreter.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "memory/universe.hpp" 41 #include "nativeInst_riscv.hpp" 42 #include "oops/accessDecorators.hpp" 43 #include "oops/compressedKlass.inline.hpp" 44 #include "oops/compressedOops.inline.hpp" 45 #include "oops/klass.inline.hpp" 46 #include "oops/oop.hpp" 47 #include "runtime/interfaceSupport.inline.hpp" 48 #include "runtime/javaThread.hpp" 49 #include "runtime/jniHandles.inline.hpp" 50 #include "runtime/sharedRuntime.hpp" 51 #include "runtime/stubRoutines.hpp" 52 #include "utilities/globalDefinitions.hpp" 53 #include "utilities/powerOfTwo.hpp" 54 #ifdef COMPILER2 55 #include "opto/compile.hpp" 56 #include "opto/node.hpp" 57 #include "opto/output.hpp" 58 #endif 59 60 #ifdef PRODUCT 61 #define BLOCK_COMMENT(str) /* nothing */ 62 #else 63 #define BLOCK_COMMENT(str) block_comment(str) 64 #endif 65 #define STOP(str) stop(str); 66 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 67 68 static void pass_arg0(MacroAssembler* masm, Register arg) { 69 if (c_rarg0 != arg) { 70 masm->mv(c_rarg0, arg); 71 } 72 } 73 74 static void pass_arg1(MacroAssembler* masm, Register arg) { 75 if (c_rarg1 != arg) { 76 masm->mv(c_rarg1, arg); 77 } 78 } 79 80 static void pass_arg2(MacroAssembler* masm, Register arg) { 81 if (c_rarg2 != arg) { 82 masm->mv(c_rarg2, arg); 83 } 84 } 85 86 static void pass_arg3(MacroAssembler* masm, Register arg) { 87 if (c_rarg3 != arg) { 88 masm->mv(c_rarg3, arg); 89 } 90 } 91 92 void MacroAssembler::push_cont_fastpath(Register java_thread) { 93 if (!Continuations::enabled()) return; 94 Label done; 95 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 96 bleu(sp, t0, done); 97 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 98 bind(done); 99 } 100 101 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 102 if (!Continuations::enabled()) return; 103 Label done; 104 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 105 bltu(sp, t0, done); 106 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 107 bind(done); 108 } 109 110 int MacroAssembler::align(int modulus, int extra_offset) { 111 CompressibleRegion cr(this); 112 intptr_t before = offset(); 113 while ((offset() + extra_offset) % modulus != 0) { nop(); } 114 return (int)(offset() - before); 115 } 116 117 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 118 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 119 } 120 121 // Implementation of call_VM versions 122 123 void MacroAssembler::call_VM(Register oop_result, 124 address entry_point, 125 bool check_exceptions) { 126 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 127 } 128 129 void MacroAssembler::call_VM(Register oop_result, 130 address entry_point, 131 Register arg_1, 132 bool check_exceptions) { 133 pass_arg1(this, arg_1); 134 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 135 } 136 137 void MacroAssembler::call_VM(Register oop_result, 138 address entry_point, 139 Register arg_1, 140 Register arg_2, 141 bool check_exceptions) { 142 assert_different_registers(arg_1, c_rarg2); 143 pass_arg2(this, arg_2); 144 pass_arg1(this, arg_1); 145 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 146 } 147 148 void MacroAssembler::call_VM(Register oop_result, 149 address entry_point, 150 Register arg_1, 151 Register arg_2, 152 Register arg_3, 153 bool check_exceptions) { 154 assert_different_registers(arg_1, c_rarg2, c_rarg3); 155 assert_different_registers(arg_2, c_rarg3); 156 pass_arg3(this, arg_3); 157 158 pass_arg2(this, arg_2); 159 160 pass_arg1(this, arg_1); 161 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 162 } 163 164 void MacroAssembler::call_VM(Register oop_result, 165 Register last_java_sp, 166 address entry_point, 167 int number_of_arguments, 168 bool check_exceptions) { 169 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 170 } 171 172 void MacroAssembler::call_VM(Register oop_result, 173 Register last_java_sp, 174 address entry_point, 175 Register arg_1, 176 bool check_exceptions) { 177 pass_arg1(this, arg_1); 178 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 179 } 180 181 void MacroAssembler::call_VM(Register oop_result, 182 Register last_java_sp, 183 address entry_point, 184 Register arg_1, 185 Register arg_2, 186 bool check_exceptions) { 187 188 assert_different_registers(arg_1, c_rarg2); 189 pass_arg2(this, arg_2); 190 pass_arg1(this, arg_1); 191 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 192 } 193 194 void MacroAssembler::call_VM(Register oop_result, 195 Register last_java_sp, 196 address entry_point, 197 Register arg_1, 198 Register arg_2, 199 Register arg_3, 200 bool check_exceptions) { 201 assert_different_registers(arg_1, c_rarg2, c_rarg3); 202 assert_different_registers(arg_2, c_rarg3); 203 pass_arg3(this, arg_3); 204 pass_arg2(this, arg_2); 205 pass_arg1(this, arg_1); 206 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 207 } 208 209 void MacroAssembler::post_call_nop() { 210 if (!Continuations::enabled()) { 211 return; 212 } 213 relocate(post_call_nop_Relocation::spec(), [&] { 214 InlineSkippedInstructionsCounter skipCounter(this); 215 nop(); 216 li32(zr, 0); 217 }); 218 } 219 220 // these are no-ops overridden by InterpreterMacroAssembler 221 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 222 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 223 224 // Calls to C land 225 // 226 // When entering C land, the fp, & esp of the last Java frame have to be recorded 227 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 228 // has to be reset to 0. This is required to allow proper stack traversal. 229 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 230 Register last_java_fp, 231 Register last_java_pc, 232 Register tmp) { 233 234 if (last_java_pc->is_valid()) { 235 sd(last_java_pc, Address(xthread, 236 JavaThread::frame_anchor_offset() + 237 JavaFrameAnchor::last_Java_pc_offset())); 238 } 239 240 // determine last_java_sp register 241 if (last_java_sp == sp) { 242 mv(tmp, sp); 243 last_java_sp = tmp; 244 } else if (!last_java_sp->is_valid()) { 245 last_java_sp = esp; 246 } 247 248 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 249 250 // last_java_fp is optional 251 if (last_java_fp->is_valid()) { 252 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 253 } 254 } 255 256 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 257 Register last_java_fp, 258 address last_java_pc, 259 Register tmp) { 260 assert(last_java_pc != nullptr, "must provide a valid PC"); 261 262 la(tmp, last_java_pc); 263 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 264 265 set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); 266 } 267 268 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 269 Register last_java_fp, 270 Label &L, 271 Register tmp) { 272 if (L.is_bound()) { 273 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 274 } else { 275 L.add_patch_at(code(), locator()); 276 IncompressibleRegion ir(this); // the label address will be patched back. 277 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 278 } 279 } 280 281 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 282 // we must set sp to zero to clear frame 283 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 284 285 // must clear fp, so that compiled frames are not confused; it is 286 // possible that we need it only for debugging 287 if (clear_fp) { 288 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 289 } 290 291 // Always clear the pc because it could have been set by make_walkable() 292 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 293 } 294 295 void MacroAssembler::call_VM_base(Register oop_result, 296 Register java_thread, 297 Register last_java_sp, 298 address entry_point, 299 int number_of_arguments, 300 bool check_exceptions) { 301 // determine java_thread register 302 if (!java_thread->is_valid()) { 303 java_thread = xthread; 304 } 305 // determine last_java_sp register 306 if (!last_java_sp->is_valid()) { 307 last_java_sp = esp; 308 } 309 310 // debugging support 311 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 312 assert(java_thread == xthread, "unexpected register"); 313 314 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 315 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 316 317 // push java thread (becomes first argument of C function) 318 mv(c_rarg0, java_thread); 319 320 // set last Java frame before call 321 assert(last_java_sp != fp, "can't use fp"); 322 323 Label l; 324 set_last_Java_frame(last_java_sp, fp, l, t0); 325 326 // do the call, remove parameters 327 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 328 329 // reset last Java frame 330 // Only interpreter should have to clear fp 331 reset_last_Java_frame(true); 332 333 // C++ interp handles this in the interpreter 334 check_and_handle_popframe(java_thread); 335 check_and_handle_earlyret(java_thread); 336 337 if (check_exceptions) { 338 // check for pending exceptions (java_thread is set upon return) 339 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 340 Label ok; 341 beqz(t0, ok); 342 RuntimeAddress target(StubRoutines::forward_exception_entry()); 343 relocate(target.rspec(), [&] { 344 int32_t offset; 345 la(t0, target.target(), offset); 346 jalr(x0, t0, offset); 347 }); 348 bind(ok); 349 } 350 351 // get oop result if there is one and reset the value in the thread 352 if (oop_result->is_valid()) { 353 get_vm_result(oop_result, java_thread); 354 } 355 } 356 357 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 358 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 359 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 360 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 361 } 362 363 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 364 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 365 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 366 } 367 368 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 369 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 370 assert_different_registers(klass, xthread, tmp); 371 372 Label L_fallthrough, L_tmp; 373 if (L_fast_path == nullptr) { 374 L_fast_path = &L_fallthrough; 375 } else if (L_slow_path == nullptr) { 376 L_slow_path = &L_fallthrough; 377 } 378 379 // Fast path check: class is fully initialized 380 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 381 sub(tmp, tmp, InstanceKlass::fully_initialized); 382 beqz(tmp, *L_fast_path); 383 384 // Fast path check: current thread is initializer thread 385 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 386 387 if (L_slow_path == &L_fallthrough) { 388 beq(xthread, tmp, *L_fast_path); 389 bind(*L_slow_path); 390 } else if (L_fast_path == &L_fallthrough) { 391 bne(xthread, tmp, *L_slow_path); 392 bind(*L_fast_path); 393 } else { 394 Unimplemented(); 395 } 396 } 397 398 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 399 if (!VerifyOops) { return; } 400 401 // Pass register number to verify_oop_subroutine 402 const char* b = nullptr; 403 { 404 ResourceMark rm; 405 stringStream ss; 406 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 407 b = code_string(ss.as_string()); 408 } 409 BLOCK_COMMENT("verify_oop {"); 410 411 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 412 413 mv(c_rarg0, reg); // c_rarg0 : x10 414 { 415 // The length of the instruction sequence emitted should not depend 416 // on the address of the char buffer so that the size of mach nodes for 417 // scratch emit and normal emit matches. 418 IncompressibleRegion ir(this); // Fixed length 419 movptr(t0, (address) b); 420 } 421 422 // call indirectly to solve generation ordering problem 423 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 424 relocate(target.rspec(), [&] { 425 int32_t offset; 426 la(t1, target.target(), offset); 427 ld(t1, Address(t1, offset)); 428 }); 429 jalr(t1); 430 431 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 432 433 BLOCK_COMMENT("} verify_oop"); 434 } 435 436 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 437 if (!VerifyOops) { 438 return; 439 } 440 441 const char* b = nullptr; 442 { 443 ResourceMark rm; 444 stringStream ss; 445 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 446 b = code_string(ss.as_string()); 447 } 448 BLOCK_COMMENT("verify_oop_addr {"); 449 450 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 451 452 if (addr.uses(sp)) { 453 la(x10, addr); 454 ld(x10, Address(x10, 4 * wordSize)); 455 } else { 456 ld(x10, addr); 457 } 458 459 { 460 // The length of the instruction sequence emitted should not depend 461 // on the address of the char buffer so that the size of mach nodes for 462 // scratch emit and normal emit matches. 463 IncompressibleRegion ir(this); // Fixed length 464 movptr(t0, (address) b); 465 } 466 467 // call indirectly to solve generation ordering problem 468 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 469 relocate(target.rspec(), [&] { 470 int32_t offset; 471 la(t1, target.target(), offset); 472 ld(t1, Address(t1, offset)); 473 }); 474 jalr(t1); 475 476 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 477 478 BLOCK_COMMENT("} verify_oop_addr"); 479 } 480 481 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 482 int extra_slot_offset) { 483 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 484 int stackElementSize = Interpreter::stackElementSize; 485 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 486 #ifdef ASSERT 487 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 488 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 489 #endif 490 if (arg_slot.is_constant()) { 491 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 492 } else { 493 assert_different_registers(t0, arg_slot.as_register()); 494 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 495 return Address(t0, offset); 496 } 497 } 498 499 #ifndef PRODUCT 500 extern "C" void findpc(intptr_t x); 501 #endif 502 503 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 504 { 505 // In order to get locks to work, we need to fake a in_VM state 506 if (ShowMessageBoxOnError) { 507 JavaThread* thread = JavaThread::current(); 508 JavaThreadState saved_state = thread->thread_state(); 509 thread->set_thread_state(_thread_in_vm); 510 #ifndef PRODUCT 511 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 512 ttyLocker ttyl; 513 BytecodeCounter::print(); 514 } 515 #endif 516 if (os::message_box(msg, "Execution stopped, print registers?")) { 517 ttyLocker ttyl; 518 tty->print_cr(" pc = 0x%016lx", pc); 519 #ifndef PRODUCT 520 tty->cr(); 521 findpc(pc); 522 tty->cr(); 523 #endif 524 tty->print_cr(" x0 = 0x%016lx", regs[0]); 525 tty->print_cr(" x1 = 0x%016lx", regs[1]); 526 tty->print_cr(" x2 = 0x%016lx", regs[2]); 527 tty->print_cr(" x3 = 0x%016lx", regs[3]); 528 tty->print_cr(" x4 = 0x%016lx", regs[4]); 529 tty->print_cr(" x5 = 0x%016lx", regs[5]); 530 tty->print_cr(" x6 = 0x%016lx", regs[6]); 531 tty->print_cr(" x7 = 0x%016lx", regs[7]); 532 tty->print_cr(" x8 = 0x%016lx", regs[8]); 533 tty->print_cr(" x9 = 0x%016lx", regs[9]); 534 tty->print_cr("x10 = 0x%016lx", regs[10]); 535 tty->print_cr("x11 = 0x%016lx", regs[11]); 536 tty->print_cr("x12 = 0x%016lx", regs[12]); 537 tty->print_cr("x13 = 0x%016lx", regs[13]); 538 tty->print_cr("x14 = 0x%016lx", regs[14]); 539 tty->print_cr("x15 = 0x%016lx", regs[15]); 540 tty->print_cr("x16 = 0x%016lx", regs[16]); 541 tty->print_cr("x17 = 0x%016lx", regs[17]); 542 tty->print_cr("x18 = 0x%016lx", regs[18]); 543 tty->print_cr("x19 = 0x%016lx", regs[19]); 544 tty->print_cr("x20 = 0x%016lx", regs[20]); 545 tty->print_cr("x21 = 0x%016lx", regs[21]); 546 tty->print_cr("x22 = 0x%016lx", regs[22]); 547 tty->print_cr("x23 = 0x%016lx", regs[23]); 548 tty->print_cr("x24 = 0x%016lx", regs[24]); 549 tty->print_cr("x25 = 0x%016lx", regs[25]); 550 tty->print_cr("x26 = 0x%016lx", regs[26]); 551 tty->print_cr("x27 = 0x%016lx", regs[27]); 552 tty->print_cr("x28 = 0x%016lx", regs[28]); 553 tty->print_cr("x30 = 0x%016lx", regs[30]); 554 tty->print_cr("x31 = 0x%016lx", regs[31]); 555 BREAKPOINT; 556 } 557 } 558 fatal("DEBUG MESSAGE: %s", msg); 559 } 560 561 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 562 assert_different_registers(value, tmp1, tmp2); 563 Label done, tagged, weak_tagged; 564 565 beqz(value, done); // Use null as-is. 566 // Test for tag. 567 andi(tmp1, value, JNIHandles::tag_mask); 568 bnez(tmp1, tagged); 569 570 // Resolve local handle 571 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 572 verify_oop(value); 573 j(done); 574 575 bind(tagged); 576 // Test for jweak tag. 577 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 578 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 579 bnez(tmp1, weak_tagged); 580 581 // Resolve global handle 582 access_load_at(T_OBJECT, IN_NATIVE, value, 583 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 584 verify_oop(value); 585 j(done); 586 587 bind(weak_tagged); 588 // Resolve jweak. 589 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 590 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 591 verify_oop(value); 592 593 bind(done); 594 } 595 596 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 597 assert_different_registers(value, tmp1, tmp2); 598 Label done; 599 600 beqz(value, done); // Use null as-is. 601 602 #ifdef ASSERT 603 { 604 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 605 Label valid_global_tag; 606 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 607 bnez(tmp1, valid_global_tag); 608 stop("non global jobject using resolve_global_jobject"); 609 bind(valid_global_tag); 610 } 611 #endif 612 613 // Resolve global handle 614 access_load_at(T_OBJECT, IN_NATIVE, value, 615 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 616 verify_oop(value); 617 618 bind(done); 619 } 620 621 void MacroAssembler::stop(const char* msg) { 622 BLOCK_COMMENT(msg); 623 illegal_instruction(Assembler::csr::time); 624 emit_int64((uintptr_t)msg); 625 } 626 627 void MacroAssembler::unimplemented(const char* what) { 628 const char* buf = nullptr; 629 { 630 ResourceMark rm; 631 stringStream ss; 632 ss.print("unimplemented: %s", what); 633 buf = code_string(ss.as_string()); 634 } 635 stop(buf); 636 } 637 638 void MacroAssembler::emit_static_call_stub() { 639 IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). 640 // CompiledDirectCall::set_to_interpreted knows the 641 // exact layout of this stub. 642 643 mov_metadata(xmethod, (Metadata*)nullptr); 644 645 // Jump to the entry point of the c2i stub. 646 int32_t offset = 0; 647 movptr(t0, 0, offset); 648 jalr(x0, t0, offset); 649 } 650 651 void MacroAssembler::call_VM_leaf_base(address entry_point, 652 int number_of_arguments, 653 Label *retaddr) { 654 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 655 call(entry_point); 656 if (retaddr != nullptr) { 657 bind(*retaddr); 658 } 659 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 660 } 661 662 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 663 call_VM_leaf_base(entry_point, number_of_arguments); 664 } 665 666 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 667 pass_arg0(this, arg_0); 668 call_VM_leaf_base(entry_point, 1); 669 } 670 671 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 672 assert_different_registers(arg_1, c_rarg0); 673 pass_arg0(this, arg_0); 674 pass_arg1(this, arg_1); 675 call_VM_leaf_base(entry_point, 2); 676 } 677 678 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 679 Register arg_1, Register arg_2) { 680 assert_different_registers(arg_1, c_rarg0); 681 assert_different_registers(arg_2, c_rarg0, c_rarg1); 682 pass_arg0(this, arg_0); 683 pass_arg1(this, arg_1); 684 pass_arg2(this, arg_2); 685 call_VM_leaf_base(entry_point, 3); 686 } 687 688 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 689 pass_arg0(this, arg_0); 690 MacroAssembler::call_VM_leaf_base(entry_point, 1); 691 } 692 693 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 694 695 assert_different_registers(arg_0, c_rarg1); 696 pass_arg1(this, arg_1); 697 pass_arg0(this, arg_0); 698 MacroAssembler::call_VM_leaf_base(entry_point, 2); 699 } 700 701 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 702 assert_different_registers(arg_0, c_rarg1, c_rarg2); 703 assert_different_registers(arg_1, c_rarg2); 704 pass_arg2(this, arg_2); 705 pass_arg1(this, arg_1); 706 pass_arg0(this, arg_0); 707 MacroAssembler::call_VM_leaf_base(entry_point, 3); 708 } 709 710 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 711 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 712 assert_different_registers(arg_1, c_rarg2, c_rarg3); 713 assert_different_registers(arg_2, c_rarg3); 714 715 pass_arg3(this, arg_3); 716 pass_arg2(this, arg_2); 717 pass_arg1(this, arg_1); 718 pass_arg0(this, arg_0); 719 MacroAssembler::call_VM_leaf_base(entry_point, 4); 720 } 721 722 void MacroAssembler::la(Register Rd, const address addr) { 723 int64_t offset = addr - pc(); 724 if (is_simm32(offset)) { 725 auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit 726 addi(Rd, Rd, ((int64_t)offset << 52) >> 52); 727 } else { 728 movptr(Rd, addr); 729 } 730 } 731 732 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) { 733 assert((uintptr_t)addr < (1ull << 48), "bad address"); 734 735 unsigned long target_address = (uintptr_t)addr; 736 unsigned long low_address = (uintptr_t)CodeCache::low_bound(); 737 unsigned long high_address = (uintptr_t)CodeCache::high_bound(); 738 long offset_low = target_address - low_address; 739 long offset_high = target_address - high_address; 740 741 // RISC-V doesn't compute a page-aligned address, in order to partially 742 // compensate for the use of *signed* offsets in its base+disp12 743 // addressing mode (RISC-V's PC-relative reach remains asymmetric 744 // [-(2G + 2K), 2G - 2K). 745 if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { 746 int64_t distance = addr - pc(); 747 auipc(Rd, (int32_t)distance + 0x800); 748 offset = ((int32_t)distance << 20) >> 20; 749 } else { 750 movptr(Rd, addr, offset); 751 } 752 } 753 754 void MacroAssembler::la(Register Rd, const Address &adr) { 755 switch (adr.getMode()) { 756 case Address::literal: { 757 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 758 if (rtype == relocInfo::none) { 759 mv(Rd, (intptr_t)(adr.target())); 760 } else { 761 relocate(adr.rspec(), [&] { 762 movptr(Rd, adr.target()); 763 }); 764 } 765 break; 766 } 767 case Address::base_plus_offset: { 768 Address new_adr = legitimize_address(Rd, adr); 769 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 770 addi(Rd, new_adr.base(), new_adr.offset()); 771 } 772 break; 773 } 774 default: 775 ShouldNotReachHere(); 776 } 777 } 778 779 void MacroAssembler::la(Register Rd, Label &label) { 780 IncompressibleRegion ir(this); // the label address may be patched back. 781 wrap_label(Rd, label, &MacroAssembler::la); 782 } 783 784 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 785 lui(Rd, (uint32_t)imm << 12); 786 srli(Rd, Rd, 12); 787 } 788 789 void MacroAssembler::li32(Register Rd, int32_t imm) { 790 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 791 int64_t upper = imm, lower = imm; 792 lower = (imm << 20) >> 20; 793 upper -= lower; 794 upper = (int32_t)upper; 795 // lui Rd, imm[31:12] + imm[11] 796 lui(Rd, upper); 797 // use addiw to distinguish li32 to li64 798 addiw(Rd, Rd, lower); 799 } 800 801 void MacroAssembler::li64(Register Rd, int64_t imm) { 802 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 803 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 804 int64_t lower = imm & 0xffffffff; 805 lower -= ((lower << 44) >> 44); 806 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 807 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 808 809 // Load upper 32 bits 810 int64_t up = upper, lo = upper; 811 lo = (lo << 52) >> 52; 812 up -= lo; 813 up = (int32_t)up; 814 lui(Rd, up); 815 addi(Rd, Rd, lo); 816 817 // Load the rest 32 bits. 818 slli(Rd, Rd, 12); 819 addi(Rd, Rd, (int32_t)lower >> 20); 820 slli(Rd, Rd, 12); 821 lower = ((int32_t)imm << 12) >> 20; 822 addi(Rd, Rd, lower); 823 slli(Rd, Rd, 8); 824 lower = imm & 0xff; 825 addi(Rd, Rd, lower); 826 } 827 828 void MacroAssembler::li(Register Rd, int64_t imm) { 829 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 830 // li -> c.li 831 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 832 c_li(Rd, imm); 833 return; 834 } 835 836 int shift = 12; 837 int64_t upper = imm, lower = imm; 838 // Split imm to a lower 12-bit sign-extended part and the remainder, 839 // because addi will sign-extend the lower imm. 840 lower = ((int32_t)imm << 20) >> 20; 841 upper -= lower; 842 843 // Test whether imm is a 32-bit integer. 844 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 845 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 846 while (((upper >> shift) & 1) == 0) { shift++; } 847 upper >>= shift; 848 li(Rd, upper); 849 slli(Rd, Rd, shift); 850 if (lower != 0) { 851 addi(Rd, Rd, lower); 852 } 853 } else { 854 // 32-bit integer 855 Register hi_Rd = zr; 856 if (upper != 0) { 857 lui(Rd, (int32_t)upper); 858 hi_Rd = Rd; 859 } 860 if (lower != 0 || hi_Rd == zr) { 861 addiw(Rd, hi_Rd, lower); 862 } 863 } 864 } 865 866 #define INSN(NAME, REGISTER) \ 867 void MacroAssembler::NAME(const address dest, Register temp) { \ 868 assert_cond(dest != nullptr); \ 869 int64_t distance = dest - pc(); \ 870 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 871 Assembler::jal(REGISTER, distance); \ 872 } else { \ 873 assert(temp != noreg, "expecting a register"); \ 874 int32_t offset = 0; \ 875 movptr(temp, dest, offset); \ 876 Assembler::jalr(REGISTER, temp, offset); \ 877 } \ 878 } \ 879 880 INSN(j, x0); 881 INSN(jal, x1); 882 883 #undef INSN 884 885 #define INSN(NAME, REGISTER) \ 886 void MacroAssembler::NAME(const Address &adr, Register temp) { \ 887 switch (adr.getMode()) { \ 888 case Address::literal: { \ 889 relocate(adr.rspec(), [&] { \ 890 NAME(adr.target(), temp); \ 891 }); \ 892 break; \ 893 } \ 894 case Address::base_plus_offset: { \ 895 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 896 la(temp, Address(adr.base(), adr.offset() - offset)); \ 897 Assembler::jalr(REGISTER, temp, offset); \ 898 break; \ 899 } \ 900 default: \ 901 ShouldNotReachHere(); \ 902 } \ 903 } 904 905 INSN(j, x0); 906 INSN(jal, x1); 907 908 #undef INSN 909 910 #define INSN(NAME) \ 911 void MacroAssembler::NAME(Register Rd, const address dest, Register temp) { \ 912 assert_cond(dest != nullptr); \ 913 int64_t distance = dest - pc(); \ 914 if (is_simm21(distance) && ((distance % 2) == 0)) { \ 915 Assembler::NAME(Rd, distance); \ 916 } else { \ 917 assert_different_registers(Rd, temp); \ 918 int32_t offset = 0; \ 919 movptr(temp, dest, offset); \ 920 jalr(Rd, temp, offset); \ 921 } \ 922 } \ 923 void MacroAssembler::NAME(Register Rd, Label &L, Register temp) { \ 924 assert_different_registers(Rd, temp); \ 925 wrap_label(Rd, L, temp, &MacroAssembler::NAME); \ 926 } 927 928 INSN(jal); 929 930 #undef INSN 931 932 #define INSN(NAME, REGISTER) \ 933 void MacroAssembler::NAME(Label &l, Register temp) { \ 934 jal(REGISTER, l, temp); \ 935 } \ 936 937 INSN(j, x0); 938 INSN(jal, x1); 939 940 #undef INSN 941 942 void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { 943 if (L.is_bound()) { 944 (this->*insn)(Rt, target(L), tmp); 945 } else { 946 L.add_patch_at(code(), locator()); 947 (this->*insn)(Rt, pc(), tmp); 948 } 949 } 950 951 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 952 if (L.is_bound()) { 953 (this->*insn)(Rt, target(L)); 954 } else { 955 L.add_patch_at(code(), locator()); 956 (this->*insn)(Rt, pc()); 957 } 958 } 959 960 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 961 compare_and_branch_insn insn, 962 compare_and_branch_label_insn neg_insn, bool is_far) { 963 if (is_far) { 964 Label done; 965 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 966 j(L); 967 bind(done); 968 } else { 969 if (L.is_bound()) { 970 (this->*insn)(r1, r2, target(L)); 971 } else { 972 L.add_patch_at(code(), locator()); 973 (this->*insn)(r1, r2, pc()); 974 } 975 } 976 } 977 978 #define INSN(NAME, NEG_INSN) \ 979 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 980 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 981 } 982 983 INSN(beq, bne); 984 INSN(bne, beq); 985 INSN(blt, bge); 986 INSN(bge, blt); 987 INSN(bltu, bgeu); 988 INSN(bgeu, bltu); 989 990 #undef INSN 991 992 #define INSN(NAME) \ 993 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 994 NAME(Rs, zr, dest); \ 995 } \ 996 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 997 NAME(Rs, zr, l, is_far); \ 998 } \ 999 1000 INSN(beq); 1001 INSN(bne); 1002 INSN(blt); 1003 INSN(ble); 1004 INSN(bge); 1005 INSN(bgt); 1006 1007 #undef INSN 1008 1009 #define INSN(NAME, NEG_INSN) \ 1010 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 1011 NEG_INSN(Rt, Rs, dest); \ 1012 } \ 1013 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 1014 NEG_INSN(Rt, Rs, l, is_far); \ 1015 } 1016 1017 INSN(bgt, blt); 1018 INSN(ble, bge); 1019 INSN(bgtu, bltu); 1020 INSN(bleu, bgeu); 1021 1022 #undef INSN 1023 1024 // Float compare branch instructions 1025 1026 #define INSN(NAME, FLOATCMP, BRANCH) \ 1027 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1028 FLOATCMP##_s(t0, Rs1, Rs2); \ 1029 BRANCH(t0, l, is_far); \ 1030 } \ 1031 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1032 FLOATCMP##_d(t0, Rs1, Rs2); \ 1033 BRANCH(t0, l, is_far); \ 1034 } 1035 1036 INSN(beq, feq, bnez); 1037 INSN(bne, feq, beqz); 1038 1039 #undef INSN 1040 1041 1042 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1043 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1044 bool is_far, bool is_unordered) { \ 1045 if (is_unordered) { \ 1046 /* jump if either source is NaN or condition is expected */ \ 1047 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1048 beqz(t0, l, is_far); \ 1049 } else { \ 1050 /* jump if no NaN in source and condition is expected */ \ 1051 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1052 bnez(t0, l, is_far); \ 1053 } \ 1054 } \ 1055 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1056 bool is_far, bool is_unordered) { \ 1057 if (is_unordered) { \ 1058 /* jump if either source is NaN or condition is expected */ \ 1059 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1060 beqz(t0, l, is_far); \ 1061 } else { \ 1062 /* jump if no NaN in source and condition is expected */ \ 1063 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1064 bnez(t0, l, is_far); \ 1065 } \ 1066 } 1067 1068 INSN(ble, fle, flt); 1069 INSN(blt, flt, fle); 1070 1071 #undef INSN 1072 1073 #define INSN(NAME, CMP) \ 1074 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1075 bool is_far, bool is_unordered) { \ 1076 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1077 } \ 1078 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1079 bool is_far, bool is_unordered) { \ 1080 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1081 } 1082 1083 INSN(bgt, blt); 1084 INSN(bge, ble); 1085 1086 #undef INSN 1087 1088 1089 #define INSN(NAME, CSR) \ 1090 void MacroAssembler::NAME(Register Rd) { \ 1091 csrr(Rd, CSR); \ 1092 } 1093 1094 INSN(rdinstret, CSR_INSTRET); 1095 INSN(rdcycle, CSR_CYCLE); 1096 INSN(rdtime, CSR_TIME); 1097 INSN(frcsr, CSR_FCSR); 1098 INSN(frrm, CSR_FRM); 1099 INSN(frflags, CSR_FFLAGS); 1100 1101 #undef INSN 1102 1103 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1104 csrrs(Rd, csr, x0); 1105 } 1106 1107 #define INSN(NAME, OPFUN) \ 1108 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1109 OPFUN(x0, csr, Rs); \ 1110 } 1111 1112 INSN(csrw, csrrw); 1113 INSN(csrs, csrrs); 1114 INSN(csrc, csrrc); 1115 1116 #undef INSN 1117 1118 #define INSN(NAME, OPFUN) \ 1119 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1120 OPFUN(x0, csr, imm); \ 1121 } 1122 1123 INSN(csrwi, csrrwi); 1124 INSN(csrsi, csrrsi); 1125 INSN(csrci, csrrci); 1126 1127 #undef INSN 1128 1129 #define INSN(NAME, CSR) \ 1130 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1131 csrrw(Rd, CSR, Rs); \ 1132 } 1133 1134 INSN(fscsr, CSR_FCSR); 1135 INSN(fsrm, CSR_FRM); 1136 INSN(fsflags, CSR_FFLAGS); 1137 1138 #undef INSN 1139 1140 #define INSN(NAME) \ 1141 void MacroAssembler::NAME(Register Rs) { \ 1142 NAME(x0, Rs); \ 1143 } 1144 1145 INSN(fscsr); 1146 INSN(fsrm); 1147 INSN(fsflags); 1148 1149 #undef INSN 1150 1151 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1152 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1153 csrrwi(Rd, CSR_FRM, imm); 1154 } 1155 1156 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1157 csrrwi(Rd, CSR_FFLAGS, imm); 1158 } 1159 1160 #define INSN(NAME) \ 1161 void MacroAssembler::NAME(unsigned imm) { \ 1162 NAME(x0, imm); \ 1163 } 1164 1165 INSN(fsrmi); 1166 INSN(fsflagsi); 1167 1168 #undef INSN 1169 1170 void MacroAssembler::push_reg(Register Rs) 1171 { 1172 addi(esp, esp, 0 - wordSize); 1173 sd(Rs, Address(esp, 0)); 1174 } 1175 1176 void MacroAssembler::pop_reg(Register Rd) 1177 { 1178 ld(Rd, Address(esp, 0)); 1179 addi(esp, esp, wordSize); 1180 } 1181 1182 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1183 int count = 0; 1184 // Scan bitset to accumulate register pairs 1185 for (int reg = 31; reg >= 0; reg--) { 1186 if ((1U << 31) & bitset) { 1187 regs[count++] = reg; 1188 } 1189 bitset <<= 1; 1190 } 1191 return count; 1192 } 1193 1194 // Push integer registers in the bitset supplied. Don't push sp. 1195 // Return the number of words pushed 1196 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1197 DEBUG_ONLY(int words_pushed = 0;) 1198 unsigned char regs[32]; 1199 int count = bitset_to_regs(bitset, regs); 1200 // reserve one slot to align for odd count 1201 int offset = is_even(count) ? 0 : wordSize; 1202 1203 if (count) { 1204 addi(stack, stack, -count * wordSize - offset); 1205 } 1206 for (int i = count - 1; i >= 0; i--) { 1207 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1208 DEBUG_ONLY(words_pushed++;) 1209 } 1210 1211 assert(words_pushed == count, "oops, pushed != count"); 1212 1213 return count; 1214 } 1215 1216 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1217 DEBUG_ONLY(int words_popped = 0;) 1218 unsigned char regs[32]; 1219 int count = bitset_to_regs(bitset, regs); 1220 // reserve one slot to align for odd count 1221 int offset = is_even(count) ? 0 : wordSize; 1222 1223 for (int i = count - 1; i >= 0; i--) { 1224 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1225 DEBUG_ONLY(words_popped++;) 1226 } 1227 1228 if (count) { 1229 addi(stack, stack, count * wordSize + offset); 1230 } 1231 assert(words_popped == count, "oops, popped != count"); 1232 1233 return count; 1234 } 1235 1236 // Push floating-point registers in the bitset supplied. 1237 // Return the number of words pushed 1238 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1239 DEBUG_ONLY(int words_pushed = 0;) 1240 unsigned char regs[32]; 1241 int count = bitset_to_regs(bitset, regs); 1242 int push_slots = count + (count & 1); 1243 1244 if (count) { 1245 addi(stack, stack, -push_slots * wordSize); 1246 } 1247 1248 for (int i = count - 1; i >= 0; i--) { 1249 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1250 DEBUG_ONLY(words_pushed++;) 1251 } 1252 1253 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1254 1255 return count; 1256 } 1257 1258 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1259 DEBUG_ONLY(int words_popped = 0;) 1260 unsigned char regs[32]; 1261 int count = bitset_to_regs(bitset, regs); 1262 int pop_slots = count + (count & 1); 1263 1264 for (int i = count - 1; i >= 0; i--) { 1265 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1266 DEBUG_ONLY(words_popped++;) 1267 } 1268 1269 if (count) { 1270 addi(stack, stack, pop_slots * wordSize); 1271 } 1272 1273 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1274 1275 return count; 1276 } 1277 1278 #ifdef COMPILER2 1279 // Push vector registers in the bitset supplied. 1280 // Return the number of words pushed 1281 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1282 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1283 1284 // Scan bitset to accumulate register pairs 1285 unsigned char regs[32]; 1286 int count = bitset_to_regs(bitset, regs); 1287 1288 for (int i = 0; i < count; i++) { 1289 sub(stack, stack, vector_size_in_bytes); 1290 vs1r_v(as_VectorRegister(regs[i]), stack); 1291 } 1292 1293 return count * vector_size_in_bytes / wordSize; 1294 } 1295 1296 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1297 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1298 1299 // Scan bitset to accumulate register pairs 1300 unsigned char regs[32]; 1301 int count = bitset_to_regs(bitset, regs); 1302 1303 for (int i = count - 1; i >= 0; i--) { 1304 vl1r_v(as_VectorRegister(regs[i]), stack); 1305 add(stack, stack, vector_size_in_bytes); 1306 } 1307 1308 return count * vector_size_in_bytes / wordSize; 1309 } 1310 #endif // COMPILER2 1311 1312 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1313 // Push integer registers x7, x10-x17, x28-x31. 1314 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1315 1316 // Push float registers f0-f7, f10-f17, f28-f31. 1317 addi(sp, sp, - wordSize * 20); 1318 int offset = 0; 1319 for (int i = 0; i < 32; i++) { 1320 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1321 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1322 } 1323 } 1324 } 1325 1326 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1327 int offset = 0; 1328 for (int i = 0; i < 32; i++) { 1329 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1330 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1331 } 1332 } 1333 addi(sp, sp, wordSize * 20); 1334 1335 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1336 } 1337 1338 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1339 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1340 push_reg(RegSet::range(x5, x31), sp); 1341 1342 // float registers 1343 addi(sp, sp, - 32 * wordSize); 1344 for (int i = 0; i < 32; i++) { 1345 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1346 } 1347 1348 // vector registers 1349 if (save_vectors) { 1350 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1351 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1352 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1353 add(t0, sp, vector_size_in_bytes * i); 1354 vse64_v(as_VectorRegister(i), t0); 1355 } 1356 } 1357 } 1358 1359 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1360 // vector registers 1361 if (restore_vectors) { 1362 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1363 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1364 vle64_v(as_VectorRegister(i), sp); 1365 add(sp, sp, vector_size_in_bytes * 8); 1366 } 1367 } 1368 1369 // float registers 1370 for (int i = 0; i < 32; i++) { 1371 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1372 } 1373 addi(sp, sp, 32 * wordSize); 1374 1375 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1376 pop_reg(RegSet::range(x5, x31), sp); 1377 } 1378 1379 static int patch_offset_in_jal(address branch, int64_t offset) { 1380 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1381 "offset is too large to be patched in one jal instruction!\n"); 1382 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1383 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1384 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1385 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1386 return NativeInstruction::instruction_size; // only one instruction 1387 } 1388 1389 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1390 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1391 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1392 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1393 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1394 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1395 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1396 return NativeInstruction::instruction_size; // only one instruction 1397 } 1398 1399 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1400 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1401 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1402 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1403 return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; 1404 } 1405 1406 static int patch_addr_in_movptr(address branch, address target) { 1407 const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load 1408 int32_t lower = ((intptr_t)target << 35) >> 35; 1409 int64_t upper = ((intptr_t)target - lower) >> 29; 1410 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1411 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1412 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1413 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1414 return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1415 } 1416 1417 static int patch_imm_in_li64(address branch, address target) { 1418 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1419 int64_t lower = (intptr_t)target & 0xffffffff; 1420 lower = lower - ((lower << 44) >> 44); 1421 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1422 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1423 int64_t tmp_upper = upper, tmp_lower = upper; 1424 tmp_lower = (tmp_lower << 52) >> 52; 1425 tmp_upper -= tmp_lower; 1426 tmp_upper >>= 12; 1427 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1428 // upper = target[63:32] + 1. 1429 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1430 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1431 // Load the rest 32 bits. 1432 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1433 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1434 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1435 return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1436 } 1437 1438 static int patch_imm_in_li16u(address branch, uint16_t target) { 1439 Assembler::patch(branch, 31, 12, target); // patch lui only 1440 return NativeInstruction::instruction_size; 1441 } 1442 1443 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1444 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1445 int64_t upper = (intptr_t)target; 1446 int32_t lower = (((int32_t)target) << 20) >> 20; 1447 upper -= lower; 1448 upper = (int32_t)upper; 1449 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1450 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1451 return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; 1452 } 1453 1454 static long get_offset_of_jal(address insn_addr) { 1455 assert_cond(insn_addr != nullptr); 1456 long offset = 0; 1457 unsigned insn = Assembler::ld_instr(insn_addr); 1458 long val = (long)Assembler::sextract(insn, 31, 12); 1459 offset |= ((val >> 19) & 0x1) << 20; 1460 offset |= (val & 0xff) << 12; 1461 offset |= ((val >> 8) & 0x1) << 11; 1462 offset |= ((val >> 9) & 0x3ff) << 1; 1463 offset = (offset << 43) >> 43; 1464 return offset; 1465 } 1466 1467 static long get_offset_of_conditional_branch(address insn_addr) { 1468 long offset = 0; 1469 assert_cond(insn_addr != nullptr); 1470 unsigned insn = Assembler::ld_instr(insn_addr); 1471 offset = (long)Assembler::sextract(insn, 31, 31); 1472 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1473 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1474 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1475 offset = (offset << 41) >> 41; 1476 return offset; 1477 } 1478 1479 static long get_offset_of_pc_relative(address insn_addr) { 1480 long offset = 0; 1481 assert_cond(insn_addr != nullptr); 1482 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1483 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1484 offset = (offset << 32) >> 32; 1485 return offset; 1486 } 1487 1488 static address get_target_of_movptr(address insn_addr) { 1489 assert_cond(insn_addr != nullptr); 1490 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1491 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1492 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1493 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1494 return (address) target_address; 1495 } 1496 1497 static address get_target_of_li64(address insn_addr) { 1498 assert_cond(insn_addr != nullptr); 1499 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui. 1500 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi. 1501 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi. 1502 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi. 1503 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi. 1504 return (address)target_address; 1505 } 1506 1507 address MacroAssembler::get_target_of_li32(address insn_addr) { 1508 assert_cond(insn_addr != nullptr); 1509 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1510 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1511 return (address)target_address; 1512 } 1513 1514 // Patch any kind of instruction; there may be several instructions. 1515 // Return the total length (in bytes) of the instructions. 1516 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 1517 assert_cond(branch != nullptr); 1518 int64_t offset = target - branch; 1519 if (NativeInstruction::is_jal_at(branch)) { // jal 1520 return patch_offset_in_jal(branch, offset); 1521 } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne 1522 return patch_offset_in_conditional_branch(branch, offset); 1523 } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load 1524 return patch_offset_in_pc_relative(branch, offset); 1525 } else if (NativeInstruction::is_movptr_at(branch)) { // movptr 1526 return patch_addr_in_movptr(branch, target); 1527 } else if (NativeInstruction::is_li64_at(branch)) { // li64 1528 return patch_imm_in_li64(branch, target); 1529 } else if (NativeInstruction::is_li32_at(branch)) { // li32 1530 int64_t imm = (intptr_t)target; 1531 return patch_imm_in_li32(branch, (int32_t)imm); 1532 } else if (NativeInstruction::is_li16u_at(branch)) { 1533 int64_t imm = (intptr_t)target; 1534 return patch_imm_in_li16u(branch, (uint16_t)imm); 1535 } else { 1536 #ifdef ASSERT 1537 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1538 Assembler::ld_instr(branch), p2i(branch)); 1539 Disassembler::decode(branch - 16, branch + 16); 1540 #endif 1541 ShouldNotReachHere(); 1542 return -1; 1543 } 1544 } 1545 1546 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1547 long offset = 0; 1548 assert_cond(insn_addr != nullptr); 1549 if (NativeInstruction::is_jal_at(insn_addr)) { // jal 1550 offset = get_offset_of_jal(insn_addr); 1551 } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1552 offset = get_offset_of_conditional_branch(insn_addr); 1553 } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1554 offset = get_offset_of_pc_relative(insn_addr); 1555 } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr 1556 return get_target_of_movptr(insn_addr); 1557 } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 1558 return get_target_of_li64(insn_addr); 1559 } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 1560 return get_target_of_li32(insn_addr); 1561 } else { 1562 ShouldNotReachHere(); 1563 } 1564 return address(((uintptr_t)insn_addr + offset)); 1565 } 1566 1567 int MacroAssembler::patch_oop(address insn_addr, address o) { 1568 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1569 // narrow OOPs by setting the upper 16 bits in the first 1570 // instruction. 1571 if (NativeInstruction::is_li32_at(insn_addr)) { 1572 // Move narrow OOP 1573 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1574 return patch_imm_in_li32(insn_addr, (int32_t)n); 1575 } else if (NativeInstruction::is_movptr_at(insn_addr)) { 1576 // Move wide OOP 1577 return patch_addr_in_movptr(insn_addr, o); 1578 } 1579 ShouldNotReachHere(); 1580 return -1; 1581 } 1582 1583 void MacroAssembler::reinit_heapbase() { 1584 if (UseCompressedOops) { 1585 if (Universe::is_fully_initialized()) { 1586 mv(xheapbase, CompressedOops::ptrs_base()); 1587 } else { 1588 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1589 relocate(target.rspec(), [&] { 1590 int32_t offset; 1591 la(xheapbase, target.target(), offset); 1592 ld(xheapbase, Address(xheapbase, offset)); 1593 }); 1594 } 1595 } 1596 } 1597 1598 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset) { 1599 int64_t imm64 = (int64_t)addr; 1600 #ifndef PRODUCT 1601 { 1602 char buffer[64]; 1603 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); 1604 block_comment(buffer); 1605 } 1606 #endif 1607 assert((uintptr_t)imm64 < (1ull << 48), "48-bit overflow in address constant"); 1608 // Load upper 31 bits 1609 int64_t imm = imm64 >> 17; 1610 int64_t upper = imm, lower = imm; 1611 lower = (lower << 52) >> 52; 1612 upper -= lower; 1613 upper = (int32_t)upper; 1614 lui(Rd, upper); 1615 addi(Rd, Rd, lower); 1616 1617 // Load the rest 17 bits. 1618 slli(Rd, Rd, 11); 1619 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1620 slli(Rd, Rd, 6); 1621 1622 // This offset will be used by following jalr/ld. 1623 offset = imm64 & 0x3f; 1624 } 1625 1626 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1627 if (is_simm12(increment)) { 1628 addi(Rd, Rn, increment); 1629 } else { 1630 assert_different_registers(Rn, temp); 1631 li(temp, increment); 1632 add(Rd, Rn, temp); 1633 } 1634 } 1635 1636 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1637 if (is_simm12(increment)) { 1638 addiw(Rd, Rn, increment); 1639 } else { 1640 assert_different_registers(Rn, temp); 1641 li(temp, increment); 1642 addw(Rd, Rn, temp); 1643 } 1644 } 1645 1646 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1647 if (is_simm12(-decrement)) { 1648 addi(Rd, Rn, -decrement); 1649 } else { 1650 assert_different_registers(Rn, temp); 1651 li(temp, decrement); 1652 sub(Rd, Rn, temp); 1653 } 1654 } 1655 1656 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1657 if (is_simm12(-decrement)) { 1658 addiw(Rd, Rn, -decrement); 1659 } else { 1660 assert_different_registers(Rn, temp); 1661 li(temp, decrement); 1662 subw(Rd, Rn, temp); 1663 } 1664 } 1665 1666 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1667 andr(Rd, Rs1, Rs2); 1668 sign_extend(Rd, Rd, 32); 1669 } 1670 1671 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1672 orr(Rd, Rs1, Rs2); 1673 sign_extend(Rd, Rd, 32); 1674 } 1675 1676 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1677 xorr(Rd, Rs1, Rs2); 1678 sign_extend(Rd, Rd, 32); 1679 } 1680 1681 // Rd = Rs1 & (~Rd2) 1682 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 1683 if (UseZbb) { 1684 Assembler::andn(Rd, Rs1, Rs2); 1685 return; 1686 } 1687 1688 notr(Rd, Rs2); 1689 andr(Rd, Rs1, Rd); 1690 } 1691 1692 // Rd = Rs1 | (~Rd2) 1693 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 1694 if (UseZbb) { 1695 Assembler::orn(Rd, Rs1, Rs2); 1696 return; 1697 } 1698 1699 notr(Rd, Rs2); 1700 orr(Rd, Rs1, Rd); 1701 } 1702 1703 // Note: load_unsigned_short used to be called load_unsigned_word. 1704 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1705 int off = offset(); 1706 lhu(dst, src); 1707 return off; 1708 } 1709 1710 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1711 int off = offset(); 1712 lbu(dst, src); 1713 return off; 1714 } 1715 1716 int MacroAssembler::load_signed_short(Register dst, Address src) { 1717 int off = offset(); 1718 lh(dst, src); 1719 return off; 1720 } 1721 1722 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1723 int off = offset(); 1724 lb(dst, src); 1725 return off; 1726 } 1727 1728 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1729 switch (size_in_bytes) { 1730 case 8: ld(dst, src); break; 1731 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1732 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1733 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1734 default: ShouldNotReachHere(); 1735 } 1736 } 1737 1738 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1739 switch (size_in_bytes) { 1740 case 8: sd(src, dst); break; 1741 case 4: sw(src, dst); break; 1742 case 2: sh(src, dst); break; 1743 case 1: sb(src, dst); break; 1744 default: ShouldNotReachHere(); 1745 } 1746 } 1747 1748 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 1749 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1750 if (granularity != 1 && granularity != 2) { 1751 ShouldNotReachHere(); 1752 } 1753 if (AvoidUnalignedAccesses && (granularity != 2)) { 1754 assert_different_registers(dst, tmp); 1755 assert_different_registers(tmp, src.base()); 1756 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 1757 slli(tmp, tmp, 8); 1758 lbu(dst, src); 1759 add(dst, dst, tmp); 1760 } else { 1761 is_signed ? lh(dst, src) : lhu(dst, src); 1762 } 1763 } 1764 1765 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 1766 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1767 if (AvoidUnalignedAccesses && (granularity != 4)) { 1768 switch(granularity) { 1769 case 1: 1770 assert_different_registers(dst, tmp, src.base()); 1771 lbu(dst, src); 1772 lbu(tmp, Address(src.base(), src.offset() + 1)); 1773 slli(tmp, tmp, 8); 1774 add(dst, dst, tmp); 1775 lbu(tmp, Address(src.base(), src.offset() + 2)); 1776 slli(tmp, tmp, 16); 1777 add(dst, dst, tmp); 1778 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 1779 slli(tmp, tmp, 24); 1780 add(dst, dst, tmp); 1781 break; 1782 case 2: 1783 assert_different_registers(dst, tmp); 1784 assert_different_registers(tmp, src.base()); 1785 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 1786 slli(tmp, tmp, 16); 1787 lhu(dst, src); 1788 add(dst, dst, tmp); 1789 break; 1790 default: 1791 ShouldNotReachHere(); 1792 } 1793 } else { 1794 is_signed ? lw(dst, src) : lwu(dst, src); 1795 } 1796 } 1797 1798 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 1799 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 1800 if (AvoidUnalignedAccesses && (granularity != 8)) { 1801 switch(granularity){ 1802 case 1: 1803 assert_different_registers(dst, tmp, src.base()); 1804 lbu(dst, src); 1805 lbu(tmp, Address(src.base(), src.offset() + 1)); 1806 slli(tmp, tmp, 8); 1807 add(dst, dst, tmp); 1808 lbu(tmp, Address(src.base(), src.offset() + 2)); 1809 slli(tmp, tmp, 16); 1810 add(dst, dst, tmp); 1811 lbu(tmp, Address(src.base(), src.offset() + 3)); 1812 slli(tmp, tmp, 24); 1813 add(dst, dst, tmp); 1814 lbu(tmp, Address(src.base(), src.offset() + 4)); 1815 slli(tmp, tmp, 32); 1816 add(dst, dst, tmp); 1817 lbu(tmp, Address(src.base(), src.offset() + 5)); 1818 slli(tmp, tmp, 40); 1819 add(dst, dst, tmp); 1820 lbu(tmp, Address(src.base(), src.offset() + 6)); 1821 slli(tmp, tmp, 48); 1822 add(dst, dst, tmp); 1823 lbu(tmp, Address(src.base(), src.offset() + 7)); 1824 slli(tmp, tmp, 56); 1825 add(dst, dst, tmp); 1826 break; 1827 case 2: 1828 assert_different_registers(dst, tmp, src.base()); 1829 lhu(dst, src); 1830 lhu(tmp, Address(src.base(), src.offset() + 2)); 1831 slli(tmp, tmp, 16); 1832 add(dst, dst, tmp); 1833 lhu(tmp, Address(src.base(), src.offset() + 4)); 1834 slli(tmp, tmp, 32); 1835 add(dst, dst, tmp); 1836 lhu(tmp, Address(src.base(), src.offset() + 6)); 1837 slli(tmp, tmp, 48); 1838 add(dst, dst, tmp); 1839 break; 1840 case 4: 1841 assert_different_registers(dst, tmp); 1842 assert_different_registers(tmp, src.base()); 1843 lwu(tmp, Address(src.base(), src.offset() + 4)); 1844 slli(tmp, tmp, 32); 1845 lwu(dst, src); 1846 add(dst, dst, tmp); 1847 break; 1848 default: 1849 ShouldNotReachHere(); 1850 } 1851 } else { 1852 ld(dst, src); 1853 } 1854 } 1855 1856 1857 // reverse bytes in halfword in lower 16 bits and sign-extend 1858 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 1859 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 1860 if (UseZbb) { 1861 rev8(Rd, Rs); 1862 srai(Rd, Rd, 48); 1863 return; 1864 } 1865 assert_different_registers(Rs, tmp); 1866 assert_different_registers(Rd, tmp); 1867 srli(tmp, Rs, 8); 1868 andi(tmp, tmp, 0xFF); 1869 slli(Rd, Rs, 56); 1870 srai(Rd, Rd, 48); // sign-extend 1871 orr(Rd, Rd, tmp); 1872 } 1873 1874 // reverse bytes in lower word and sign-extend 1875 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 1876 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1877 if (UseZbb) { 1878 rev8(Rd, Rs); 1879 srai(Rd, Rd, 32); 1880 return; 1881 } 1882 assert_different_registers(Rs, tmp1, tmp2); 1883 assert_different_registers(Rd, tmp1, tmp2); 1884 revb_h_w_u(Rd, Rs, tmp1, tmp2); 1885 slli(tmp2, Rd, 48); 1886 srai(tmp2, tmp2, 32); // sign-extend 1887 srli(Rd, Rd, 16); 1888 orr(Rd, Rd, tmp2); 1889 } 1890 1891 // reverse bytes in halfword in lower 16 bits and zero-extend 1892 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1893 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 1894 if (UseZbb) { 1895 rev8(Rd, Rs); 1896 srli(Rd, Rd, 48); 1897 return; 1898 } 1899 assert_different_registers(Rs, tmp); 1900 assert_different_registers(Rd, tmp); 1901 srli(tmp, Rs, 8); 1902 andi(tmp, tmp, 0xFF); 1903 andi(Rd, Rs, 0xFF); 1904 slli(Rd, Rd, 8); 1905 orr(Rd, Rd, tmp); 1906 } 1907 1908 // reverse bytes in halfwords in lower 32 bits and zero-extend 1909 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 1910 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1911 if (UseZbb) { 1912 rev8(Rd, Rs); 1913 rori(Rd, Rd, 32); 1914 roriw(Rd, Rd, 16); 1915 zero_extend(Rd, Rd, 32); 1916 return; 1917 } 1918 assert_different_registers(Rs, tmp1, tmp2); 1919 assert_different_registers(Rd, tmp1, tmp2); 1920 srli(tmp2, Rs, 16); 1921 revb_h_h_u(tmp2, tmp2, tmp1); 1922 revb_h_h_u(Rd, Rs, tmp1); 1923 slli(tmp2, tmp2, 16); 1924 orr(Rd, Rd, tmp2); 1925 } 1926 1927 // This method is only used for revb_h 1928 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 1929 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1930 assert_different_registers(Rs, tmp1, tmp2); 1931 assert_different_registers(Rd, tmp1); 1932 srli(tmp1, Rs, 48); 1933 andi(tmp2, tmp1, 0xFF); 1934 slli(tmp2, tmp2, 8); 1935 srli(tmp1, tmp1, 8); 1936 orr(tmp1, tmp1, tmp2); 1937 slli(Rd, Rs, 16); 1938 orr(Rd, Rd, tmp1); 1939 } 1940 1941 // reverse bytes in each halfword 1942 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 1943 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1944 if (UseZbb) { 1945 assert_different_registers(Rs, tmp1); 1946 assert_different_registers(Rd, tmp1); 1947 rev8(Rd, Rs); 1948 zero_extend(tmp1, Rd, 32); 1949 roriw(tmp1, tmp1, 16); 1950 slli(tmp1, tmp1, 32); 1951 srli(Rd, Rd, 32); 1952 roriw(Rd, Rd, 16); 1953 zero_extend(Rd, Rd, 32); 1954 orr(Rd, Rd, tmp1); 1955 return; 1956 } 1957 assert_different_registers(Rs, tmp1, tmp2); 1958 assert_different_registers(Rd, tmp1, tmp2); 1959 revb_h_helper(Rd, Rs, tmp1, tmp2); 1960 for (int i = 0; i < 3; ++i) { 1961 revb_h_helper(Rd, Rd, tmp1, tmp2); 1962 } 1963 } 1964 1965 // reverse bytes in each word 1966 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 1967 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1968 if (UseZbb) { 1969 rev8(Rd, Rs); 1970 rori(Rd, Rd, 32); 1971 return; 1972 } 1973 assert_different_registers(Rs, tmp1, tmp2); 1974 assert_different_registers(Rd, tmp1, tmp2); 1975 revb(Rd, Rs, tmp1, tmp2); 1976 ror_imm(Rd, Rd, 32); 1977 } 1978 1979 // reverse bytes in doubleword 1980 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 1981 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 1982 if (UseZbb) { 1983 rev8(Rd, Rs); 1984 return; 1985 } 1986 assert_different_registers(Rs, tmp1, tmp2); 1987 assert_different_registers(Rd, tmp1, tmp2); 1988 andi(tmp1, Rs, 0xFF); 1989 slli(tmp1, tmp1, 8); 1990 for (int step = 8; step < 56; step += 8) { 1991 srli(tmp2, Rs, step); 1992 andi(tmp2, tmp2, 0xFF); 1993 orr(tmp1, tmp1, tmp2); 1994 slli(tmp1, tmp1, 8); 1995 } 1996 srli(Rd, Rs, 56); 1997 andi(Rd, Rd, 0xFF); 1998 orr(Rd, tmp1, Rd); 1999 } 2000 2001 // rotate right with shift bits 2002 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 2003 { 2004 if (UseZbb) { 2005 rori(dst, src, shift); 2006 return; 2007 } 2008 2009 assert_different_registers(dst, tmp); 2010 assert_different_registers(src, tmp); 2011 assert(shift < 64, "shift amount must be < 64"); 2012 slli(tmp, src, 64 - shift); 2013 srli(dst, src, shift); 2014 orr(dst, dst, tmp); 2015 } 2016 2017 // rotate left with shift bits, 32-bit version 2018 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 2019 if (UseZbb) { 2020 // no roliw available 2021 roriw(dst, src, 32 - shift); 2022 return; 2023 } 2024 2025 assert_different_registers(dst, tmp); 2026 assert_different_registers(src, tmp); 2027 assert(shift < 32, "shift amount must be < 32"); 2028 srliw(tmp, src, 32 - shift); 2029 slliw(dst, src, shift); 2030 orr(dst, dst, tmp); 2031 } 2032 2033 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2034 if (is_simm12(imm)) { 2035 and_imm12(Rd, Rn, imm); 2036 } else { 2037 assert_different_registers(Rn, tmp); 2038 mv(tmp, imm); 2039 andr(Rd, Rn, tmp); 2040 } 2041 } 2042 2043 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2044 ld(tmp1, adr); 2045 if (src.is_register()) { 2046 orr(tmp1, tmp1, src.as_register()); 2047 } else { 2048 if (is_simm12(src.as_constant())) { 2049 ori(tmp1, tmp1, src.as_constant()); 2050 } else { 2051 assert_different_registers(tmp1, tmp2); 2052 mv(tmp2, src.as_constant()); 2053 orr(tmp1, tmp1, tmp2); 2054 } 2055 } 2056 sd(tmp1, adr); 2057 } 2058 2059 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2060 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2061 if (UseCompressedClassPointers) { 2062 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2063 if (CompressedKlassPointers::base() == nullptr) { 2064 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2065 beq(trial_klass, tmp1, L); 2066 return; 2067 } 2068 decode_klass_not_null(tmp1, tmp2); 2069 } else { 2070 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2071 } 2072 beq(trial_klass, tmp1, L); 2073 } 2074 2075 // Move an oop into a register. 2076 void MacroAssembler::movoop(Register dst, jobject obj) { 2077 int oop_index; 2078 if (obj == nullptr) { 2079 oop_index = oop_recorder()->allocate_oop_index(obj); 2080 } else { 2081 #ifdef ASSERT 2082 { 2083 ThreadInVMfromUnknown tiv; 2084 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2085 } 2086 #endif 2087 oop_index = oop_recorder()->find_index(obj); 2088 } 2089 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2090 2091 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2092 mv(dst, Address((address)obj, rspec)); 2093 } else { 2094 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2095 ld_constant(dst, Address(dummy, rspec)); 2096 } 2097 } 2098 2099 // Move a metadata address into a register. 2100 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2101 int oop_index; 2102 if (obj == nullptr) { 2103 oop_index = oop_recorder()->allocate_metadata_index(obj); 2104 } else { 2105 oop_index = oop_recorder()->find_index(obj); 2106 } 2107 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2108 mv(dst, Address((address)obj, rspec)); 2109 } 2110 2111 // Writes to stack successive pages until offset reached to check for 2112 // stack overflow + shadow pages. This clobbers tmp. 2113 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2114 assert_different_registers(tmp, size, t0); 2115 // Bang stack for total size given plus shadow page size. 2116 // Bang one page at a time because large size can bang beyond yellow and 2117 // red zones. 2118 mv(t0, (int)os::vm_page_size()); 2119 Label loop; 2120 bind(loop); 2121 sub(tmp, sp, t0); 2122 subw(size, size, t0); 2123 sd(size, Address(tmp)); 2124 bgtz(size, loop); 2125 2126 // Bang down shadow pages too. 2127 // At this point, (tmp-0) is the last address touched, so don't 2128 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2129 // was post-decremented.) Skip this address by starting at i=1, and 2130 // touch a few more pages below. N.B. It is important to touch all 2131 // the way down to and including i=StackShadowPages. 2132 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2133 // this could be any sized move but this is can be a debugging crumb 2134 // so the bigger the better. 2135 sub(tmp, tmp, (int)os::vm_page_size()); 2136 sd(size, Address(tmp, 0)); 2137 } 2138 } 2139 2140 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2141 int32_t offset = 0; 2142 _masm = masm; 2143 ExternalAddress target((address)flag_addr); 2144 _masm->relocate(target.rspec(), [&] { 2145 int32_t offset; 2146 _masm->la(t0, target.target(), offset); 2147 _masm->lbu(t0, Address(t0, offset)); 2148 }); 2149 if (value) { 2150 _masm->bnez(t0, _label); 2151 } else { 2152 _masm->beqz(t0, _label); 2153 } 2154 } 2155 2156 SkipIfEqual::~SkipIfEqual() { 2157 _masm->bind(_label); 2158 _masm = nullptr; 2159 } 2160 2161 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2162 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2163 ld(dst, Address(xmethod, Method::const_offset())); 2164 ld(dst, Address(dst, ConstMethod::constants_offset())); 2165 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2166 ld(dst, Address(dst, mirror_offset)); 2167 resolve_oop_handle(dst, tmp1, tmp2); 2168 } 2169 2170 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2171 // OopHandle::resolve is an indirection. 2172 assert_different_registers(result, tmp1, tmp2); 2173 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2174 } 2175 2176 // ((WeakHandle)result).resolve() 2177 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2178 assert_different_registers(result, tmp1, tmp2); 2179 Label resolved; 2180 2181 // A null weak handle resolves to null. 2182 beqz(result, resolved); 2183 2184 // Only 64 bit platforms support GCs that require a tmp register 2185 // Only IN_HEAP loads require a thread_tmp register 2186 // WeakHandle::resolve is an indirection like jweak. 2187 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2188 result, Address(result), tmp1, tmp2); 2189 bind(resolved); 2190 } 2191 2192 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2193 Register dst, Address src, 2194 Register tmp1, Register tmp2) { 2195 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2196 decorators = AccessInternal::decorator_fixup(decorators, type); 2197 bool as_raw = (decorators & AS_RAW) != 0; 2198 if (as_raw) { 2199 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2200 } else { 2201 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2202 } 2203 } 2204 2205 void MacroAssembler::null_check(Register reg, int offset) { 2206 if (needs_explicit_null_check(offset)) { 2207 // provoke OS null exception if reg is null by 2208 // accessing M[reg] w/o changing any registers 2209 // NOTE: this is plenty to provoke a segv 2210 ld(zr, Address(reg, 0)); 2211 } else { 2212 // nothing to do, (later) access of M[reg + offset] 2213 // will provoke OS null exception if reg is null 2214 } 2215 } 2216 2217 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2218 Address dst, Register val, 2219 Register tmp1, Register tmp2, Register tmp3) { 2220 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2221 decorators = AccessInternal::decorator_fixup(decorators, type); 2222 bool as_raw = (decorators & AS_RAW) != 0; 2223 if (as_raw) { 2224 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2225 } else { 2226 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2227 } 2228 } 2229 2230 // Algorithm must match CompressedOops::encode. 2231 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2232 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2233 if (CompressedOops::base() == nullptr) { 2234 if (CompressedOops::shift() != 0) { 2235 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2236 srli(d, s, LogMinObjAlignmentInBytes); 2237 } else { 2238 mv(d, s); 2239 } 2240 } else { 2241 Label notNull; 2242 sub(d, s, xheapbase); 2243 bgez(d, notNull); 2244 mv(d, zr); 2245 bind(notNull); 2246 if (CompressedOops::shift() != 0) { 2247 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2248 srli(d, d, CompressedOops::shift()); 2249 } 2250 } 2251 } 2252 2253 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2254 assert_different_registers(dst, tmp); 2255 assert_different_registers(src, tmp); 2256 if (UseCompressedClassPointers) { 2257 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2258 decode_klass_not_null(dst, tmp); 2259 } else { 2260 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2261 } 2262 } 2263 2264 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2265 // FIXME: Should this be a store release? concurrent gcs assumes 2266 // klass length is valid if klass field is not null. 2267 if (UseCompressedClassPointers) { 2268 encode_klass_not_null(src, tmp); 2269 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2270 } else { 2271 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2272 } 2273 } 2274 2275 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2276 if (UseCompressedClassPointers) { 2277 // Store to klass gap in destination 2278 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2279 } 2280 } 2281 2282 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2283 assert_different_registers(r, tmp); 2284 decode_klass_not_null(r, r, tmp); 2285 } 2286 2287 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2288 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2289 2290 if (CompressedKlassPointers::base() == nullptr) { 2291 if (CompressedKlassPointers::shift() != 0) { 2292 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2293 slli(dst, src, LogKlassAlignmentInBytes); 2294 } else { 2295 mv(dst, src); 2296 } 2297 return; 2298 } 2299 2300 Register xbase = dst; 2301 if (dst == src) { 2302 xbase = tmp; 2303 } 2304 2305 assert_different_registers(src, xbase); 2306 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2307 2308 if (CompressedKlassPointers::shift() != 0) { 2309 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2310 assert_different_registers(t0, xbase); 2311 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2312 } else { 2313 add(dst, xbase, src); 2314 } 2315 } 2316 2317 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2318 assert_different_registers(r, tmp); 2319 encode_klass_not_null(r, r, tmp); 2320 } 2321 2322 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2323 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2324 2325 if (CompressedKlassPointers::base() == nullptr) { 2326 if (CompressedKlassPointers::shift() != 0) { 2327 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2328 srli(dst, src, LogKlassAlignmentInBytes); 2329 } else { 2330 mv(dst, src); 2331 } 2332 return; 2333 } 2334 2335 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2336 CompressedKlassPointers::shift() == 0) { 2337 zero_extend(dst, src, 32); 2338 return; 2339 } 2340 2341 Register xbase = dst; 2342 if (dst == src) { 2343 xbase = tmp; 2344 } 2345 2346 assert_different_registers(src, xbase); 2347 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2348 sub(dst, src, xbase); 2349 if (CompressedKlassPointers::shift() != 0) { 2350 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2351 srli(dst, dst, LogKlassAlignmentInBytes); 2352 } 2353 } 2354 2355 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2356 decode_heap_oop_not_null(r, r); 2357 } 2358 2359 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2360 assert(UseCompressedOops, "should only be used for compressed headers"); 2361 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2362 // Cannot assert, unverified entry point counts instructions (see .ad file) 2363 // vtableStubs also counts instructions in pd_code_size_limit. 2364 // Also do not verify_oop as this is called by verify_oop. 2365 if (CompressedOops::shift() != 0) { 2366 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2367 slli(dst, src, LogMinObjAlignmentInBytes); 2368 if (CompressedOops::base() != nullptr) { 2369 add(dst, xheapbase, dst); 2370 } 2371 } else { 2372 assert(CompressedOops::base() == nullptr, "sanity"); 2373 mv(dst, src); 2374 } 2375 } 2376 2377 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2378 if (CompressedOops::base() == nullptr) { 2379 if (CompressedOops::shift() != 0 || d != s) { 2380 slli(d, s, CompressedOops::shift()); 2381 } 2382 } else { 2383 Label done; 2384 mv(d, s); 2385 beqz(s, done); 2386 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2387 bind(done); 2388 } 2389 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2390 } 2391 2392 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2393 Register tmp2, Register tmp3, DecoratorSet decorators) { 2394 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2395 } 2396 2397 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2398 Register tmp2, DecoratorSet decorators) { 2399 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2400 } 2401 2402 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2403 Register tmp2, DecoratorSet decorators) { 2404 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2405 } 2406 2407 // Used for storing nulls. 2408 void MacroAssembler::store_heap_oop_null(Address dst) { 2409 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2410 } 2411 2412 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2413 bool want_remainder, bool is_signed) 2414 { 2415 // Full implementation of Java idiv and irem. The function 2416 // returns the (pc) offset of the div instruction - may be needed 2417 // for implicit exceptions. 2418 // 2419 // input : rs1: dividend 2420 // rs2: divisor 2421 // 2422 // result: either 2423 // quotient (= rs1 idiv rs2) 2424 // remainder (= rs1 irem rs2) 2425 2426 2427 int idivl_offset = offset(); 2428 if (!want_remainder) { 2429 if (is_signed) { 2430 divw(result, rs1, rs2); 2431 } else { 2432 divuw(result, rs1, rs2); 2433 } 2434 } else { 2435 // result = rs1 % rs2; 2436 if (is_signed) { 2437 remw(result, rs1, rs2); 2438 } else { 2439 remuw(result, rs1, rs2); 2440 } 2441 } 2442 return idivl_offset; 2443 } 2444 2445 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2446 bool want_remainder, bool is_signed) 2447 { 2448 // Full implementation of Java ldiv and lrem. The function 2449 // returns the (pc) offset of the div instruction - may be needed 2450 // for implicit exceptions. 2451 // 2452 // input : rs1: dividend 2453 // rs2: divisor 2454 // 2455 // result: either 2456 // quotient (= rs1 idiv rs2) 2457 // remainder (= rs1 irem rs2) 2458 2459 int idivq_offset = offset(); 2460 if (!want_remainder) { 2461 if (is_signed) { 2462 div(result, rs1, rs2); 2463 } else { 2464 divu(result, rs1, rs2); 2465 } 2466 } else { 2467 // result = rs1 % rs2; 2468 if (is_signed) { 2469 rem(result, rs1, rs2); 2470 } else { 2471 remu(result, rs1, rs2); 2472 } 2473 } 2474 return idivq_offset; 2475 } 2476 2477 // Look up the method for a megamorpic invkkeinterface call. 2478 // The target method is determined by <intf_klass, itable_index>. 2479 // The receiver klass is in recv_klass. 2480 // On success, the result will be in method_result, and execution falls through. 2481 // On failure, execution transfers to the given label. 2482 void MacroAssembler::lookup_interface_method(Register recv_klass, 2483 Register intf_klass, 2484 RegisterOrConstant itable_index, 2485 Register method_result, 2486 Register scan_tmp, 2487 Label& L_no_such_interface, 2488 bool return_method) { 2489 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2490 assert_different_registers(method_result, intf_klass, scan_tmp); 2491 assert(recv_klass != method_result || !return_method, 2492 "recv_klass can be destroyed when mehtid isn't needed"); 2493 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2494 "caller must be same register for non-constant itable index as for method"); 2495 2496 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2497 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2498 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2499 int scan_step = itableOffsetEntry::size() * wordSize; 2500 int vte_size = vtableEntry::size_in_bytes(); 2501 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2502 2503 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2504 2505 // Could store the aligned, prescaled offset in the klass. 2506 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2507 add(scan_tmp, scan_tmp, vtable_base); 2508 2509 if (return_method) { 2510 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2511 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2512 if (itable_index.is_register()) { 2513 slli(t0, itable_index.as_register(), 3); 2514 } else { 2515 mv(t0, itable_index.as_constant() << 3); 2516 } 2517 add(recv_klass, recv_klass, t0); 2518 if (itentry_off) { 2519 add(recv_klass, recv_klass, itentry_off); 2520 } 2521 } 2522 2523 Label search, found_method; 2524 2525 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2526 beq(intf_klass, method_result, found_method); 2527 bind(search); 2528 // Check that the previous entry is non-null. A null entry means that 2529 // the receiver class doesn't implement the interface, and wasn't the 2530 // same as when the caller was compiled. 2531 beqz(method_result, L_no_such_interface, /* is_far */ true); 2532 addi(scan_tmp, scan_tmp, scan_step); 2533 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2534 bne(intf_klass, method_result, search); 2535 2536 bind(found_method); 2537 2538 // Got a hit. 2539 if (return_method) { 2540 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2541 add(method_result, recv_klass, scan_tmp); 2542 ld(method_result, Address(method_result)); 2543 } 2544 } 2545 2546 // Look up the method for a megamorphic invokeinterface call in a single pass over itable: 2547 // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData 2548 // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index 2549 // The target method is determined by <holder_klass, itable_index>. 2550 // The receiver klass is in recv_klass. 2551 // On success, the result will be in method_result, and execution falls through. 2552 // On failure, execution transfers to the given label. 2553 void MacroAssembler::lookup_interface_method_stub(Register recv_klass, 2554 Register holder_klass, 2555 Register resolved_klass, 2556 Register method_result, 2557 Register temp_itbl_klass, 2558 Register scan_temp, 2559 int itable_index, 2560 Label& L_no_such_interface) { 2561 // 'method_result' is only used as output register at the very end of this method. 2562 // Until then we can reuse it as 'holder_offset'. 2563 Register holder_offset = method_result; 2564 assert_different_registers(resolved_klass, recv_klass, holder_klass, temp_itbl_klass, scan_temp, holder_offset); 2565 2566 int vtable_start_offset_bytes = in_bytes(Klass::vtable_start_offset()); 2567 int scan_step = itableOffsetEntry::size() * wordSize; 2568 int ioffset_bytes = in_bytes(itableOffsetEntry::interface_offset()); 2569 int ooffset_bytes = in_bytes(itableOffsetEntry::offset_offset()); 2570 int itmentry_off_bytes = in_bytes(itableMethodEntry::method_offset()); 2571 const int vte_scale = exact_log2(vtableEntry::size_in_bytes()); 2572 2573 Label L_loop_search_resolved_entry, L_resolved_found, L_holder_found; 2574 2575 lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); 2576 add(recv_klass, recv_klass, vtable_start_offset_bytes + ioffset_bytes); 2577 // itableOffsetEntry[] itable = recv_klass + Klass::vtable_start_offset() 2578 // + sizeof(vtableEntry) * (recv_klass->_vtable_len); 2579 // scan_temp = &(itable[0]._interface) 2580 // temp_itbl_klass = itable[0]._interface; 2581 shadd(scan_temp, scan_temp, recv_klass, scan_temp, vte_scale); 2582 ld(temp_itbl_klass, Address(scan_temp)); 2583 mv(holder_offset, zr); 2584 2585 // Initial checks: 2586 // - if (holder_klass != resolved_klass), go to "scan for resolved" 2587 // - if (itable[0] == holder_klass), shortcut to "holder found" 2588 // - if (itable[0] == 0), no such interface 2589 bne(resolved_klass, holder_klass, L_loop_search_resolved_entry); 2590 beq(holder_klass, temp_itbl_klass, L_holder_found); 2591 beqz(temp_itbl_klass, L_no_such_interface); 2592 2593 // Loop: Look for holder_klass record in itable 2594 // do { 2595 // temp_itbl_klass = *(scan_temp += scan_step); 2596 // if (temp_itbl_klass == holder_klass) { 2597 // goto L_holder_found; // Found! 2598 // } 2599 // } while (temp_itbl_klass != 0); 2600 // goto L_no_such_interface // Not found. 2601 Label L_search_holder; 2602 bind(L_search_holder); 2603 add(scan_temp, scan_temp, scan_step); 2604 ld(temp_itbl_klass, Address(scan_temp)); 2605 beq(holder_klass, temp_itbl_klass, L_holder_found); 2606 bnez(temp_itbl_klass, L_search_holder); 2607 2608 j(L_no_such_interface); 2609 2610 // Loop: Look for resolved_class record in itable 2611 // while (true) { 2612 // temp_itbl_klass = *(scan_temp += scan_step); 2613 // if (temp_itbl_klass == 0) { 2614 // goto L_no_such_interface; 2615 // } 2616 // if (temp_itbl_klass == resolved_klass) { 2617 // goto L_resolved_found; // Found! 2618 // } 2619 // if (temp_itbl_klass == holder_klass) { 2620 // holder_offset = scan_temp; 2621 // } 2622 // } 2623 // 2624 Label L_loop_search_resolved; 2625 bind(L_loop_search_resolved); 2626 add(scan_temp, scan_temp, scan_step); 2627 ld(temp_itbl_klass, Address(scan_temp)); 2628 bind(L_loop_search_resolved_entry); 2629 beqz(temp_itbl_klass, L_no_such_interface); 2630 beq(resolved_klass, temp_itbl_klass, L_resolved_found); 2631 bne(holder_klass, temp_itbl_klass, L_loop_search_resolved); 2632 mv(holder_offset, scan_temp); 2633 j(L_loop_search_resolved); 2634 2635 // See if we already have a holder klass. If not, go and scan for it. 2636 bind(L_resolved_found); 2637 beqz(holder_offset, L_search_holder); 2638 mv(scan_temp, holder_offset); 2639 2640 // Finally, scan_temp contains holder_klass vtable offset 2641 bind(L_holder_found); 2642 lwu(method_result, Address(scan_temp, ooffset_bytes - ioffset_bytes)); 2643 add(recv_klass, recv_klass, itable_index * wordSize + itmentry_off_bytes 2644 - vtable_start_offset_bytes - ioffset_bytes); // substract offsets to restore the original value of recv_klass 2645 add(method_result, recv_klass, method_result); 2646 ld(method_result, Address(method_result)); 2647 } 2648 2649 // virtual method calling 2650 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2651 RegisterOrConstant vtable_index, 2652 Register method_result) { 2653 const ByteSize base = Klass::vtable_start_offset(); 2654 assert(vtableEntry::size() * wordSize == 8, 2655 "adjust the scaling in the code below"); 2656 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 2657 2658 if (vtable_index.is_register()) { 2659 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2660 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2661 } else { 2662 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2663 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2664 } 2665 } 2666 2667 void MacroAssembler::membar(uint32_t order_constraint) { 2668 address prev = pc() - NativeMembar::instruction_size; 2669 address last = code()->last_insn(); 2670 2671 if (last != nullptr && nativeInstruction_at(last)->is_membar() && prev == last) { 2672 NativeMembar *bar = NativeMembar_at(prev); 2673 // We are merging two memory barrier instructions. On RISCV we 2674 // can do this simply by ORing them together. 2675 bar->set_kind(bar->get_kind() | order_constraint); 2676 BLOCK_COMMENT("merged membar"); 2677 } else { 2678 code()->set_last_insn(pc()); 2679 2680 uint32_t predecessor = 0; 2681 uint32_t successor = 0; 2682 2683 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2684 fence(predecessor, successor); 2685 } 2686 } 2687 2688 // Form an address from base + offset in Rd. Rd my or may not 2689 // actually be used: you must use the Address that is returned. It 2690 // is up to you to ensure that the shift provided matches the size 2691 // of your data. 2692 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2693 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 2694 return Address(base, byte_offset); 2695 } 2696 2697 assert_different_registers(Rd, base, noreg); 2698 2699 // Do it the hard way 2700 mv(Rd, byte_offset); 2701 add(Rd, base, Rd); 2702 return Address(Rd); 2703 } 2704 2705 void MacroAssembler::check_klass_subtype(Register sub_klass, 2706 Register super_klass, 2707 Register tmp_reg, 2708 Label& L_success) { 2709 Label L_failure; 2710 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 2711 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 2712 bind(L_failure); 2713 } 2714 2715 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2716 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2717 if (acquire) { 2718 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2719 } 2720 if (at_return) { 2721 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 2722 } else { 2723 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 2724 bnez(t0, slow_path, true /* is_far */); 2725 } 2726 } 2727 2728 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2729 Label &succeed, Label *fail) { 2730 assert_different_registers(addr, tmp, t0); 2731 assert_different_registers(newv, tmp, t0); 2732 assert_different_registers(oldv, tmp, t0); 2733 2734 // oldv holds comparison value 2735 // newv holds value to write in exchange 2736 // addr identifies memory word to compare against/update 2737 if (UseZacas) { 2738 mv(tmp, oldv); 2739 atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); 2740 beq(tmp, oldv, succeed); 2741 } else { 2742 Label retry_load, nope; 2743 bind(retry_load); 2744 // Load reserved from the memory location 2745 load_reserved(tmp, addr, int64, Assembler::aqrl); 2746 // Fail and exit if it is not what we expect 2747 bne(tmp, oldv, nope); 2748 // If the store conditional succeeds, tmp will be zero 2749 store_conditional(tmp, newv, addr, int64, Assembler::rl); 2750 beqz(tmp, succeed); 2751 // Retry only when the store conditional failed 2752 j(retry_load); 2753 2754 bind(nope); 2755 } 2756 2757 // neither amocas nor lr/sc have an implied barrier in the failing case 2758 membar(AnyAny); 2759 2760 mv(oldv, tmp); 2761 if (fail != nullptr) { 2762 j(*fail); 2763 } 2764 } 2765 2766 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 2767 Label &succeed, Label *fail) { 2768 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 2769 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 2770 } 2771 2772 void MacroAssembler::load_reserved(Register dst, 2773 Register addr, 2774 enum operand_size size, 2775 Assembler::Aqrl acquire) { 2776 switch (size) { 2777 case int64: 2778 lr_d(dst, addr, acquire); 2779 break; 2780 case int32: 2781 lr_w(dst, addr, acquire); 2782 break; 2783 case uint32: 2784 lr_w(dst, addr, acquire); 2785 zero_extend(dst, dst, 32); 2786 break; 2787 default: 2788 ShouldNotReachHere(); 2789 } 2790 } 2791 2792 void MacroAssembler::store_conditional(Register dst, 2793 Register new_val, 2794 Register addr, 2795 enum operand_size size, 2796 Assembler::Aqrl release) { 2797 switch (size) { 2798 case int64: 2799 sc_d(dst, new_val, addr, release); 2800 break; 2801 case int32: 2802 case uint32: 2803 sc_w(dst, new_val, addr, release); 2804 break; 2805 default: 2806 ShouldNotReachHere(); 2807 } 2808 } 2809 2810 2811 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 2812 Register new_val, 2813 enum operand_size size, 2814 Register tmp1, Register tmp2, Register tmp3) { 2815 assert(size == int8 || size == int16, "unsupported operand size"); 2816 2817 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 2818 2819 andi(shift, addr, 3); 2820 slli(shift, shift, 3); 2821 2822 andi(aligned_addr, addr, ~3); 2823 2824 if (size == int8) { 2825 mv(mask, 0xff); 2826 } else { 2827 // size == int16 case 2828 mv(mask, -1); 2829 zero_extend(mask, mask, 16); 2830 } 2831 sll(mask, mask, shift); 2832 2833 notr(not_mask, mask); 2834 2835 sll(expected, expected, shift); 2836 andr(expected, expected, mask); 2837 2838 sll(new_val, new_val, shift); 2839 andr(new_val, new_val, mask); 2840 } 2841 2842 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 2843 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, 2844 // which are forced to work with 4-byte aligned address. 2845 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 2846 Register new_val, 2847 enum operand_size size, 2848 Assembler::Aqrl acquire, Assembler::Aqrl release, 2849 Register result, bool result_as_bool, 2850 Register tmp1, Register tmp2, Register tmp3) { 2851 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2852 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2853 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2854 2855 Label retry, fail, done; 2856 2857 bind(retry); 2858 2859 if (UseZacas) { 2860 lw(old, aligned_addr); 2861 2862 // if old & mask != expected 2863 andr(tmp, old, mask); 2864 bne(tmp, expected, fail); 2865 2866 andr(tmp, old, not_mask); 2867 orr(tmp, tmp, new_val); 2868 2869 atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release); 2870 bne(tmp, old, retry); 2871 } else { 2872 lr_w(old, aligned_addr, acquire); 2873 andr(tmp, old, mask); 2874 bne(tmp, expected, fail); 2875 2876 andr(tmp, old, not_mask); 2877 orr(tmp, tmp, new_val); 2878 sc_w(tmp, tmp, aligned_addr, release); 2879 bnez(tmp, retry); 2880 } 2881 2882 if (result_as_bool) { 2883 mv(result, 1); 2884 j(done); 2885 2886 bind(fail); 2887 mv(result, zr); 2888 2889 bind(done); 2890 } else { 2891 andr(tmp, old, mask); 2892 2893 bind(fail); 2894 srl(result, tmp, shift); 2895 2896 if (size == int8) { 2897 sign_extend(result, result, 8); 2898 } else { 2899 // size == int16 case 2900 sign_extend(result, result, 16); 2901 } 2902 } 2903 } 2904 2905 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 2906 // the weak CAS stuff. The major difference is that it just failed when store conditional 2907 // failed. 2908 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 2909 Register new_val, 2910 enum operand_size size, 2911 Assembler::Aqrl acquire, Assembler::Aqrl release, 2912 Register result, 2913 Register tmp1, Register tmp2, Register tmp3) { 2914 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 2915 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 2916 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 2917 2918 Label fail, done; 2919 2920 if (UseZacas) { 2921 lw(old, aligned_addr); 2922 2923 // if old & mask != expected 2924 andr(tmp, old, mask); 2925 bne(tmp, expected, fail); 2926 2927 andr(tmp, old, not_mask); 2928 orr(tmp, tmp, new_val); 2929 2930 atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release); 2931 bne(tmp, old, fail); 2932 } else { 2933 lr_w(old, aligned_addr, acquire); 2934 andr(tmp, old, mask); 2935 bne(tmp, expected, fail); 2936 2937 andr(tmp, old, not_mask); 2938 orr(tmp, tmp, new_val); 2939 sc_w(tmp, tmp, aligned_addr, release); 2940 bnez(tmp, fail); 2941 } 2942 2943 // Success 2944 mv(result, 1); 2945 j(done); 2946 2947 // Fail 2948 bind(fail); 2949 mv(result, zr); 2950 2951 bind(done); 2952 } 2953 2954 void MacroAssembler::cmpxchg(Register addr, Register expected, 2955 Register new_val, 2956 enum operand_size size, 2957 Assembler::Aqrl acquire, Assembler::Aqrl release, 2958 Register result, bool result_as_bool) { 2959 assert(size != int8 && size != int16, "unsupported operand size"); 2960 assert_different_registers(addr, t0); 2961 assert_different_registers(expected, t0); 2962 assert_different_registers(new_val, t0); 2963 2964 if (UseZacas) { 2965 if (result_as_bool) { 2966 mv(t0, expected); 2967 atomic_cas(t0, new_val, addr, size, acquire, release); 2968 xorr(t0, t0, expected); 2969 seqz(result, t0); 2970 } else { 2971 mv(result, expected); 2972 atomic_cas(result, new_val, addr, size, acquire, release); 2973 } 2974 return; 2975 } 2976 2977 Label retry_load, done, ne_done; 2978 bind(retry_load); 2979 load_reserved(t0, addr, size, acquire); 2980 bne(t0, expected, ne_done); 2981 store_conditional(t0, new_val, addr, size, release); 2982 bnez(t0, retry_load); 2983 2984 // equal, succeed 2985 if (result_as_bool) { 2986 mv(result, 1); 2987 } else { 2988 mv(result, expected); 2989 } 2990 j(done); 2991 2992 // not equal, failed 2993 bind(ne_done); 2994 if (result_as_bool) { 2995 mv(result, zr); 2996 } else { 2997 mv(result, t0); 2998 } 2999 3000 bind(done); 3001 } 3002 3003 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 3004 Register new_val, 3005 enum operand_size size, 3006 Assembler::Aqrl acquire, Assembler::Aqrl release, 3007 Register result) { 3008 if (UseZacas) { 3009 cmpxchg(addr, expected, new_val, size, acquire, release, result, true); 3010 return; 3011 } 3012 3013 assert_different_registers(addr, t0); 3014 assert_different_registers(expected, t0); 3015 assert_different_registers(new_val, t0); 3016 3017 Label fail, done; 3018 load_reserved(t0, addr, size, acquire); 3019 bne(t0, expected, fail); 3020 store_conditional(t0, new_val, addr, size, release); 3021 bnez(t0, fail); 3022 3023 // Success 3024 mv(result, 1); 3025 j(done); 3026 3027 // Fail 3028 bind(fail); 3029 mv(result, zr); 3030 3031 bind(done); 3032 } 3033 3034 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 3035 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 3036 prev = prev->is_valid() ? prev : zr; \ 3037 if (incr.is_register()) { \ 3038 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3039 } else { \ 3040 mv(t0, incr.as_constant()); \ 3041 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3042 } \ 3043 return; \ 3044 } 3045 3046 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 3047 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 3048 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 3049 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 3050 3051 #undef ATOMIC_OP 3052 3053 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 3054 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3055 prev = prev->is_valid() ? prev : zr; \ 3056 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3057 return; \ 3058 } 3059 3060 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 3061 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 3062 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 3063 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 3064 3065 #undef ATOMIC_XCHG 3066 3067 #define ATOMIC_XCHGU(OP1, OP2) \ 3068 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3069 atomic_##OP2(prev, newv, addr); \ 3070 zero_extend(prev, prev, 32); \ 3071 return; \ 3072 } 3073 3074 ATOMIC_XCHGU(xchgwu, xchgw) 3075 ATOMIC_XCHGU(xchgalwu, xchgalw) 3076 3077 #undef ATOMIC_XCHGU 3078 3079 #define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \ 3080 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3081 assert(UseZacas, "invariant"); \ 3082 prev = prev->is_valid() ? prev : zr; \ 3083 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3084 return; \ 3085 } 3086 3087 ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed) 3088 ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed) 3089 ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl) 3090 ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl) 3091 ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl) 3092 ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl) 3093 3094 #undef ATOMIC_CAS 3095 3096 #define ATOMIC_CASU(OP1, OP2) \ 3097 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3098 atomic_##OP2(prev, newv, addr); \ 3099 zero_extend(prev, prev, 32); \ 3100 return; \ 3101 } 3102 3103 ATOMIC_CASU(caswu, casw) 3104 ATOMIC_CASU(caslwu, caslw) 3105 ATOMIC_CASU(casalwu, casalw) 3106 3107 #undef ATOMIC_CASU 3108 3109 void MacroAssembler::atomic_cas( 3110 Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { 3111 switch (size) { 3112 case int64: 3113 switch ((Assembler::Aqrl)(acquire | release)) { 3114 case Assembler::relaxed: 3115 atomic_cas(prev, newv, addr); 3116 break; 3117 case Assembler::rl: 3118 atomic_casl(prev, newv, addr); 3119 break; 3120 case Assembler::aqrl: 3121 atomic_casal(prev, newv, addr); 3122 break; 3123 default: 3124 ShouldNotReachHere(); 3125 } 3126 break; 3127 case int32: 3128 switch ((Assembler::Aqrl)(acquire | release)) { 3129 case Assembler::relaxed: 3130 atomic_casw(prev, newv, addr); 3131 break; 3132 case Assembler::rl: 3133 atomic_caslw(prev, newv, addr); 3134 break; 3135 case Assembler::aqrl: 3136 atomic_casalw(prev, newv, addr); 3137 break; 3138 default: 3139 ShouldNotReachHere(); 3140 } 3141 break; 3142 case uint32: 3143 switch ((Assembler::Aqrl)(acquire | release)) { 3144 case Assembler::relaxed: 3145 atomic_caswu(prev, newv, addr); 3146 break; 3147 case Assembler::rl: 3148 atomic_caslwu(prev, newv, addr); 3149 break; 3150 case Assembler::aqrl: 3151 atomic_casalwu(prev, newv, addr); 3152 break; 3153 default: 3154 ShouldNotReachHere(); 3155 } 3156 break; 3157 default: 3158 ShouldNotReachHere(); 3159 } 3160 } 3161 3162 void MacroAssembler::far_jump(const Address &entry, Register tmp) { 3163 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 3164 assert(CodeCache::find_blob(entry.target()) != nullptr, 3165 "destination of far call not found in code cache"); 3166 assert(entry.rspec().type() == relocInfo::external_word_type 3167 || entry.rspec().type() == relocInfo::runtime_call_type 3168 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3169 // Fixed length: see MacroAssembler::far_branch_size() 3170 relocate(entry.rspec(), [&] { 3171 int32_t offset; 3172 la(tmp, entry.target(), offset); 3173 jalr(x0, tmp, offset); 3174 }); 3175 } 3176 3177 void MacroAssembler::far_call(const Address &entry, Register tmp) { 3178 assert(ReservedCodeCacheSize < 4*G, "branch out of range"); 3179 assert(CodeCache::find_blob(entry.target()) != nullptr, 3180 "destination of far call not found in code cache"); 3181 assert(entry.rspec().type() == relocInfo::external_word_type 3182 || entry.rspec().type() == relocInfo::runtime_call_type 3183 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3184 // Fixed length: see MacroAssembler::far_branch_size() 3185 // We can use auipc + jalr here because we know that the total size of 3186 // the code cache cannot exceed 2Gb. 3187 relocate(entry.rspec(), [&] { 3188 int32_t offset; 3189 la(tmp, entry.target(), offset); 3190 jalr(x1, tmp, offset); // link 3191 }); 3192 } 3193 3194 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 3195 Register super_klass, 3196 Register tmp_reg, 3197 Label* L_success, 3198 Label* L_failure, 3199 Label* L_slow_path, 3200 Register super_check_offset) { 3201 assert_different_registers(sub_klass, super_klass, tmp_reg); 3202 bool must_load_sco = (super_check_offset == noreg); 3203 if (must_load_sco) { 3204 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 3205 } else { 3206 assert_different_registers(sub_klass, super_klass, super_check_offset); 3207 } 3208 3209 Label L_fallthrough; 3210 int label_nulls = 0; 3211 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3212 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3213 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3214 assert(label_nulls <= 1, "at most one null in batch"); 3215 3216 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3217 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3218 Address super_check_offset_addr(super_klass, sco_offset); 3219 3220 // Hacked jmp, which may only be used just before L_fallthrough. 3221 #define final_jmp(label) \ 3222 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3223 else j(label) /*omit semi*/ 3224 3225 // If the pointers are equal, we are done (e.g., String[] elements). 3226 // This self-check enables sharing of secondary supertype arrays among 3227 // non-primary types such as array-of-interface. Otherwise, each such 3228 // type would need its own customized SSA. 3229 // We move this check to the front of the fast path because many 3230 // type checks are in fact trivially successful in this manner, 3231 // so we get a nicely predicted branch right at the start of the check. 3232 beq(sub_klass, super_klass, *L_success); 3233 3234 // Check the supertype display: 3235 if (must_load_sco) { 3236 lwu(tmp_reg, super_check_offset_addr); 3237 super_check_offset = tmp_reg; 3238 } 3239 add(t0, sub_klass, super_check_offset); 3240 Address super_check_addr(t0); 3241 ld(t0, super_check_addr); // load displayed supertype 3242 3243 // This check has worked decisively for primary supers. 3244 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3245 // (Secondary supers are interfaces and very deeply nested subtypes.) 3246 // This works in the same check above because of a tricky aliasing 3247 // between the super_Cache and the primary super display elements. 3248 // (The 'super_check_addr' can address either, as the case requires.) 3249 // Note that the cache is updated below if it does not help us find 3250 // what we need immediately. 3251 // So if it was a primary super, we can just fail immediately. 3252 // Otherwise, it's the slow path for us (no success at this point). 3253 3254 beq(super_klass, t0, *L_success); 3255 mv(t1, sc_offset); 3256 if (L_failure == &L_fallthrough) { 3257 beq(super_check_offset, t1, *L_slow_path); 3258 } else { 3259 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 3260 final_jmp(*L_slow_path); 3261 } 3262 3263 bind(L_fallthrough); 3264 3265 #undef final_jmp 3266 } 3267 3268 // Scans count pointer sized words at [addr] for occurrence of value, 3269 // generic 3270 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 3271 Register tmp) { 3272 Label Lloop, Lexit; 3273 beqz(count, Lexit); 3274 bind(Lloop); 3275 ld(tmp, addr); 3276 beq(value, tmp, Lexit); 3277 add(addr, addr, wordSize); 3278 sub(count, count, 1); 3279 bnez(count, Lloop); 3280 bind(Lexit); 3281 } 3282 3283 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3284 Register super_klass, 3285 Register tmp1_reg, 3286 Register tmp2_reg, 3287 Label* L_success, 3288 Label* L_failure) { 3289 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3290 if (tmp2_reg != noreg) { 3291 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3292 } 3293 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3294 3295 Label L_fallthrough; 3296 int label_nulls = 0; 3297 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3298 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3299 3300 assert(label_nulls <= 1, "at most one null in the batch"); 3301 3302 // A couple of useful fields in sub_klass: 3303 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3304 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3305 Address secondary_supers_addr(sub_klass, ss_offset); 3306 Address super_cache_addr( sub_klass, sc_offset); 3307 3308 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3309 3310 // Do a linear scan of the secondary super-klass chain. 3311 // This code is rarely used, so simplicity is a virtue here. 3312 // The repne_scan instruction uses fixed registers, which we must spill. 3313 // Don't worry too much about pre-existing connections with the input regs. 3314 3315 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3316 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3317 3318 RegSet pushed_registers; 3319 if (!IS_A_TEMP(x12)) { 3320 pushed_registers += x12; 3321 } 3322 if (!IS_A_TEMP(x15)) { 3323 pushed_registers += x15; 3324 } 3325 3326 if (super_klass != x10) { 3327 if (!IS_A_TEMP(x10)) { 3328 pushed_registers += x10; 3329 } 3330 } 3331 3332 push_reg(pushed_registers, sp); 3333 3334 // Get super_klass value into x10 (even if it was in x15 or x12) 3335 mv(x10, super_klass); 3336 3337 #ifndef PRODUCT 3338 mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); 3339 Address pst_counter_addr(t1); 3340 ld(t0, pst_counter_addr); 3341 add(t0, t0, 1); 3342 sd(t0, pst_counter_addr); 3343 #endif // PRODUCT 3344 3345 // We will consult the secondary-super array. 3346 ld(x15, secondary_supers_addr); 3347 // Load the array length. 3348 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3349 // Skip to start of data. 3350 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3351 3352 // Set t0 to an obvious invalid value, falling through by default 3353 mv(t0, -1); 3354 // Scan X12 words at [X15] for an occurrence of X10. 3355 repne_scan(x15, x10, x12, t0); 3356 3357 // pop will restore x10, so we should use a temp register to keep its value 3358 mv(t1, x10); 3359 3360 // Unspill the temp registers: 3361 pop_reg(pushed_registers, sp); 3362 3363 bne(t1, t0, *L_failure); 3364 3365 // Success. Cache the super we found an proceed in triumph. 3366 sd(super_klass, super_cache_addr); 3367 3368 if (L_success != &L_fallthrough) { 3369 j(*L_success); 3370 } 3371 3372 #undef IS_A_TEMP 3373 3374 bind(L_fallthrough); 3375 } 3376 3377 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 3378 void MacroAssembler::tlab_allocate(Register obj, 3379 Register var_size_in_bytes, 3380 int con_size_in_bytes, 3381 Register tmp1, 3382 Register tmp2, 3383 Label& slow_case, 3384 bool is_far) { 3385 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3386 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 3387 } 3388 3389 // get_thread() can be called anywhere inside generated code so we 3390 // need to save whatever non-callee save context might get clobbered 3391 // by the call to Thread::current() or, indeed, the call setup code. 3392 void MacroAssembler::get_thread(Register thread) { 3393 // save all call-clobbered regs except thread 3394 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 3395 RegSet::range(x28, x31) + ra - thread; 3396 push_reg(saved_regs, sp); 3397 3398 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 3399 jalr(ra); 3400 if (thread != c_rarg0) { 3401 mv(thread, c_rarg0); 3402 } 3403 3404 // restore pushed registers 3405 pop_reg(saved_regs, sp); 3406 } 3407 3408 void MacroAssembler::load_byte_map_base(Register reg) { 3409 CardTable::CardValue* byte_map_base = 3410 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 3411 mv(reg, (uint64_t)byte_map_base); 3412 } 3413 3414 void MacroAssembler::build_frame(int framesize) { 3415 assert(framesize >= 2, "framesize must include space for FP/RA"); 3416 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3417 sub(sp, sp, framesize); 3418 sd(fp, Address(sp, framesize - 2 * wordSize)); 3419 sd(ra, Address(sp, framesize - wordSize)); 3420 if (PreserveFramePointer) { add(fp, sp, framesize); } 3421 } 3422 3423 void MacroAssembler::remove_frame(int framesize) { 3424 assert(framesize >= 2, "framesize must include space for FP/RA"); 3425 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3426 ld(fp, Address(sp, framesize - 2 * wordSize)); 3427 ld(ra, Address(sp, framesize - wordSize)); 3428 add(sp, sp, framesize); 3429 } 3430 3431 void MacroAssembler::reserved_stack_check() { 3432 // testing if reserved zone needs to be enabled 3433 Label no_reserved_zone_enabling; 3434 3435 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3436 bltu(sp, t0, no_reserved_zone_enabling); 3437 3438 enter(); // RA and FP are live. 3439 mv(c_rarg0, xthread); 3440 rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3441 leave(); 3442 3443 // We have already removed our own frame. 3444 // throw_delayed_StackOverflowError will think that it's been 3445 // called by our caller. 3446 RuntimeAddress target(StubRoutines::throw_delayed_StackOverflowError_entry()); 3447 relocate(target.rspec(), [&] { 3448 int32_t offset; 3449 movptr(t0, target.target(), offset); 3450 jalr(x0, t0, offset); 3451 }); 3452 should_not_reach_here(); 3453 3454 bind(no_reserved_zone_enabling); 3455 } 3456 3457 // Move the address of the polling page into dest. 3458 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3459 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3460 } 3461 3462 // Read the polling page. The address of the polling page must 3463 // already be in r. 3464 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3465 relocate(rtype, [&] { 3466 lwu(zr, Address(r, offset)); 3467 }); 3468 } 3469 3470 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3471 #ifdef ASSERT 3472 { 3473 ThreadInVMfromUnknown tiv; 3474 assert (UseCompressedOops, "should only be used for compressed oops"); 3475 assert (Universe::heap() != nullptr, "java heap should be initialized"); 3476 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3477 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3478 } 3479 #endif 3480 int oop_index = oop_recorder()->find_index(obj); 3481 relocate(oop_Relocation::spec(oop_index), [&] { 3482 li32(dst, 0xDEADBEEF); 3483 }); 3484 zero_extend(dst, dst, 32); 3485 } 3486 3487 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3488 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3489 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3490 int index = oop_recorder()->find_index(k); 3491 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3492 3493 narrowKlass nk = CompressedKlassPointers::encode(k); 3494 relocate(metadata_Relocation::spec(index), [&] { 3495 li32(dst, nk); 3496 }); 3497 zero_extend(dst, dst, 32); 3498 } 3499 3500 // Maybe emit a call via a trampoline. If the code cache is small 3501 // trampolines won't be emitted. 3502 address MacroAssembler::trampoline_call(Address entry) { 3503 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3504 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3505 entry.rspec().type() == relocInfo::static_call_type || 3506 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3507 3508 address target = entry.target(); 3509 3510 // We need a trampoline if branches are far. 3511 if (!in_scratch_emit_size()) { 3512 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3513 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3514 code()->share_trampoline_for(entry.target(), offset()); 3515 } else { 3516 address stub = emit_trampoline_stub(offset(), target); 3517 if (stub == nullptr) { 3518 postcond(pc() == badAddress); 3519 return nullptr; // CodeCache is full 3520 } 3521 } 3522 } 3523 target = pc(); 3524 3525 address call_pc = pc(); 3526 #ifdef ASSERT 3527 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3528 assert_alignment(call_pc); 3529 } 3530 #endif 3531 relocate(entry.rspec(), [&] { 3532 jal(target); 3533 }); 3534 3535 postcond(pc() != badAddress); 3536 return call_pc; 3537 } 3538 3539 address MacroAssembler::ic_call(address entry, jint method_index) { 3540 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3541 IncompressibleRegion ir(this); // relocations 3542 movptr(t1, (address)Universe::non_oop_word()); 3543 assert_cond(entry != nullptr); 3544 return trampoline_call(Address(entry, rh)); 3545 } 3546 3547 int MacroAssembler::ic_check_size() { 3548 // No compressed 3549 return (NativeInstruction::instruction_size * (2 /* 2 loads */ + 1 /* branch */)) + 3550 far_branch_size(); 3551 } 3552 3553 int MacroAssembler::ic_check(int end_alignment) { 3554 IncompressibleRegion ir(this); 3555 Register receiver = j_rarg0; 3556 Register data = t1; 3557 3558 Register tmp1 = t0; // t0 always scratch 3559 // t2 is saved on call, thus should have been saved before this check. 3560 // Hence we can clobber it. 3561 Register tmp2 = t2; 3562 3563 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 3564 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 3565 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 3566 // before the inline cache check here, and not after 3567 align(end_alignment, ic_check_size()); 3568 int uep_offset = offset(); 3569 3570 if (UseCompressedClassPointers) { 3571 lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 3572 lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 3573 } else { 3574 ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 3575 ld(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 3576 } 3577 3578 Label ic_hit; 3579 beq(tmp1, tmp2, ic_hit); 3580 // Note, far_jump is not fixed size. 3581 // Is this ever generates a movptr alignment/size will be off. 3582 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 3583 bind(ic_hit); 3584 3585 assert((offset() % end_alignment) == 0, "Misaligned verified entry point."); 3586 return uep_offset; 3587 } 3588 3589 // Emit a trampoline stub for a call to a target which is too far away. 3590 // 3591 // code sequences: 3592 // 3593 // call-site: 3594 // branch-and-link to <destination> or <trampoline stub> 3595 // 3596 // Related trampoline stub for this call site in the stub section: 3597 // load the call target from the constant pool 3598 // branch (RA still points to the call site above) 3599 3600 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3601 address dest) { 3602 // Max stub size: alignment nop, TrampolineStub. 3603 address stub = start_a_stub(max_trampoline_stub_size()); 3604 if (stub == nullptr) { 3605 return nullptr; // CodeBuffer::expand failed 3606 } 3607 3608 // We are always 4-byte aligned here. 3609 assert_alignment(pc()); 3610 3611 // Create a trampoline stub relocation which relates this trampoline stub 3612 // with the call instruction at insts_call_instruction_offset in the 3613 // instructions code-section. 3614 3615 // Make sure the address of destination 8-byte aligned after 3 instructions. 3616 align(wordSize, NativeCallTrampolineStub::data_offset); 3617 3618 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3619 insts_call_instruction_offset); 3620 const int stub_start_offset = offset(); 3621 relocate(rh, [&] { 3622 // Now, create the trampoline stub's code: 3623 // - load the call 3624 // - call 3625 Label target; 3626 ld(t0, target); // auipc + ld 3627 jr(t0); // jalr 3628 bind(target); 3629 assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, 3630 "should be"); 3631 assert(offset() % wordSize == 0, "bad alignment"); 3632 emit_int64((int64_t)dest); 3633 }); 3634 3635 const address stub_start_addr = addr_at(stub_start_offset); 3636 3637 assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); 3638 3639 end_a_stub(); 3640 return stub_start_addr; 3641 } 3642 3643 int MacroAssembler::max_trampoline_stub_size() { 3644 // Max stub size: alignment nop, TrampolineStub. 3645 return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; 3646 } 3647 3648 int MacroAssembler::static_call_stub_size() { 3649 // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr 3650 return 12 * NativeInstruction::instruction_size; 3651 } 3652 3653 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3654 switch (dst.getMode()) { 3655 case Address::base_plus_offset: 3656 // This is the expected mode, although we allow all the other 3657 // forms below. 3658 return form_address(tmp, dst.base(), dst.offset()); 3659 default: 3660 la(tmp, dst); 3661 return Address(tmp); 3662 } 3663 } 3664 3665 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3666 assert(((dst.getMode() == Address::base_plus_offset && 3667 is_simm12(dst.offset())) || is_simm12(value)), 3668 "invalid value and address mode combination"); 3669 Address adr = add_memory_helper(dst, tmp2); 3670 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3671 ld(tmp1, adr); 3672 add(tmp1, tmp1, value, tmp2); 3673 sd(tmp1, adr); 3674 } 3675 3676 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3677 assert(((dst.getMode() == Address::base_plus_offset && 3678 is_simm12(dst.offset())) || is_simm12(value)), 3679 "invalid value and address mode combination"); 3680 Address adr = add_memory_helper(dst, tmp2); 3681 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3682 lwu(tmp1, adr); 3683 addw(tmp1, tmp1, value, tmp2); 3684 sw(tmp1, adr); 3685 } 3686 3687 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3688 assert(((dst.getMode() == Address::base_plus_offset && 3689 is_simm12(dst.offset())) || is_simm12(value)), 3690 "invalid value and address mode combination"); 3691 Address adr = add_memory_helper(dst, tmp2); 3692 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3693 ld(tmp1, adr); 3694 sub(tmp1, tmp1, value, tmp2); 3695 sd(tmp1, adr); 3696 } 3697 3698 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3699 assert(((dst.getMode() == Address::base_plus_offset && 3700 is_simm12(dst.offset())) || is_simm12(value)), 3701 "invalid value and address mode combination"); 3702 Address adr = add_memory_helper(dst, tmp2); 3703 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3704 lwu(tmp1, adr); 3705 subw(tmp1, tmp1, value, tmp2); 3706 sw(tmp1, adr); 3707 } 3708 3709 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3710 assert_different_registers(src1, t0); 3711 relocate(src2.rspec(), [&] { 3712 int32_t offset; 3713 la(t0, src2.target(), offset); 3714 ld(t0, Address(t0, offset)); 3715 }); 3716 beq(src1, t0, equal); 3717 } 3718 3719 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3720 load_method_holder(result, method); 3721 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3722 } 3723 3724 void MacroAssembler::load_method_holder(Register holder, Register method) { 3725 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3726 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3727 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 3728 } 3729 3730 // string indexof 3731 // compute index by trailing zeros 3732 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3733 Register match_mask, Register result, 3734 Register ch2, Register tmp, 3735 bool haystack_isL) { 3736 int haystack_chr_shift = haystack_isL ? 0 : 1; 3737 srl(match_mask, match_mask, trailing_zeros); 3738 srli(match_mask, match_mask, 1); 3739 srli(tmp, trailing_zeros, LogBitsPerByte); 3740 if (!haystack_isL) andi(tmp, tmp, 0xE); 3741 add(haystack, haystack, tmp); 3742 ld(ch2, Address(haystack)); 3743 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3744 add(result, result, tmp); 3745 } 3746 3747 // string indexof 3748 // Find pattern element in src, compute match mask, 3749 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3750 // match mask patterns and corresponding indices would be like: 3751 // - 0x8080808080808080 (Latin1) 3752 // - 7 6 5 4 3 2 1 0 (match index) 3753 // - 0x8000800080008000 (UTF16) 3754 // - 3 2 1 0 (match index) 3755 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3756 Register mask1, Register mask2) { 3757 xorr(src, pattern, src); 3758 sub(match_mask, src, mask1); 3759 orr(src, src, mask2); 3760 notr(src, src); 3761 andr(match_mask, match_mask, src); 3762 } 3763 3764 #ifdef COMPILER2 3765 // Code for BigInteger::mulAdd intrinsic 3766 // out = x10 3767 // in = x11 3768 // offset = x12 (already out.length-offset) 3769 // len = x13 3770 // k = x14 3771 // tmp = x28 3772 // 3773 // pseudo code from java implementation: 3774 // long kLong = k & LONG_MASK; 3775 // carry = 0; 3776 // offset = out.length-offset - 1; 3777 // for (int j = len - 1; j >= 0; j--) { 3778 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 3779 // out[offset--] = (int)product; 3780 // carry = product >>> 32; 3781 // } 3782 // return (int)carry; 3783 void MacroAssembler::mul_add(Register out, Register in, Register offset, 3784 Register len, Register k, Register tmp) { 3785 Label L_tail_loop, L_unroll, L_end; 3786 mv(tmp, out); 3787 mv(out, zr); 3788 blez(len, L_end); 3789 zero_extend(k, k, 32); 3790 slliw(t0, offset, LogBytesPerInt); 3791 add(offset, tmp, t0); 3792 slliw(t0, len, LogBytesPerInt); 3793 add(in, in, t0); 3794 3795 const int unroll = 8; 3796 mv(tmp, unroll); 3797 blt(len, tmp, L_tail_loop); 3798 bind(L_unroll); 3799 for (int i = 0; i < unroll; i++) { 3800 sub(in, in, BytesPerInt); 3801 lwu(t0, Address(in, 0)); 3802 mul(t1, t0, k); 3803 add(t0, t1, out); 3804 sub(offset, offset, BytesPerInt); 3805 lwu(t1, Address(offset, 0)); 3806 add(t0, t0, t1); 3807 sw(t0, Address(offset, 0)); 3808 srli(out, t0, 32); 3809 } 3810 subw(len, len, tmp); 3811 bge(len, tmp, L_unroll); 3812 3813 bind(L_tail_loop); 3814 blez(len, L_end); 3815 sub(in, in, BytesPerInt); 3816 lwu(t0, Address(in, 0)); 3817 mul(t1, t0, k); 3818 add(t0, t1, out); 3819 sub(offset, offset, BytesPerInt); 3820 lwu(t1, Address(offset, 0)); 3821 add(t0, t0, t1); 3822 sw(t0, Address(offset, 0)); 3823 srli(out, t0, 32); 3824 subw(len, len, 1); 3825 j(L_tail_loop); 3826 3827 bind(L_end); 3828 } 3829 3830 // Multiply and multiply-accumulate unsigned 64-bit registers. 3831 void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) { 3832 assert_different_registers(prod_lo, prod_hi); 3833 3834 mul(prod_lo, n, m); 3835 mulhu(prod_hi, n, m); 3836 } 3837 3838 void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n, 3839 Register m, Register tmp1, Register tmp2) { 3840 assert_different_registers(sum_lo, sum_hi); 3841 assert_different_registers(sum_hi, tmp2); 3842 3843 wide_mul(tmp1, tmp2, n, m); 3844 cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1 3845 adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi 3846 } 3847 3848 // add two unsigned input and output carry 3849 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 3850 { 3851 assert_different_registers(dst, carry); 3852 assert_different_registers(dst, src2); 3853 add(dst, src1, src2); 3854 sltu(carry, dst, src2); 3855 } 3856 3857 // add two input with carry 3858 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 3859 assert_different_registers(dst, carry); 3860 add(dst, src1, src2); 3861 add(dst, dst, carry); 3862 } 3863 3864 // add two unsigned input with carry and output carry 3865 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 3866 assert_different_registers(dst, src2); 3867 adc(dst, src1, src2, carry); 3868 sltu(carry, dst, src2); 3869 } 3870 3871 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 3872 Register src1, Register src2, Register carry) { 3873 cad(dest_lo, dest_lo, src1, carry); 3874 add(dest_hi, dest_hi, carry); 3875 cad(dest_lo, dest_lo, src2, carry); 3876 add(final_dest_hi, dest_hi, carry); 3877 } 3878 3879 /** 3880 * Multiply 32 bit by 32 bit first loop. 3881 */ 3882 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 3883 Register y, Register y_idx, Register z, 3884 Register carry, Register product, 3885 Register idx, Register kdx) { 3886 // jlong carry, x[], y[], z[]; 3887 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3888 // long product = y[idx] * x[xstart] + carry; 3889 // z[kdx] = (int)product; 3890 // carry = product >>> 32; 3891 // } 3892 // z[xstart] = (int)carry; 3893 3894 Label L_first_loop, L_first_loop_exit; 3895 blez(idx, L_first_loop_exit); 3896 3897 shadd(t0, xstart, x, t0, LogBytesPerInt); 3898 lwu(x_xstart, Address(t0, 0)); 3899 3900 bind(L_first_loop); 3901 subw(idx, idx, 1); 3902 shadd(t0, idx, y, t0, LogBytesPerInt); 3903 lwu(y_idx, Address(t0, 0)); 3904 mul(product, x_xstart, y_idx); 3905 add(product, product, carry); 3906 srli(carry, product, 32); 3907 subw(kdx, kdx, 1); 3908 shadd(t0, kdx, z, t0, LogBytesPerInt); 3909 sw(product, Address(t0, 0)); 3910 bgtz(idx, L_first_loop); 3911 3912 bind(L_first_loop_exit); 3913 } 3914 3915 /** 3916 * Multiply 64 bit by 64 bit first loop. 3917 */ 3918 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 3919 Register y, Register y_idx, Register z, 3920 Register carry, Register product, 3921 Register idx, Register kdx) { 3922 // 3923 // jlong carry, x[], y[], z[]; 3924 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3925 // huge_128 product = y[idx] * x[xstart] + carry; 3926 // z[kdx] = (jlong)product; 3927 // carry = (jlong)(product >>> 64); 3928 // } 3929 // z[xstart] = carry; 3930 // 3931 3932 Label L_first_loop, L_first_loop_exit; 3933 Label L_one_x, L_one_y, L_multiply; 3934 3935 subw(xstart, xstart, 1); 3936 bltz(xstart, L_one_x); 3937 3938 shadd(t0, xstart, x, t0, LogBytesPerInt); 3939 ld(x_xstart, Address(t0, 0)); 3940 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 3941 3942 bind(L_first_loop); 3943 subw(idx, idx, 1); 3944 bltz(idx, L_first_loop_exit); 3945 subw(idx, idx, 1); 3946 bltz(idx, L_one_y); 3947 3948 shadd(t0, idx, y, t0, LogBytesPerInt); 3949 ld(y_idx, Address(t0, 0)); 3950 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 3951 bind(L_multiply); 3952 3953 mulhu(t0, x_xstart, y_idx); 3954 mul(product, x_xstart, y_idx); 3955 cad(product, product, carry, t1); 3956 adc(carry, t0, zr, t1); 3957 3958 subw(kdx, kdx, 2); 3959 ror_imm(product, product, 32); // back to big-endian 3960 shadd(t0, kdx, z, t0, LogBytesPerInt); 3961 sd(product, Address(t0, 0)); 3962 3963 j(L_first_loop); 3964 3965 bind(L_one_y); 3966 lwu(y_idx, Address(y, 0)); 3967 j(L_multiply); 3968 3969 bind(L_one_x); 3970 lwu(x_xstart, Address(x, 0)); 3971 j(L_first_loop); 3972 3973 bind(L_first_loop_exit); 3974 } 3975 3976 /** 3977 * Multiply 128 bit by 128 bit. Unrolled inner loop. 3978 * 3979 */ 3980 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 3981 Register carry, Register carry2, 3982 Register idx, Register jdx, 3983 Register yz_idx1, Register yz_idx2, 3984 Register tmp, Register tmp3, Register tmp4, 3985 Register tmp6, Register product_hi) { 3986 // jlong carry, x[], y[], z[]; 3987 // int kdx = xstart+1; 3988 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3989 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 3990 // jlong carry2 = (jlong)(tmp3 >>> 64); 3991 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 3992 // carry = (jlong)(tmp4 >>> 64); 3993 // z[kdx+idx+1] = (jlong)tmp3; 3994 // z[kdx+idx] = (jlong)tmp4; 3995 // } 3996 // idx += 2; 3997 // if (idx > 0) { 3998 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 3999 // z[kdx+idx] = (jlong)yz_idx1; 4000 // carry = (jlong)(yz_idx1 >>> 64); 4001 // } 4002 // 4003 4004 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 4005 4006 srliw(jdx, idx, 2); 4007 4008 bind(L_third_loop); 4009 4010 subw(jdx, jdx, 1); 4011 bltz(jdx, L_third_loop_exit); 4012 subw(idx, idx, 4); 4013 4014 shadd(t0, idx, y, t0, LogBytesPerInt); 4015 ld(yz_idx2, Address(t0, 0)); 4016 ld(yz_idx1, Address(t0, wordSize)); 4017 4018 shadd(tmp6, idx, z, t0, LogBytesPerInt); 4019 4020 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 4021 ror_imm(yz_idx2, yz_idx2, 32); 4022 4023 ld(t1, Address(tmp6, 0)); 4024 ld(t0, Address(tmp6, wordSize)); 4025 4026 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4027 mulhu(tmp4, product_hi, yz_idx1); 4028 4029 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 4030 ror_imm(t1, t1, 32, tmp); 4031 4032 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 4033 mulhu(carry2, product_hi, yz_idx2); 4034 4035 cad(tmp3, tmp3, carry, carry); 4036 adc(tmp4, tmp4, zr, carry); 4037 cad(tmp3, tmp3, t0, t0); 4038 cadc(tmp4, tmp4, tmp, t0); 4039 adc(carry, carry2, zr, t0); 4040 cad(tmp4, tmp4, t1, carry2); 4041 adc(carry, carry, zr, carry2); 4042 4043 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 4044 ror_imm(tmp4, tmp4, 32); 4045 sd(tmp4, Address(tmp6, 0)); 4046 sd(tmp3, Address(tmp6, wordSize)); 4047 4048 j(L_third_loop); 4049 4050 bind(L_third_loop_exit); 4051 4052 andi(idx, idx, 0x3); 4053 beqz(idx, L_post_third_loop_done); 4054 4055 Label L_check_1; 4056 subw(idx, idx, 2); 4057 bltz(idx, L_check_1); 4058 4059 shadd(t0, idx, y, t0, LogBytesPerInt); 4060 ld(yz_idx1, Address(t0, 0)); 4061 ror_imm(yz_idx1, yz_idx1, 32); 4062 4063 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4064 mulhu(tmp4, product_hi, yz_idx1); 4065 4066 shadd(t0, idx, z, t0, LogBytesPerInt); 4067 ld(yz_idx2, Address(t0, 0)); 4068 ror_imm(yz_idx2, yz_idx2, 32, tmp); 4069 4070 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 4071 4072 ror_imm(tmp3, tmp3, 32, tmp); 4073 sd(tmp3, Address(t0, 0)); 4074 4075 bind(L_check_1); 4076 4077 andi(idx, idx, 0x1); 4078 subw(idx, idx, 1); 4079 bltz(idx, L_post_third_loop_done); 4080 shadd(t0, idx, y, t0, LogBytesPerInt); 4081 lwu(tmp4, Address(t0, 0)); 4082 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 4083 mulhu(carry2, tmp4, product_hi); 4084 4085 shadd(t0, idx, z, t0, LogBytesPerInt); 4086 lwu(tmp4, Address(t0, 0)); 4087 4088 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 4089 4090 shadd(t0, idx, z, t0, LogBytesPerInt); 4091 sw(tmp3, Address(t0, 0)); 4092 4093 slli(t0, carry2, 32); 4094 srli(carry, tmp3, 32); 4095 orr(carry, carry, t0); 4096 4097 bind(L_post_third_loop_done); 4098 } 4099 4100 /** 4101 * Code for BigInteger::multiplyToLen() intrinsic. 4102 * 4103 * x10: x 4104 * x11: xlen 4105 * x12: y 4106 * x13: ylen 4107 * x14: z 4108 * x15: zlen 4109 * x16: tmp1 4110 * x17: tmp2 4111 * x7: tmp3 4112 * x28: tmp4 4113 * x29: tmp5 4114 * x30: tmp6 4115 * x31: tmp7 4116 */ 4117 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 4118 Register z, Register zlen, 4119 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 4120 Register tmp5, Register tmp6, Register product_hi) { 4121 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 4122 4123 const Register idx = tmp1; 4124 const Register kdx = tmp2; 4125 const Register xstart = tmp3; 4126 4127 const Register y_idx = tmp4; 4128 const Register carry = tmp5; 4129 const Register product = xlen; 4130 const Register x_xstart = zlen; // reuse register 4131 4132 mv(idx, ylen); // idx = ylen; 4133 mv(kdx, zlen); // kdx = xlen+ylen; 4134 mv(carry, zr); // carry = 0; 4135 4136 Label L_multiply_64_x_64_loop, L_done; 4137 4138 subw(xstart, xlen, 1); 4139 bltz(xstart, L_done); 4140 4141 const Register jdx = tmp1; 4142 4143 if (AvoidUnalignedAccesses) { 4144 // Check if x and y are both 8-byte aligned. 4145 orr(t0, xlen, ylen); 4146 test_bit(t0, t0, 0); 4147 beqz(t0, L_multiply_64_x_64_loop); 4148 4149 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4150 shadd(t0, xstart, z, t0, LogBytesPerInt); 4151 sw(carry, Address(t0, 0)); 4152 4153 Label L_second_loop_unaligned; 4154 bind(L_second_loop_unaligned); 4155 mv(carry, zr); 4156 mv(jdx, ylen); 4157 subw(xstart, xstart, 1); 4158 bltz(xstart, L_done); 4159 sub(sp, sp, 2 * wordSize); 4160 sd(z, Address(sp, 0)); 4161 sd(zr, Address(sp, wordSize)); 4162 shadd(t0, xstart, z, t0, LogBytesPerInt); 4163 addi(z, t0, 4); 4164 shadd(t0, xstart, x, t0, LogBytesPerInt); 4165 lwu(product, Address(t0, 0)); 4166 Label L_third_loop, L_third_loop_exit; 4167 4168 blez(jdx, L_third_loop_exit); 4169 4170 bind(L_third_loop); 4171 subw(jdx, jdx, 1); 4172 shadd(t0, jdx, y, t0, LogBytesPerInt); 4173 lwu(t0, Address(t0, 0)); 4174 mul(t1, t0, product); 4175 add(t0, t1, carry); 4176 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 4177 lwu(t1, Address(tmp6, 0)); 4178 add(t0, t0, t1); 4179 sw(t0, Address(tmp6, 0)); 4180 srli(carry, t0, 32); 4181 bgtz(jdx, L_third_loop); 4182 4183 bind(L_third_loop_exit); 4184 ld(z, Address(sp, 0)); 4185 addi(sp, sp, 2 * wordSize); 4186 shadd(t0, xstart, z, t0, LogBytesPerInt); 4187 sw(carry, Address(t0, 0)); 4188 4189 j(L_second_loop_unaligned); 4190 } 4191 4192 bind(L_multiply_64_x_64_loop); 4193 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4194 4195 Label L_second_loop_aligned; 4196 beqz(kdx, L_second_loop_aligned); 4197 4198 Label L_carry; 4199 subw(kdx, kdx, 1); 4200 beqz(kdx, L_carry); 4201 4202 shadd(t0, kdx, z, t0, LogBytesPerInt); 4203 sw(carry, Address(t0, 0)); 4204 srli(carry, carry, 32); 4205 subw(kdx, kdx, 1); 4206 4207 bind(L_carry); 4208 shadd(t0, kdx, z, t0, LogBytesPerInt); 4209 sw(carry, Address(t0, 0)); 4210 4211 // Second and third (nested) loops. 4212 // 4213 // for (int i = xstart-1; i >= 0; i--) { // Second loop 4214 // carry = 0; 4215 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 4216 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 4217 // (z[k] & LONG_MASK) + carry; 4218 // z[k] = (int)product; 4219 // carry = product >>> 32; 4220 // } 4221 // z[i] = (int)carry; 4222 // } 4223 // 4224 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 4225 4226 bind(L_second_loop_aligned); 4227 mv(carry, zr); // carry = 0; 4228 mv(jdx, ylen); // j = ystart+1 4229 4230 subw(xstart, xstart, 1); // i = xstart-1; 4231 bltz(xstart, L_done); 4232 4233 sub(sp, sp, 4 * wordSize); 4234 sd(z, Address(sp, 0)); 4235 4236 Label L_last_x; 4237 shadd(t0, xstart, z, t0, LogBytesPerInt); 4238 addi(z, t0, 4); 4239 subw(xstart, xstart, 1); // i = xstart-1; 4240 bltz(xstart, L_last_x); 4241 4242 shadd(t0, xstart, x, t0, LogBytesPerInt); 4243 ld(product_hi, Address(t0, 0)); 4244 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 4245 4246 Label L_third_loop_prologue; 4247 bind(L_third_loop_prologue); 4248 4249 sd(ylen, Address(sp, wordSize)); 4250 sd(x, Address(sp, 2 * wordSize)); 4251 sd(xstart, Address(sp, 3 * wordSize)); 4252 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 4253 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 4254 ld(z, Address(sp, 0)); 4255 ld(ylen, Address(sp, wordSize)); 4256 ld(x, Address(sp, 2 * wordSize)); 4257 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 4258 addi(sp, sp, 4 * wordSize); 4259 4260 addiw(tmp3, xlen, 1); 4261 shadd(t0, tmp3, z, t0, LogBytesPerInt); 4262 sw(carry, Address(t0, 0)); 4263 4264 subw(tmp3, tmp3, 1); 4265 bltz(tmp3, L_done); 4266 4267 srli(carry, carry, 32); 4268 shadd(t0, tmp3, z, t0, LogBytesPerInt); 4269 sw(carry, Address(t0, 0)); 4270 j(L_second_loop_aligned); 4271 4272 // Next infrequent code is moved outside loops. 4273 bind(L_last_x); 4274 lwu(product_hi, Address(x, 0)); 4275 j(L_third_loop_prologue); 4276 4277 bind(L_done); 4278 } 4279 #endif 4280 4281 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 4282 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 4283 // shift 16 bits once. 4284 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 4285 if (UseZbb) { 4286 assert_different_registers(Rd, Rs, tmp1); 4287 int step = isLL ? 8 : 16; 4288 ctz(Rd, Rs); 4289 andi(tmp1, Rd, step - 1); 4290 sub(Rd, Rd, tmp1); 4291 return; 4292 } 4293 4294 assert_different_registers(Rd, Rs, tmp1, tmp2); 4295 Label Loop; 4296 int step = isLL ? 8 : 16; 4297 mv(Rd, -step); 4298 mv(tmp2, Rs); 4299 4300 bind(Loop); 4301 addi(Rd, Rd, step); 4302 andi(tmp1, tmp2, ((1 << step) - 1)); 4303 srli(tmp2, tmp2, step); 4304 beqz(tmp1, Loop); 4305 } 4306 4307 // This instruction reads adjacent 4 bytes from the lower half of source register, 4308 // inflate into a register, for example: 4309 // Rs: A7A6A5A4A3A2A1A0 4310 // Rd: 00A300A200A100A0 4311 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4312 assert_different_registers(Rd, Rs, tmp1, tmp2); 4313 4314 mv(tmp1, 0xFF000000); // first byte mask at lower word 4315 andr(Rd, Rs, tmp1); 4316 for (int i = 0; i < 2; i++) { 4317 slli(Rd, Rd, wordSize); 4318 srli(tmp1, tmp1, wordSize); 4319 andr(tmp2, Rs, tmp1); 4320 orr(Rd, Rd, tmp2); 4321 } 4322 slli(Rd, Rd, wordSize); 4323 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 4324 orr(Rd, Rd, tmp2); 4325 } 4326 4327 // This instruction reads adjacent 4 bytes from the upper half of source register, 4328 // inflate into a register, for example: 4329 // Rs: A7A6A5A4A3A2A1A0 4330 // Rd: 00A700A600A500A4 4331 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4332 assert_different_registers(Rd, Rs, tmp1, tmp2); 4333 srli(Rs, Rs, 32); // only upper 32 bits are needed 4334 inflate_lo32(Rd, Rs, tmp1, tmp2); 4335 } 4336 4337 // The size of the blocks erased by the zero_blocks stub. We must 4338 // handle anything smaller than this ourselves in zero_words(). 4339 const int MacroAssembler::zero_words_block_size = 8; 4340 4341 // zero_words() is used by C2 ClearArray patterns. It is as small as 4342 // possible, handling small word counts locally and delegating 4343 // anything larger to the zero_blocks stub. It is expanded many times 4344 // in compiled code, so it is important to keep it short. 4345 4346 // ptr: Address of a buffer to be zeroed. 4347 // cnt: Count in HeapWords. 4348 // 4349 // ptr, cnt, and t0 are clobbered. 4350 address MacroAssembler::zero_words(Register ptr, Register cnt) { 4351 assert(is_power_of_2(zero_words_block_size), "adjust this"); 4352 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 4353 assert_different_registers(cnt, t0); 4354 4355 BLOCK_COMMENT("zero_words {"); 4356 4357 mv(t0, zero_words_block_size); 4358 Label around, done, done16; 4359 bltu(cnt, t0, around); 4360 { 4361 RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks()); 4362 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 4363 if (StubRoutines::riscv::complete()) { 4364 address tpc = trampoline_call(zero_blocks); 4365 if (tpc == nullptr) { 4366 DEBUG_ONLY(reset_labels(around)); 4367 postcond(pc() == badAddress); 4368 return nullptr; 4369 } 4370 } else { 4371 jal(zero_blocks); 4372 } 4373 } 4374 bind(around); 4375 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 4376 Label l; 4377 test_bit(t0, cnt, exact_log2(i)); 4378 beqz(t0, l); 4379 for (int j = 0; j < i; j++) { 4380 sd(zr, Address(ptr, j * wordSize)); 4381 } 4382 addi(ptr, ptr, i * wordSize); 4383 bind(l); 4384 } 4385 { 4386 Label l; 4387 test_bit(t0, cnt, 0); 4388 beqz(t0, l); 4389 sd(zr, Address(ptr, 0)); 4390 bind(l); 4391 } 4392 4393 BLOCK_COMMENT("} zero_words"); 4394 postcond(pc() != badAddress); 4395 return pc(); 4396 } 4397 4398 #define SmallArraySize (18 * BytesPerLong) 4399 4400 // base: Address of a buffer to be zeroed, 8 bytes aligned. 4401 // cnt: Immediate count in HeapWords. 4402 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 4403 assert_different_registers(base, t0, t1); 4404 4405 BLOCK_COMMENT("zero_words {"); 4406 4407 if (cnt <= SmallArraySize / BytesPerLong) { 4408 for (int i = 0; i < (int)cnt; i++) { 4409 sd(zr, Address(base, i * wordSize)); 4410 } 4411 } else { 4412 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 4413 int remainder = cnt % unroll; 4414 for (int i = 0; i < remainder; i++) { 4415 sd(zr, Address(base, i * wordSize)); 4416 } 4417 4418 Label loop; 4419 Register cnt_reg = t0; 4420 Register loop_base = t1; 4421 cnt = cnt - remainder; 4422 mv(cnt_reg, cnt); 4423 add(loop_base, base, remainder * wordSize); 4424 bind(loop); 4425 sub(cnt_reg, cnt_reg, unroll); 4426 for (int i = 0; i < unroll; i++) { 4427 sd(zr, Address(loop_base, i * wordSize)); 4428 } 4429 add(loop_base, loop_base, unroll * wordSize); 4430 bnez(cnt_reg, loop); 4431 } 4432 4433 BLOCK_COMMENT("} zero_words"); 4434 } 4435 4436 // base: Address of a buffer to be filled, 8 bytes aligned. 4437 // cnt: Count in 8-byte unit. 4438 // value: Value to be filled with. 4439 // base will point to the end of the buffer after filling. 4440 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 4441 // Algorithm: 4442 // 4443 // t0 = cnt & 7 4444 // cnt -= t0 4445 // p += t0 4446 // switch (t0): 4447 // switch start: 4448 // do while cnt 4449 // cnt -= 8 4450 // p[-8] = value 4451 // case 7: 4452 // p[-7] = value 4453 // case 6: 4454 // p[-6] = value 4455 // // ... 4456 // case 1: 4457 // p[-1] = value 4458 // case 0: 4459 // p += 8 4460 // do-while end 4461 // switch end 4462 4463 assert_different_registers(base, cnt, value, t0, t1); 4464 4465 Label fini, skip, entry, loop; 4466 const int unroll = 8; // Number of sd instructions we'll unroll 4467 4468 beqz(cnt, fini); 4469 4470 andi(t0, cnt, unroll - 1); 4471 sub(cnt, cnt, t0); 4472 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4473 shadd(base, t0, base, t1, 3); 4474 la(t1, entry); 4475 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4476 sub(t1, t1, t0); 4477 jr(t1); 4478 4479 bind(loop); 4480 add(base, base, unroll * 8); 4481 for (int i = -unroll; i < 0; i++) { 4482 sd(value, Address(base, i * 8)); 4483 } 4484 bind(entry); 4485 sub(cnt, cnt, unroll); 4486 bgez(cnt, loop); 4487 4488 bind(fini); 4489 } 4490 4491 // Zero blocks of memory by using CBO.ZERO. 4492 // 4493 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4494 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4495 // zeroed in HeapWords. Returns the count of words left to be zeroed 4496 // in cnt. 4497 // 4498 // NOTE: This is intended to be used in the zero_blocks() stub. If 4499 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4500 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4501 Label initial_table_end, loop; 4502 4503 // Align base with cache line size. 4504 neg(tmp1, base); 4505 andi(tmp1, tmp1, CacheLineSize - 1); 4506 4507 // tmp1: the number of bytes to be filled to align the base with cache line size. 4508 add(base, base, tmp1); 4509 srai(tmp2, tmp1, 3); 4510 sub(cnt, cnt, tmp2); 4511 srli(tmp2, tmp1, 1); 4512 la(tmp1, initial_table_end); 4513 sub(tmp2, tmp1, tmp2); 4514 jr(tmp2); 4515 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4516 sd(zr, Address(base, i)); 4517 } 4518 bind(initial_table_end); 4519 4520 mv(tmp1, CacheLineSize / wordSize); 4521 bind(loop); 4522 cbo_zero(base); 4523 sub(cnt, cnt, tmp1); 4524 add(base, base, CacheLineSize); 4525 bge(cnt, tmp1, loop); 4526 } 4527 4528 // java.lang.Math.round(float a) 4529 // Returns the closest int to the argument, with ties rounding to positive infinity. 4530 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 4531 // this instructions calling sequence provides performance improvement on all tested devices; 4532 // don't change it without re-verification 4533 Label done; 4534 mv(t0, jint_cast(0.5f)); 4535 fmv_w_x(ftmp, t0); 4536 4537 // dst = 0 if NaN 4538 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 4539 mv(dst, zr); 4540 beqz(t0, done); 4541 4542 // dst = (src + 0.5f) rounded down towards negative infinity 4543 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 4544 // RDN is required for fadd_s, RNE gives incorrect results: 4545 // -------------------------------------------------------------------- 4546 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 4547 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 4548 // -------------------------------------------------------------------- 4549 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 4550 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 4551 // -------------------------------------------------------------------- 4552 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 4553 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 4554 4555 bind(done); 4556 } 4557 4558 // java.lang.Math.round(double a) 4559 // Returns the closest long to the argument, with ties rounding to positive infinity. 4560 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 4561 // this instructions calling sequence provides performance improvement on all tested devices; 4562 // don't change it without re-verification 4563 Label done; 4564 mv(t0, julong_cast(0.5)); 4565 fmv_d_x(ftmp, t0); 4566 4567 // dst = 0 if NaN 4568 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 4569 mv(dst, zr); 4570 beqz(t0, done); 4571 4572 // dst = (src + 0.5) rounded down towards negative infinity 4573 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 4574 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 4575 4576 bind(done); 4577 } 4578 4579 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 4580 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4581 Label done; \ 4582 assert_different_registers(dst, tmp); \ 4583 fclass_##FLOATSIG(tmp, src); \ 4584 mv(dst, zr); \ 4585 /* check if src is NaN */ \ 4586 andi(tmp, tmp, fclass_mask::nan); \ 4587 bnez(tmp, done); \ 4588 FLOATCVT(dst, src); \ 4589 bind(done); \ 4590 } 4591 4592 FCVT_SAFE(fcvt_w_s, s); 4593 FCVT_SAFE(fcvt_l_s, s); 4594 FCVT_SAFE(fcvt_w_d, d); 4595 FCVT_SAFE(fcvt_l_d, d); 4596 4597 #undef FCVT_SAFE 4598 4599 #define FCMP(FLOATTYPE, FLOATSIG) \ 4600 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4601 FloatRegister Rs2, int unordered_result) { \ 4602 Label Ldone; \ 4603 if (unordered_result < 0) { \ 4604 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4605 /* installs 1 if gt else 0 */ \ 4606 flt_##FLOATSIG(result, Rs2, Rs1); \ 4607 /* Rs1 > Rs2, install 1 */ \ 4608 bgtz(result, Ldone); \ 4609 feq_##FLOATSIG(result, Rs1, Rs2); \ 4610 addi(result, result, -1); \ 4611 /* Rs1 = Rs2, install 0 */ \ 4612 /* NaN or Rs1 < Rs2, install -1 */ \ 4613 bind(Ldone); \ 4614 } else { \ 4615 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4616 /* installs 1 if gt or unordered else 0 */ \ 4617 flt_##FLOATSIG(result, Rs1, Rs2); \ 4618 /* Rs1 < Rs2, install -1 */ \ 4619 bgtz(result, Ldone); \ 4620 feq_##FLOATSIG(result, Rs1, Rs2); \ 4621 addi(result, result, -1); \ 4622 /* Rs1 = Rs2, install 0 */ \ 4623 /* NaN or Rs1 > Rs2, install 1 */ \ 4624 bind(Ldone); \ 4625 neg(result, result); \ 4626 } \ 4627 } 4628 4629 FCMP(float, s); 4630 FCMP(double, d); 4631 4632 #undef FCMP 4633 4634 // Zero words; len is in bytes 4635 // Destroys all registers except addr 4636 // len must be a nonzero multiple of wordSize 4637 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4638 assert_different_registers(addr, len, tmp, t0, t1); 4639 4640 #ifdef ASSERT 4641 { 4642 Label L; 4643 andi(t0, len, BytesPerWord - 1); 4644 beqz(t0, L); 4645 stop("len is not a multiple of BytesPerWord"); 4646 bind(L); 4647 } 4648 #endif // ASSERT 4649 4650 #ifndef PRODUCT 4651 block_comment("zero memory"); 4652 #endif // PRODUCT 4653 4654 Label loop; 4655 Label entry; 4656 4657 // Algorithm: 4658 // 4659 // t0 = cnt & 7 4660 // cnt -= t0 4661 // p += t0 4662 // switch (t0) { 4663 // do { 4664 // cnt -= 8 4665 // p[-8] = 0 4666 // case 7: 4667 // p[-7] = 0 4668 // case 6: 4669 // p[-6] = 0 4670 // ... 4671 // case 1: 4672 // p[-1] = 0 4673 // case 0: 4674 // p += 8 4675 // } while (cnt) 4676 // } 4677 4678 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4679 4680 srli(len, len, LogBytesPerWord); 4681 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4682 sub(len, len, t0); // cnt -= unroll 4683 // tmp always points to the end of the region we're about to zero 4684 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4685 la(t1, entry); 4686 slli(t0, t0, 2); 4687 sub(t1, t1, t0); 4688 jr(t1); 4689 bind(loop); 4690 sub(len, len, unroll); 4691 for (int i = -unroll; i < 0; i++) { 4692 sd(zr, Address(tmp, i * wordSize)); 4693 } 4694 bind(entry); 4695 add(tmp, tmp, unroll * wordSize); 4696 bnez(len, loop); 4697 } 4698 4699 // shift left by shamt and add 4700 // Rd = (Rs1 << shamt) + Rs2 4701 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4702 if (UseZba) { 4703 if (shamt == 1) { 4704 sh1add(Rd, Rs1, Rs2); 4705 return; 4706 } else if (shamt == 2) { 4707 sh2add(Rd, Rs1, Rs2); 4708 return; 4709 } else if (shamt == 3) { 4710 sh3add(Rd, Rs1, Rs2); 4711 return; 4712 } 4713 } 4714 4715 if (shamt != 0) { 4716 assert_different_registers(Rs2, tmp); 4717 slli(tmp, Rs1, shamt); 4718 add(Rd, Rs2, tmp); 4719 } else { 4720 add(Rd, Rs1, Rs2); 4721 } 4722 } 4723 4724 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4725 switch (bits) { 4726 case 32: 4727 if (UseZba) { 4728 zext_w(dst, src); 4729 return; 4730 } 4731 break; 4732 case 16: 4733 if (UseZbb) { 4734 zext_h(dst, src); 4735 return; 4736 } 4737 break; 4738 case 8: 4739 if (UseZbb) { 4740 zext_b(dst, src); 4741 return; 4742 } 4743 break; 4744 default: 4745 break; 4746 } 4747 slli(dst, src, XLEN - bits); 4748 srli(dst, dst, XLEN - bits); 4749 } 4750 4751 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4752 switch (bits) { 4753 case 32: 4754 sext_w(dst, src); 4755 return; 4756 case 16: 4757 if (UseZbb) { 4758 sext_h(dst, src); 4759 return; 4760 } 4761 break; 4762 case 8: 4763 if (UseZbb) { 4764 sext_b(dst, src); 4765 return; 4766 } 4767 break; 4768 default: 4769 break; 4770 } 4771 slli(dst, src, XLEN - bits); 4772 srai(dst, dst, XLEN - bits); 4773 } 4774 4775 void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, 4776 Register tmp, bool is_signed) { 4777 if (src1 == src2) { 4778 mv(dst, zr); 4779 return; 4780 } 4781 Label done; 4782 Register left = src1; 4783 Register right = src2; 4784 if (dst == src1) { 4785 assert_different_registers(dst, src2, tmp); 4786 mv(tmp, src1); 4787 left = tmp; 4788 } else if (dst == src2) { 4789 assert_different_registers(dst, src1, tmp); 4790 mv(tmp, src2); 4791 right = tmp; 4792 } 4793 4794 // installs 1 if gt else 0 4795 if (is_signed) { 4796 slt(dst, right, left); 4797 } else { 4798 sltu(dst, right, left); 4799 } 4800 bnez(dst, done); 4801 if (is_signed) { 4802 slt(dst, left, right); 4803 } else { 4804 sltu(dst, left, right); 4805 } 4806 // dst = -1 if lt; else if eq , dst = 0 4807 neg(dst, dst); 4808 bind(done); 4809 } 4810 4811 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 4812 { 4813 cmp_x2i(dst, src1, src2, tmp); 4814 } 4815 4816 void MacroAssembler::cmp_ul2i(Register dst, Register src1, Register src2, Register tmp) { 4817 cmp_x2i(dst, src1, src2, tmp, false); 4818 } 4819 4820 void MacroAssembler::cmp_uw2i(Register dst, Register src1, Register src2, Register tmp) { 4821 cmp_x2i(dst, src1, src2, tmp, false); 4822 } 4823 4824 // The java_calling_convention describes stack locations as ideal slots on 4825 // a frame with no abi restrictions. Since we must observe abi restrictions 4826 // (like the placement of the register window) the slots must be biased by 4827 // the following value. 4828 static int reg2offset_in(VMReg r) { 4829 // Account for saved fp and ra 4830 // This should really be in_preserve_stack_slots 4831 return r->reg2stack() * VMRegImpl::stack_slot_size; 4832 } 4833 4834 static int reg2offset_out(VMReg r) { 4835 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 4836 } 4837 4838 // On 64 bit we will store integer like items to the stack as 4839 // 64 bits items (riscv64 abi) even though java would only store 4840 // 32bits for a parameter. On 32bit it will simply be 32 bits 4841 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 4842 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 4843 if (src.first()->is_stack()) { 4844 if (dst.first()->is_stack()) { 4845 // stack to stack 4846 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4847 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4848 } else { 4849 // stack to reg 4850 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4851 } 4852 } else if (dst.first()->is_stack()) { 4853 // reg to stack 4854 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4855 } else { 4856 if (dst.first() != src.first()) { 4857 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 4858 } 4859 } 4860 } 4861 4862 // An oop arg. Must pass a handle not the oop itself 4863 void MacroAssembler::object_move(OopMap* map, 4864 int oop_handle_offset, 4865 int framesize_in_slots, 4866 VMRegPair src, 4867 VMRegPair dst, 4868 bool is_receiver, 4869 int* receiver_offset) { 4870 assert_cond(map != nullptr && receiver_offset != nullptr); 4871 4872 // must pass a handle. First figure out the location we use as a handle 4873 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 4874 4875 // See if oop is null if it is we need no handle 4876 4877 if (src.first()->is_stack()) { 4878 // Oop is already on the stack as an argument 4879 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 4880 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 4881 if (is_receiver) { 4882 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 4883 } 4884 4885 ld(t0, Address(fp, reg2offset_in(src.first()))); 4886 la(rHandle, Address(fp, reg2offset_in(src.first()))); 4887 // conditionally move a null 4888 Label notZero1; 4889 bnez(t0, notZero1); 4890 mv(rHandle, zr); 4891 bind(notZero1); 4892 } else { 4893 4894 // Oop is in a register we must store it to the space we reserve 4895 // on the stack for oop_handles and pass a handle if oop is non-null 4896 4897 const Register rOop = src.first()->as_Register(); 4898 int oop_slot = -1; 4899 if (rOop == j_rarg0) { 4900 oop_slot = 0; 4901 } else if (rOop == j_rarg1) { 4902 oop_slot = 1; 4903 } else if (rOop == j_rarg2) { 4904 oop_slot = 2; 4905 } else if (rOop == j_rarg3) { 4906 oop_slot = 3; 4907 } else if (rOop == j_rarg4) { 4908 oop_slot = 4; 4909 } else if (rOop == j_rarg5) { 4910 oop_slot = 5; 4911 } else if (rOop == j_rarg6) { 4912 oop_slot = 6; 4913 } else { 4914 assert(rOop == j_rarg7, "wrong register"); 4915 oop_slot = 7; 4916 } 4917 4918 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 4919 int offset = oop_slot * VMRegImpl::stack_slot_size; 4920 4921 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 4922 // Store oop in handle area, may be null 4923 sd(rOop, Address(sp, offset)); 4924 if (is_receiver) { 4925 *receiver_offset = offset; 4926 } 4927 4928 //rOop maybe the same as rHandle 4929 if (rOop == rHandle) { 4930 Label isZero; 4931 beqz(rOop, isZero); 4932 la(rHandle, Address(sp, offset)); 4933 bind(isZero); 4934 } else { 4935 Label notZero2; 4936 la(rHandle, Address(sp, offset)); 4937 bnez(rOop, notZero2); 4938 mv(rHandle, zr); 4939 bind(notZero2); 4940 } 4941 } 4942 4943 // If arg is on the stack then place it otherwise it is already in correct reg. 4944 if (dst.first()->is_stack()) { 4945 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 4946 } 4947 } 4948 4949 // A float arg may have to do float reg int reg conversion 4950 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 4951 assert((src.first()->is_stack() && dst.first()->is_stack()) || 4952 (src.first()->is_reg() && dst.first()->is_reg()) || 4953 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 4954 if (src.first()->is_stack()) { 4955 if (dst.first()->is_stack()) { 4956 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 4957 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 4958 } else if (dst.first()->is_Register()) { 4959 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4960 } else { 4961 ShouldNotReachHere(); 4962 } 4963 } else if (src.first() != dst.first()) { 4964 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 4965 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 4966 } else { 4967 ShouldNotReachHere(); 4968 } 4969 } 4970 } 4971 4972 // A long move 4973 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 4974 if (src.first()->is_stack()) { 4975 if (dst.first()->is_stack()) { 4976 // stack to stack 4977 ld(tmp, Address(fp, reg2offset_in(src.first()))); 4978 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 4979 } else { 4980 // stack to reg 4981 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 4982 } 4983 } else if (dst.first()->is_stack()) { 4984 // reg to stack 4985 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 4986 } else { 4987 if (dst.first() != src.first()) { 4988 mv(dst.first()->as_Register(), src.first()->as_Register()); 4989 } 4990 } 4991 } 4992 4993 // A double move 4994 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 4995 assert((src.first()->is_stack() && dst.first()->is_stack()) || 4996 (src.first()->is_reg() && dst.first()->is_reg()) || 4997 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 4998 if (src.first()->is_stack()) { 4999 if (dst.first()->is_stack()) { 5000 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5001 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5002 } else if (dst.first()-> is_Register()) { 5003 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5004 } else { 5005 ShouldNotReachHere(); 5006 } 5007 } else if (src.first() != dst.first()) { 5008 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5009 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5010 } else { 5011 ShouldNotReachHere(); 5012 } 5013 } 5014 } 5015 5016 void MacroAssembler::rt_call(address dest, Register tmp) { 5017 CodeBlob *cb = CodeCache::find_blob(dest); 5018 RuntimeAddress target(dest); 5019 if (cb) { 5020 far_call(target, tmp); 5021 } else { 5022 relocate(target.rspec(), [&] { 5023 int32_t offset; 5024 movptr(tmp, target.target(), offset); 5025 jalr(x1, tmp, offset); 5026 }); 5027 } 5028 } 5029 5030 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 5031 assert(bit_pos < 64, "invalid bit range"); 5032 if (UseZbs) { 5033 bexti(Rd, Rs, bit_pos); 5034 return; 5035 } 5036 int64_t imm = (int64_t)(1UL << bit_pos); 5037 if (is_simm12(imm)) { 5038 and_imm12(Rd, Rs, imm); 5039 } else { 5040 srli(Rd, Rs, bit_pos); 5041 and_imm12(Rd, Rd, 1); 5042 } 5043 } 5044 5045 // Implements lightweight-locking. 5046 // 5047 // - obj: the object to be locked 5048 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 5049 // - slow: branched to if locking fails 5050 void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5051 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5052 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5053 5054 Label push; 5055 const Register top = tmp1; 5056 const Register mark = tmp2; 5057 const Register t = tmp3; 5058 5059 // Preload the markWord. It is important that this is the first 5060 // instruction emitted as it is part of C1's null check semantics. 5061 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5062 5063 // Check if the lock-stack is full. 5064 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5065 mv(t, (unsigned)LockStack::end_offset()); 5066 bge(top, t, slow, /* is_far */ true); 5067 5068 // Check for recursion. 5069 add(t, xthread, top); 5070 ld(t, Address(t, -oopSize)); 5071 beq(obj, t, push); 5072 5073 // Check header for monitor (0b10). 5074 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5075 bnez(t, slow, /* is_far */ true); 5076 5077 // Try to lock. Transition lock-bits 0b01 => 0b00 5078 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 5079 ori(mark, mark, markWord::unlocked_value); 5080 xori(t, mark, markWord::unlocked_value); 5081 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5082 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 5083 bne(mark, t, slow, /* is_far */ true); 5084 5085 bind(push); 5086 // After successful lock, push object on lock-stack. 5087 add(t, xthread, top); 5088 sd(obj, Address(t)); 5089 addw(top, top, oopSize); 5090 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5091 } 5092 5093 // Implements ligthweight-unlocking. 5094 // 5095 // - obj: the object to be unlocked 5096 // - tmp1, tmp2, tmp3: temporary registers 5097 // - slow: branched to if unlocking fails 5098 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5099 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5100 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5101 5102 #ifdef ASSERT 5103 { 5104 // Check for lock-stack underflow. 5105 Label stack_ok; 5106 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 5107 mv(tmp2, (unsigned)LockStack::start_offset()); 5108 bge(tmp1, tmp2, stack_ok); 5109 STOP("Lock-stack underflow"); 5110 bind(stack_ok); 5111 } 5112 #endif 5113 5114 Label unlocked, push_and_slow; 5115 const Register top = tmp1; 5116 const Register mark = tmp2; 5117 const Register t = tmp3; 5118 5119 // Check if obj is top of lock-stack. 5120 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5121 subw(top, top, oopSize); 5122 add(t, xthread, top); 5123 ld(t, Address(t)); 5124 bne(obj, t, slow, /* is_far */ true); 5125 5126 // Pop lock-stack. 5127 DEBUG_ONLY(add(t, xthread, top);) 5128 DEBUG_ONLY(sd(zr, Address(t));) 5129 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5130 5131 // Check if recursive. 5132 add(t, xthread, top); 5133 ld(t, Address(t, -oopSize)); 5134 beq(obj, t, unlocked); 5135 5136 // Not recursive. Check header for monitor (0b10). 5137 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5138 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5139 bnez(t, push_and_slow); 5140 5141 #ifdef ASSERT 5142 // Check header not unlocked (0b01). 5143 Label not_unlocked; 5144 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 5145 beqz(t, not_unlocked); 5146 stop("lightweight_unlock already unlocked"); 5147 bind(not_unlocked); 5148 #endif 5149 5150 // Try to unlock. Transition lock bits 0b00 => 0b01 5151 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 5152 ori(t, mark, markWord::unlocked_value); 5153 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5154 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 5155 beq(mark, t, unlocked); 5156 5157 bind(push_and_slow); 5158 // Restore lock-stack and handle the unlock in runtime. 5159 DEBUG_ONLY(add(t, xthread, top);) 5160 DEBUG_ONLY(sd(obj, Address(t));) 5161 addw(top, top, oopSize); 5162 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5163 j(slow); 5164 5165 bind(unlocked); 5166 }