1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "code/compiledIC.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/barrierSetAssembler.hpp" 34 #include "gc/shared/cardTable.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.hpp" 37 #include "interpreter/bytecodeHistogram.hpp" 38 #include "interpreter/interpreter.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "memory/universe.hpp" 41 #include "oops/accessDecorators.hpp" 42 #include "oops/compressedKlass.inline.hpp" 43 #include "oops/compressedOops.inline.hpp" 44 #include "oops/klass.inline.hpp" 45 #include "oops/oop.hpp" 46 #include "runtime/interfaceSupport.inline.hpp" 47 #include "runtime/javaThread.hpp" 48 #include "runtime/jniHandles.inline.hpp" 49 #include "runtime/sharedRuntime.hpp" 50 #include "runtime/stubRoutines.hpp" 51 #include "utilities/globalDefinitions.hpp" 52 #include "utilities/powerOfTwo.hpp" 53 #ifdef COMPILER2 54 #include "opto/compile.hpp" 55 #include "opto/node.hpp" 56 #include "opto/output.hpp" 57 #endif 58 59 #ifdef PRODUCT 60 #define BLOCK_COMMENT(str) /* nothing */ 61 #else 62 #define BLOCK_COMMENT(str) block_comment(str) 63 #endif 64 #define STOP(str) stop(str); 65 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") 66 67 68 69 Register MacroAssembler::extract_rs1(address instr) { 70 assert_cond(instr != nullptr); 71 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 19, 15)); 72 } 73 74 Register MacroAssembler::extract_rs2(address instr) { 75 assert_cond(instr != nullptr); 76 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 24, 20)); 77 } 78 79 Register MacroAssembler::extract_rd(address instr) { 80 assert_cond(instr != nullptr); 81 return as_Register(Assembler::extract(Assembler::ld_instr(instr), 11, 7)); 82 } 83 84 uint32_t MacroAssembler::extract_opcode(address instr) { 85 assert_cond(instr != nullptr); 86 return Assembler::extract(Assembler::ld_instr(instr), 6, 0); 87 } 88 89 uint32_t MacroAssembler::extract_funct3(address instr) { 90 assert_cond(instr != nullptr); 91 return Assembler::extract(Assembler::ld_instr(instr), 14, 12); 92 } 93 94 bool MacroAssembler::is_pc_relative_at(address instr) { 95 // auipc + jalr 96 // auipc + addi 97 // auipc + load 98 // auipc + fload_load 99 return (is_auipc_at(instr)) && 100 (is_addi_at(instr + instruction_size) || 101 is_jalr_at(instr + instruction_size) || 102 is_load_at(instr + instruction_size) || 103 is_float_load_at(instr + instruction_size)) && 104 check_pc_relative_data_dependency(instr); 105 } 106 107 // ie:ld(Rd, Label) 108 bool MacroAssembler::is_load_pc_relative_at(address instr) { 109 return is_auipc_at(instr) && // auipc 110 is_ld_at(instr + instruction_size) && // ld 111 check_load_pc_relative_data_dependency(instr); 112 } 113 114 bool MacroAssembler::is_movptr1_at(address instr) { 115 return is_lui_at(instr) && // Lui 116 is_addi_at(instr + instruction_size) && // Addi 117 is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 118 is_addi_at(instr + instruction_size * 3) && // Addi 119 is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 120 (is_addi_at(instr + instruction_size * 5) || 121 is_jalr_at(instr + instruction_size * 5) || 122 is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load 123 check_movptr1_data_dependency(instr); 124 } 125 126 bool MacroAssembler::is_movptr2_at(address instr) { 127 return is_lui_at(instr) && // lui 128 is_lui_at(instr + instruction_size) && // lui 129 is_slli_shift_at(instr + instruction_size * 2, 18) && // slli Rd, Rs, 18 130 is_add_at(instr + instruction_size * 3) && 131 (is_addi_at(instr + instruction_size * 4) || 132 is_jalr_at(instr + instruction_size * 4) || 133 is_load_at(instr + instruction_size * 4)) && // Addi/Jalr/Load 134 check_movptr2_data_dependency(instr); 135 } 136 137 bool MacroAssembler::is_li16u_at(address instr) { 138 return is_lui_at(instr) && // lui 139 is_srli_at(instr + instruction_size) && // srli 140 check_li16u_data_dependency(instr); 141 } 142 143 bool MacroAssembler::is_li32_at(address instr) { 144 return is_lui_at(instr) && // lui 145 is_addiw_at(instr + instruction_size) && // addiw 146 check_li32_data_dependency(instr); 147 } 148 149 bool MacroAssembler::is_li64_at(address instr) { 150 return is_lui_at(instr) && // lui 151 is_addi_at(instr + instruction_size) && // addi 152 is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 153 is_addi_at(instr + instruction_size * 3) && // addi 154 is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 155 is_addi_at(instr + instruction_size * 5) && // addi 156 is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 157 is_addi_at(instr + instruction_size * 7) && // addi 158 check_li64_data_dependency(instr); 159 } 160 161 bool MacroAssembler::is_lwu_to_zr(address instr) { 162 assert_cond(instr != nullptr); 163 return (extract_opcode(instr) == 0b0000011 && 164 extract_funct3(instr) == 0b110 && 165 extract_rd(instr) == zr); // zr 166 } 167 168 uint32_t MacroAssembler::get_membar_kind(address addr) { 169 assert_cond(addr != nullptr); 170 assert(is_membar(addr), "no membar found"); 171 172 uint32_t insn = Bytes::get_native_u4(addr); 173 174 uint32_t predecessor = Assembler::extract(insn, 27, 24); 175 uint32_t successor = Assembler::extract(insn, 23, 20); 176 177 return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); 178 } 179 180 void MacroAssembler::set_membar_kind(address addr, uint32_t order_kind) { 181 assert_cond(addr != nullptr); 182 assert(is_membar(addr), "no membar found"); 183 184 uint32_t predecessor = 0; 185 uint32_t successor = 0; 186 187 MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); 188 189 uint32_t insn = Bytes::get_native_u4(addr); 190 address pInsn = (address) &insn; 191 Assembler::patch(pInsn, 27, 24, predecessor); 192 Assembler::patch(pInsn, 23, 20, successor); 193 194 address membar = addr; 195 Assembler::sd_instr(membar, insn); 196 } 197 198 199 static void pass_arg0(MacroAssembler* masm, Register arg) { 200 if (c_rarg0 != arg) { 201 masm->mv(c_rarg0, arg); 202 } 203 } 204 205 static void pass_arg1(MacroAssembler* masm, Register arg) { 206 if (c_rarg1 != arg) { 207 masm->mv(c_rarg1, arg); 208 } 209 } 210 211 static void pass_arg2(MacroAssembler* masm, Register arg) { 212 if (c_rarg2 != arg) { 213 masm->mv(c_rarg2, arg); 214 } 215 } 216 217 static void pass_arg3(MacroAssembler* masm, Register arg) { 218 if (c_rarg3 != arg) { 219 masm->mv(c_rarg3, arg); 220 } 221 } 222 223 void MacroAssembler::push_cont_fastpath(Register java_thread) { 224 if (!Continuations::enabled()) return; 225 Label done; 226 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 227 bleu(sp, t0, done); 228 sd(sp, Address(java_thread, JavaThread::cont_fastpath_offset())); 229 bind(done); 230 } 231 232 void MacroAssembler::pop_cont_fastpath(Register java_thread) { 233 if (!Continuations::enabled()) return; 234 Label done; 235 ld(t0, Address(java_thread, JavaThread::cont_fastpath_offset())); 236 bltu(sp, t0, done); 237 sd(zr, Address(java_thread, JavaThread::cont_fastpath_offset())); 238 bind(done); 239 } 240 241 int MacroAssembler::align(int modulus, int extra_offset) { 242 CompressibleRegion cr(this); 243 intptr_t before = offset(); 244 while ((offset() + extra_offset) % modulus != 0) { nop(); } 245 return (int)(offset() - before); 246 } 247 248 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 249 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 250 } 251 252 // Implementation of call_VM versions 253 254 void MacroAssembler::call_VM(Register oop_result, 255 address entry_point, 256 bool check_exceptions) { 257 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 258 } 259 260 void MacroAssembler::call_VM(Register oop_result, 261 address entry_point, 262 Register arg_1, 263 bool check_exceptions) { 264 pass_arg1(this, arg_1); 265 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 266 } 267 268 void MacroAssembler::call_VM(Register oop_result, 269 address entry_point, 270 Register arg_1, 271 Register arg_2, 272 bool check_exceptions) { 273 assert_different_registers(arg_1, c_rarg2); 274 pass_arg2(this, arg_2); 275 pass_arg1(this, arg_1); 276 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 277 } 278 279 void MacroAssembler::call_VM(Register oop_result, 280 address entry_point, 281 Register arg_1, 282 Register arg_2, 283 Register arg_3, 284 bool check_exceptions) { 285 assert_different_registers(arg_1, c_rarg2, c_rarg3); 286 assert_different_registers(arg_2, c_rarg3); 287 pass_arg3(this, arg_3); 288 289 pass_arg2(this, arg_2); 290 291 pass_arg1(this, arg_1); 292 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 293 } 294 295 void MacroAssembler::call_VM(Register oop_result, 296 Register last_java_sp, 297 address entry_point, 298 int number_of_arguments, 299 bool check_exceptions) { 300 call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 301 } 302 303 void MacroAssembler::call_VM(Register oop_result, 304 Register last_java_sp, 305 address entry_point, 306 Register arg_1, 307 bool check_exceptions) { 308 pass_arg1(this, arg_1); 309 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 310 } 311 312 void MacroAssembler::call_VM(Register oop_result, 313 Register last_java_sp, 314 address entry_point, 315 Register arg_1, 316 Register arg_2, 317 bool check_exceptions) { 318 319 assert_different_registers(arg_1, c_rarg2); 320 pass_arg2(this, arg_2); 321 pass_arg1(this, arg_1); 322 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 323 } 324 325 void MacroAssembler::call_VM(Register oop_result, 326 Register last_java_sp, 327 address entry_point, 328 Register arg_1, 329 Register arg_2, 330 Register arg_3, 331 bool check_exceptions) { 332 assert_different_registers(arg_1, c_rarg2, c_rarg3); 333 assert_different_registers(arg_2, c_rarg3); 334 pass_arg3(this, arg_3); 335 pass_arg2(this, arg_2); 336 pass_arg1(this, arg_1); 337 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 338 } 339 340 void MacroAssembler::post_call_nop() { 341 if (!Continuations::enabled()) { 342 return; 343 } 344 relocate(post_call_nop_Relocation::spec(), [&] { 345 InlineSkippedInstructionsCounter skipCounter(this); 346 nop(); 347 li32(zr, 0); 348 }); 349 } 350 351 // these are no-ops overridden by InterpreterMacroAssembler 352 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} 353 void MacroAssembler::check_and_handle_popframe(Register java_thread) {} 354 355 // Calls to C land 356 // 357 // When entering C land, the fp, & esp of the last Java frame have to be recorded 358 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 359 // has to be reset to 0. This is required to allow proper stack traversal. 360 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 361 Register last_java_fp, 362 Register last_java_pc) { 363 364 if (last_java_pc->is_valid()) { 365 sd(last_java_pc, Address(xthread, 366 JavaThread::frame_anchor_offset() + 367 JavaFrameAnchor::last_Java_pc_offset())); 368 } 369 370 // determine last_java_sp register 371 if (!last_java_sp->is_valid()) { 372 last_java_sp = esp; 373 } 374 375 sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); 376 377 // last_java_fp is optional 378 if (last_java_fp->is_valid()) { 379 sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); 380 } 381 } 382 383 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 384 Register last_java_fp, 385 address last_java_pc, 386 Register tmp) { 387 assert(last_java_pc != nullptr, "must provide a valid PC"); 388 389 la(tmp, last_java_pc); 390 sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 391 392 set_last_Java_frame(last_java_sp, last_java_fp, noreg); 393 } 394 395 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 396 Register last_java_fp, 397 Label &L, 398 Register tmp) { 399 if (L.is_bound()) { 400 set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); 401 } else { 402 L.add_patch_at(code(), locator()); 403 IncompressibleRegion ir(this); // the label address will be patched back. 404 set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); 405 } 406 } 407 408 void MacroAssembler::reset_last_Java_frame(bool clear_fp) { 409 // we must set sp to zero to clear frame 410 sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); 411 412 // must clear fp, so that compiled frames are not confused; it is 413 // possible that we need it only for debugging 414 if (clear_fp) { 415 sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); 416 } 417 418 // Always clear the pc because it could have been set by make_walkable() 419 sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); 420 } 421 422 void MacroAssembler::call_VM_base(Register oop_result, 423 Register java_thread, 424 Register last_java_sp, 425 address entry_point, 426 int number_of_arguments, 427 bool check_exceptions) { 428 // determine java_thread register 429 if (!java_thread->is_valid()) { 430 java_thread = xthread; 431 } 432 // determine last_java_sp register 433 if (!last_java_sp->is_valid()) { 434 last_java_sp = esp; 435 } 436 437 // debugging support 438 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 439 assert(java_thread == xthread, "unexpected register"); 440 441 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 442 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 443 444 // push java thread (becomes first argument of C function) 445 mv(c_rarg0, java_thread); 446 447 // set last Java frame before call 448 assert(last_java_sp != fp, "can't use fp"); 449 450 Label l; 451 set_last_Java_frame(last_java_sp, fp, l, t0); 452 453 // do the call, remove parameters 454 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 455 456 // reset last Java frame 457 // Only interpreter should have to clear fp 458 reset_last_Java_frame(true); 459 460 // C++ interp handles this in the interpreter 461 check_and_handle_popframe(java_thread); 462 check_and_handle_earlyret(java_thread); 463 464 if (check_exceptions) { 465 // check for pending exceptions (java_thread is set upon return) 466 ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 467 Label ok; 468 beqz(t0, ok); 469 RuntimeAddress target(StubRoutines::forward_exception_entry()); 470 relocate(target.rspec(), [&] { 471 int32_t offset; 472 la(t0, target.target(), offset); 473 jr(t0, offset); 474 }); 475 bind(ok); 476 } 477 478 // get oop result if there is one and reset the value in the thread 479 if (oop_result->is_valid()) { 480 get_vm_result(oop_result, java_thread); 481 } 482 } 483 484 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 485 ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 486 sd(zr, Address(java_thread, JavaThread::vm_result_offset())); 487 verify_oop_msg(oop_result, "broken oop in call_VM_base"); 488 } 489 490 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 491 ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 492 sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 493 } 494 495 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { 496 assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); 497 assert_different_registers(klass, xthread, tmp); 498 499 Label L_fallthrough, L_tmp; 500 if (L_fast_path == nullptr) { 501 L_fast_path = &L_fallthrough; 502 } else if (L_slow_path == nullptr) { 503 L_slow_path = &L_fallthrough; 504 } 505 506 // Fast path check: class is fully initialized 507 lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); 508 sub(tmp, tmp, InstanceKlass::fully_initialized); 509 beqz(tmp, *L_fast_path); 510 511 // Fast path check: current thread is initializer thread 512 ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); 513 514 if (L_slow_path == &L_fallthrough) { 515 beq(xthread, tmp, *L_fast_path); 516 bind(*L_slow_path); 517 } else if (L_fast_path == &L_fallthrough) { 518 bne(xthread, tmp, *L_slow_path); 519 bind(*L_fast_path); 520 } else { 521 Unimplemented(); 522 } 523 } 524 525 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 526 if (!VerifyOops) { return; } 527 528 // Pass register number to verify_oop_subroutine 529 const char* b = nullptr; 530 { 531 ResourceMark rm; 532 stringStream ss; 533 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); 534 b = code_string(ss.as_string()); 535 } 536 BLOCK_COMMENT("verify_oop {"); 537 538 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 539 540 mv(c_rarg0, reg); // c_rarg0 : x10 541 { 542 // The length of the instruction sequence emitted should not depend 543 // on the address of the char buffer so that the size of mach nodes for 544 // scratch emit and normal emit matches. 545 IncompressibleRegion ir(this); // Fixed length 546 movptr(t0, (address) b); 547 } 548 549 // call indirectly to solve generation ordering problem 550 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 551 relocate(target.rspec(), [&] { 552 int32_t offset; 553 la(t1, target.target(), offset); 554 ld(t1, Address(t1, offset)); 555 }); 556 jalr(t1); 557 558 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 559 560 BLOCK_COMMENT("} verify_oop"); 561 } 562 563 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 564 if (!VerifyOops) { 565 return; 566 } 567 568 const char* b = nullptr; 569 { 570 ResourceMark rm; 571 stringStream ss; 572 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); 573 b = code_string(ss.as_string()); 574 } 575 BLOCK_COMMENT("verify_oop_addr {"); 576 577 push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 578 579 if (addr.uses(sp)) { 580 la(x10, addr); 581 ld(x10, Address(x10, 4 * wordSize)); 582 } else { 583 ld(x10, addr); 584 } 585 586 { 587 // The length of the instruction sequence emitted should not depend 588 // on the address of the char buffer so that the size of mach nodes for 589 // scratch emit and normal emit matches. 590 IncompressibleRegion ir(this); // Fixed length 591 movptr(t0, (address) b); 592 } 593 594 // call indirectly to solve generation ordering problem 595 ExternalAddress target(StubRoutines::verify_oop_subroutine_entry_address()); 596 relocate(target.rspec(), [&] { 597 int32_t offset; 598 la(t1, target.target(), offset); 599 ld(t1, Address(t1, offset)); 600 }); 601 jalr(t1); 602 603 pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); 604 605 BLOCK_COMMENT("} verify_oop_addr"); 606 } 607 608 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 609 int extra_slot_offset) { 610 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 611 int stackElementSize = Interpreter::stackElementSize; 612 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 613 #ifdef ASSERT 614 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 615 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 616 #endif 617 if (arg_slot.is_constant()) { 618 return Address(esp, arg_slot.as_constant() * stackElementSize + offset); 619 } else { 620 assert_different_registers(t0, arg_slot.as_register()); 621 shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); 622 return Address(t0, offset); 623 } 624 } 625 626 #ifndef PRODUCT 627 extern "C" void findpc(intptr_t x); 628 #endif 629 630 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 631 { 632 // In order to get locks to work, we need to fake a in_VM state 633 if (ShowMessageBoxOnError) { 634 JavaThread* thread = JavaThread::current(); 635 JavaThreadState saved_state = thread->thread_state(); 636 thread->set_thread_state(_thread_in_vm); 637 #ifndef PRODUCT 638 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 639 ttyLocker ttyl; 640 BytecodeCounter::print(); 641 } 642 #endif 643 if (os::message_box(msg, "Execution stopped, print registers?")) { 644 ttyLocker ttyl; 645 tty->print_cr(" pc = 0x%016lx", pc); 646 #ifndef PRODUCT 647 tty->cr(); 648 findpc(pc); 649 tty->cr(); 650 #endif 651 tty->print_cr(" x0 = 0x%016lx", regs[0]); 652 tty->print_cr(" x1 = 0x%016lx", regs[1]); 653 tty->print_cr(" x2 = 0x%016lx", regs[2]); 654 tty->print_cr(" x3 = 0x%016lx", regs[3]); 655 tty->print_cr(" x4 = 0x%016lx", regs[4]); 656 tty->print_cr(" x5 = 0x%016lx", regs[5]); 657 tty->print_cr(" x6 = 0x%016lx", regs[6]); 658 tty->print_cr(" x7 = 0x%016lx", regs[7]); 659 tty->print_cr(" x8 = 0x%016lx", regs[8]); 660 tty->print_cr(" x9 = 0x%016lx", regs[9]); 661 tty->print_cr("x10 = 0x%016lx", regs[10]); 662 tty->print_cr("x11 = 0x%016lx", regs[11]); 663 tty->print_cr("x12 = 0x%016lx", regs[12]); 664 tty->print_cr("x13 = 0x%016lx", regs[13]); 665 tty->print_cr("x14 = 0x%016lx", regs[14]); 666 tty->print_cr("x15 = 0x%016lx", regs[15]); 667 tty->print_cr("x16 = 0x%016lx", regs[16]); 668 tty->print_cr("x17 = 0x%016lx", regs[17]); 669 tty->print_cr("x18 = 0x%016lx", regs[18]); 670 tty->print_cr("x19 = 0x%016lx", regs[19]); 671 tty->print_cr("x20 = 0x%016lx", regs[20]); 672 tty->print_cr("x21 = 0x%016lx", regs[21]); 673 tty->print_cr("x22 = 0x%016lx", regs[22]); 674 tty->print_cr("x23 = 0x%016lx", regs[23]); 675 tty->print_cr("x24 = 0x%016lx", regs[24]); 676 tty->print_cr("x25 = 0x%016lx", regs[25]); 677 tty->print_cr("x26 = 0x%016lx", regs[26]); 678 tty->print_cr("x27 = 0x%016lx", regs[27]); 679 tty->print_cr("x28 = 0x%016lx", regs[28]); 680 tty->print_cr("x30 = 0x%016lx", regs[30]); 681 tty->print_cr("x31 = 0x%016lx", regs[31]); 682 BREAKPOINT; 683 } 684 } 685 fatal("DEBUG MESSAGE: %s", msg); 686 } 687 688 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { 689 assert_different_registers(value, tmp1, tmp2); 690 Label done, tagged, weak_tagged; 691 692 beqz(value, done); // Use null as-is. 693 // Test for tag. 694 andi(tmp1, value, JNIHandles::tag_mask); 695 bnez(tmp1, tagged); 696 697 // Resolve local handle 698 access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, value, Address(value, 0), tmp1, tmp2); 699 verify_oop(value); 700 j(done); 701 702 bind(tagged); 703 // Test for jweak tag. 704 STATIC_ASSERT(JNIHandles::TypeTag::weak_global == 0b1); 705 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::weak_global)); 706 bnez(tmp1, weak_tagged); 707 708 // Resolve global handle 709 access_load_at(T_OBJECT, IN_NATIVE, value, 710 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 711 verify_oop(value); 712 j(done); 713 714 bind(weak_tagged); 715 // Resolve jweak. 716 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, 717 Address(value, -JNIHandles::TypeTag::weak_global), tmp1, tmp2); 718 verify_oop(value); 719 720 bind(done); 721 } 722 723 void MacroAssembler::resolve_global_jobject(Register value, Register tmp1, Register tmp2) { 724 assert_different_registers(value, tmp1, tmp2); 725 Label done; 726 727 beqz(value, done); // Use null as-is. 728 729 #ifdef ASSERT 730 { 731 STATIC_ASSERT(JNIHandles::TypeTag::global == 0b10); 732 Label valid_global_tag; 733 test_bit(tmp1, value, exact_log2(JNIHandles::TypeTag::global)); // Test for global tag. 734 bnez(tmp1, valid_global_tag); 735 stop("non global jobject using resolve_global_jobject"); 736 bind(valid_global_tag); 737 } 738 #endif 739 740 // Resolve global handle 741 access_load_at(T_OBJECT, IN_NATIVE, value, 742 Address(value, -JNIHandles::TypeTag::global), tmp1, tmp2); 743 verify_oop(value); 744 745 bind(done); 746 } 747 748 void MacroAssembler::stop(const char* msg) { 749 BLOCK_COMMENT(msg); 750 illegal_instruction(Assembler::csr::time); 751 emit_int64((uintptr_t)msg); 752 } 753 754 void MacroAssembler::unimplemented(const char* what) { 755 const char* buf = nullptr; 756 { 757 ResourceMark rm; 758 stringStream ss; 759 ss.print("unimplemented: %s", what); 760 buf = code_string(ss.as_string()); 761 } 762 stop(buf); 763 } 764 765 void MacroAssembler::emit_static_call_stub() { 766 IncompressibleRegion ir(this); // Fixed length: see CompiledDirectCall::to_interp_stub_size(). 767 // CompiledDirectCall::set_to_interpreted knows the 768 // exact layout of this stub. 769 770 mov_metadata(xmethod, (Metadata*)nullptr); 771 772 // Jump to the entry point of the c2i stub. 773 int32_t offset = 0; 774 movptr(t0, 0, offset, t1); // lui + lui + slli + add 775 jr(t0, offset); 776 } 777 778 void MacroAssembler::call_VM_leaf_base(address entry_point, 779 int number_of_arguments, 780 Label *retaddr) { 781 int32_t offset = 0; 782 push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp 783 mv(t0, entry_point, offset); 784 jalr(t0, offset); 785 if (retaddr != nullptr) { 786 bind(*retaddr); 787 } 788 pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp 789 } 790 791 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 792 call_VM_leaf_base(entry_point, number_of_arguments); 793 } 794 795 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 796 pass_arg0(this, arg_0); 797 call_VM_leaf_base(entry_point, 1); 798 } 799 800 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 801 assert_different_registers(arg_1, c_rarg0); 802 pass_arg0(this, arg_0); 803 pass_arg1(this, arg_1); 804 call_VM_leaf_base(entry_point, 2); 805 } 806 807 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 808 Register arg_1, Register arg_2) { 809 assert_different_registers(arg_1, c_rarg0); 810 assert_different_registers(arg_2, c_rarg0, c_rarg1); 811 pass_arg0(this, arg_0); 812 pass_arg1(this, arg_1); 813 pass_arg2(this, arg_2); 814 call_VM_leaf_base(entry_point, 3); 815 } 816 817 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 818 pass_arg0(this, arg_0); 819 MacroAssembler::call_VM_leaf_base(entry_point, 1); 820 } 821 822 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 823 824 assert_different_registers(arg_0, c_rarg1); 825 pass_arg1(this, arg_1); 826 pass_arg0(this, arg_0); 827 MacroAssembler::call_VM_leaf_base(entry_point, 2); 828 } 829 830 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 831 assert_different_registers(arg_0, c_rarg1, c_rarg2); 832 assert_different_registers(arg_1, c_rarg2); 833 pass_arg2(this, arg_2); 834 pass_arg1(this, arg_1); 835 pass_arg0(this, arg_0); 836 MacroAssembler::call_VM_leaf_base(entry_point, 3); 837 } 838 839 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 840 assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); 841 assert_different_registers(arg_1, c_rarg2, c_rarg3); 842 assert_different_registers(arg_2, c_rarg3); 843 844 pass_arg3(this, arg_3); 845 pass_arg2(this, arg_2); 846 pass_arg1(this, arg_1); 847 pass_arg0(this, arg_0); 848 MacroAssembler::call_VM_leaf_base(entry_point, 4); 849 } 850 851 void MacroAssembler::la(Register Rd, const address addr) { 852 int32_t offset; 853 la(Rd, addr, offset); 854 addi(Rd, Rd, offset); 855 } 856 857 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) { 858 if (is_32bit_offset_from_codecache((int64_t)addr)) { 859 int64_t distance = addr - pc(); 860 assert(is_valid_32bit_offset(distance), "Must be"); 861 auipc(Rd, (int32_t)distance + 0x800); 862 offset = ((int32_t)distance << 20) >> 20; 863 } else { 864 assert(!CodeCache::contains(addr), "Must be"); 865 movptr(Rd, addr, offset); 866 } 867 } 868 869 void MacroAssembler::la(Register Rd, const Address &adr) { 870 switch (adr.getMode()) { 871 case Address::literal: { 872 relocInfo::relocType rtype = adr.rspec().reloc()->type(); 873 if (rtype == relocInfo::none) { 874 mv(Rd, (intptr_t)(adr.target())); 875 } else { 876 relocate(adr.rspec(), [&] { 877 movptr(Rd, adr.target()); 878 }); 879 } 880 break; 881 } 882 case Address::base_plus_offset: { 883 Address new_adr = legitimize_address(Rd, adr); 884 if (!(new_adr.base() == Rd && new_adr.offset() == 0)) { 885 addi(Rd, new_adr.base(), new_adr.offset()); 886 } 887 break; 888 } 889 default: 890 ShouldNotReachHere(); 891 } 892 } 893 894 void MacroAssembler::la(Register Rd, Label &label) { 895 IncompressibleRegion ir(this); // the label address may be patched back. 896 wrap_label(Rd, label, &MacroAssembler::la); 897 } 898 899 void MacroAssembler::li16u(Register Rd, uint16_t imm) { 900 lui(Rd, (uint32_t)imm << 12); 901 srli(Rd, Rd, 12); 902 } 903 904 void MacroAssembler::li32(Register Rd, int32_t imm) { 905 // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit 906 int64_t upper = imm, lower = imm; 907 lower = (imm << 20) >> 20; 908 upper -= lower; 909 upper = (int32_t)upper; 910 // lui Rd, imm[31:12] + imm[11] 911 lui(Rd, upper); 912 // use addiw to distinguish li32 to li64 913 addiw(Rd, Rd, lower); 914 } 915 916 void MacroAssembler::li64(Register Rd, int64_t imm) { 917 // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or 918 // (imm[31:20] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. 919 int64_t lower = imm & 0xffffffff; 920 lower -= ((lower << 44) >> 44); 921 int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; 922 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 923 924 // Load upper 32 bits 925 int64_t up = upper, lo = upper; 926 lo = (lo << 52) >> 52; 927 up -= lo; 928 up = (int32_t)up; 929 lui(Rd, up); 930 addi(Rd, Rd, lo); 931 932 // Load the rest 32 bits. 933 slli(Rd, Rd, 12); 934 addi(Rd, Rd, (int32_t)lower >> 20); 935 slli(Rd, Rd, 12); 936 lower = ((int32_t)imm << 12) >> 20; 937 addi(Rd, Rd, lower); 938 slli(Rd, Rd, 8); 939 lower = imm & 0xff; 940 addi(Rd, Rd, lower); 941 } 942 943 void MacroAssembler::li(Register Rd, int64_t imm) { 944 // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff 945 // li -> c.li 946 if (do_compress() && (is_simm6(imm) && Rd != x0)) { 947 c_li(Rd, imm); 948 return; 949 } 950 951 int shift = 12; 952 int64_t upper = imm, lower = imm; 953 // Split imm to a lower 12-bit sign-extended part and the remainder, 954 // because addi will sign-extend the lower imm. 955 lower = ((int32_t)imm << 20) >> 20; 956 upper -= lower; 957 958 // Test whether imm is a 32-bit integer. 959 if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || 960 (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { 961 while (((upper >> shift) & 1) == 0) { shift++; } 962 upper >>= shift; 963 li(Rd, upper); 964 slli(Rd, Rd, shift); 965 if (lower != 0) { 966 addi(Rd, Rd, lower); 967 } 968 } else { 969 // 32-bit integer 970 Register hi_Rd = zr; 971 if (upper != 0) { 972 lui(Rd, (int32_t)upper); 973 hi_Rd = Rd; 974 } 975 if (lower != 0 || hi_Rd == zr) { 976 addiw(Rd, hi_Rd, lower); 977 } 978 } 979 } 980 981 void MacroAssembler::jump_link(const address dest, Register temp) { 982 assert_cond(dest != nullptr); 983 int64_t distance = dest - pc(); 984 if (is_simm21(distance) && ((distance % 2) == 0)) { 985 Assembler::jal(x1, distance); 986 } else { 987 assert(temp != noreg && temp != x0, "expecting a register"); 988 int32_t offset = 0; 989 la(temp, dest, offset); 990 jalr(temp, offset); 991 } 992 } 993 994 void MacroAssembler::jump_link(const Address &adr, Register temp) { 995 switch (adr.getMode()) { 996 case Address::literal: { 997 relocate(adr.rspec(), [&] { 998 jump_link(adr.target(), temp); 999 }); 1000 break; 1001 } 1002 case Address::base_plus_offset: { 1003 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; 1004 la(temp, Address(adr.base(), adr.offset() - offset)); 1005 jalr(temp, offset); 1006 break; 1007 } 1008 default: 1009 ShouldNotReachHere(); 1010 } 1011 } 1012 1013 void MacroAssembler::j(const address dest, Register temp) { 1014 assert(CodeCache::contains(dest), "Must be"); 1015 assert_cond(dest != nullptr); 1016 int64_t distance = dest - pc(); 1017 1018 // We can't patch C, i.e. if Label wasn't bound we need to patch this jump. 1019 IncompressibleRegion ir(this); 1020 if (is_simm21(distance) && ((distance % 2) == 0)) { 1021 Assembler::jal(x0, distance); 1022 } else { 1023 assert(temp != noreg && temp != x0, "expecting a register"); 1024 int32_t offset = 0; 1025 la(temp, dest, offset); 1026 jr(temp, offset); 1027 } 1028 } 1029 1030 void MacroAssembler::j(const Address &adr, Register temp) { 1031 switch (adr.getMode()) { 1032 case Address::literal: { 1033 relocate(adr.rspec(), [&] { 1034 j(adr.target(), temp); 1035 }); 1036 break; 1037 } 1038 case Address::base_plus_offset: { 1039 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; 1040 la(temp, Address(adr.base(), adr.offset() - offset)); 1041 jr(temp, offset); 1042 break; 1043 } 1044 default: 1045 ShouldNotReachHere(); 1046 } 1047 } 1048 1049 void MacroAssembler::j(Label &lab, Register temp) { 1050 assert_different_registers(x0, temp); 1051 if (lab.is_bound()) { 1052 MacroAssembler::j(target(lab), temp); 1053 } else { 1054 lab.add_patch_at(code(), locator()); 1055 MacroAssembler::j(pc(), temp); 1056 } 1057 } 1058 1059 void MacroAssembler::jr(Register Rd, int32_t offset) { 1060 assert(Rd != noreg, "expecting a register"); 1061 Assembler::jalr(x0, Rd, offset); 1062 } 1063 1064 void MacroAssembler::call(const address dest, Register temp) { 1065 assert_cond(dest != nullptr); 1066 assert(temp != noreg, "expecting a register"); 1067 int32_t offset = 0; 1068 la(temp, dest, offset); 1069 jalr(temp, offset); 1070 } 1071 1072 void MacroAssembler::jalr(Register Rs, int32_t offset) { 1073 assert(Rs != noreg, "expecting a register"); 1074 Assembler::jalr(x1, Rs, offset); 1075 } 1076 1077 void MacroAssembler::rt_call(address dest, Register tmp) { 1078 CodeBlob *cb = CodeCache::find_blob(dest); 1079 RuntimeAddress target(dest); 1080 if (cb) { 1081 far_call(target, tmp); 1082 } else { 1083 relocate(target.rspec(), [&] { 1084 int32_t offset; 1085 la(tmp, target.target(), offset); 1086 jalr(tmp, offset); 1087 }); 1088 } 1089 } 1090 1091 void MacroAssembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { 1092 if (L.is_bound()) { 1093 (this->*insn)(Rt, target(L)); 1094 } else { 1095 L.add_patch_at(code(), locator()); 1096 (this->*insn)(Rt, pc()); 1097 } 1098 } 1099 1100 void MacroAssembler::wrap_label(Register r1, Register r2, Label &L, 1101 compare_and_branch_insn insn, 1102 compare_and_branch_label_insn neg_insn, bool is_far) { 1103 if (is_far) { 1104 Label done; 1105 (this->*neg_insn)(r1, r2, done, /* is_far */ false); 1106 j(L); 1107 bind(done); 1108 } else { 1109 if (L.is_bound()) { 1110 (this->*insn)(r1, r2, target(L)); 1111 } else { 1112 L.add_patch_at(code(), locator()); 1113 (this->*insn)(r1, r2, pc()); 1114 } 1115 } 1116 } 1117 1118 #define INSN(NAME, NEG_INSN) \ 1119 void MacroAssembler::NAME(Register Rs1, Register Rs2, Label &L, bool is_far) { \ 1120 wrap_label(Rs1, Rs2, L, &MacroAssembler::NAME, &MacroAssembler::NEG_INSN, is_far); \ 1121 } 1122 1123 INSN(beq, bne); 1124 INSN(bne, beq); 1125 INSN(blt, bge); 1126 INSN(bge, blt); 1127 INSN(bltu, bgeu); 1128 INSN(bgeu, bltu); 1129 1130 #undef INSN 1131 1132 #define INSN(NAME) \ 1133 void MacroAssembler::NAME##z(Register Rs, const address dest) { \ 1134 NAME(Rs, zr, dest); \ 1135 } \ 1136 void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ 1137 NAME(Rs, zr, l, is_far); \ 1138 } \ 1139 1140 INSN(beq); 1141 INSN(bne); 1142 INSN(blt); 1143 INSN(ble); 1144 INSN(bge); 1145 INSN(bgt); 1146 1147 #undef INSN 1148 1149 #define INSN(NAME, NEG_INSN) \ 1150 void MacroAssembler::NAME(Register Rs, Register Rt, const address dest) { \ 1151 NEG_INSN(Rt, Rs, dest); \ 1152 } \ 1153 void MacroAssembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ 1154 NEG_INSN(Rt, Rs, l, is_far); \ 1155 } 1156 1157 INSN(bgt, blt); 1158 INSN(ble, bge); 1159 INSN(bgtu, bltu); 1160 INSN(bleu, bgeu); 1161 1162 #undef INSN 1163 1164 // Float compare branch instructions 1165 1166 #define INSN(NAME, FLOATCMP, BRANCH) \ 1167 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1168 FLOATCMP##_s(t0, Rs1, Rs2); \ 1169 BRANCH(t0, l, is_far); \ 1170 } \ 1171 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ 1172 FLOATCMP##_d(t0, Rs1, Rs2); \ 1173 BRANCH(t0, l, is_far); \ 1174 } 1175 1176 INSN(beq, feq, bnez); 1177 INSN(bne, feq, beqz); 1178 1179 #undef INSN 1180 1181 1182 #define INSN(NAME, FLOATCMP1, FLOATCMP2) \ 1183 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1184 bool is_far, bool is_unordered) { \ 1185 if (is_unordered) { \ 1186 /* jump if either source is NaN or condition is expected */ \ 1187 FLOATCMP2##_s(t0, Rs2, Rs1); \ 1188 beqz(t0, l, is_far); \ 1189 } else { \ 1190 /* jump if no NaN in source and condition is expected */ \ 1191 FLOATCMP1##_s(t0, Rs1, Rs2); \ 1192 bnez(t0, l, is_far); \ 1193 } \ 1194 } \ 1195 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1196 bool is_far, bool is_unordered) { \ 1197 if (is_unordered) { \ 1198 /* jump if either source is NaN or condition is expected */ \ 1199 FLOATCMP2##_d(t0, Rs2, Rs1); \ 1200 beqz(t0, l, is_far); \ 1201 } else { \ 1202 /* jump if no NaN in source and condition is expected */ \ 1203 FLOATCMP1##_d(t0, Rs1, Rs2); \ 1204 bnez(t0, l, is_far); \ 1205 } \ 1206 } 1207 1208 INSN(ble, fle, flt); 1209 INSN(blt, flt, fle); 1210 1211 #undef INSN 1212 1213 #define INSN(NAME, CMP) \ 1214 void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1215 bool is_far, bool is_unordered) { \ 1216 float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1217 } \ 1218 void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ 1219 bool is_far, bool is_unordered) { \ 1220 double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ 1221 } 1222 1223 INSN(bgt, blt); 1224 INSN(bge, ble); 1225 1226 #undef INSN 1227 1228 1229 #define INSN(NAME, CSR) \ 1230 void MacroAssembler::NAME(Register Rd) { \ 1231 csrr(Rd, CSR); \ 1232 } 1233 1234 INSN(rdinstret, CSR_INSTRET); 1235 INSN(rdcycle, CSR_CYCLE); 1236 INSN(rdtime, CSR_TIME); 1237 INSN(frcsr, CSR_FCSR); 1238 INSN(frrm, CSR_FRM); 1239 INSN(frflags, CSR_FFLAGS); 1240 1241 #undef INSN 1242 1243 void MacroAssembler::csrr(Register Rd, unsigned csr) { 1244 csrrs(Rd, csr, x0); 1245 } 1246 1247 #define INSN(NAME, OPFUN) \ 1248 void MacroAssembler::NAME(unsigned csr, Register Rs) { \ 1249 OPFUN(x0, csr, Rs); \ 1250 } 1251 1252 INSN(csrw, csrrw); 1253 INSN(csrs, csrrs); 1254 INSN(csrc, csrrc); 1255 1256 #undef INSN 1257 1258 #define INSN(NAME, OPFUN) \ 1259 void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ 1260 OPFUN(x0, csr, imm); \ 1261 } 1262 1263 INSN(csrwi, csrrwi); 1264 INSN(csrsi, csrrsi); 1265 INSN(csrci, csrrci); 1266 1267 #undef INSN 1268 1269 #define INSN(NAME, CSR) \ 1270 void MacroAssembler::NAME(Register Rd, Register Rs) { \ 1271 csrrw(Rd, CSR, Rs); \ 1272 } 1273 1274 INSN(fscsr, CSR_FCSR); 1275 INSN(fsrm, CSR_FRM); 1276 INSN(fsflags, CSR_FFLAGS); 1277 1278 #undef INSN 1279 1280 #define INSN(NAME) \ 1281 void MacroAssembler::NAME(Register Rs) { \ 1282 NAME(x0, Rs); \ 1283 } 1284 1285 INSN(fscsr); 1286 INSN(fsrm); 1287 INSN(fsflags); 1288 1289 #undef INSN 1290 1291 void MacroAssembler::fsrmi(Register Rd, unsigned imm) { 1292 guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); 1293 csrrwi(Rd, CSR_FRM, imm); 1294 } 1295 1296 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { 1297 csrrwi(Rd, CSR_FFLAGS, imm); 1298 } 1299 1300 #define INSN(NAME) \ 1301 void MacroAssembler::NAME(unsigned imm) { \ 1302 NAME(x0, imm); \ 1303 } 1304 1305 INSN(fsrmi); 1306 INSN(fsflagsi); 1307 1308 #undef INSN 1309 1310 void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) { 1311 if (RestoreMXCSROnJNICalls) { 1312 Label skip_fsrmi; 1313 frrm(tmp); 1314 // Set FRM to the state we need. We do want Round to Nearest. 1315 // We don't want non-IEEE rounding modes. 1316 guarantee(RoundingMode::rne == 0, "must be"); 1317 beqz(tmp, skip_fsrmi); // Only reset FRM if it's wrong 1318 fsrmi(RoundingMode::rne); 1319 bind(skip_fsrmi); 1320 } 1321 } 1322 1323 void MacroAssembler::push_reg(Register Rs) 1324 { 1325 addi(esp, esp, 0 - wordSize); 1326 sd(Rs, Address(esp, 0)); 1327 } 1328 1329 void MacroAssembler::pop_reg(Register Rd) 1330 { 1331 ld(Rd, Address(esp, 0)); 1332 addi(esp, esp, wordSize); 1333 } 1334 1335 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { 1336 int count = 0; 1337 // Scan bitset to accumulate register pairs 1338 for (int reg = 31; reg >= 0; reg--) { 1339 if ((1U << 31) & bitset) { 1340 regs[count++] = reg; 1341 } 1342 bitset <<= 1; 1343 } 1344 return count; 1345 } 1346 1347 // Push integer registers in the bitset supplied. Don't push sp. 1348 // Return the number of words pushed 1349 int MacroAssembler::push_reg(unsigned int bitset, Register stack) { 1350 DEBUG_ONLY(int words_pushed = 0;) 1351 unsigned char regs[32]; 1352 int count = bitset_to_regs(bitset, regs); 1353 // reserve one slot to align for odd count 1354 int offset = is_even(count) ? 0 : wordSize; 1355 1356 if (count) { 1357 addi(stack, stack, -count * wordSize - offset); 1358 } 1359 for (int i = count - 1; i >= 0; i--) { 1360 sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1361 DEBUG_ONLY(words_pushed++;) 1362 } 1363 1364 assert(words_pushed == count, "oops, pushed != count"); 1365 1366 return count; 1367 } 1368 1369 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { 1370 DEBUG_ONLY(int words_popped = 0;) 1371 unsigned char regs[32]; 1372 int count = bitset_to_regs(bitset, regs); 1373 // reserve one slot to align for odd count 1374 int offset = is_even(count) ? 0 : wordSize; 1375 1376 for (int i = count - 1; i >= 0; i--) { 1377 ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); 1378 DEBUG_ONLY(words_popped++;) 1379 } 1380 1381 if (count) { 1382 addi(stack, stack, count * wordSize + offset); 1383 } 1384 assert(words_popped == count, "oops, popped != count"); 1385 1386 return count; 1387 } 1388 1389 // Push floating-point registers in the bitset supplied. 1390 // Return the number of words pushed 1391 int MacroAssembler::push_fp(unsigned int bitset, Register stack) { 1392 DEBUG_ONLY(int words_pushed = 0;) 1393 unsigned char regs[32]; 1394 int count = bitset_to_regs(bitset, regs); 1395 int push_slots = count + (count & 1); 1396 1397 if (count) { 1398 addi(stack, stack, -push_slots * wordSize); 1399 } 1400 1401 for (int i = count - 1; i >= 0; i--) { 1402 fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); 1403 DEBUG_ONLY(words_pushed++;) 1404 } 1405 1406 assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); 1407 1408 return count; 1409 } 1410 1411 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { 1412 DEBUG_ONLY(int words_popped = 0;) 1413 unsigned char regs[32]; 1414 int count = bitset_to_regs(bitset, regs); 1415 int pop_slots = count + (count & 1); 1416 1417 for (int i = count - 1; i >= 0; i--) { 1418 fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); 1419 DEBUG_ONLY(words_popped++;) 1420 } 1421 1422 if (count) { 1423 addi(stack, stack, pop_slots * wordSize); 1424 } 1425 1426 assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); 1427 1428 return count; 1429 } 1430 1431 #ifdef COMPILER2 1432 // Push vector registers in the bitset supplied. 1433 // Return the number of words pushed 1434 int MacroAssembler::push_v(unsigned int bitset, Register stack) { 1435 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1436 1437 // Scan bitset to accumulate register pairs 1438 unsigned char regs[32]; 1439 int count = bitset_to_regs(bitset, regs); 1440 1441 for (int i = 0; i < count; i++) { 1442 sub(stack, stack, vector_size_in_bytes); 1443 vs1r_v(as_VectorRegister(regs[i]), stack); 1444 } 1445 1446 return count * vector_size_in_bytes / wordSize; 1447 } 1448 1449 int MacroAssembler::pop_v(unsigned int bitset, Register stack) { 1450 int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); 1451 1452 // Scan bitset to accumulate register pairs 1453 unsigned char regs[32]; 1454 int count = bitset_to_regs(bitset, regs); 1455 1456 for (int i = count - 1; i >= 0; i--) { 1457 vl1r_v(as_VectorRegister(regs[i]), stack); 1458 add(stack, stack, vector_size_in_bytes); 1459 } 1460 1461 return count * vector_size_in_bytes / wordSize; 1462 } 1463 #endif // COMPILER2 1464 1465 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { 1466 // Push integer registers x7, x10-x17, x28-x31. 1467 push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1468 1469 // Push float registers f0-f7, f10-f17, f28-f31. 1470 addi(sp, sp, - wordSize * 20); 1471 int offset = 0; 1472 for (int i = 0; i < 32; i++) { 1473 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1474 fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1475 } 1476 } 1477 } 1478 1479 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { 1480 int offset = 0; 1481 for (int i = 0; i < 32; i++) { 1482 if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { 1483 fld(as_FloatRegister(i), Address(sp, wordSize * (offset++))); 1484 } 1485 } 1486 addi(sp, sp, wordSize * 20); 1487 1488 pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); 1489 } 1490 1491 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { 1492 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1493 push_reg(RegSet::range(x5, x31), sp); 1494 1495 // float registers 1496 addi(sp, sp, - 32 * wordSize); 1497 for (int i = 0; i < 32; i++) { 1498 fsd(as_FloatRegister(i), Address(sp, i * wordSize)); 1499 } 1500 1501 // vector registers 1502 if (save_vectors) { 1503 sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers); 1504 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1505 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1506 add(t0, sp, vector_size_in_bytes * i); 1507 vse64_v(as_VectorRegister(i), t0); 1508 } 1509 } 1510 } 1511 1512 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { 1513 // vector registers 1514 if (restore_vectors) { 1515 vsetvli(t0, x0, Assembler::e64, Assembler::m8); 1516 for (int i = 0; i < VectorRegister::number_of_registers; i += 8) { 1517 vle64_v(as_VectorRegister(i), sp); 1518 add(sp, sp, vector_size_in_bytes * 8); 1519 } 1520 } 1521 1522 // float registers 1523 for (int i = 0; i < 32; i++) { 1524 fld(as_FloatRegister(i), Address(sp, i * wordSize)); 1525 } 1526 addi(sp, sp, 32 * wordSize); 1527 1528 // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) 1529 pop_reg(RegSet::range(x5, x31), sp); 1530 } 1531 1532 static int patch_offset_in_jal(address branch, int64_t offset) { 1533 assert(Assembler::is_simm21(offset) && ((offset % 2) == 0), 1534 "offset is too large to be patched in one jal instruction!\n"); 1535 Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] 1536 Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] 1537 Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] 1538 Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] 1539 return MacroAssembler::instruction_size; // only one instruction 1540 } 1541 1542 static int patch_offset_in_conditional_branch(address branch, int64_t offset) { 1543 assert(Assembler::is_simm13(offset) && ((offset % 2) == 0), 1544 "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne instruction!\n"); 1545 Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] 1546 Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] 1547 Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] 1548 Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] 1549 return MacroAssembler::instruction_size; // only one instruction 1550 } 1551 1552 static int patch_offset_in_pc_relative(address branch, int64_t offset) { 1553 const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load 1554 Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] 1555 Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] 1556 return PC_RELATIVE_INSTRUCTION_NUM * MacroAssembler::instruction_size; 1557 } 1558 1559 static int patch_addr_in_movptr1(address branch, address target) { 1560 int32_t lower = ((intptr_t)target << 35) >> 35; 1561 int64_t upper = ((intptr_t)target - lower) >> 29; 1562 Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] 1563 Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] 1564 Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] 1565 Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] 1566 return MacroAssembler::movptr1_instruction_size; 1567 } 1568 1569 static int patch_addr_in_movptr2(address instruction_address, address target) { 1570 uintptr_t addr = (uintptr_t)target; 1571 1572 assert(addr < (1ull << 48), "48-bit overflow in address constant"); 1573 unsigned int upper18 = (addr >> 30ull); 1574 int lower30 = (addr & 0x3fffffffu); 1575 int low12 = (lower30 << 20) >> 20; 1576 int mid18 = ((lower30 - low12) >> 12); 1577 1578 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 0), 31, 12, (upper18 & 0xfffff)); // Lui 1579 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 1), 31, 12, (mid18 & 0xfffff)); // Lui 1580 // Slli 1581 // Add 1582 Assembler::patch(instruction_address + (MacroAssembler::instruction_size * 4), 31, 20, low12 & 0xfff); // Addi/Jalr/Load 1583 1584 assert(MacroAssembler::target_addr_for_insn(instruction_address) == target, "Must be"); 1585 1586 return MacroAssembler::movptr2_instruction_size; 1587 } 1588 1589 static int patch_imm_in_li64(address branch, address target) { 1590 const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi 1591 int64_t lower = (intptr_t)target & 0xffffffff; 1592 lower = lower - ((lower << 44) >> 44); 1593 int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; 1594 int32_t upper = (tmp_imm - (int32_t)lower) >> 32; 1595 int64_t tmp_upper = upper, tmp_lower = upper; 1596 tmp_lower = (tmp_lower << 52) >> 52; 1597 tmp_upper -= tmp_lower; 1598 tmp_upper >>= 12; 1599 // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1), 1600 // upper = target[63:32] + 1. 1601 Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. 1602 Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. 1603 // Load the rest 32 bits. 1604 Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. 1605 Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. 1606 Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. 1607 return LI64_INSTRUCTIONS_NUM * MacroAssembler::instruction_size; 1608 } 1609 1610 static int patch_imm_in_li16u(address branch, uint16_t target) { 1611 Assembler::patch(branch, 31, 12, target); // patch lui only 1612 return MacroAssembler::instruction_size; 1613 } 1614 1615 int MacroAssembler::patch_imm_in_li32(address branch, int32_t target) { 1616 const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw 1617 int64_t upper = (intptr_t)target; 1618 int32_t lower = (((int32_t)target) << 20) >> 20; 1619 upper -= lower; 1620 upper = (int32_t)upper; 1621 Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. 1622 Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. 1623 return LI32_INSTRUCTIONS_NUM * MacroAssembler::instruction_size; 1624 } 1625 1626 static long get_offset_of_jal(address insn_addr) { 1627 assert_cond(insn_addr != nullptr); 1628 long offset = 0; 1629 unsigned insn = Assembler::ld_instr(insn_addr); 1630 long val = (long)Assembler::sextract(insn, 31, 12); 1631 offset |= ((val >> 19) & 0x1) << 20; 1632 offset |= (val & 0xff) << 12; 1633 offset |= ((val >> 8) & 0x1) << 11; 1634 offset |= ((val >> 9) & 0x3ff) << 1; 1635 offset = (offset << 43) >> 43; 1636 return offset; 1637 } 1638 1639 static long get_offset_of_conditional_branch(address insn_addr) { 1640 long offset = 0; 1641 assert_cond(insn_addr != nullptr); 1642 unsigned insn = Assembler::ld_instr(insn_addr); 1643 offset = (long)Assembler::sextract(insn, 31, 31); 1644 offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); 1645 offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); 1646 offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); 1647 offset = (offset << 41) >> 41; 1648 return offset; 1649 } 1650 1651 static long get_offset_of_pc_relative(address insn_addr) { 1652 long offset = 0; 1653 assert_cond(insn_addr != nullptr); 1654 offset = ((long)(Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12))) << 12; // Auipc. 1655 offset += ((long)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addi/Jalr/Load. 1656 offset = (offset << 32) >> 32; 1657 return offset; 1658 } 1659 1660 static address get_target_of_movptr1(address insn_addr) { 1661 assert_cond(insn_addr != nullptr); 1662 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 29; // Lui. 1663 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 17; // Addi. 1664 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 6; // Addi. 1665 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)); // Addi/Jalr/Load. 1666 return (address) target_address; 1667 } 1668 1669 static address get_target_of_movptr2(address insn_addr) { 1670 assert_cond(insn_addr != nullptr); 1671 int32_t upper18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 0), 31, 12)) & 0xfffff); // Lui 1672 int32_t mid18 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 1), 31, 12)) & 0xfffff); // Lui 1673 // 2 // Slli 1674 // 3 // Add 1675 int32_t low12 = ((Assembler::sextract(Assembler::ld_instr(insn_addr + MacroAssembler::instruction_size * 4), 31, 20))); // Addi/Jalr/Load. 1676 address ret = (address)(((intptr_t)upper18<<30ll) + ((intptr_t)mid18<<12ll) + low12); 1677 return ret; 1678 } 1679 1680 static address get_target_of_li64(address insn_addr) { 1681 assert_cond(insn_addr != nullptr); 1682 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 44; // Lui. 1683 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)) << 32; // Addi. 1684 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 12), 31, 20)) << 20; // Addi. 1685 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 20), 31, 20)) << 8; // Addi. 1686 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 28), 31, 20)); // Addi. 1687 return (address)target_address; 1688 } 1689 1690 address MacroAssembler::get_target_of_li32(address insn_addr) { 1691 assert_cond(insn_addr != nullptr); 1692 intptr_t target_address = (((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr), 31, 12)) & 0xfffff) << 12; // Lui. 1693 target_address += ((int64_t)Assembler::sextract(Assembler::ld_instr(insn_addr + 4), 31, 20)); // Addiw. 1694 return (address)target_address; 1695 } 1696 1697 // Patch any kind of instruction; there may be several instructions. 1698 // Return the total length (in bytes) of the instructions. 1699 int MacroAssembler::pd_patch_instruction_size(address instruction_address, address target) { 1700 assert_cond(instruction_address != nullptr); 1701 int64_t offset = target - instruction_address; 1702 if (MacroAssembler::is_jal_at(instruction_address)) { // jal 1703 return patch_offset_in_jal(instruction_address, offset); 1704 } else if (MacroAssembler::is_branch_at(instruction_address)) { // beq/bge/bgeu/blt/bltu/bne 1705 return patch_offset_in_conditional_branch(instruction_address, offset); 1706 } else if (MacroAssembler::is_pc_relative_at(instruction_address)) { // auipc, addi/jalr/load 1707 return patch_offset_in_pc_relative(instruction_address, offset); 1708 } else if (MacroAssembler::is_movptr1_at(instruction_address)) { // movptr1 1709 return patch_addr_in_movptr1(instruction_address, target); 1710 } else if (MacroAssembler::is_movptr2_at(instruction_address)) { // movptr2 1711 return patch_addr_in_movptr2(instruction_address, target); 1712 } else if (MacroAssembler::is_li64_at(instruction_address)) { // li64 1713 return patch_imm_in_li64(instruction_address, target); 1714 } else if (MacroAssembler::is_li32_at(instruction_address)) { // li32 1715 int64_t imm = (intptr_t)target; 1716 return patch_imm_in_li32(instruction_address, (int32_t)imm); 1717 } else if (MacroAssembler::is_li16u_at(instruction_address)) { 1718 int64_t imm = (intptr_t)target; 1719 return patch_imm_in_li16u(instruction_address, (uint16_t)imm); 1720 } else { 1721 #ifdef ASSERT 1722 tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", 1723 Assembler::ld_instr(instruction_address), p2i(instruction_address)); 1724 Disassembler::decode(instruction_address - 16, instruction_address + 16); 1725 #endif 1726 ShouldNotReachHere(); 1727 return -1; 1728 } 1729 } 1730 1731 address MacroAssembler::target_addr_for_insn(address insn_addr) { 1732 long offset = 0; 1733 assert_cond(insn_addr != nullptr); 1734 if (MacroAssembler::is_jal_at(insn_addr)) { // jal 1735 offset = get_offset_of_jal(insn_addr); 1736 } else if (MacroAssembler::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne 1737 offset = get_offset_of_conditional_branch(insn_addr); 1738 } else if (MacroAssembler::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load 1739 offset = get_offset_of_pc_relative(insn_addr); 1740 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { // movptr1 1741 return get_target_of_movptr1(insn_addr); 1742 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { // movptr2 1743 return get_target_of_movptr2(insn_addr); 1744 } else if (MacroAssembler::is_li64_at(insn_addr)) { // li64 1745 return get_target_of_li64(insn_addr); 1746 } else if (MacroAssembler::is_li32_at(insn_addr)) { // li32 1747 return get_target_of_li32(insn_addr); 1748 } else { 1749 ShouldNotReachHere(); 1750 } 1751 return address(((uintptr_t)insn_addr + offset)); 1752 } 1753 1754 int MacroAssembler::patch_oop(address insn_addr, address o) { 1755 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 1756 // narrow OOPs by setting the upper 16 bits in the first 1757 // instruction. 1758 if (MacroAssembler::is_li32_at(insn_addr)) { 1759 // Move narrow OOP 1760 uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); 1761 return patch_imm_in_li32(insn_addr, (int32_t)n); 1762 } else if (MacroAssembler::is_movptr1_at(insn_addr)) { 1763 // Move wide OOP 1764 return patch_addr_in_movptr1(insn_addr, o); 1765 } else if (MacroAssembler::is_movptr2_at(insn_addr)) { 1766 // Move wide OOP 1767 return patch_addr_in_movptr2(insn_addr, o); 1768 } 1769 ShouldNotReachHere(); 1770 return -1; 1771 } 1772 1773 void MacroAssembler::reinit_heapbase() { 1774 if (UseCompressedOops) { 1775 if (Universe::is_fully_initialized()) { 1776 mv(xheapbase, CompressedOops::ptrs_base()); 1777 } else { 1778 ExternalAddress target(CompressedOops::ptrs_base_addr()); 1779 relocate(target.rspec(), [&] { 1780 int32_t offset; 1781 la(xheapbase, target.target(), offset); 1782 ld(xheapbase, Address(xheapbase, offset)); 1783 }); 1784 } 1785 } 1786 } 1787 1788 void MacroAssembler::movptr(Register Rd, address addr, Register temp) { 1789 int offset = 0; 1790 movptr(Rd, addr, offset, temp); 1791 addi(Rd, Rd, offset); 1792 } 1793 1794 void MacroAssembler::movptr(Register Rd, address addr, int32_t &offset, Register temp) { 1795 uint64_t uimm64 = (uint64_t)addr; 1796 #ifndef PRODUCT 1797 { 1798 char buffer[64]; 1799 snprintf(buffer, sizeof(buffer), "0x%" PRIx64, uimm64); 1800 block_comment(buffer); 1801 } 1802 #endif 1803 assert(uimm64 < (1ull << 48), "48-bit overflow in address constant"); 1804 1805 if (temp == noreg) { 1806 movptr1(Rd, uimm64, offset); 1807 } else { 1808 movptr2(Rd, uimm64, offset, temp); 1809 } 1810 } 1811 1812 void MacroAssembler::movptr1(Register Rd, uint64_t imm64, int32_t &offset) { 1813 // Load upper 31 bits 1814 // 1815 // In case of 11th bit of `lower` is 0, it's straightforward to understand. 1816 // In case of 11th bit of `lower` is 1, it's a bit tricky, to help understand, 1817 // imagine divide both `upper` and `lower` into 2 parts respectively, i.e. 1818 // [upper_20, upper_12], [lower_20, lower_12], they are the same just before 1819 // `lower = (lower << 52) >> 52;`. 1820 // After `upper -= lower;`, 1821 // upper_20' = upper_20 - (-1) == upper_20 + 1 1822 // upper_12 = 0x000 1823 // After `lui(Rd, upper);`, `Rd` = upper_20' << 12 1824 // Also divide `Rd` into 2 parts [Rd_20, Rd_12], 1825 // Rd_20 == upper_20' 1826 // Rd_12 == 0x000 1827 // After `addi(Rd, Rd, lower);`, 1828 // Rd_20 = upper_20' + (-1) == upper_20 + 1 - 1 = upper_20 1829 // Rd_12 = lower_12 1830 // So, finally Rd == [upper_20, lower_12] 1831 int64_t imm = imm64 >> 17; 1832 int64_t upper = imm, lower = imm; 1833 lower = (lower << 52) >> 52; 1834 upper -= lower; 1835 upper = (int32_t)upper; 1836 lui(Rd, upper); 1837 addi(Rd, Rd, lower); 1838 1839 // Load the rest 17 bits. 1840 slli(Rd, Rd, 11); 1841 addi(Rd, Rd, (imm64 >> 6) & 0x7ff); 1842 slli(Rd, Rd, 6); 1843 1844 // This offset will be used by following jalr/ld. 1845 offset = imm64 & 0x3f; 1846 } 1847 1848 void MacroAssembler::movptr2(Register Rd, uint64_t addr, int32_t &offset, Register tmp) { 1849 assert_different_registers(Rd, tmp, noreg); 1850 1851 // addr: [upper18, lower30[mid18, lower12]] 1852 1853 int64_t upper18 = addr >> 18; 1854 lui(tmp, upper18); 1855 1856 int64_t lower30 = addr & 0x3fffffff; 1857 int64_t mid18 = lower30, lower12 = lower30; 1858 lower12 = (lower12 << 52) >> 52; 1859 // For this tricky part (`mid18 -= lower12;` + `offset = lower12;`), 1860 // please refer to movptr1 above. 1861 mid18 -= (int32_t)lower12; 1862 lui(Rd, mid18); 1863 1864 slli(tmp, tmp, 18); 1865 add(Rd, Rd, tmp); 1866 1867 offset = lower12; 1868 } 1869 1870 void MacroAssembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { 1871 if (is_simm12(increment)) { 1872 addi(Rd, Rn, increment); 1873 } else { 1874 assert_different_registers(Rn, temp); 1875 li(temp, increment); 1876 add(Rd, Rn, temp); 1877 } 1878 } 1879 1880 void MacroAssembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { 1881 if (is_simm12(increment)) { 1882 addiw(Rd, Rn, increment); 1883 } else { 1884 assert_different_registers(Rn, temp); 1885 li(temp, increment); 1886 addw(Rd, Rn, temp); 1887 } 1888 } 1889 1890 void MacroAssembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { 1891 if (is_simm12(-decrement)) { 1892 addi(Rd, Rn, -decrement); 1893 } else { 1894 assert_different_registers(Rn, temp); 1895 li(temp, decrement); 1896 sub(Rd, Rn, temp); 1897 } 1898 } 1899 1900 void MacroAssembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { 1901 if (is_simm12(-decrement)) { 1902 addiw(Rd, Rn, -decrement); 1903 } else { 1904 assert_different_registers(Rn, temp); 1905 li(temp, decrement); 1906 subw(Rd, Rn, temp); 1907 } 1908 } 1909 1910 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { 1911 andr(Rd, Rs1, Rs2); 1912 sign_extend(Rd, Rd, 32); 1913 } 1914 1915 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { 1916 orr(Rd, Rs1, Rs2); 1917 sign_extend(Rd, Rd, 32); 1918 } 1919 1920 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { 1921 xorr(Rd, Rs1, Rs2); 1922 sign_extend(Rd, Rd, 32); 1923 } 1924 1925 // Rd = Rs1 & (~Rd2) 1926 void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) { 1927 if (UseZbb) { 1928 Assembler::andn(Rd, Rs1, Rs2); 1929 return; 1930 } 1931 1932 notr(Rd, Rs2); 1933 andr(Rd, Rs1, Rd); 1934 } 1935 1936 // Rd = Rs1 | (~Rd2) 1937 void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) { 1938 if (UseZbb) { 1939 Assembler::orn(Rd, Rs1, Rs2); 1940 return; 1941 } 1942 1943 notr(Rd, Rs2); 1944 orr(Rd, Rs1, Rd); 1945 } 1946 1947 // Note: load_unsigned_short used to be called load_unsigned_word. 1948 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1949 int off = offset(); 1950 lhu(dst, src); 1951 return off; 1952 } 1953 1954 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1955 int off = offset(); 1956 lbu(dst, src); 1957 return off; 1958 } 1959 1960 int MacroAssembler::load_signed_short(Register dst, Address src) { 1961 int off = offset(); 1962 lh(dst, src); 1963 return off; 1964 } 1965 1966 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1967 int off = offset(); 1968 lb(dst, src); 1969 return off; 1970 } 1971 1972 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 1973 switch (size_in_bytes) { 1974 case 8: ld(dst, src); break; 1975 case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; 1976 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1977 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1978 default: ShouldNotReachHere(); 1979 } 1980 } 1981 1982 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes) { 1983 switch (size_in_bytes) { 1984 case 8: sd(src, dst); break; 1985 case 4: sw(src, dst); break; 1986 case 2: sh(src, dst); break; 1987 case 1: sb(src, dst); break; 1988 default: ShouldNotReachHere(); 1989 } 1990 } 1991 1992 // granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register 1993 void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 1994 if (granularity != 1 && granularity != 2) { 1995 ShouldNotReachHere(); 1996 } 1997 if (AvoidUnalignedAccesses && (granularity != 2)) { 1998 assert_different_registers(dst, tmp); 1999 assert_different_registers(tmp, src.base()); 2000 is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1)); 2001 slli(tmp, tmp, 8); 2002 lbu(dst, src); 2003 add(dst, dst, tmp); 2004 } else { 2005 is_signed ? lh(dst, src) : lhu(dst, src); 2006 } 2007 } 2008 2009 // granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register 2010 void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) { 2011 if (AvoidUnalignedAccesses && (granularity != 4)) { 2012 switch(granularity) { 2013 case 1: 2014 assert_different_registers(dst, tmp, src.base()); 2015 lbu(dst, src); 2016 lbu(tmp, Address(src.base(), src.offset() + 1)); 2017 slli(tmp, tmp, 8); 2018 add(dst, dst, tmp); 2019 lbu(tmp, Address(src.base(), src.offset() + 2)); 2020 slli(tmp, tmp, 16); 2021 add(dst, dst, tmp); 2022 is_signed ? lb(tmp, Address(src.base(), src.offset() + 3)) : lbu(tmp, Address(src.base(), src.offset() + 3)); 2023 slli(tmp, tmp, 24); 2024 add(dst, dst, tmp); 2025 break; 2026 case 2: 2027 assert_different_registers(dst, tmp); 2028 assert_different_registers(tmp, src.base()); 2029 is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2)); 2030 slli(tmp, tmp, 16); 2031 lhu(dst, src); 2032 add(dst, dst, tmp); 2033 break; 2034 default: 2035 ShouldNotReachHere(); 2036 } 2037 } else { 2038 is_signed ? lw(dst, src) : lwu(dst, src); 2039 } 2040 } 2041 2042 // granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register 2043 void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) { 2044 if (AvoidUnalignedAccesses && (granularity != 8)) { 2045 switch(granularity){ 2046 case 1: 2047 assert_different_registers(dst, tmp, src.base()); 2048 lbu(dst, src); 2049 lbu(tmp, Address(src.base(), src.offset() + 1)); 2050 slli(tmp, tmp, 8); 2051 add(dst, dst, tmp); 2052 lbu(tmp, Address(src.base(), src.offset() + 2)); 2053 slli(tmp, tmp, 16); 2054 add(dst, dst, tmp); 2055 lbu(tmp, Address(src.base(), src.offset() + 3)); 2056 slli(tmp, tmp, 24); 2057 add(dst, dst, tmp); 2058 lbu(tmp, Address(src.base(), src.offset() + 4)); 2059 slli(tmp, tmp, 32); 2060 add(dst, dst, tmp); 2061 lbu(tmp, Address(src.base(), src.offset() + 5)); 2062 slli(tmp, tmp, 40); 2063 add(dst, dst, tmp); 2064 lbu(tmp, Address(src.base(), src.offset() + 6)); 2065 slli(tmp, tmp, 48); 2066 add(dst, dst, tmp); 2067 lbu(tmp, Address(src.base(), src.offset() + 7)); 2068 slli(tmp, tmp, 56); 2069 add(dst, dst, tmp); 2070 break; 2071 case 2: 2072 assert_different_registers(dst, tmp, src.base()); 2073 lhu(dst, src); 2074 lhu(tmp, Address(src.base(), src.offset() + 2)); 2075 slli(tmp, tmp, 16); 2076 add(dst, dst, tmp); 2077 lhu(tmp, Address(src.base(), src.offset() + 4)); 2078 slli(tmp, tmp, 32); 2079 add(dst, dst, tmp); 2080 lhu(tmp, Address(src.base(), src.offset() + 6)); 2081 slli(tmp, tmp, 48); 2082 add(dst, dst, tmp); 2083 break; 2084 case 4: 2085 assert_different_registers(dst, tmp); 2086 assert_different_registers(tmp, src.base()); 2087 lwu(tmp, Address(src.base(), src.offset() + 4)); 2088 slli(tmp, tmp, 32); 2089 lwu(dst, src); 2090 add(dst, dst, tmp); 2091 break; 2092 default: 2093 ShouldNotReachHere(); 2094 } 2095 } else { 2096 ld(dst, src); 2097 } 2098 } 2099 2100 2101 // reverse bytes in halfword in lower 16 bits and sign-extend 2102 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) 2103 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { 2104 if (UseZbb) { 2105 rev8(Rd, Rs); 2106 srai(Rd, Rd, 48); 2107 return; 2108 } 2109 assert_different_registers(Rs, tmp); 2110 assert_different_registers(Rd, tmp); 2111 srli(tmp, Rs, 8); 2112 andi(tmp, tmp, 0xFF); 2113 slli(Rd, Rs, 56); 2114 srai(Rd, Rd, 48); // sign-extend 2115 orr(Rd, Rd, tmp); 2116 } 2117 2118 // reverse bytes in lower word and sign-extend 2119 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) 2120 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2121 if (UseZbb) { 2122 rev8(Rd, Rs); 2123 srai(Rd, Rd, 32); 2124 return; 2125 } 2126 assert_different_registers(Rs, tmp1, tmp2); 2127 assert_different_registers(Rd, tmp1, tmp2); 2128 revb_h_w_u(Rd, Rs, tmp1, tmp2); 2129 slli(tmp2, Rd, 48); 2130 srai(tmp2, tmp2, 32); // sign-extend 2131 srli(Rd, Rd, 16); 2132 orr(Rd, Rd, tmp2); 2133 } 2134 2135 // reverse bytes in halfword in lower 16 bits and zero-extend 2136 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2137 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { 2138 if (UseZbb) { 2139 rev8(Rd, Rs); 2140 srli(Rd, Rd, 48); 2141 return; 2142 } 2143 assert_different_registers(Rs, tmp); 2144 assert_different_registers(Rd, tmp); 2145 srli(tmp, Rs, 8); 2146 andi(tmp, tmp, 0xFF); 2147 andi(Rd, Rs, 0xFF); 2148 slli(Rd, Rd, 8); 2149 orr(Rd, Rd, tmp); 2150 } 2151 2152 // reverse bytes in halfwords in lower 32 bits and zero-extend 2153 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) 2154 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2155 if (UseZbb) { 2156 rev8(Rd, Rs); 2157 rori(Rd, Rd, 32); 2158 roriw(Rd, Rd, 16); 2159 zero_extend(Rd, Rd, 32); 2160 return; 2161 } 2162 assert_different_registers(Rs, tmp1, tmp2); 2163 assert_different_registers(Rd, tmp1, tmp2); 2164 srli(tmp2, Rs, 16); 2165 revb_h_h_u(tmp2, tmp2, tmp1); 2166 revb_h_h_u(Rd, Rs, tmp1); 2167 slli(tmp2, tmp2, 16); 2168 orr(Rd, Rd, tmp2); 2169 } 2170 2171 // This method is only used for revb_h 2172 // Rd = Rs[47:0] Rs[55:48] Rs[63:56] 2173 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2174 assert_different_registers(Rs, tmp1, tmp2); 2175 assert_different_registers(Rd, tmp1); 2176 srli(tmp1, Rs, 48); 2177 andi(tmp2, tmp1, 0xFF); 2178 slli(tmp2, tmp2, 8); 2179 srli(tmp1, tmp1, 8); 2180 orr(tmp1, tmp1, tmp2); 2181 slli(Rd, Rs, 16); 2182 orr(Rd, Rd, tmp1); 2183 } 2184 2185 // reverse bytes in each halfword 2186 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] 2187 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2188 if (UseZbb) { 2189 assert_different_registers(Rs, tmp1); 2190 assert_different_registers(Rd, tmp1); 2191 rev8(Rd, Rs); 2192 zero_extend(tmp1, Rd, 32); 2193 roriw(tmp1, tmp1, 16); 2194 slli(tmp1, tmp1, 32); 2195 srli(Rd, Rd, 32); 2196 roriw(Rd, Rd, 16); 2197 zero_extend(Rd, Rd, 32); 2198 orr(Rd, Rd, tmp1); 2199 return; 2200 } 2201 assert_different_registers(Rs, tmp1, tmp2); 2202 assert_different_registers(Rd, tmp1, tmp2); 2203 revb_h_helper(Rd, Rs, tmp1, tmp2); 2204 for (int i = 0; i < 3; ++i) { 2205 revb_h_helper(Rd, Rd, tmp1, tmp2); 2206 } 2207 } 2208 2209 // reverse bytes in each word 2210 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] 2211 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2212 if (UseZbb) { 2213 rev8(Rd, Rs); 2214 rori(Rd, Rd, 32); 2215 return; 2216 } 2217 assert_different_registers(Rs, tmp1, tmp2); 2218 assert_different_registers(Rd, tmp1, tmp2); 2219 revb(Rd, Rs, tmp1, tmp2); 2220 ror_imm(Rd, Rd, 32); 2221 } 2222 2223 // reverse bytes in doubleword 2224 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] 2225 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { 2226 if (UseZbb) { 2227 rev8(Rd, Rs); 2228 return; 2229 } 2230 assert_different_registers(Rs, tmp1, tmp2); 2231 assert_different_registers(Rd, tmp1, tmp2); 2232 andi(tmp1, Rs, 0xFF); 2233 slli(tmp1, tmp1, 8); 2234 for (int step = 8; step < 56; step += 8) { 2235 srli(tmp2, Rs, step); 2236 andi(tmp2, tmp2, 0xFF); 2237 orr(tmp1, tmp1, tmp2); 2238 slli(tmp1, tmp1, 8); 2239 } 2240 srli(Rd, Rs, 56); 2241 andi(Rd, Rd, 0xFF); 2242 orr(Rd, tmp1, Rd); 2243 } 2244 2245 // rotate right with shift bits 2246 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) 2247 { 2248 if (UseZbb) { 2249 rori(dst, src, shift); 2250 return; 2251 } 2252 2253 assert_different_registers(dst, tmp); 2254 assert_different_registers(src, tmp); 2255 assert(shift < 64, "shift amount must be < 64"); 2256 slli(tmp, src, 64 - shift); 2257 srli(dst, src, shift); 2258 orr(dst, dst, tmp); 2259 } 2260 2261 // rotate left with shift bits, 32-bit version 2262 void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) { 2263 if (UseZbb) { 2264 // no roliw available 2265 roriw(dst, src, 32 - shift); 2266 return; 2267 } 2268 2269 assert_different_registers(dst, tmp); 2270 assert_different_registers(src, tmp); 2271 assert(shift < 32, "shift amount must be < 32"); 2272 srliw(tmp, src, 32 - shift); 2273 slliw(dst, src, shift); 2274 orr(dst, dst, tmp); 2275 } 2276 2277 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { 2278 if (is_simm12(imm)) { 2279 and_imm12(Rd, Rn, imm); 2280 } else { 2281 assert_different_registers(Rn, tmp); 2282 mv(tmp, imm); 2283 andr(Rd, Rn, tmp); 2284 } 2285 } 2286 2287 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { 2288 ld(tmp1, adr); 2289 if (src.is_register()) { 2290 orr(tmp1, tmp1, src.as_register()); 2291 } else { 2292 if (is_simm12(src.as_constant())) { 2293 ori(tmp1, tmp1, src.as_constant()); 2294 } else { 2295 assert_different_registers(tmp1, tmp2); 2296 mv(tmp2, src.as_constant()); 2297 orr(tmp1, tmp1, tmp2); 2298 } 2299 } 2300 sd(tmp1, adr); 2301 } 2302 2303 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { 2304 assert_different_registers(oop, trial_klass, tmp1, tmp2); 2305 if (UseCompressedClassPointers) { 2306 lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2307 if (CompressedKlassPointers::base() == nullptr) { 2308 slli(tmp1, tmp1, CompressedKlassPointers::shift()); 2309 beq(trial_klass, tmp1, L); 2310 return; 2311 } 2312 decode_klass_not_null(tmp1, tmp2); 2313 } else { 2314 ld(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); 2315 } 2316 beq(trial_klass, tmp1, L); 2317 } 2318 2319 // Move an oop into a register. 2320 void MacroAssembler::movoop(Register dst, jobject obj) { 2321 int oop_index; 2322 if (obj == nullptr) { 2323 oop_index = oop_recorder()->allocate_oop_index(obj); 2324 } else { 2325 #ifdef ASSERT 2326 { 2327 ThreadInVMfromUnknown tiv; 2328 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 2329 } 2330 #endif 2331 oop_index = oop_recorder()->find_index(obj); 2332 } 2333 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2334 2335 if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) { 2336 la(dst, Address((address)obj, rspec)); 2337 } else { 2338 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 2339 ld_constant(dst, Address(dummy, rspec)); 2340 } 2341 } 2342 2343 // Move a metadata address into a register. 2344 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 2345 assert((uintptr_t)obj < (1ull << 48), "48-bit overflow in metadata"); 2346 int oop_index; 2347 if (obj == nullptr) { 2348 oop_index = oop_recorder()->allocate_metadata_index(obj); 2349 } else { 2350 oop_index = oop_recorder()->find_index(obj); 2351 } 2352 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 2353 la(dst, Address((address)obj, rspec)); 2354 } 2355 2356 // Writes to stack successive pages until offset reached to check for 2357 // stack overflow + shadow pages. This clobbers tmp. 2358 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 2359 assert_different_registers(tmp, size, t0); 2360 // Bang stack for total size given plus shadow page size. 2361 // Bang one page at a time because large size can bang beyond yellow and 2362 // red zones. 2363 mv(t0, (int)os::vm_page_size()); 2364 Label loop; 2365 bind(loop); 2366 sub(tmp, sp, t0); 2367 subw(size, size, t0); 2368 sd(size, Address(tmp)); 2369 bgtz(size, loop); 2370 2371 // Bang down shadow pages too. 2372 // At this point, (tmp-0) is the last address touched, so don't 2373 // touch it again. (It was touched as (tmp-pagesize) but then tmp 2374 // was post-decremented.) Skip this address by starting at i=1, and 2375 // touch a few more pages below. N.B. It is important to touch all 2376 // the way down to and including i=StackShadowPages. 2377 for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / (int)os::vm_page_size()) - 1; i++) { 2378 // this could be any sized move but this is can be a debugging crumb 2379 // so the bigger the better. 2380 sub(tmp, tmp, (int)os::vm_page_size()); 2381 sd(size, Address(tmp, 0)); 2382 } 2383 } 2384 2385 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { 2386 int32_t offset = 0; 2387 _masm = masm; 2388 ExternalAddress target((address)flag_addr); 2389 _masm->relocate(target.rspec(), [&] { 2390 int32_t offset; 2391 _masm->la(t0, target.target(), offset); 2392 _masm->lbu(t0, Address(t0, offset)); 2393 }); 2394 if (value) { 2395 _masm->bnez(t0, _label); 2396 } else { 2397 _masm->beqz(t0, _label); 2398 } 2399 } 2400 2401 SkipIfEqual::~SkipIfEqual() { 2402 _masm->bind(_label); 2403 _masm = nullptr; 2404 } 2405 2406 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) { 2407 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2408 ld(dst, Address(xmethod, Method::const_offset())); 2409 ld(dst, Address(dst, ConstMethod::constants_offset())); 2410 ld(dst, Address(dst, ConstantPool::pool_holder_offset())); 2411 ld(dst, Address(dst, mirror_offset)); 2412 resolve_oop_handle(dst, tmp1, tmp2); 2413 } 2414 2415 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) { 2416 // OopHandle::resolve is an indirection. 2417 assert_different_registers(result, tmp1, tmp2); 2418 access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2); 2419 } 2420 2421 // ((WeakHandle)result).resolve() 2422 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) { 2423 assert_different_registers(result, tmp1, tmp2); 2424 Label resolved; 2425 2426 // A null weak handle resolves to null. 2427 beqz(result, resolved); 2428 2429 // Only 64 bit platforms support GCs that require a tmp register 2430 // Only IN_HEAP loads require a thread_tmp register 2431 // WeakHandle::resolve is an indirection like jweak. 2432 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2433 result, Address(result), tmp1, tmp2); 2434 bind(resolved); 2435 } 2436 2437 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2438 Register dst, Address src, 2439 Register tmp1, Register tmp2) { 2440 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2441 decorators = AccessInternal::decorator_fixup(decorators, type); 2442 bool as_raw = (decorators & AS_RAW) != 0; 2443 if (as_raw) { 2444 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2); 2445 } else { 2446 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2); 2447 } 2448 } 2449 2450 void MacroAssembler::null_check(Register reg, int offset) { 2451 if (needs_explicit_null_check(offset)) { 2452 // provoke OS null exception if reg is null by 2453 // accessing M[reg] w/o changing any registers 2454 // NOTE: this is plenty to provoke a segv 2455 ld(zr, Address(reg, 0)); 2456 } else { 2457 // nothing to do, (later) access of M[reg + offset] 2458 // will provoke OS null exception if reg is null 2459 } 2460 } 2461 2462 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2463 Address dst, Register val, 2464 Register tmp1, Register tmp2, Register tmp3) { 2465 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2466 decorators = AccessInternal::decorator_fixup(decorators, type); 2467 bool as_raw = (decorators & AS_RAW) != 0; 2468 if (as_raw) { 2469 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2470 } else { 2471 bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3); 2472 } 2473 } 2474 2475 // Algorithm must match CompressedOops::encode. 2476 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2477 verify_oop_msg(s, "broken oop in encode_heap_oop"); 2478 if (CompressedOops::base() == nullptr) { 2479 if (CompressedOops::shift() != 0) { 2480 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2481 srli(d, s, LogMinObjAlignmentInBytes); 2482 } else { 2483 mv(d, s); 2484 } 2485 } else { 2486 Label notNull; 2487 sub(d, s, xheapbase); 2488 bgez(d, notNull); 2489 mv(d, zr); 2490 bind(notNull); 2491 if (CompressedOops::shift() != 0) { 2492 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2493 srli(d, d, CompressedOops::shift()); 2494 } 2495 } 2496 } 2497 2498 void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { 2499 assert_different_registers(dst, tmp); 2500 assert_different_registers(src, tmp); 2501 if (UseCompressedClassPointers) { 2502 lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2503 decode_klass_not_null(dst, tmp); 2504 } else { 2505 ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2506 } 2507 } 2508 2509 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { 2510 // FIXME: Should this be a store release? concurrent gcs assumes 2511 // klass length is valid if klass field is not null. 2512 if (UseCompressedClassPointers) { 2513 encode_klass_not_null(src, tmp); 2514 sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2515 } else { 2516 sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2517 } 2518 } 2519 2520 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2521 if (UseCompressedClassPointers) { 2522 // Store to klass gap in destination 2523 sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2524 } 2525 } 2526 2527 void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { 2528 assert_different_registers(r, tmp); 2529 decode_klass_not_null(r, r, tmp); 2530 } 2531 2532 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { 2533 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2534 2535 if (CompressedKlassPointers::base() == nullptr) { 2536 if (CompressedKlassPointers::shift() != 0) { 2537 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2538 slli(dst, src, LogKlassAlignmentInBytes); 2539 } else { 2540 mv(dst, src); 2541 } 2542 return; 2543 } 2544 2545 Register xbase = dst; 2546 if (dst == src) { 2547 xbase = tmp; 2548 } 2549 2550 assert_different_registers(src, xbase); 2551 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2552 2553 if (CompressedKlassPointers::shift() != 0) { 2554 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2555 assert_different_registers(t0, xbase); 2556 shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); 2557 } else { 2558 add(dst, xbase, src); 2559 } 2560 } 2561 2562 void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { 2563 assert_different_registers(r, tmp); 2564 encode_klass_not_null(r, r, tmp); 2565 } 2566 2567 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { 2568 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2569 2570 if (CompressedKlassPointers::base() == nullptr) { 2571 if (CompressedKlassPointers::shift() != 0) { 2572 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2573 srli(dst, src, LogKlassAlignmentInBytes); 2574 } else { 2575 mv(dst, src); 2576 } 2577 return; 2578 } 2579 2580 if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && 2581 CompressedKlassPointers::shift() == 0) { 2582 zero_extend(dst, src, 32); 2583 return; 2584 } 2585 2586 Register xbase = dst; 2587 if (dst == src) { 2588 xbase = tmp; 2589 } 2590 2591 assert_different_registers(src, xbase); 2592 mv(xbase, (uintptr_t)CompressedKlassPointers::base()); 2593 sub(dst, src, xbase); 2594 if (CompressedKlassPointers::shift() != 0) { 2595 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); 2596 srli(dst, dst, LogKlassAlignmentInBytes); 2597 } 2598 } 2599 2600 void MacroAssembler::decode_heap_oop_not_null(Register r) { 2601 decode_heap_oop_not_null(r, r); 2602 } 2603 2604 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2605 assert(UseCompressedOops, "should only be used for compressed headers"); 2606 assert(Universe::heap() != nullptr, "java heap should be initialized"); 2607 // Cannot assert, unverified entry point counts instructions (see .ad file) 2608 // vtableStubs also counts instructions in pd_code_size_limit. 2609 // Also do not verify_oop as this is called by verify_oop. 2610 if (CompressedOops::shift() != 0) { 2611 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); 2612 slli(dst, src, LogMinObjAlignmentInBytes); 2613 if (CompressedOops::base() != nullptr) { 2614 add(dst, xheapbase, dst); 2615 } 2616 } else { 2617 assert(CompressedOops::base() == nullptr, "sanity"); 2618 mv(dst, src); 2619 } 2620 } 2621 2622 void MacroAssembler::decode_heap_oop(Register d, Register s) { 2623 if (CompressedOops::base() == nullptr) { 2624 if (CompressedOops::shift() != 0 || d != s) { 2625 slli(d, s, CompressedOops::shift()); 2626 } 2627 } else { 2628 Label done; 2629 mv(d, s); 2630 beqz(s, done); 2631 shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); 2632 bind(done); 2633 } 2634 verify_oop_msg(d, "broken oop in decode_heap_oop"); 2635 } 2636 2637 void MacroAssembler::store_heap_oop(Address dst, Register val, Register tmp1, 2638 Register tmp2, Register tmp3, DecoratorSet decorators) { 2639 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, val, tmp1, tmp2, tmp3); 2640 } 2641 2642 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, 2643 Register tmp2, DecoratorSet decorators) { 2644 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); 2645 } 2646 2647 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, 2648 Register tmp2, DecoratorSet decorators) { 2649 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2); 2650 } 2651 2652 // Used for storing nulls. 2653 void MacroAssembler::store_heap_oop_null(Address dst) { 2654 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); 2655 } 2656 2657 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, 2658 bool want_remainder, bool is_signed) 2659 { 2660 // Full implementation of Java idiv and irem. The function 2661 // returns the (pc) offset of the div instruction - may be needed 2662 // for implicit exceptions. 2663 // 2664 // input : rs1: dividend 2665 // rs2: divisor 2666 // 2667 // result: either 2668 // quotient (= rs1 idiv rs2) 2669 // remainder (= rs1 irem rs2) 2670 2671 2672 int idivl_offset = offset(); 2673 if (!want_remainder) { 2674 if (is_signed) { 2675 divw(result, rs1, rs2); 2676 } else { 2677 divuw(result, rs1, rs2); 2678 } 2679 } else { 2680 // result = rs1 % rs2; 2681 if (is_signed) { 2682 remw(result, rs1, rs2); 2683 } else { 2684 remuw(result, rs1, rs2); 2685 } 2686 } 2687 return idivl_offset; 2688 } 2689 2690 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, 2691 bool want_remainder, bool is_signed) 2692 { 2693 // Full implementation of Java ldiv and lrem. The function 2694 // returns the (pc) offset of the div instruction - may be needed 2695 // for implicit exceptions. 2696 // 2697 // input : rs1: dividend 2698 // rs2: divisor 2699 // 2700 // result: either 2701 // quotient (= rs1 idiv rs2) 2702 // remainder (= rs1 irem rs2) 2703 2704 int idivq_offset = offset(); 2705 if (!want_remainder) { 2706 if (is_signed) { 2707 div(result, rs1, rs2); 2708 } else { 2709 divu(result, rs1, rs2); 2710 } 2711 } else { 2712 // result = rs1 % rs2; 2713 if (is_signed) { 2714 rem(result, rs1, rs2); 2715 } else { 2716 remu(result, rs1, rs2); 2717 } 2718 } 2719 return idivq_offset; 2720 } 2721 2722 // Look up the method for a megamorpic invkkeinterface call. 2723 // The target method is determined by <intf_klass, itable_index>. 2724 // The receiver klass is in recv_klass. 2725 // On success, the result will be in method_result, and execution falls through. 2726 // On failure, execution transfers to the given label. 2727 void MacroAssembler::lookup_interface_method(Register recv_klass, 2728 Register intf_klass, 2729 RegisterOrConstant itable_index, 2730 Register method_result, 2731 Register scan_tmp, 2732 Label& L_no_such_interface, 2733 bool return_method) { 2734 assert_different_registers(recv_klass, intf_klass, scan_tmp); 2735 assert_different_registers(method_result, intf_klass, scan_tmp); 2736 assert(recv_klass != method_result || !return_method, 2737 "recv_klass can be destroyed when mehtid isn't needed"); 2738 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 2739 "caller must be same register for non-constant itable index as for method"); 2740 2741 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 2742 int vtable_base = in_bytes(Klass::vtable_start_offset()); 2743 int itentry_off = in_bytes(itableMethodEntry::method_offset()); 2744 int scan_step = itableOffsetEntry::size() * wordSize; 2745 int vte_size = vtableEntry::size_in_bytes(); 2746 assert(vte_size == wordSize, "else adjust times_vte_scale"); 2747 2748 lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); 2749 2750 // Could store the aligned, prescaled offset in the klass. 2751 shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); 2752 add(scan_tmp, scan_tmp, vtable_base); 2753 2754 if (return_method) { 2755 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 2756 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 2757 if (itable_index.is_register()) { 2758 slli(t0, itable_index.as_register(), 3); 2759 } else { 2760 mv(t0, itable_index.as_constant() << 3); 2761 } 2762 add(recv_klass, recv_klass, t0); 2763 if (itentry_off) { 2764 add(recv_klass, recv_klass, itentry_off); 2765 } 2766 } 2767 2768 Label search, found_method; 2769 2770 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2771 beq(intf_klass, method_result, found_method); 2772 bind(search); 2773 // Check that the previous entry is non-null. A null entry means that 2774 // the receiver class doesn't implement the interface, and wasn't the 2775 // same as when the caller was compiled. 2776 beqz(method_result, L_no_such_interface, /* is_far */ true); 2777 addi(scan_tmp, scan_tmp, scan_step); 2778 ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset())); 2779 bne(intf_klass, method_result, search); 2780 2781 bind(found_method); 2782 2783 // Got a hit. 2784 if (return_method) { 2785 lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset())); 2786 add(method_result, recv_klass, scan_tmp); 2787 ld(method_result, Address(method_result)); 2788 } 2789 } 2790 2791 // Look up the method for a megamorphic invokeinterface call in a single pass over itable: 2792 // - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICData 2793 // - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index 2794 // The target method is determined by <holder_klass, itable_index>. 2795 // The receiver klass is in recv_klass. 2796 // On success, the result will be in method_result, and execution falls through. 2797 // On failure, execution transfers to the given label. 2798 void MacroAssembler::lookup_interface_method_stub(Register recv_klass, 2799 Register holder_klass, 2800 Register resolved_klass, 2801 Register method_result, 2802 Register temp_itbl_klass, 2803 Register scan_temp, 2804 int itable_index, 2805 Label& L_no_such_interface) { 2806 // 'method_result' is only used as output register at the very end of this method. 2807 // Until then we can reuse it as 'holder_offset'. 2808 Register holder_offset = method_result; 2809 assert_different_registers(resolved_klass, recv_klass, holder_klass, temp_itbl_klass, scan_temp, holder_offset); 2810 2811 int vtable_start_offset_bytes = in_bytes(Klass::vtable_start_offset()); 2812 int scan_step = itableOffsetEntry::size() * wordSize; 2813 int ioffset_bytes = in_bytes(itableOffsetEntry::interface_offset()); 2814 int ooffset_bytes = in_bytes(itableOffsetEntry::offset_offset()); 2815 int itmentry_off_bytes = in_bytes(itableMethodEntry::method_offset()); 2816 const int vte_scale = exact_log2(vtableEntry::size_in_bytes()); 2817 2818 Label L_loop_search_resolved_entry, L_resolved_found, L_holder_found; 2819 2820 lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); 2821 add(recv_klass, recv_klass, vtable_start_offset_bytes + ioffset_bytes); 2822 // itableOffsetEntry[] itable = recv_klass + Klass::vtable_start_offset() 2823 // + sizeof(vtableEntry) * (recv_klass->_vtable_len); 2824 // scan_temp = &(itable[0]._interface) 2825 // temp_itbl_klass = itable[0]._interface; 2826 shadd(scan_temp, scan_temp, recv_klass, scan_temp, vte_scale); 2827 ld(temp_itbl_klass, Address(scan_temp)); 2828 mv(holder_offset, zr); 2829 2830 // Initial checks: 2831 // - if (holder_klass != resolved_klass), go to "scan for resolved" 2832 // - if (itable[0] == holder_klass), shortcut to "holder found" 2833 // - if (itable[0] == 0), no such interface 2834 bne(resolved_klass, holder_klass, L_loop_search_resolved_entry); 2835 beq(holder_klass, temp_itbl_klass, L_holder_found); 2836 beqz(temp_itbl_klass, L_no_such_interface); 2837 2838 // Loop: Look for holder_klass record in itable 2839 // do { 2840 // temp_itbl_klass = *(scan_temp += scan_step); 2841 // if (temp_itbl_klass == holder_klass) { 2842 // goto L_holder_found; // Found! 2843 // } 2844 // } while (temp_itbl_klass != 0); 2845 // goto L_no_such_interface // Not found. 2846 Label L_search_holder; 2847 bind(L_search_holder); 2848 add(scan_temp, scan_temp, scan_step); 2849 ld(temp_itbl_klass, Address(scan_temp)); 2850 beq(holder_klass, temp_itbl_klass, L_holder_found); 2851 bnez(temp_itbl_klass, L_search_holder); 2852 2853 j(L_no_such_interface); 2854 2855 // Loop: Look for resolved_class record in itable 2856 // while (true) { 2857 // temp_itbl_klass = *(scan_temp += scan_step); 2858 // if (temp_itbl_klass == 0) { 2859 // goto L_no_such_interface; 2860 // } 2861 // if (temp_itbl_klass == resolved_klass) { 2862 // goto L_resolved_found; // Found! 2863 // } 2864 // if (temp_itbl_klass == holder_klass) { 2865 // holder_offset = scan_temp; 2866 // } 2867 // } 2868 // 2869 Label L_loop_search_resolved; 2870 bind(L_loop_search_resolved); 2871 add(scan_temp, scan_temp, scan_step); 2872 ld(temp_itbl_klass, Address(scan_temp)); 2873 bind(L_loop_search_resolved_entry); 2874 beqz(temp_itbl_klass, L_no_such_interface); 2875 beq(resolved_klass, temp_itbl_klass, L_resolved_found); 2876 bne(holder_klass, temp_itbl_klass, L_loop_search_resolved); 2877 mv(holder_offset, scan_temp); 2878 j(L_loop_search_resolved); 2879 2880 // See if we already have a holder klass. If not, go and scan for it. 2881 bind(L_resolved_found); 2882 beqz(holder_offset, L_search_holder); 2883 mv(scan_temp, holder_offset); 2884 2885 // Finally, scan_temp contains holder_klass vtable offset 2886 bind(L_holder_found); 2887 lwu(method_result, Address(scan_temp, ooffset_bytes - ioffset_bytes)); 2888 add(recv_klass, recv_klass, itable_index * wordSize + itmentry_off_bytes 2889 - vtable_start_offset_bytes - ioffset_bytes); // substract offsets to restore the original value of recv_klass 2890 add(method_result, recv_klass, method_result); 2891 ld(method_result, Address(method_result)); 2892 } 2893 2894 // virtual method calling 2895 void MacroAssembler::lookup_virtual_method(Register recv_klass, 2896 RegisterOrConstant vtable_index, 2897 Register method_result) { 2898 const ByteSize base = Klass::vtable_start_offset(); 2899 assert(vtableEntry::size() * wordSize == 8, 2900 "adjust the scaling in the code below"); 2901 int vtable_offset_in_bytes = in_bytes(base + vtableEntry::method_offset()); 2902 2903 if (vtable_index.is_register()) { 2904 shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); 2905 ld(method_result, Address(method_result, vtable_offset_in_bytes)); 2906 } else { 2907 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 2908 ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); 2909 } 2910 } 2911 2912 void MacroAssembler::membar(uint32_t order_constraint) { 2913 address prev = pc() - MacroAssembler::instruction_size; 2914 address last = code()->last_insn(); 2915 2916 if (last != nullptr && is_membar(last) && prev == last) { 2917 // We are merging two memory barrier instructions. On RISCV we 2918 // can do this simply by ORing them together. 2919 set_membar_kind(prev, get_membar_kind(prev) | order_constraint); 2920 BLOCK_COMMENT("merged membar"); 2921 } else { 2922 code()->set_last_insn(pc()); 2923 2924 uint32_t predecessor = 0; 2925 uint32_t successor = 0; 2926 2927 membar_mask_to_pred_succ(order_constraint, predecessor, successor); 2928 fence(predecessor, successor); 2929 } 2930 } 2931 2932 // Form an address from base + offset in Rd. Rd my or may not 2933 // actually be used: you must use the Address that is returned. It 2934 // is up to you to ensure that the shift provided matches the size 2935 // of your data. 2936 Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset) { 2937 if (is_simm12(byte_offset)) { // 12: imm in range 2^12 2938 return Address(base, byte_offset); 2939 } 2940 2941 assert_different_registers(Rd, base, noreg); 2942 2943 // Do it the hard way 2944 mv(Rd, byte_offset); 2945 add(Rd, base, Rd); 2946 return Address(Rd); 2947 } 2948 2949 void MacroAssembler::check_klass_subtype(Register sub_klass, 2950 Register super_klass, 2951 Register tmp_reg, 2952 Label& L_success) { 2953 Label L_failure; 2954 check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, nullptr); 2955 check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, nullptr); 2956 bind(L_failure); 2957 } 2958 2959 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { 2960 ld(t0, Address(xthread, JavaThread::polling_word_offset())); 2961 if (acquire) { 2962 membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); 2963 } 2964 if (at_return) { 2965 bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); 2966 } else { 2967 test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); 2968 bnez(t0, slow_path, true /* is_far */); 2969 } 2970 } 2971 2972 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 2973 Label &succeed, Label *fail) { 2974 assert_different_registers(addr, tmp, t0); 2975 assert_different_registers(newv, tmp, t0); 2976 assert_different_registers(oldv, tmp, t0); 2977 2978 // oldv holds comparison value 2979 // newv holds value to write in exchange 2980 // addr identifies memory word to compare against/update 2981 if (UseZacas) { 2982 mv(tmp, oldv); 2983 atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); 2984 beq(tmp, oldv, succeed); 2985 } else { 2986 Label retry_load, nope; 2987 bind(retry_load); 2988 // Load reserved from the memory location 2989 load_reserved(tmp, addr, int64, Assembler::aqrl); 2990 // Fail and exit if it is not what we expect 2991 bne(tmp, oldv, nope); 2992 // If the store conditional succeeds, tmp will be zero 2993 store_conditional(tmp, newv, addr, int64, Assembler::rl); 2994 beqz(tmp, succeed); 2995 // Retry only when the store conditional failed 2996 j(retry_load); 2997 2998 bind(nope); 2999 } 3000 3001 // neither amocas nor lr/sc have an implied barrier in the failing case 3002 membar(AnyAny); 3003 3004 mv(oldv, tmp); 3005 if (fail != nullptr) { 3006 j(*fail); 3007 } 3008 } 3009 3010 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 3011 Label &succeed, Label *fail) { 3012 assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); 3013 cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); 3014 } 3015 3016 void MacroAssembler::load_reserved(Register dst, 3017 Register addr, 3018 enum operand_size size, 3019 Assembler::Aqrl acquire) { 3020 switch (size) { 3021 case int64: 3022 lr_d(dst, addr, acquire); 3023 break; 3024 case int32: 3025 lr_w(dst, addr, acquire); 3026 break; 3027 case uint32: 3028 lr_w(dst, addr, acquire); 3029 zero_extend(dst, dst, 32); 3030 break; 3031 default: 3032 ShouldNotReachHere(); 3033 } 3034 } 3035 3036 void MacroAssembler::store_conditional(Register dst, 3037 Register new_val, 3038 Register addr, 3039 enum operand_size size, 3040 Assembler::Aqrl release) { 3041 switch (size) { 3042 case int64: 3043 sc_d(dst, new_val, addr, release); 3044 break; 3045 case int32: 3046 case uint32: 3047 sc_w(dst, new_val, addr, release); 3048 break; 3049 default: 3050 ShouldNotReachHere(); 3051 } 3052 } 3053 3054 3055 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, 3056 Register new_val, 3057 enum operand_size size, 3058 Register tmp1, Register tmp2, Register tmp3) { 3059 assert(size == int8 || size == int16, "unsupported operand size"); 3060 3061 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; 3062 3063 andi(shift, addr, 3); 3064 slli(shift, shift, 3); 3065 3066 andi(aligned_addr, addr, ~3); 3067 3068 if (size == int8) { 3069 mv(mask, 0xff); 3070 } else { 3071 // size == int16 case 3072 mv(mask, -1); 3073 zero_extend(mask, mask, 16); 3074 } 3075 sll(mask, mask, shift); 3076 3077 notr(not_mask, mask); 3078 3079 sll(expected, expected, shift); 3080 andr(expected, expected, mask); 3081 3082 sll(new_val, new_val, shift); 3083 andr(new_val, new_val, mask); 3084 } 3085 3086 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. 3087 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, 3088 // which are forced to work with 4-byte aligned address. 3089 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, 3090 Register new_val, 3091 enum operand_size size, 3092 Assembler::Aqrl acquire, Assembler::Aqrl release, 3093 Register result, bool result_as_bool, 3094 Register tmp1, Register tmp2, Register tmp3) { 3095 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3096 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3097 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3098 3099 Label retry, fail, done; 3100 3101 bind(retry); 3102 3103 if (UseZacas) { 3104 lw(old, aligned_addr); 3105 3106 // if old & mask != expected 3107 andr(tmp, old, mask); 3108 bne(tmp, expected, fail); 3109 3110 andr(tmp, old, not_mask); 3111 orr(tmp, tmp, new_val); 3112 3113 atomic_cas(old, tmp, aligned_addr, operand_size::int32, acquire, release); 3114 bne(tmp, old, retry); 3115 } else { 3116 lr_w(old, aligned_addr, acquire); 3117 andr(tmp, old, mask); 3118 bne(tmp, expected, fail); 3119 3120 andr(tmp, old, not_mask); 3121 orr(tmp, tmp, new_val); 3122 sc_w(tmp, tmp, aligned_addr, release); 3123 bnez(tmp, retry); 3124 } 3125 3126 if (result_as_bool) { 3127 mv(result, 1); 3128 j(done); 3129 3130 bind(fail); 3131 mv(result, zr); 3132 3133 bind(done); 3134 } else { 3135 andr(tmp, old, mask); 3136 3137 bind(fail); 3138 srl(result, tmp, shift); 3139 3140 if (size == int8) { 3141 sign_extend(result, result, 8); 3142 } else { 3143 // size == int16 case 3144 sign_extend(result, result, 16); 3145 } 3146 } 3147 } 3148 3149 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement 3150 // the weak CAS stuff. The major difference is that it just failed when store conditional 3151 // failed. 3152 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, 3153 Register new_val, 3154 enum operand_size size, 3155 Assembler::Aqrl acquire, Assembler::Aqrl release, 3156 Register result, 3157 Register tmp1, Register tmp2, Register tmp3) { 3158 Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; 3159 assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); 3160 cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); 3161 3162 Label fail, done; 3163 3164 if (UseZacas) { 3165 lw(old, aligned_addr); 3166 3167 // if old & mask != expected 3168 andr(tmp, old, mask); 3169 bne(tmp, expected, fail); 3170 3171 andr(tmp, old, not_mask); 3172 orr(tmp, tmp, new_val); 3173 3174 atomic_cas(tmp, new_val, addr, operand_size::int32, acquire, release); 3175 bne(tmp, old, fail); 3176 } else { 3177 lr_w(old, aligned_addr, acquire); 3178 andr(tmp, old, mask); 3179 bne(tmp, expected, fail); 3180 3181 andr(tmp, old, not_mask); 3182 orr(tmp, tmp, new_val); 3183 sc_w(tmp, tmp, aligned_addr, release); 3184 bnez(tmp, fail); 3185 } 3186 3187 // Success 3188 mv(result, 1); 3189 j(done); 3190 3191 // Fail 3192 bind(fail); 3193 mv(result, zr); 3194 3195 bind(done); 3196 } 3197 3198 void MacroAssembler::cmpxchg(Register addr, Register expected, 3199 Register new_val, 3200 enum operand_size size, 3201 Assembler::Aqrl acquire, Assembler::Aqrl release, 3202 Register result, bool result_as_bool) { 3203 assert(size != int8 && size != int16, "unsupported operand size"); 3204 assert_different_registers(addr, t0); 3205 assert_different_registers(expected, t0); 3206 assert_different_registers(new_val, t0); 3207 3208 if (UseZacas) { 3209 if (result_as_bool) { 3210 mv(t0, expected); 3211 atomic_cas(t0, new_val, addr, size, acquire, release); 3212 xorr(t0, t0, expected); 3213 seqz(result, t0); 3214 } else { 3215 mv(result, expected); 3216 atomic_cas(result, new_val, addr, size, acquire, release); 3217 } 3218 return; 3219 } 3220 3221 Label retry_load, done, ne_done; 3222 bind(retry_load); 3223 load_reserved(t0, addr, size, acquire); 3224 bne(t0, expected, ne_done); 3225 store_conditional(t0, new_val, addr, size, release); 3226 bnez(t0, retry_load); 3227 3228 // equal, succeed 3229 if (result_as_bool) { 3230 mv(result, 1); 3231 } else { 3232 mv(result, expected); 3233 } 3234 j(done); 3235 3236 // not equal, failed 3237 bind(ne_done); 3238 if (result_as_bool) { 3239 mv(result, zr); 3240 } else { 3241 mv(result, t0); 3242 } 3243 3244 bind(done); 3245 } 3246 3247 void MacroAssembler::cmpxchg_weak(Register addr, Register expected, 3248 Register new_val, 3249 enum operand_size size, 3250 Assembler::Aqrl acquire, Assembler::Aqrl release, 3251 Register result) { 3252 if (UseZacas) { 3253 cmpxchg(addr, expected, new_val, size, acquire, release, result, true); 3254 return; 3255 } 3256 3257 assert_different_registers(addr, t0); 3258 assert_different_registers(expected, t0); 3259 assert_different_registers(new_val, t0); 3260 3261 Label fail, done; 3262 load_reserved(t0, addr, size, acquire); 3263 bne(t0, expected, fail); 3264 store_conditional(t0, new_val, addr, size, release); 3265 bnez(t0, fail); 3266 3267 // Success 3268 mv(result, 1); 3269 j(done); 3270 3271 // Fail 3272 bind(fail); 3273 mv(result, zr); 3274 3275 bind(done); 3276 } 3277 3278 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ 3279 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ 3280 prev = prev->is_valid() ? prev : zr; \ 3281 if (incr.is_register()) { \ 3282 AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3283 } else { \ 3284 mv(t0, incr.as_constant()); \ 3285 AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3286 } \ 3287 return; \ 3288 } 3289 3290 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) 3291 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) 3292 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) 3293 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) 3294 3295 #undef ATOMIC_OP 3296 3297 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ 3298 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3299 prev = prev->is_valid() ? prev : zr; \ 3300 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3301 return; \ 3302 } 3303 3304 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) 3305 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) 3306 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) 3307 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) 3308 3309 #undef ATOMIC_XCHG 3310 3311 #define ATOMIC_XCHGU(OP1, OP2) \ 3312 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3313 atomic_##OP2(prev, newv, addr); \ 3314 zero_extend(prev, prev, 32); \ 3315 return; \ 3316 } 3317 3318 ATOMIC_XCHGU(xchgwu, xchgw) 3319 ATOMIC_XCHGU(xchgalwu, xchgalw) 3320 3321 #undef ATOMIC_XCHGU 3322 3323 #define ATOMIC_CAS(OP, AOP, ACQUIRE, RELEASE) \ 3324 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 3325 assert(UseZacas, "invariant"); \ 3326 prev = prev->is_valid() ? prev : zr; \ 3327 AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ 3328 return; \ 3329 } 3330 3331 ATOMIC_CAS(cas, amocas_d, Assembler::relaxed, Assembler::relaxed) 3332 ATOMIC_CAS(casw, amocas_w, Assembler::relaxed, Assembler::relaxed) 3333 ATOMIC_CAS(casl, amocas_d, Assembler::relaxed, Assembler::rl) 3334 ATOMIC_CAS(caslw, amocas_w, Assembler::relaxed, Assembler::rl) 3335 ATOMIC_CAS(casal, amocas_d, Assembler::aq, Assembler::rl) 3336 ATOMIC_CAS(casalw, amocas_w, Assembler::aq, Assembler::rl) 3337 3338 #undef ATOMIC_CAS 3339 3340 #define ATOMIC_CASU(OP1, OP2) \ 3341 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ 3342 atomic_##OP2(prev, newv, addr); \ 3343 zero_extend(prev, prev, 32); \ 3344 return; \ 3345 } 3346 3347 ATOMIC_CASU(caswu, casw) 3348 ATOMIC_CASU(caslwu, caslw) 3349 ATOMIC_CASU(casalwu, casalw) 3350 3351 #undef ATOMIC_CASU 3352 3353 void MacroAssembler::atomic_cas( 3354 Register prev, Register newv, Register addr, enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { 3355 switch (size) { 3356 case int64: 3357 switch ((Assembler::Aqrl)(acquire | release)) { 3358 case Assembler::relaxed: 3359 atomic_cas(prev, newv, addr); 3360 break; 3361 case Assembler::rl: 3362 atomic_casl(prev, newv, addr); 3363 break; 3364 case Assembler::aqrl: 3365 atomic_casal(prev, newv, addr); 3366 break; 3367 default: 3368 ShouldNotReachHere(); 3369 } 3370 break; 3371 case int32: 3372 switch ((Assembler::Aqrl)(acquire | release)) { 3373 case Assembler::relaxed: 3374 atomic_casw(prev, newv, addr); 3375 break; 3376 case Assembler::rl: 3377 atomic_caslw(prev, newv, addr); 3378 break; 3379 case Assembler::aqrl: 3380 atomic_casalw(prev, newv, addr); 3381 break; 3382 default: 3383 ShouldNotReachHere(); 3384 } 3385 break; 3386 case uint32: 3387 switch ((Assembler::Aqrl)(acquire | release)) { 3388 case Assembler::relaxed: 3389 atomic_caswu(prev, newv, addr); 3390 break; 3391 case Assembler::rl: 3392 atomic_caslwu(prev, newv, addr); 3393 break; 3394 case Assembler::aqrl: 3395 atomic_casalwu(prev, newv, addr); 3396 break; 3397 default: 3398 ShouldNotReachHere(); 3399 } 3400 break; 3401 default: 3402 ShouldNotReachHere(); 3403 } 3404 } 3405 3406 void MacroAssembler::far_jump(const Address &entry, Register tmp) { 3407 assert(CodeCache::find_blob(entry.target()) != nullptr, 3408 "destination of far call not found in code cache"); 3409 assert(entry.rspec().type() == relocInfo::external_word_type 3410 || entry.rspec().type() == relocInfo::runtime_call_type 3411 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3412 // Fixed length: see MacroAssembler::far_branch_size() 3413 relocate(entry.rspec(), [&] { 3414 int32_t offset; 3415 la(tmp, entry.target(), offset); 3416 jr(tmp, offset); 3417 }); 3418 } 3419 3420 void MacroAssembler::far_call(const Address &entry, Register tmp) { 3421 assert(CodeCache::find_blob(entry.target()) != nullptr, 3422 "destination of far call not found in code cache"); 3423 assert(entry.rspec().type() == relocInfo::external_word_type 3424 || entry.rspec().type() == relocInfo::runtime_call_type 3425 || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); 3426 // Fixed length: see MacroAssembler::far_branch_size() 3427 // We can use auipc + jalr here because we know that the total size of 3428 // the code cache cannot exceed 2Gb. 3429 relocate(entry.rspec(), [&] { 3430 assert(is_valid_32bit_offset(entry.target() - pc()), "Far call using wrong instructions."); 3431 call(entry.target(), tmp); 3432 }); 3433 } 3434 3435 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 3436 Register super_klass, 3437 Register tmp_reg, 3438 Label* L_success, 3439 Label* L_failure, 3440 Label* L_slow_path, 3441 Register super_check_offset) { 3442 assert_different_registers(sub_klass, super_klass, tmp_reg); 3443 bool must_load_sco = (super_check_offset == noreg); 3444 if (must_load_sco) { 3445 assert(tmp_reg != noreg, "supply either a temp or a register offset"); 3446 } else { 3447 assert_different_registers(sub_klass, super_klass, super_check_offset); 3448 } 3449 3450 Label L_fallthrough; 3451 int label_nulls = 0; 3452 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3453 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3454 if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } 3455 assert(label_nulls <= 1, "at most one null in batch"); 3456 3457 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3458 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 3459 Address super_check_offset_addr(super_klass, sco_offset); 3460 3461 // Hacked jmp, which may only be used just before L_fallthrough. 3462 #define final_jmp(label) \ 3463 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 3464 else j(label) /*omit semi*/ 3465 3466 // If the pointers are equal, we are done (e.g., String[] elements). 3467 // This self-check enables sharing of secondary supertype arrays among 3468 // non-primary types such as array-of-interface. Otherwise, each such 3469 // type would need its own customized SSA. 3470 // We move this check to the front of the fast path because many 3471 // type checks are in fact trivially successful in this manner, 3472 // so we get a nicely predicted branch right at the start of the check. 3473 beq(sub_klass, super_klass, *L_success); 3474 3475 // Check the supertype display: 3476 if (must_load_sco) { 3477 lwu(tmp_reg, super_check_offset_addr); 3478 super_check_offset = tmp_reg; 3479 } 3480 add(t0, sub_klass, super_check_offset); 3481 Address super_check_addr(t0); 3482 ld(t0, super_check_addr); // load displayed supertype 3483 3484 // This check has worked decisively for primary supers. 3485 // Secondary supers are sought in the super_cache ('super_cache_addr'). 3486 // (Secondary supers are interfaces and very deeply nested subtypes.) 3487 // This works in the same check above because of a tricky aliasing 3488 // between the super_Cache and the primary super display elements. 3489 // (The 'super_check_addr' can address either, as the case requires.) 3490 // Note that the cache is updated below if it does not help us find 3491 // what we need immediately. 3492 // So if it was a primary super, we can just fail immediately. 3493 // Otherwise, it's the slow path for us (no success at this point). 3494 3495 beq(super_klass, t0, *L_success); 3496 mv(t1, sc_offset); 3497 if (L_failure == &L_fallthrough) { 3498 beq(super_check_offset, t1, *L_slow_path); 3499 } else { 3500 bne(super_check_offset, t1, *L_failure, /* is_far */ true); 3501 final_jmp(*L_slow_path); 3502 } 3503 3504 bind(L_fallthrough); 3505 3506 #undef final_jmp 3507 } 3508 3509 // Scans count pointer sized words at [addr] for occurrence of value, 3510 // generic 3511 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 3512 Register tmp) { 3513 Label Lloop, Lexit; 3514 beqz(count, Lexit); 3515 bind(Lloop); 3516 ld(tmp, addr); 3517 beq(value, tmp, Lexit); 3518 add(addr, addr, wordSize); 3519 sub(count, count, 1); 3520 bnez(count, Lloop); 3521 bind(Lexit); 3522 } 3523 3524 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3525 Register super_klass, 3526 Register tmp1_reg, 3527 Register tmp2_reg, 3528 Label* L_success, 3529 Label* L_failure) { 3530 assert_different_registers(sub_klass, super_klass, tmp1_reg); 3531 if (tmp2_reg != noreg) { 3532 assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); 3533 } 3534 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) 3535 3536 Label L_fallthrough; 3537 int label_nulls = 0; 3538 if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } 3539 if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } 3540 3541 assert(label_nulls <= 1, "at most one null in the batch"); 3542 3543 // A couple of useful fields in sub_klass: 3544 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 3545 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 3546 Address secondary_supers_addr(sub_klass, ss_offset); 3547 Address super_cache_addr( sub_klass, sc_offset); 3548 3549 BLOCK_COMMENT("check_klass_subtype_slow_path"); 3550 3551 // Do a linear scan of the secondary super-klass chain. 3552 // This code is rarely used, so simplicity is a virtue here. 3553 // The repne_scan instruction uses fixed registers, which we must spill. 3554 // Don't worry too much about pre-existing connections with the input regs. 3555 3556 assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) 3557 assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) 3558 3559 RegSet pushed_registers; 3560 if (!IS_A_TEMP(x12)) { 3561 pushed_registers += x12; 3562 } 3563 if (!IS_A_TEMP(x15)) { 3564 pushed_registers += x15; 3565 } 3566 3567 if (super_klass != x10) { 3568 if (!IS_A_TEMP(x10)) { 3569 pushed_registers += x10; 3570 } 3571 } 3572 3573 push_reg(pushed_registers, sp); 3574 3575 // Get super_klass value into x10 (even if it was in x15 or x12) 3576 mv(x10, super_klass); 3577 3578 #ifndef PRODUCT 3579 incrementw(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 3580 #endif // PRODUCT 3581 3582 // We will consult the secondary-super array. 3583 ld(x15, secondary_supers_addr); 3584 // Load the array length. 3585 lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); 3586 // Skip to start of data. 3587 add(x15, x15, Array<Klass*>::base_offset_in_bytes()); 3588 3589 // Set t0 to an obvious invalid value, falling through by default 3590 mv(t0, -1); 3591 // Scan X12 words at [X15] for an occurrence of X10. 3592 repne_scan(x15, x10, x12, t0); 3593 3594 // pop will restore x10, so we should use a temp register to keep its value 3595 mv(t1, x10); 3596 3597 // Unspill the temp registers: 3598 pop_reg(pushed_registers, sp); 3599 3600 bne(t1, t0, *L_failure); 3601 3602 // Success. Cache the super we found an proceed in triumph. 3603 sd(super_klass, super_cache_addr); 3604 3605 if (L_success != &L_fallthrough) { 3606 j(*L_success); 3607 } 3608 3609 #undef IS_A_TEMP 3610 3611 bind(L_fallthrough); 3612 } 3613 3614 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. 3615 void MacroAssembler::tlab_allocate(Register obj, 3616 Register var_size_in_bytes, 3617 int con_size_in_bytes, 3618 Register tmp1, 3619 Register tmp2, 3620 Label& slow_case, 3621 bool is_far) { 3622 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 3623 bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); 3624 } 3625 3626 // get_thread() can be called anywhere inside generated code so we 3627 // need to save whatever non-callee save context might get clobbered 3628 // by the call to Thread::current() or, indeed, the call setup code. 3629 void MacroAssembler::get_thread(Register thread) { 3630 // save all call-clobbered regs except thread 3631 RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + 3632 RegSet::range(x28, x31) + ra - thread; 3633 push_reg(saved_regs, sp); 3634 3635 mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); 3636 jalr(ra); 3637 if (thread != c_rarg0) { 3638 mv(thread, c_rarg0); 3639 } 3640 3641 // restore pushed registers 3642 pop_reg(saved_regs, sp); 3643 } 3644 3645 void MacroAssembler::load_byte_map_base(Register reg) { 3646 CardTable::CardValue* byte_map_base = 3647 ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); 3648 mv(reg, (uint64_t)byte_map_base); 3649 } 3650 3651 void MacroAssembler::build_frame(int framesize) { 3652 assert(framesize >= 2, "framesize must include space for FP/RA"); 3653 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3654 sub(sp, sp, framesize); 3655 sd(fp, Address(sp, framesize - 2 * wordSize)); 3656 sd(ra, Address(sp, framesize - wordSize)); 3657 if (PreserveFramePointer) { add(fp, sp, framesize); } 3658 } 3659 3660 void MacroAssembler::remove_frame(int framesize) { 3661 assert(framesize >= 2, "framesize must include space for FP/RA"); 3662 assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); 3663 ld(fp, Address(sp, framesize - 2 * wordSize)); 3664 ld(ra, Address(sp, framesize - wordSize)); 3665 add(sp, sp, framesize); 3666 } 3667 3668 void MacroAssembler::reserved_stack_check() { 3669 // testing if reserved zone needs to be enabled 3670 Label no_reserved_zone_enabling; 3671 3672 ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); 3673 bltu(sp, t0, no_reserved_zone_enabling); 3674 3675 enter(); // RA and FP are live. 3676 mv(c_rarg0, xthread); 3677 rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); 3678 leave(); 3679 3680 // We have already removed our own frame. 3681 // throw_delayed_StackOverflowError will think that it's been 3682 // called by our caller. 3683 RuntimeAddress target(StubRoutines::throw_delayed_StackOverflowError_entry()); 3684 relocate(target.rspec(), [&] { 3685 int32_t offset; 3686 movptr(t0, target.target(), offset); 3687 jr(t0, offset); 3688 }); 3689 should_not_reach_here(); 3690 3691 bind(no_reserved_zone_enabling); 3692 } 3693 3694 // Move the address of the polling page into dest. 3695 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { 3696 ld(dest, Address(xthread, JavaThread::polling_page_offset())); 3697 } 3698 3699 // Read the polling page. The address of the polling page must 3700 // already be in r. 3701 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { 3702 relocate(rtype, [&] { 3703 lwu(zr, Address(r, offset)); 3704 }); 3705 } 3706 3707 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3708 #ifdef ASSERT 3709 { 3710 ThreadInVMfromUnknown tiv; 3711 assert (UseCompressedOops, "should only be used for compressed oops"); 3712 assert (Universe::heap() != nullptr, "java heap should be initialized"); 3713 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3714 assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); 3715 } 3716 #endif 3717 int oop_index = oop_recorder()->find_index(obj); 3718 relocate(oop_Relocation::spec(oop_index), [&] { 3719 li32(dst, 0xDEADBEEF); 3720 }); 3721 zero_extend(dst, dst, 32); 3722 } 3723 3724 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3725 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3726 assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); 3727 int index = oop_recorder()->find_index(k); 3728 assert(!Universe::heap()->is_in(k), "should not be an oop"); 3729 3730 narrowKlass nk = CompressedKlassPointers::encode(k); 3731 relocate(metadata_Relocation::spec(index), [&] { 3732 li32(dst, nk); 3733 }); 3734 zero_extend(dst, dst, 32); 3735 } 3736 3737 // Maybe emit a call via a trampoline. If the code cache is small 3738 // trampolines won't be emitted. 3739 address MacroAssembler::trampoline_call(Address entry) { 3740 assert(entry.rspec().type() == relocInfo::runtime_call_type || 3741 entry.rspec().type() == relocInfo::opt_virtual_call_type || 3742 entry.rspec().type() == relocInfo::static_call_type || 3743 entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); 3744 3745 address target = entry.target(); 3746 3747 // We need a trampoline if branches are far. 3748 if (!in_scratch_emit_size()) { 3749 if (entry.rspec().type() == relocInfo::runtime_call_type) { 3750 assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs"); 3751 code()->share_trampoline_for(entry.target(), offset()); 3752 } else { 3753 address stub = emit_trampoline_stub(offset(), target); 3754 if (stub == nullptr) { 3755 postcond(pc() == badAddress); 3756 return nullptr; // CodeCache is full 3757 } 3758 } 3759 } 3760 target = pc(); 3761 3762 address call_pc = pc(); 3763 #ifdef ASSERT 3764 if (entry.rspec().type() != relocInfo::runtime_call_type) { 3765 assert_alignment(call_pc); 3766 } 3767 #endif 3768 relocate(entry.rspec(), [&] { 3769 jump_link(target, t0); 3770 }); 3771 3772 postcond(pc() != badAddress); 3773 return call_pc; 3774 } 3775 3776 address MacroAssembler::ic_call(address entry, jint method_index) { 3777 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); 3778 IncompressibleRegion ir(this); // relocations 3779 movptr(t1, (address)Universe::non_oop_word(), t0); 3780 assert_cond(entry != nullptr); 3781 return trampoline_call(Address(entry, rh)); 3782 } 3783 3784 int MacroAssembler::ic_check_size() { 3785 // No compressed 3786 return (MacroAssembler::instruction_size * (2 /* 2 loads */ + 1 /* branch */)) + 3787 far_branch_size(); 3788 } 3789 3790 int MacroAssembler::ic_check(int end_alignment) { 3791 IncompressibleRegion ir(this); 3792 Register receiver = j_rarg0; 3793 Register data = t1; 3794 3795 Register tmp1 = t0; // t0 always scratch 3796 // t2 is saved on call, thus should have been saved before this check. 3797 // Hence we can clobber it. 3798 Register tmp2 = t2; 3799 3800 // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed 3801 // before the inline cache check, so we don't have to execute any nop instructions when dispatching 3802 // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align 3803 // before the inline cache check here, and not after 3804 align(end_alignment, ic_check_size()); 3805 int uep_offset = offset(); 3806 3807 if (UseCompressedClassPointers) { 3808 lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 3809 lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 3810 } else { 3811 ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes())); 3812 ld(tmp2, Address(data, CompiledICData::speculated_klass_offset())); 3813 } 3814 3815 Label ic_hit; 3816 beq(tmp1, tmp2, ic_hit); 3817 // Note, far_jump is not fixed size. 3818 // Is this ever generates a movptr alignment/size will be off. 3819 far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 3820 bind(ic_hit); 3821 3822 assert((offset() % end_alignment) == 0, "Misaligned verified entry point."); 3823 return uep_offset; 3824 } 3825 3826 // Emit a trampoline stub for a call to a target which is too far away. 3827 // 3828 // code sequences: 3829 // 3830 // call-site: 3831 // branch-and-link to <destination> or <trampoline stub> 3832 // 3833 // Related trampoline stub for this call site in the stub section: 3834 // load the call target from the constant pool 3835 // branch (RA still points to the call site above) 3836 3837 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, 3838 address dest) { 3839 // Max stub size: alignment nop, TrampolineStub. 3840 address stub = start_a_stub(max_trampoline_stub_size()); 3841 if (stub == nullptr) { 3842 return nullptr; // CodeBuffer::expand failed 3843 } 3844 3845 // We are always 4-byte aligned here. 3846 assert_alignment(pc()); 3847 3848 // Create a trampoline stub relocation which relates this trampoline stub 3849 // with the call instruction at insts_call_instruction_offset in the 3850 // instructions code-section. 3851 3852 // Make sure the address of destination 8-byte aligned after 3 instructions. 3853 align(wordSize, MacroAssembler::trampoline_stub_data_offset); 3854 3855 RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() + 3856 insts_call_instruction_offset); 3857 const int stub_start_offset = offset(); 3858 relocate(rh, [&] { 3859 // Now, create the trampoline stub's code: 3860 // - load the call 3861 // - call 3862 Label target; 3863 ld(t0, target); // auipc + ld 3864 jr(t0); // jalr 3865 bind(target); 3866 assert(offset() - stub_start_offset == MacroAssembler::trampoline_stub_data_offset, 3867 "should be"); 3868 assert(offset() % wordSize == 0, "bad alignment"); 3869 emit_int64((int64_t)dest); 3870 }); 3871 3872 const address stub_start_addr = addr_at(stub_start_offset); 3873 3874 assert(MacroAssembler::is_trampoline_stub_at(stub_start_addr), "doesn't look like a trampoline"); 3875 3876 end_a_stub(); 3877 return stub_start_addr; 3878 } 3879 3880 int MacroAssembler::max_trampoline_stub_size() { 3881 // Max stub size: alignment nop, TrampolineStub. 3882 return MacroAssembler::instruction_size + MacroAssembler::trampoline_stub_instruction_size; 3883 } 3884 3885 int MacroAssembler::static_call_stub_size() { 3886 // (lui, addi, slli, addi, slli, addi) + (lui + lui + slli + add) + jalr 3887 return 11 * MacroAssembler::instruction_size; 3888 } 3889 3890 Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { 3891 switch (dst.getMode()) { 3892 case Address::base_plus_offset: 3893 // This is the expected mode, although we allow all the other 3894 // forms below. 3895 return form_address(tmp, dst.base(), dst.offset()); 3896 default: 3897 la(tmp, dst); 3898 return Address(tmp); 3899 } 3900 } 3901 3902 void MacroAssembler::increment(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3903 assert(((dst.getMode() == Address::base_plus_offset && 3904 is_simm12(dst.offset())) || is_simm12(value)), 3905 "invalid value and address mode combination"); 3906 Address adr = add_memory_helper(dst, tmp2); 3907 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3908 ld(tmp1, adr); 3909 add(tmp1, tmp1, value, tmp2); 3910 sd(tmp1, adr); 3911 } 3912 3913 void MacroAssembler::incrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3914 assert(((dst.getMode() == Address::base_plus_offset && 3915 is_simm12(dst.offset())) || is_simm12(value)), 3916 "invalid value and address mode combination"); 3917 Address adr = add_memory_helper(dst, tmp2); 3918 assert(!adr.uses(tmp1), "invalid dst for address increment"); 3919 lwu(tmp1, adr); 3920 addw(tmp1, tmp1, value, tmp2); 3921 sw(tmp1, adr); 3922 } 3923 3924 void MacroAssembler::decrement(const Address dst, int64_t value, Register tmp1, Register tmp2) { 3925 assert(((dst.getMode() == Address::base_plus_offset && 3926 is_simm12(dst.offset())) || is_simm12(value)), 3927 "invalid value and address mode combination"); 3928 Address adr = add_memory_helper(dst, tmp2); 3929 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3930 ld(tmp1, adr); 3931 sub(tmp1, tmp1, value, tmp2); 3932 sd(tmp1, adr); 3933 } 3934 3935 void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1, Register tmp2) { 3936 assert(((dst.getMode() == Address::base_plus_offset && 3937 is_simm12(dst.offset())) || is_simm12(value)), 3938 "invalid value and address mode combination"); 3939 Address adr = add_memory_helper(dst, tmp2); 3940 assert(!adr.uses(tmp1), "invalid dst for address decrement"); 3941 lwu(tmp1, adr); 3942 subw(tmp1, tmp1, value, tmp2); 3943 sw(tmp1, adr); 3944 } 3945 3946 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { 3947 assert_different_registers(src1, t0); 3948 relocate(src2.rspec(), [&] { 3949 int32_t offset; 3950 la(t0, src2.target(), offset); 3951 ld(t0, Address(t0, offset)); 3952 }); 3953 beq(src1, t0, equal); 3954 } 3955 3956 void MacroAssembler::load_method_holder_cld(Register result, Register method) { 3957 load_method_holder(result, method); 3958 ld(result, Address(result, InstanceKlass::class_loader_data_offset())); 3959 } 3960 3961 void MacroAssembler::load_method_holder(Register holder, Register method) { 3962 ld(holder, Address(method, Method::const_offset())); // ConstMethod* 3963 ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* 3964 ld(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* 3965 } 3966 3967 // string indexof 3968 // compute index by trailing zeros 3969 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, 3970 Register match_mask, Register result, 3971 Register ch2, Register tmp, 3972 bool haystack_isL) { 3973 int haystack_chr_shift = haystack_isL ? 0 : 1; 3974 srl(match_mask, match_mask, trailing_zeros); 3975 srli(match_mask, match_mask, 1); 3976 srli(tmp, trailing_zeros, LogBitsPerByte); 3977 if (!haystack_isL) andi(tmp, tmp, 0xE); 3978 add(haystack, haystack, tmp); 3979 ld(ch2, Address(haystack)); 3980 if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); 3981 add(result, result, tmp); 3982 } 3983 3984 // string indexof 3985 // Find pattern element in src, compute match mask, 3986 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index 3987 // match mask patterns and corresponding indices would be like: 3988 // - 0x8080808080808080 (Latin1) 3989 // - 7 6 5 4 3 2 1 0 (match index) 3990 // - 0x8000800080008000 (UTF16) 3991 // - 3 2 1 0 (match index) 3992 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, 3993 Register mask1, Register mask2) { 3994 xorr(src, pattern, src); 3995 sub(match_mask, src, mask1); 3996 orr(src, src, mask2); 3997 notr(src, src); 3998 andr(match_mask, match_mask, src); 3999 } 4000 4001 #ifdef COMPILER2 4002 // Code for BigInteger::mulAdd intrinsic 4003 // out = x10 4004 // in = x11 4005 // offset = x12 (already out.length-offset) 4006 // len = x13 4007 // k = x14 4008 // tmp = x28 4009 // 4010 // pseudo code from java implementation: 4011 // long kLong = k & LONG_MASK; 4012 // carry = 0; 4013 // offset = out.length-offset - 1; 4014 // for (int j = len - 1; j >= 0; j--) { 4015 // product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; 4016 // out[offset--] = (int)product; 4017 // carry = product >>> 32; 4018 // } 4019 // return (int)carry; 4020 void MacroAssembler::mul_add(Register out, Register in, Register offset, 4021 Register len, Register k, Register tmp) { 4022 Label L_tail_loop, L_unroll, L_end; 4023 mv(tmp, out); 4024 mv(out, zr); 4025 blez(len, L_end); 4026 zero_extend(k, k, 32); 4027 slliw(t0, offset, LogBytesPerInt); 4028 add(offset, tmp, t0); 4029 slliw(t0, len, LogBytesPerInt); 4030 add(in, in, t0); 4031 4032 const int unroll = 8; 4033 mv(tmp, unroll); 4034 blt(len, tmp, L_tail_loop); 4035 bind(L_unroll); 4036 for (int i = 0; i < unroll; i++) { 4037 sub(in, in, BytesPerInt); 4038 lwu(t0, Address(in, 0)); 4039 mul(t1, t0, k); 4040 add(t0, t1, out); 4041 sub(offset, offset, BytesPerInt); 4042 lwu(t1, Address(offset, 0)); 4043 add(t0, t0, t1); 4044 sw(t0, Address(offset, 0)); 4045 srli(out, t0, 32); 4046 } 4047 subw(len, len, tmp); 4048 bge(len, tmp, L_unroll); 4049 4050 bind(L_tail_loop); 4051 blez(len, L_end); 4052 sub(in, in, BytesPerInt); 4053 lwu(t0, Address(in, 0)); 4054 mul(t1, t0, k); 4055 add(t0, t1, out); 4056 sub(offset, offset, BytesPerInt); 4057 lwu(t1, Address(offset, 0)); 4058 add(t0, t0, t1); 4059 sw(t0, Address(offset, 0)); 4060 srli(out, t0, 32); 4061 subw(len, len, 1); 4062 j(L_tail_loop); 4063 4064 bind(L_end); 4065 } 4066 4067 // Multiply and multiply-accumulate unsigned 64-bit registers. 4068 void MacroAssembler::wide_mul(Register prod_lo, Register prod_hi, Register n, Register m) { 4069 assert_different_registers(prod_lo, prod_hi); 4070 4071 mul(prod_lo, n, m); 4072 mulhu(prod_hi, n, m); 4073 } 4074 4075 void MacroAssembler::wide_madd(Register sum_lo, Register sum_hi, Register n, 4076 Register m, Register tmp1, Register tmp2) { 4077 assert_different_registers(sum_lo, sum_hi); 4078 assert_different_registers(sum_hi, tmp2); 4079 4080 wide_mul(tmp1, tmp2, n, m); 4081 cad(sum_lo, sum_lo, tmp1, tmp1); // Add tmp1 to sum_lo with carry output to tmp1 4082 adc(sum_hi, sum_hi, tmp2, tmp1); // Add tmp2 with carry to sum_hi 4083 } 4084 4085 // add two unsigned input and output carry 4086 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) 4087 { 4088 assert_different_registers(dst, carry); 4089 assert_different_registers(dst, src2); 4090 add(dst, src1, src2); 4091 sltu(carry, dst, src2); 4092 } 4093 4094 // add two input with carry 4095 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) { 4096 assert_different_registers(dst, carry); 4097 add(dst, src1, src2); 4098 add(dst, dst, carry); 4099 } 4100 4101 // add two unsigned input with carry and output carry 4102 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) { 4103 assert_different_registers(dst, src2); 4104 adc(dst, src1, src2, carry); 4105 sltu(carry, dst, src2); 4106 } 4107 4108 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 4109 Register src1, Register src2, Register carry) { 4110 cad(dest_lo, dest_lo, src1, carry); 4111 add(dest_hi, dest_hi, carry); 4112 cad(dest_lo, dest_lo, src2, carry); 4113 add(final_dest_hi, dest_hi, carry); 4114 } 4115 4116 /** 4117 * Multiply 32 bit by 32 bit first loop. 4118 */ 4119 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 4120 Register y, Register y_idx, Register z, 4121 Register carry, Register product, 4122 Register idx, Register kdx) { 4123 // jlong carry, x[], y[], z[]; 4124 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4125 // long product = y[idx] * x[xstart] + carry; 4126 // z[kdx] = (int)product; 4127 // carry = product >>> 32; 4128 // } 4129 // z[xstart] = (int)carry; 4130 4131 Label L_first_loop, L_first_loop_exit; 4132 blez(idx, L_first_loop_exit); 4133 4134 shadd(t0, xstart, x, t0, LogBytesPerInt); 4135 lwu(x_xstart, Address(t0, 0)); 4136 4137 bind(L_first_loop); 4138 subw(idx, idx, 1); 4139 shadd(t0, idx, y, t0, LogBytesPerInt); 4140 lwu(y_idx, Address(t0, 0)); 4141 mul(product, x_xstart, y_idx); 4142 add(product, product, carry); 4143 srli(carry, product, 32); 4144 subw(kdx, kdx, 1); 4145 shadd(t0, kdx, z, t0, LogBytesPerInt); 4146 sw(product, Address(t0, 0)); 4147 bgtz(idx, L_first_loop); 4148 4149 bind(L_first_loop_exit); 4150 } 4151 4152 /** 4153 * Multiply 64 bit by 64 bit first loop. 4154 */ 4155 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 4156 Register y, Register y_idx, Register z, 4157 Register carry, Register product, 4158 Register idx, Register kdx) { 4159 // 4160 // jlong carry, x[], y[], z[]; 4161 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 4162 // huge_128 product = y[idx] * x[xstart] + carry; 4163 // z[kdx] = (jlong)product; 4164 // carry = (jlong)(product >>> 64); 4165 // } 4166 // z[xstart] = carry; 4167 // 4168 4169 Label L_first_loop, L_first_loop_exit; 4170 Label L_one_x, L_one_y, L_multiply; 4171 4172 subw(xstart, xstart, 1); 4173 bltz(xstart, L_one_x); 4174 4175 shadd(t0, xstart, x, t0, LogBytesPerInt); 4176 ld(x_xstart, Address(t0, 0)); 4177 ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian 4178 4179 bind(L_first_loop); 4180 subw(idx, idx, 1); 4181 bltz(idx, L_first_loop_exit); 4182 subw(idx, idx, 1); 4183 bltz(idx, L_one_y); 4184 4185 shadd(t0, idx, y, t0, LogBytesPerInt); 4186 ld(y_idx, Address(t0, 0)); 4187 ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian 4188 bind(L_multiply); 4189 4190 mulhu(t0, x_xstart, y_idx); 4191 mul(product, x_xstart, y_idx); 4192 cad(product, product, carry, t1); 4193 adc(carry, t0, zr, t1); 4194 4195 subw(kdx, kdx, 2); 4196 ror_imm(product, product, 32); // back to big-endian 4197 shadd(t0, kdx, z, t0, LogBytesPerInt); 4198 sd(product, Address(t0, 0)); 4199 4200 j(L_first_loop); 4201 4202 bind(L_one_y); 4203 lwu(y_idx, Address(y, 0)); 4204 j(L_multiply); 4205 4206 bind(L_one_x); 4207 lwu(x_xstart, Address(x, 0)); 4208 j(L_first_loop); 4209 4210 bind(L_first_loop_exit); 4211 } 4212 4213 /** 4214 * Multiply 128 bit by 128 bit. Unrolled inner loop. 4215 * 4216 */ 4217 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 4218 Register carry, Register carry2, 4219 Register idx, Register jdx, 4220 Register yz_idx1, Register yz_idx2, 4221 Register tmp, Register tmp3, Register tmp4, 4222 Register tmp6, Register product_hi) { 4223 // jlong carry, x[], y[], z[]; 4224 // int kdx = xstart+1; 4225 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 4226 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 4227 // jlong carry2 = (jlong)(tmp3 >>> 64); 4228 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 4229 // carry = (jlong)(tmp4 >>> 64); 4230 // z[kdx+idx+1] = (jlong)tmp3; 4231 // z[kdx+idx] = (jlong)tmp4; 4232 // } 4233 // idx += 2; 4234 // if (idx > 0) { 4235 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 4236 // z[kdx+idx] = (jlong)yz_idx1; 4237 // carry = (jlong)(yz_idx1 >>> 64); 4238 // } 4239 // 4240 4241 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 4242 4243 srliw(jdx, idx, 2); 4244 4245 bind(L_third_loop); 4246 4247 subw(jdx, jdx, 1); 4248 bltz(jdx, L_third_loop_exit); 4249 subw(idx, idx, 4); 4250 4251 shadd(t0, idx, y, t0, LogBytesPerInt); 4252 ld(yz_idx2, Address(t0, 0)); 4253 ld(yz_idx1, Address(t0, wordSize)); 4254 4255 shadd(tmp6, idx, z, t0, LogBytesPerInt); 4256 4257 ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 4258 ror_imm(yz_idx2, yz_idx2, 32); 4259 4260 ld(t1, Address(tmp6, 0)); 4261 ld(t0, Address(tmp6, wordSize)); 4262 4263 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4264 mulhu(tmp4, product_hi, yz_idx1); 4265 4266 ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian 4267 ror_imm(t1, t1, 32, tmp); 4268 4269 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 4270 mulhu(carry2, product_hi, yz_idx2); 4271 4272 cad(tmp3, tmp3, carry, carry); 4273 adc(tmp4, tmp4, zr, carry); 4274 cad(tmp3, tmp3, t0, t0); 4275 cadc(tmp4, tmp4, tmp, t0); 4276 adc(carry, carry2, zr, t0); 4277 cad(tmp4, tmp4, t1, carry2); 4278 adc(carry, carry, zr, carry2); 4279 4280 ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian 4281 ror_imm(tmp4, tmp4, 32); 4282 sd(tmp4, Address(tmp6, 0)); 4283 sd(tmp3, Address(tmp6, wordSize)); 4284 4285 j(L_third_loop); 4286 4287 bind(L_third_loop_exit); 4288 4289 andi(idx, idx, 0x3); 4290 beqz(idx, L_post_third_loop_done); 4291 4292 Label L_check_1; 4293 subw(idx, idx, 2); 4294 bltz(idx, L_check_1); 4295 4296 shadd(t0, idx, y, t0, LogBytesPerInt); 4297 ld(yz_idx1, Address(t0, 0)); 4298 ror_imm(yz_idx1, yz_idx1, 32); 4299 4300 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 4301 mulhu(tmp4, product_hi, yz_idx1); 4302 4303 shadd(t0, idx, z, t0, LogBytesPerInt); 4304 ld(yz_idx2, Address(t0, 0)); 4305 ror_imm(yz_idx2, yz_idx2, 32, tmp); 4306 4307 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); 4308 4309 ror_imm(tmp3, tmp3, 32, tmp); 4310 sd(tmp3, Address(t0, 0)); 4311 4312 bind(L_check_1); 4313 4314 andi(idx, idx, 0x1); 4315 subw(idx, idx, 1); 4316 bltz(idx, L_post_third_loop_done); 4317 shadd(t0, idx, y, t0, LogBytesPerInt); 4318 lwu(tmp4, Address(t0, 0)); 4319 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 4320 mulhu(carry2, tmp4, product_hi); 4321 4322 shadd(t0, idx, z, t0, LogBytesPerInt); 4323 lwu(tmp4, Address(t0, 0)); 4324 4325 add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); 4326 4327 shadd(t0, idx, z, t0, LogBytesPerInt); 4328 sw(tmp3, Address(t0, 0)); 4329 4330 slli(t0, carry2, 32); 4331 srli(carry, tmp3, 32); 4332 orr(carry, carry, t0); 4333 4334 bind(L_post_third_loop_done); 4335 } 4336 4337 /** 4338 * Code for BigInteger::multiplyToLen() intrinsic. 4339 * 4340 * x10: x 4341 * x11: xlen 4342 * x12: y 4343 * x13: ylen 4344 * x14: z 4345 * x15: tmp0 4346 * x16: tmp1 4347 * x17: tmp2 4348 * x7: tmp3 4349 * x28: tmp4 4350 * x29: tmp5 4351 * x30: tmp6 4352 * x31: tmp7 4353 */ 4354 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 4355 Register z, Register tmp0, 4356 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 4357 Register tmp5, Register tmp6, Register product_hi) { 4358 assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 4359 4360 const Register idx = tmp1; 4361 const Register kdx = tmp2; 4362 const Register xstart = tmp3; 4363 4364 const Register y_idx = tmp4; 4365 const Register carry = tmp5; 4366 const Register product = xlen; 4367 const Register x_xstart = tmp0; 4368 4369 mv(idx, ylen); // idx = ylen; 4370 addw(kdx, xlen, ylen); // kdx = xlen+ylen; 4371 mv(carry, zr); // carry = 0; 4372 4373 Label L_multiply_64_x_64_loop, L_done; 4374 4375 subw(xstart, xlen, 1); 4376 bltz(xstart, L_done); 4377 4378 const Register jdx = tmp1; 4379 4380 if (AvoidUnalignedAccesses) { 4381 // Check if x and y are both 8-byte aligned. 4382 orr(t0, xlen, ylen); 4383 test_bit(t0, t0, 0); 4384 beqz(t0, L_multiply_64_x_64_loop); 4385 4386 multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4387 shadd(t0, xstart, z, t0, LogBytesPerInt); 4388 sw(carry, Address(t0, 0)); 4389 4390 Label L_second_loop_unaligned; 4391 bind(L_second_loop_unaligned); 4392 mv(carry, zr); 4393 mv(jdx, ylen); 4394 subw(xstart, xstart, 1); 4395 bltz(xstart, L_done); 4396 sub(sp, sp, 2 * wordSize); 4397 sd(z, Address(sp, 0)); 4398 sd(zr, Address(sp, wordSize)); 4399 shadd(t0, xstart, z, t0, LogBytesPerInt); 4400 addi(z, t0, 4); 4401 shadd(t0, xstart, x, t0, LogBytesPerInt); 4402 lwu(product, Address(t0, 0)); 4403 Label L_third_loop, L_third_loop_exit; 4404 4405 blez(jdx, L_third_loop_exit); 4406 4407 bind(L_third_loop); 4408 subw(jdx, jdx, 1); 4409 shadd(t0, jdx, y, t0, LogBytesPerInt); 4410 lwu(t0, Address(t0, 0)); 4411 mul(t1, t0, product); 4412 add(t0, t1, carry); 4413 shadd(tmp6, jdx, z, t1, LogBytesPerInt); 4414 lwu(t1, Address(tmp6, 0)); 4415 add(t0, t0, t1); 4416 sw(t0, Address(tmp6, 0)); 4417 srli(carry, t0, 32); 4418 bgtz(jdx, L_third_loop); 4419 4420 bind(L_third_loop_exit); 4421 ld(z, Address(sp, 0)); 4422 addi(sp, sp, 2 * wordSize); 4423 shadd(t0, xstart, z, t0, LogBytesPerInt); 4424 sw(carry, Address(t0, 0)); 4425 4426 j(L_second_loop_unaligned); 4427 } 4428 4429 bind(L_multiply_64_x_64_loop); 4430 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 4431 4432 Label L_second_loop_aligned; 4433 beqz(kdx, L_second_loop_aligned); 4434 4435 Label L_carry; 4436 subw(kdx, kdx, 1); 4437 beqz(kdx, L_carry); 4438 4439 shadd(t0, kdx, z, t0, LogBytesPerInt); 4440 sw(carry, Address(t0, 0)); 4441 srli(carry, carry, 32); 4442 subw(kdx, kdx, 1); 4443 4444 bind(L_carry); 4445 shadd(t0, kdx, z, t0, LogBytesPerInt); 4446 sw(carry, Address(t0, 0)); 4447 4448 // Second and third (nested) loops. 4449 // 4450 // for (int i = xstart-1; i >= 0; i--) { // Second loop 4451 // carry = 0; 4452 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 4453 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 4454 // (z[k] & LONG_MASK) + carry; 4455 // z[k] = (int)product; 4456 // carry = product >>> 32; 4457 // } 4458 // z[i] = (int)carry; 4459 // } 4460 // 4461 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 4462 4463 bind(L_second_loop_aligned); 4464 mv(carry, zr); // carry = 0; 4465 mv(jdx, ylen); // j = ystart+1 4466 4467 subw(xstart, xstart, 1); // i = xstart-1; 4468 bltz(xstart, L_done); 4469 4470 sub(sp, sp, 4 * wordSize); 4471 sd(z, Address(sp, 0)); 4472 4473 Label L_last_x; 4474 shadd(t0, xstart, z, t0, LogBytesPerInt); 4475 addi(z, t0, 4); 4476 subw(xstart, xstart, 1); // i = xstart-1; 4477 bltz(xstart, L_last_x); 4478 4479 shadd(t0, xstart, x, t0, LogBytesPerInt); 4480 ld(product_hi, Address(t0, 0)); 4481 ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian 4482 4483 Label L_third_loop_prologue; 4484 bind(L_third_loop_prologue); 4485 4486 sd(ylen, Address(sp, wordSize)); 4487 sd(x, Address(sp, 2 * wordSize)); 4488 sd(xstart, Address(sp, 3 * wordSize)); 4489 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 4490 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 4491 ld(z, Address(sp, 0)); 4492 ld(ylen, Address(sp, wordSize)); 4493 ld(x, Address(sp, 2 * wordSize)); 4494 ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen 4495 addi(sp, sp, 4 * wordSize); 4496 4497 addiw(tmp3, xlen, 1); 4498 shadd(t0, tmp3, z, t0, LogBytesPerInt); 4499 sw(carry, Address(t0, 0)); 4500 4501 subw(tmp3, tmp3, 1); 4502 bltz(tmp3, L_done); 4503 4504 srli(carry, carry, 32); 4505 shadd(t0, tmp3, z, t0, LogBytesPerInt); 4506 sw(carry, Address(t0, 0)); 4507 j(L_second_loop_aligned); 4508 4509 // Next infrequent code is moved outside loops. 4510 bind(L_last_x); 4511 lwu(product_hi, Address(x, 0)); 4512 j(L_third_loop_prologue); 4513 4514 bind(L_done); 4515 } 4516 #endif 4517 4518 // Count bits of trailing zero chars from lsb to msb until first non-zero element. 4519 // For LL case, one byte for one element, so shift 8 bits once, and for other case, 4520 // shift 16 bits once. 4521 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { 4522 if (UseZbb) { 4523 assert_different_registers(Rd, Rs, tmp1); 4524 int step = isLL ? 8 : 16; 4525 ctz(Rd, Rs); 4526 andi(tmp1, Rd, step - 1); 4527 sub(Rd, Rd, tmp1); 4528 return; 4529 } 4530 4531 assert_different_registers(Rd, Rs, tmp1, tmp2); 4532 Label Loop; 4533 int step = isLL ? 8 : 16; 4534 mv(Rd, -step); 4535 mv(tmp2, Rs); 4536 4537 bind(Loop); 4538 addi(Rd, Rd, step); 4539 andi(tmp1, tmp2, ((1 << step) - 1)); 4540 srli(tmp2, tmp2, step); 4541 beqz(tmp1, Loop); 4542 } 4543 4544 // This instruction reads adjacent 4 bytes from the lower half of source register, 4545 // inflate into a register, for example: 4546 // Rs: A7A6A5A4A3A2A1A0 4547 // Rd: 00A300A200A100A0 4548 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4549 assert_different_registers(Rd, Rs, tmp1, tmp2); 4550 4551 mv(tmp1, 0xFF000000); // first byte mask at lower word 4552 andr(Rd, Rs, tmp1); 4553 for (int i = 0; i < 2; i++) { 4554 slli(Rd, Rd, wordSize); 4555 srli(tmp1, tmp1, wordSize); 4556 andr(tmp2, Rs, tmp1); 4557 orr(Rd, Rd, tmp2); 4558 } 4559 slli(Rd, Rd, wordSize); 4560 andi(tmp2, Rs, 0xFF); // last byte mask at lower word 4561 orr(Rd, Rd, tmp2); 4562 } 4563 4564 // This instruction reads adjacent 4 bytes from the upper half of source register, 4565 // inflate into a register, for example: 4566 // Rs: A7A6A5A4A3A2A1A0 4567 // Rd: 00A700A600A500A4 4568 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) { 4569 assert_different_registers(Rd, Rs, tmp1, tmp2); 4570 srli(Rs, Rs, 32); // only upper 32 bits are needed 4571 inflate_lo32(Rd, Rs, tmp1, tmp2); 4572 } 4573 4574 // The size of the blocks erased by the zero_blocks stub. We must 4575 // handle anything smaller than this ourselves in zero_words(). 4576 const int MacroAssembler::zero_words_block_size = 8; 4577 4578 // zero_words() is used by C2 ClearArray patterns. It is as small as 4579 // possible, handling small word counts locally and delegating 4580 // anything larger to the zero_blocks stub. It is expanded many times 4581 // in compiled code, so it is important to keep it short. 4582 4583 // ptr: Address of a buffer to be zeroed. 4584 // cnt: Count in HeapWords. 4585 // 4586 // ptr, cnt, and t0 are clobbered. 4587 address MacroAssembler::zero_words(Register ptr, Register cnt) { 4588 assert(is_power_of_2(zero_words_block_size), "adjust this"); 4589 assert(ptr == x28 && cnt == x29, "mismatch in register usage"); 4590 assert_different_registers(cnt, t0); 4591 4592 BLOCK_COMMENT("zero_words {"); 4593 4594 mv(t0, zero_words_block_size); 4595 Label around, done, done16; 4596 bltu(cnt, t0, around); 4597 { 4598 RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks()); 4599 assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated"); 4600 if (StubRoutines::riscv::complete()) { 4601 address tpc = trampoline_call(zero_blocks); 4602 if (tpc == nullptr) { 4603 DEBUG_ONLY(reset_labels(around)); 4604 postcond(pc() == badAddress); 4605 return nullptr; 4606 } 4607 } else { 4608 jump_link(zero_blocks, t0); 4609 } 4610 } 4611 bind(around); 4612 for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { 4613 Label l; 4614 test_bit(t0, cnt, exact_log2(i)); 4615 beqz(t0, l); 4616 for (int j = 0; j < i; j++) { 4617 sd(zr, Address(ptr, j * wordSize)); 4618 } 4619 addi(ptr, ptr, i * wordSize); 4620 bind(l); 4621 } 4622 { 4623 Label l; 4624 test_bit(t0, cnt, 0); 4625 beqz(t0, l); 4626 sd(zr, Address(ptr, 0)); 4627 bind(l); 4628 } 4629 4630 BLOCK_COMMENT("} zero_words"); 4631 postcond(pc() != badAddress); 4632 return pc(); 4633 } 4634 4635 #define SmallArraySize (18 * BytesPerLong) 4636 4637 // base: Address of a buffer to be zeroed, 8 bytes aligned. 4638 // cnt: Immediate count in HeapWords. 4639 void MacroAssembler::zero_words(Register base, uint64_t cnt) { 4640 assert_different_registers(base, t0, t1); 4641 4642 BLOCK_COMMENT("zero_words {"); 4643 4644 if (cnt <= SmallArraySize / BytesPerLong) { 4645 for (int i = 0; i < (int)cnt; i++) { 4646 sd(zr, Address(base, i * wordSize)); 4647 } 4648 } else { 4649 const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll 4650 int remainder = cnt % unroll; 4651 for (int i = 0; i < remainder; i++) { 4652 sd(zr, Address(base, i * wordSize)); 4653 } 4654 4655 Label loop; 4656 Register cnt_reg = t0; 4657 Register loop_base = t1; 4658 cnt = cnt - remainder; 4659 mv(cnt_reg, cnt); 4660 add(loop_base, base, remainder * wordSize); 4661 bind(loop); 4662 sub(cnt_reg, cnt_reg, unroll); 4663 for (int i = 0; i < unroll; i++) { 4664 sd(zr, Address(loop_base, i * wordSize)); 4665 } 4666 add(loop_base, loop_base, unroll * wordSize); 4667 bnez(cnt_reg, loop); 4668 } 4669 4670 BLOCK_COMMENT("} zero_words"); 4671 } 4672 4673 // base: Address of a buffer to be filled, 8 bytes aligned. 4674 // cnt: Count in 8-byte unit. 4675 // value: Value to be filled with. 4676 // base will point to the end of the buffer after filling. 4677 void MacroAssembler::fill_words(Register base, Register cnt, Register value) { 4678 // Algorithm: 4679 // 4680 // t0 = cnt & 7 4681 // cnt -= t0 4682 // p += t0 4683 // switch (t0): 4684 // switch start: 4685 // do while cnt 4686 // cnt -= 8 4687 // p[-8] = value 4688 // case 7: 4689 // p[-7] = value 4690 // case 6: 4691 // p[-6] = value 4692 // // ... 4693 // case 1: 4694 // p[-1] = value 4695 // case 0: 4696 // p += 8 4697 // do-while end 4698 // switch end 4699 4700 assert_different_registers(base, cnt, value, t0, t1); 4701 4702 Label fini, skip, entry, loop; 4703 const int unroll = 8; // Number of sd instructions we'll unroll 4704 4705 beqz(cnt, fini); 4706 4707 andi(t0, cnt, unroll - 1); 4708 sub(cnt, cnt, t0); 4709 // align 8, so first sd n % 8 = mod, next loop sd 8 * n. 4710 shadd(base, t0, base, t1, 3); 4711 la(t1, entry); 4712 slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) 4713 sub(t1, t1, t0); 4714 jr(t1); 4715 4716 bind(loop); 4717 add(base, base, unroll * 8); 4718 for (int i = -unroll; i < 0; i++) { 4719 sd(value, Address(base, i * 8)); 4720 } 4721 bind(entry); 4722 sub(cnt, cnt, unroll); 4723 bgez(cnt, loop); 4724 4725 bind(fini); 4726 } 4727 4728 // Zero blocks of memory by using CBO.ZERO. 4729 // 4730 // Aligns the base address first sufficiently for CBO.ZERO, then uses 4731 // CBO.ZERO repeatedly for every full block. cnt is the size to be 4732 // zeroed in HeapWords. Returns the count of words left to be zeroed 4733 // in cnt. 4734 // 4735 // NOTE: This is intended to be used in the zero_blocks() stub. If 4736 // you want to use it elsewhere, note that cnt must be >= CacheLineSize. 4737 void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) { 4738 Label initial_table_end, loop; 4739 4740 // Align base with cache line size. 4741 neg(tmp1, base); 4742 andi(tmp1, tmp1, CacheLineSize - 1); 4743 4744 // tmp1: the number of bytes to be filled to align the base with cache line size. 4745 add(base, base, tmp1); 4746 srai(tmp2, tmp1, 3); 4747 sub(cnt, cnt, tmp2); 4748 srli(tmp2, tmp1, 1); 4749 la(tmp1, initial_table_end); 4750 sub(tmp2, tmp1, tmp2); 4751 jr(tmp2); 4752 for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) { 4753 sd(zr, Address(base, i)); 4754 } 4755 bind(initial_table_end); 4756 4757 mv(tmp1, CacheLineSize / wordSize); 4758 bind(loop); 4759 cbo_zero(base); 4760 sub(cnt, cnt, tmp1); 4761 add(base, base, CacheLineSize); 4762 bge(cnt, tmp1, loop); 4763 } 4764 4765 // java.lang.Math.round(float a) 4766 // Returns the closest int to the argument, with ties rounding to positive infinity. 4767 void MacroAssembler::java_round_float(Register dst, FloatRegister src, FloatRegister ftmp) { 4768 // this instructions calling sequence provides performance improvement on all tested devices; 4769 // don't change it without re-verification 4770 Label done; 4771 mv(t0, jint_cast(0.5f)); 4772 fmv_w_x(ftmp, t0); 4773 4774 // dst = 0 if NaN 4775 feq_s(t0, src, src); // replacing fclass with feq as performance optimization 4776 mv(dst, zr); 4777 beqz(t0, done); 4778 4779 // dst = (src + 0.5f) rounded down towards negative infinity 4780 // Adding 0.5f to some floats exceeds the precision limits for a float and rounding takes place. 4781 // RDN is required for fadd_s, RNE gives incorrect results: 4782 // -------------------------------------------------------------------- 4783 // fadd.s rne (src + 0.5f): src = 8388609.000000 ftmp = 8388610.000000 4784 // fcvt.w.s rdn: ftmp = 8388610.000000 dst = 8388610 4785 // -------------------------------------------------------------------- 4786 // fadd.s rdn (src + 0.5f): src = 8388609.000000 ftmp = 8388609.000000 4787 // fcvt.w.s rdn: ftmp = 8388609.000000 dst = 8388609 4788 // -------------------------------------------------------------------- 4789 fadd_s(ftmp, src, ftmp, RoundingMode::rdn); 4790 fcvt_w_s(dst, ftmp, RoundingMode::rdn); 4791 4792 bind(done); 4793 } 4794 4795 // java.lang.Math.round(double a) 4796 // Returns the closest long to the argument, with ties rounding to positive infinity. 4797 void MacroAssembler::java_round_double(Register dst, FloatRegister src, FloatRegister ftmp) { 4798 // this instructions calling sequence provides performance improvement on all tested devices; 4799 // don't change it without re-verification 4800 Label done; 4801 mv(t0, julong_cast(0.5)); 4802 fmv_d_x(ftmp, t0); 4803 4804 // dst = 0 if NaN 4805 feq_d(t0, src, src); // replacing fclass with feq as performance optimization 4806 mv(dst, zr); 4807 beqz(t0, done); 4808 4809 // dst = (src + 0.5) rounded down towards negative infinity 4810 fadd_d(ftmp, src, ftmp, RoundingMode::rdn); // RDN is required here otherwise some inputs produce incorrect results 4811 fcvt_l_d(dst, ftmp, RoundingMode::rdn); 4812 4813 bind(done); 4814 } 4815 4816 #define FCVT_SAFE(FLOATCVT, FLOATSIG) \ 4817 void MacroAssembler::FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ 4818 Label done; \ 4819 assert_different_registers(dst, tmp); \ 4820 fclass_##FLOATSIG(tmp, src); \ 4821 mv(dst, zr); \ 4822 /* check if src is NaN */ \ 4823 andi(tmp, tmp, fclass_mask::nan); \ 4824 bnez(tmp, done); \ 4825 FLOATCVT(dst, src); \ 4826 bind(done); \ 4827 } 4828 4829 FCVT_SAFE(fcvt_w_s, s); 4830 FCVT_SAFE(fcvt_l_s, s); 4831 FCVT_SAFE(fcvt_w_d, d); 4832 FCVT_SAFE(fcvt_l_d, d); 4833 4834 #undef FCVT_SAFE 4835 4836 #define FCMP(FLOATTYPE, FLOATSIG) \ 4837 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ 4838 FloatRegister Rs2, int unordered_result) { \ 4839 Label Ldone; \ 4840 if (unordered_result < 0) { \ 4841 /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ 4842 /* installs 1 if gt else 0 */ \ 4843 flt_##FLOATSIG(result, Rs2, Rs1); \ 4844 /* Rs1 > Rs2, install 1 */ \ 4845 bgtz(result, Ldone); \ 4846 feq_##FLOATSIG(result, Rs1, Rs2); \ 4847 addi(result, result, -1); \ 4848 /* Rs1 = Rs2, install 0 */ \ 4849 /* NaN or Rs1 < Rs2, install -1 */ \ 4850 bind(Ldone); \ 4851 } else { \ 4852 /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ 4853 /* installs 1 if gt or unordered else 0 */ \ 4854 flt_##FLOATSIG(result, Rs1, Rs2); \ 4855 /* Rs1 < Rs2, install -1 */ \ 4856 bgtz(result, Ldone); \ 4857 feq_##FLOATSIG(result, Rs1, Rs2); \ 4858 addi(result, result, -1); \ 4859 /* Rs1 = Rs2, install 0 */ \ 4860 /* NaN or Rs1 > Rs2, install 1 */ \ 4861 bind(Ldone); \ 4862 neg(result, result); \ 4863 } \ 4864 } 4865 4866 FCMP(float, s); 4867 FCMP(double, d); 4868 4869 #undef FCMP 4870 4871 // Zero words; len is in bytes 4872 // Destroys all registers except addr 4873 // len must be a nonzero multiple of wordSize 4874 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { 4875 assert_different_registers(addr, len, tmp, t0, t1); 4876 4877 #ifdef ASSERT 4878 { 4879 Label L; 4880 andi(t0, len, BytesPerWord - 1); 4881 beqz(t0, L); 4882 stop("len is not a multiple of BytesPerWord"); 4883 bind(L); 4884 } 4885 #endif // ASSERT 4886 4887 #ifndef PRODUCT 4888 block_comment("zero memory"); 4889 #endif // PRODUCT 4890 4891 Label loop; 4892 Label entry; 4893 4894 // Algorithm: 4895 // 4896 // t0 = cnt & 7 4897 // cnt -= t0 4898 // p += t0 4899 // switch (t0) { 4900 // do { 4901 // cnt -= 8 4902 // p[-8] = 0 4903 // case 7: 4904 // p[-7] = 0 4905 // case 6: 4906 // p[-6] = 0 4907 // ... 4908 // case 1: 4909 // p[-1] = 0 4910 // case 0: 4911 // p += 8 4912 // } while (cnt) 4913 // } 4914 4915 const int unroll = 8; // Number of sd(zr) instructions we'll unroll 4916 4917 srli(len, len, LogBytesPerWord); 4918 andi(t0, len, unroll - 1); // t0 = cnt % unroll 4919 sub(len, len, t0); // cnt -= unroll 4920 // tmp always points to the end of the region we're about to zero 4921 shadd(tmp, t0, addr, t1, LogBytesPerWord); 4922 la(t1, entry); 4923 slli(t0, t0, 2); 4924 sub(t1, t1, t0); 4925 jr(t1); 4926 bind(loop); 4927 sub(len, len, unroll); 4928 for (int i = -unroll; i < 0; i++) { 4929 sd(zr, Address(tmp, i * wordSize)); 4930 } 4931 bind(entry); 4932 add(tmp, tmp, unroll * wordSize); 4933 bnez(len, loop); 4934 } 4935 4936 // shift left by shamt and add 4937 // Rd = (Rs1 << shamt) + Rs2 4938 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { 4939 if (UseZba) { 4940 if (shamt == 1) { 4941 sh1add(Rd, Rs1, Rs2); 4942 return; 4943 } else if (shamt == 2) { 4944 sh2add(Rd, Rs1, Rs2); 4945 return; 4946 } else if (shamt == 3) { 4947 sh3add(Rd, Rs1, Rs2); 4948 return; 4949 } 4950 } 4951 4952 if (shamt != 0) { 4953 assert_different_registers(Rs2, tmp); 4954 slli(tmp, Rs1, shamt); 4955 add(Rd, Rs2, tmp); 4956 } else { 4957 add(Rd, Rs1, Rs2); 4958 } 4959 } 4960 4961 void MacroAssembler::zero_extend(Register dst, Register src, int bits) { 4962 switch (bits) { 4963 case 32: 4964 if (UseZba) { 4965 zext_w(dst, src); 4966 return; 4967 } 4968 break; 4969 case 16: 4970 if (UseZbb) { 4971 zext_h(dst, src); 4972 return; 4973 } 4974 break; 4975 case 8: 4976 if (UseZbb) { 4977 zext_b(dst, src); 4978 return; 4979 } 4980 break; 4981 default: 4982 break; 4983 } 4984 slli(dst, src, XLEN - bits); 4985 srli(dst, dst, XLEN - bits); 4986 } 4987 4988 void MacroAssembler::sign_extend(Register dst, Register src, int bits) { 4989 switch (bits) { 4990 case 32: 4991 sext_w(dst, src); 4992 return; 4993 case 16: 4994 if (UseZbb) { 4995 sext_h(dst, src); 4996 return; 4997 } 4998 break; 4999 case 8: 5000 if (UseZbb) { 5001 sext_b(dst, src); 5002 return; 5003 } 5004 break; 5005 default: 5006 break; 5007 } 5008 slli(dst, src, XLEN - bits); 5009 srai(dst, dst, XLEN - bits); 5010 } 5011 5012 void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, 5013 Register tmp, bool is_signed) { 5014 if (src1 == src2) { 5015 mv(dst, zr); 5016 return; 5017 } 5018 Label done; 5019 Register left = src1; 5020 Register right = src2; 5021 if (dst == src1) { 5022 assert_different_registers(dst, src2, tmp); 5023 mv(tmp, src1); 5024 left = tmp; 5025 } else if (dst == src2) { 5026 assert_different_registers(dst, src1, tmp); 5027 mv(tmp, src2); 5028 right = tmp; 5029 } 5030 5031 // installs 1 if gt else 0 5032 if (is_signed) { 5033 slt(dst, right, left); 5034 } else { 5035 sltu(dst, right, left); 5036 } 5037 bnez(dst, done); 5038 if (is_signed) { 5039 slt(dst, left, right); 5040 } else { 5041 sltu(dst, left, right); 5042 } 5043 // dst = -1 if lt; else if eq , dst = 0 5044 neg(dst, dst); 5045 bind(done); 5046 } 5047 5048 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) 5049 { 5050 cmp_x2i(dst, src1, src2, tmp); 5051 } 5052 5053 void MacroAssembler::cmp_ul2i(Register dst, Register src1, Register src2, Register tmp) { 5054 cmp_x2i(dst, src1, src2, tmp, false); 5055 } 5056 5057 void MacroAssembler::cmp_uw2i(Register dst, Register src1, Register src2, Register tmp) { 5058 cmp_x2i(dst, src1, src2, tmp, false); 5059 } 5060 5061 // The java_calling_convention describes stack locations as ideal slots on 5062 // a frame with no abi restrictions. Since we must observe abi restrictions 5063 // (like the placement of the register window) the slots must be biased by 5064 // the following value. 5065 static int reg2offset_in(VMReg r) { 5066 // Account for saved fp and ra 5067 // This should really be in_preserve_stack_slots 5068 return r->reg2stack() * VMRegImpl::stack_slot_size; 5069 } 5070 5071 static int reg2offset_out(VMReg r) { 5072 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 5073 } 5074 5075 // On 64 bit we will store integer like items to the stack as 5076 // 64 bits items (riscv64 abi) even though java would only store 5077 // 32bits for a parameter. On 32bit it will simply be 32 bits 5078 // So this routine will do 32->32 on 32bit and 32->64 on 64bit 5079 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { 5080 if (src.first()->is_stack()) { 5081 if (dst.first()->is_stack()) { 5082 // stack to stack 5083 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5084 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5085 } else { 5086 // stack to reg 5087 lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5088 } 5089 } else if (dst.first()->is_stack()) { 5090 // reg to stack 5091 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5092 } else { 5093 if (dst.first() != src.first()) { 5094 sign_extend(dst.first()->as_Register(), src.first()->as_Register(), 32); 5095 } 5096 } 5097 } 5098 5099 // An oop arg. Must pass a handle not the oop itself 5100 void MacroAssembler::object_move(OopMap* map, 5101 int oop_handle_offset, 5102 int framesize_in_slots, 5103 VMRegPair src, 5104 VMRegPair dst, 5105 bool is_receiver, 5106 int* receiver_offset) { 5107 assert_cond(map != nullptr && receiver_offset != nullptr); 5108 5109 // must pass a handle. First figure out the location we use as a handle 5110 Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); 5111 5112 // See if oop is null if it is we need no handle 5113 5114 if (src.first()->is_stack()) { 5115 // Oop is already on the stack as an argument 5116 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 5117 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 5118 if (is_receiver) { 5119 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 5120 } 5121 5122 ld(t0, Address(fp, reg2offset_in(src.first()))); 5123 la(rHandle, Address(fp, reg2offset_in(src.first()))); 5124 // conditionally move a null 5125 Label notZero1; 5126 bnez(t0, notZero1); 5127 mv(rHandle, zr); 5128 bind(notZero1); 5129 } else { 5130 5131 // Oop is in a register we must store it to the space we reserve 5132 // on the stack for oop_handles and pass a handle if oop is non-null 5133 5134 const Register rOop = src.first()->as_Register(); 5135 int oop_slot = -1; 5136 if (rOop == j_rarg0) { 5137 oop_slot = 0; 5138 } else if (rOop == j_rarg1) { 5139 oop_slot = 1; 5140 } else if (rOop == j_rarg2) { 5141 oop_slot = 2; 5142 } else if (rOop == j_rarg3) { 5143 oop_slot = 3; 5144 } else if (rOop == j_rarg4) { 5145 oop_slot = 4; 5146 } else if (rOop == j_rarg5) { 5147 oop_slot = 5; 5148 } else if (rOop == j_rarg6) { 5149 oop_slot = 6; 5150 } else { 5151 assert(rOop == j_rarg7, "wrong register"); 5152 oop_slot = 7; 5153 } 5154 5155 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 5156 int offset = oop_slot * VMRegImpl::stack_slot_size; 5157 5158 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 5159 // Store oop in handle area, may be null 5160 sd(rOop, Address(sp, offset)); 5161 if (is_receiver) { 5162 *receiver_offset = offset; 5163 } 5164 5165 //rOop maybe the same as rHandle 5166 if (rOop == rHandle) { 5167 Label isZero; 5168 beqz(rOop, isZero); 5169 la(rHandle, Address(sp, offset)); 5170 bind(isZero); 5171 } else { 5172 Label notZero2; 5173 la(rHandle, Address(sp, offset)); 5174 bnez(rOop, notZero2); 5175 mv(rHandle, zr); 5176 bind(notZero2); 5177 } 5178 } 5179 5180 // If arg is on the stack then place it otherwise it is already in correct reg. 5181 if (dst.first()->is_stack()) { 5182 sd(rHandle, Address(sp, reg2offset_out(dst.first()))); 5183 } 5184 } 5185 5186 // A float arg may have to do float reg int reg conversion 5187 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { 5188 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5189 (src.first()->is_reg() && dst.first()->is_reg()) || 5190 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5191 if (src.first()->is_stack()) { 5192 if (dst.first()->is_stack()) { 5193 lwu(tmp, Address(fp, reg2offset_in(src.first()))); 5194 sw(tmp, Address(sp, reg2offset_out(dst.first()))); 5195 } else if (dst.first()->is_Register()) { 5196 lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5197 } else { 5198 ShouldNotReachHere(); 5199 } 5200 } else if (src.first() != dst.first()) { 5201 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5202 fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5203 } else { 5204 ShouldNotReachHere(); 5205 } 5206 } 5207 } 5208 5209 // A long move 5210 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { 5211 if (src.first()->is_stack()) { 5212 if (dst.first()->is_stack()) { 5213 // stack to stack 5214 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5215 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5216 } else { 5217 // stack to reg 5218 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5219 } 5220 } else if (dst.first()->is_stack()) { 5221 // reg to stack 5222 sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); 5223 } else { 5224 if (dst.first() != src.first()) { 5225 mv(dst.first()->as_Register(), src.first()->as_Register()); 5226 } 5227 } 5228 } 5229 5230 // A double move 5231 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { 5232 assert((src.first()->is_stack() && dst.first()->is_stack()) || 5233 (src.first()->is_reg() && dst.first()->is_reg()) || 5234 (src.first()->is_stack() && dst.first()->is_reg()), "Unexpected error"); 5235 if (src.first()->is_stack()) { 5236 if (dst.first()->is_stack()) { 5237 ld(tmp, Address(fp, reg2offset_in(src.first()))); 5238 sd(tmp, Address(sp, reg2offset_out(dst.first()))); 5239 } else if (dst.first()-> is_Register()) { 5240 ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); 5241 } else { 5242 ShouldNotReachHere(); 5243 } 5244 } else if (src.first() != dst.first()) { 5245 if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { 5246 fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 5247 } else { 5248 ShouldNotReachHere(); 5249 } 5250 } 5251 } 5252 5253 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { 5254 assert(bit_pos < 64, "invalid bit range"); 5255 if (UseZbs) { 5256 bexti(Rd, Rs, bit_pos); 5257 return; 5258 } 5259 int64_t imm = (int64_t)(1UL << bit_pos); 5260 if (is_simm12(imm)) { 5261 and_imm12(Rd, Rs, imm); 5262 } else { 5263 srli(Rd, Rs, bit_pos); 5264 and_imm12(Rd, Rd, 1); 5265 } 5266 } 5267 5268 // Implements lightweight-locking. 5269 // 5270 // - obj: the object to be locked 5271 // - tmp1, tmp2, tmp3: temporary registers, will be destroyed 5272 // - slow: branched to if locking fails 5273 void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5274 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5275 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5276 5277 Label push; 5278 const Register top = tmp1; 5279 const Register mark = tmp2; 5280 const Register t = tmp3; 5281 5282 // Preload the markWord. It is important that this is the first 5283 // instruction emitted as it is part of C1's null check semantics. 5284 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5285 5286 // Check if the lock-stack is full. 5287 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5288 mv(t, (unsigned)LockStack::end_offset()); 5289 bge(top, t, slow, /* is_far */ true); 5290 5291 // Check for recursion. 5292 add(t, xthread, top); 5293 ld(t, Address(t, -oopSize)); 5294 beq(obj, t, push); 5295 5296 // Check header for monitor (0b10). 5297 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5298 bnez(t, slow, /* is_far */ true); 5299 5300 // Try to lock. Transition lock-bits 0b01 => 0b00 5301 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); 5302 ori(mark, mark, markWord::unlocked_value); 5303 xori(t, mark, markWord::unlocked_value); 5304 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5305 /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); 5306 bne(mark, t, slow, /* is_far */ true); 5307 5308 bind(push); 5309 // After successful lock, push object on lock-stack. 5310 add(t, xthread, top); 5311 sd(obj, Address(t)); 5312 addw(top, top, oopSize); 5313 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5314 } 5315 5316 // Implements ligthweight-unlocking. 5317 // 5318 // - obj: the object to be unlocked 5319 // - tmp1, tmp2, tmp3: temporary registers 5320 // - slow: branched to if unlocking fails 5321 void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { 5322 assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); 5323 assert_different_registers(obj, tmp1, tmp2, tmp3, t0); 5324 5325 #ifdef ASSERT 5326 { 5327 // Check for lock-stack underflow. 5328 Label stack_ok; 5329 lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); 5330 mv(tmp2, (unsigned)LockStack::start_offset()); 5331 bge(tmp1, tmp2, stack_ok); 5332 STOP("Lock-stack underflow"); 5333 bind(stack_ok); 5334 } 5335 #endif 5336 5337 Label unlocked, push_and_slow; 5338 const Register top = tmp1; 5339 const Register mark = tmp2; 5340 const Register t = tmp3; 5341 5342 // Check if obj is top of lock-stack. 5343 lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5344 subw(top, top, oopSize); 5345 add(t, xthread, top); 5346 ld(t, Address(t)); 5347 bne(obj, t, slow, /* is_far */ true); 5348 5349 // Pop lock-stack. 5350 DEBUG_ONLY(add(t, xthread, top);) 5351 DEBUG_ONLY(sd(zr, Address(t));) 5352 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5353 5354 // Check if recursive. 5355 add(t, xthread, top); 5356 ld(t, Address(t, -oopSize)); 5357 beq(obj, t, unlocked); 5358 5359 // Not recursive. Check header for monitor (0b10). 5360 ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); 5361 test_bit(t, mark, exact_log2(markWord::monitor_value)); 5362 bnez(t, push_and_slow); 5363 5364 #ifdef ASSERT 5365 // Check header not unlocked (0b01). 5366 Label not_unlocked; 5367 test_bit(t, mark, exact_log2(markWord::unlocked_value)); 5368 beqz(t, not_unlocked); 5369 stop("lightweight_unlock already unlocked"); 5370 bind(not_unlocked); 5371 #endif 5372 5373 // Try to unlock. Transition lock bits 0b00 => 0b01 5374 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 5375 ori(t, mark, markWord::unlocked_value); 5376 cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, 5377 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); 5378 beq(mark, t, unlocked); 5379 5380 bind(push_and_slow); 5381 // Restore lock-stack and handle the unlock in runtime. 5382 DEBUG_ONLY(add(t, xthread, top);) 5383 DEBUG_ONLY(sd(obj, Address(t));) 5384 addw(top, top, oopSize); 5385 sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); 5386 j(slow); 5387 5388 bind(unlocked); 5389 }